summaryrefslogtreecommitdiff
path: root/lib/rss
diff options
context:
space:
mode:
authorkou <kou@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2006-06-17 00:51:31 +0000
committerkou <kou@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2006-06-17 00:51:31 +0000
commit88dd1e4c993e661b0d52c40d0eff567b00bf6ffd (patch)
treee8298605f2f0e29f2ec2befeda793b46194e4ddd /lib/rss
parente85f8c782958c0a3e6ea296401a08d5fe9f8e550 (diff)
* lib/rss, test/rss: backported from trunk. (2005-11-16 - now)
* lib/rss/parser.rb: added entity handling type predicate. * lib/rss/rexmlparser.rb: ditto. * lib/rss/xmlparser.rb: ditto. * lib/rss/xmlscanner.rb: ditto. * lib/rss/xmlscanner.rb: more robust entity handling. * test/rss/test_parser.rb: added an entity handling test. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@10295 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rss')
-rw-r--r--lib/rss/parser.rb13
-rw-r--r--lib/rss/rexmlparser.rb17
-rw-r--r--lib/rss/xmlparser.rb10
-rw-r--r--lib/rss/xmlscanner.rb35
4 files changed, 56 insertions, 19 deletions
diff --git a/lib/rss/parser.rb b/lib/rss/parser.rb
index 4b7000d81e..73cce6b7a0 100644
--- a/lib/rss/parser.rb
+++ b/lib/rss/parser.rb
@@ -69,7 +69,6 @@ module RSS
parser.ignore_unknown_element = ignore_unknown_element
parser.parse
end
-
end
def_delegators(:@parser, :parse, :rss,
@@ -113,8 +112,14 @@ module RSS
class BaseParser
+ class << self
+ def raise_for_undefined_entity?
+ listener.raise_for_undefined_entity?
+ end
+ end
+
def initialize(rss)
- @listener = listener.new
+ @listener = self.class.listener.new
@rss = rss
end
@@ -205,6 +210,10 @@ module RSS
def_get_text_element(uri, name, *get_file_and_line_from_caller(1))
end
+ def raise_for_undefined_entity?
+ true
+ end
+
private
def def_get_text_element(uri, name, file, line)
diff --git a/lib/rss/rexmlparser.rb b/lib/rss/rexmlparser.rb
index 73e352a731..4dabf59199 100644
--- a/lib/rss/rexmlparser.rb
+++ b/lib/rss/rexmlparser.rb
@@ -10,12 +10,13 @@ module RSS
class REXMLParser < BaseParser
- private
-
- def listener
- REXMLListener
+ class << self
+ def listener
+ REXMLListener
+ end
end
-
+
+ private
def _parse
begin
REXML::Document.parse_stream(@rss, @listener)
@@ -35,6 +36,12 @@ module RSS
include REXML::StreamListener
include ListenerMixin
+ class << self
+ def raise_for_undefined_entity?
+ false
+ end
+ end
+
def xmldecl(version, encoding, standalone)
super(version, encoding, standalone == "yes")
# Encoding is converted to UTF-8 when REXML parse XML.
diff --git a/lib/rss/xmlparser.rb b/lib/rss/xmlparser.rb
index 64737b821b..3dfe7d461a 100644
--- a/lib/rss/xmlparser.rb
+++ b/lib/rss/xmlparser.rb
@@ -59,11 +59,13 @@ module RSS
class XMLParserParser < BaseParser
- private
- def listener
- XMLParserListener
+ class << self
+ def listener
+ XMLParserListener
+ end
end
-
+
+ private
def _parse
begin
parser = REXMLLikeXMLParser.new
diff --git a/lib/rss/xmlscanner.rb b/lib/rss/xmlscanner.rb
index c5a11bad18..61b9fa6bf4 100644
--- a/lib/rss/xmlscanner.rb
+++ b/lib/rss/xmlscanner.rb
@@ -1,19 +1,29 @@
require 'xmlscan/scanner'
+require 'stringio'
module RSS
class XMLScanParser < BaseParser
- private
- def listener
- XMLScanListener
+ class << self
+ def listener
+ XMLScanListener
+ end
end
-
+
+ private
def _parse
begin
- XMLScan::XMLScanner.new(@listener).parse(@rss)
+ if @rss.is_a?(String)
+ input = StringIO.new(@rss)
+ else
+ input = @rss
+ end
+ scanner = XMLScan::XMLScanner.new(@listener)
+ scanner.parse(input)
rescue XMLScan::Error => e
- raise NotWellFormedError.new(e.lineno){e.message}
+ lineno = e.lineno || scanner.lineno || input.lineno
+ raise NotWellFormedError.new(lineno){e.message}
end
end
@@ -57,7 +67,7 @@ module RSS
end
def on_entityref(ref)
- text(ENTITIES[ref])
+ text(entity(ref))
end
def on_charref(code)
@@ -79,7 +89,7 @@ module RSS
end
def on_attr_entityref(ref)
- @current_attr << ENTITIES[ref]
+ @current_attr << entity(ref)
end
def on_attr_charref(code)
@@ -97,6 +107,15 @@ module RSS
tag_end(name)
end
+ private
+ def entity(ref)
+ ent = ENTITIES[ref]
+ if ent
+ ent
+ else
+ wellformed_error("undefined entity: #{ref}")
+ end
+ end
end
end