summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog11
-rw-r--r--lib/rss/parser.rb13
-rw-r--r--lib/rss/rexmlparser.rb17
-rw-r--r--lib/rss/xmlparser.rb10
-rw-r--r--lib/rss/xmlscanner.rb35
-rw-r--r--test/rss/test_parser.rb13
6 files changed, 80 insertions, 19 deletions
diff --git a/ChangeLog b/ChangeLog
index 276f5c6d15d..1053f65e453 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+Wed Nov 23 22:34:15 2005 Kouhei Sutou <kou@cozmixng.org>
+
+ * lib/rss/parser.rb: added entity handling type predicate.
+ * lib/rss/rexmlparser.rb: ditto.
+ * lib/rss/xmlparser.rb: ditto.
+ * lib/rss/xmlscanner.rb: ditto.
+
+ * lib/rss/xmlscanner.rb: more robust entity handling.
+
+ * test/rss/test_parser.rb: added an entity handling test.
+
Wed Nov 23 20:59:01 2005 Hidetoshi NAGAI <nagai@ai.kyutech.ac.jp>
* ext/tk/lib/tk.rb: add Tk.pkgconfig_list and Tk.pkgconfig_get
diff --git a/lib/rss/parser.rb b/lib/rss/parser.rb
index a328146c4a7..5308211b9d8 100644
--- a/lib/rss/parser.rb
+++ b/lib/rss/parser.rb
@@ -69,7 +69,6 @@ module RSS
parser.ignore_unknown_element = ignore_unknown_element
parser.parse
end
-
end
def_delegators(:@parser, :parse, :rss,
@@ -113,8 +112,14 @@ module RSS
class BaseParser
+ class << self
+ def raise_for_undefined_entity?
+ listener.raise_for_undefined_entity?
+ end
+ end
+
def initialize(rss)
- @listener = listener.new
+ @listener = self.class.listener.new
@rss = rss
end
@@ -205,6 +210,10 @@ module RSS
def_get_text_element(uri, name, *get_file_and_line_from_caller(1))
end
+ def raise_for_undefined_entity?
+ true
+ end
+
private
def def_get_text_element(uri, name, file, line)
diff --git a/lib/rss/rexmlparser.rb b/lib/rss/rexmlparser.rb
index 73e352a7318..4dabf59199f 100644
--- a/lib/rss/rexmlparser.rb
+++ b/lib/rss/rexmlparser.rb
@@ -10,12 +10,13 @@ module RSS
class REXMLParser < BaseParser
- private
-
- def listener
- REXMLListener
+ class << self
+ def listener
+ REXMLListener
+ end
end
-
+
+ private
def _parse
begin
REXML::Document.parse_stream(@rss, @listener)
@@ -35,6 +36,12 @@ module RSS
include REXML::StreamListener
include ListenerMixin
+ class << self
+ def raise_for_undefined_entity?
+ false
+ end
+ end
+
def xmldecl(version, encoding, standalone)
super(version, encoding, standalone == "yes")
# Encoding is converted to UTF-8 when REXML parse XML.
diff --git a/lib/rss/xmlparser.rb b/lib/rss/xmlparser.rb
index 64737b821b1..3dfe7d461a5 100644
--- a/lib/rss/xmlparser.rb
+++ b/lib/rss/xmlparser.rb
@@ -59,11 +59,13 @@ module RSS
class XMLParserParser < BaseParser
- private
- def listener
- XMLParserListener
+ class << self
+ def listener
+ XMLParserListener
+ end
end
-
+
+ private
def _parse
begin
parser = REXMLLikeXMLParser.new
diff --git a/lib/rss/xmlscanner.rb b/lib/rss/xmlscanner.rb
index c5a11bad181..61b9fa6bf49 100644
--- a/lib/rss/xmlscanner.rb
+++ b/lib/rss/xmlscanner.rb
@@ -1,19 +1,29 @@
require 'xmlscan/scanner'
+require 'stringio'
module RSS
class XMLScanParser < BaseParser
- private
- def listener
- XMLScanListener
+ class << self
+ def listener
+ XMLScanListener
+ end
end
-
+
+ private
def _parse
begin
- XMLScan::XMLScanner.new(@listener).parse(@rss)
+ if @rss.is_a?(String)
+ input = StringIO.new(@rss)
+ else
+ input = @rss
+ end
+ scanner = XMLScan::XMLScanner.new(@listener)
+ scanner.parse(input)
rescue XMLScan::Error => e
- raise NotWellFormedError.new(e.lineno){e.message}
+ lineno = e.lineno || scanner.lineno || input.lineno
+ raise NotWellFormedError.new(lineno){e.message}
end
end
@@ -57,7 +67,7 @@ module RSS
end
def on_entityref(ref)
- text(ENTITIES[ref])
+ text(entity(ref))
end
def on_charref(code)
@@ -79,7 +89,7 @@ module RSS
end
def on_attr_entityref(ref)
- @current_attr << ENTITIES[ref]
+ @current_attr << entity(ref)
end
def on_attr_charref(code)
@@ -97,6 +107,15 @@ module RSS
tag_end(name)
end
+ private
+ def entity(ref)
+ ent = ENTITIES[ref]
+ if ent
+ ent
+ else
+ wellformed_error("undefined entity: #{ref}")
+ end
+ end
end
end
diff --git a/test/rss/test_parser.rb b/test/rss/test_parser.rb
index adf0864d199..d6de0699a09 100644
--- a/test/rss/test_parser.rb
+++ b/test/rss/test_parser.rb
@@ -122,7 +122,20 @@ EOR
end
assert_parse(rss, :nothing_raised)
end
+ end
+ def test_undefined_entity
+ return unless RSS::Parser.default_parser.raise_for_undefined_entity?
+ assert_parse(make_RDF(<<-EOR), :raises, RSS::NotWellFormedError)
+#{make_channel}
+#{make_image}
+<item rdf:about="#{RDF_ABOUT}">
+ <title>#{TITLE_VALUE} &UNKNOWN_ENTITY;</title>
+ <link>#{LINK_VALUE}</link>
+ <description>#{DESCRIPTION_VALUE}</description>
+</item>
+#{make_textinput}
+EOR
end
def test_channel