From 88dd1e4c993e661b0d52c40d0eff567b00bf6ffd Mon Sep 17 00:00:00 2001 From: kou Date: Sat, 17 Jun 2006 00:51:31 +0000 Subject: * lib/rss, test/rss: backported from trunk. (2005-11-16 - now) * lib/rss/parser.rb: added entity handling type predicate. * lib/rss/rexmlparser.rb: ditto. * lib/rss/xmlparser.rb: ditto. * lib/rss/xmlscanner.rb: ditto. * lib/rss/xmlscanner.rb: more robust entity handling. * test/rss/test_parser.rb: added an entity handling test. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@10295 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/rss/parser.rb | 13 +++++++++++-- lib/rss/rexmlparser.rb | 17 ++++++++++++----- lib/rss/xmlparser.rb | 10 ++++++---- lib/rss/xmlscanner.rb | 35 +++++++++++++++++++++++++++-------- 4 files changed, 56 insertions(+), 19 deletions(-) (limited to 'lib/rss') diff --git a/lib/rss/parser.rb b/lib/rss/parser.rb index 4b7000d81e..73cce6b7a0 100644 --- a/lib/rss/parser.rb +++ b/lib/rss/parser.rb @@ -69,7 +69,6 @@ module RSS parser.ignore_unknown_element = ignore_unknown_element parser.parse end - end def_delegators(:@parser, :parse, :rss, @@ -113,8 +112,14 @@ module RSS class BaseParser + class << self + def raise_for_undefined_entity? + listener.raise_for_undefined_entity? + end + end + def initialize(rss) - @listener = listener.new + @listener = self.class.listener.new @rss = rss end @@ -205,6 +210,10 @@ module RSS def_get_text_element(uri, name, *get_file_and_line_from_caller(1)) end + def raise_for_undefined_entity? + true + end + private def def_get_text_element(uri, name, file, line) diff --git a/lib/rss/rexmlparser.rb b/lib/rss/rexmlparser.rb index 73e352a731..4dabf59199 100644 --- a/lib/rss/rexmlparser.rb +++ b/lib/rss/rexmlparser.rb @@ -10,12 +10,13 @@ module RSS class REXMLParser < BaseParser - private - - def listener - REXMLListener + class << self + def listener + REXMLListener + end end - + + private def _parse begin REXML::Document.parse_stream(@rss, @listener) @@ -35,6 +36,12 @@ module RSS include REXML::StreamListener include ListenerMixin + class << self + def raise_for_undefined_entity? + false + end + end + def xmldecl(version, encoding, standalone) super(version, encoding, standalone == "yes") # Encoding is converted to UTF-8 when REXML parse XML. diff --git a/lib/rss/xmlparser.rb b/lib/rss/xmlparser.rb index 64737b821b..3dfe7d461a 100644 --- a/lib/rss/xmlparser.rb +++ b/lib/rss/xmlparser.rb @@ -59,11 +59,13 @@ module RSS class XMLParserParser < BaseParser - private - def listener - XMLParserListener + class << self + def listener + XMLParserListener + end end - + + private def _parse begin parser = REXMLLikeXMLParser.new diff --git a/lib/rss/xmlscanner.rb b/lib/rss/xmlscanner.rb index c5a11bad18..61b9fa6bf4 100644 --- a/lib/rss/xmlscanner.rb +++ b/lib/rss/xmlscanner.rb @@ -1,19 +1,29 @@ require 'xmlscan/scanner' +require 'stringio' module RSS class XMLScanParser < BaseParser - private - def listener - XMLScanListener + class << self + def listener + XMLScanListener + end end - + + private def _parse begin - XMLScan::XMLScanner.new(@listener).parse(@rss) + if @rss.is_a?(String) + input = StringIO.new(@rss) + else + input = @rss + end + scanner = XMLScan::XMLScanner.new(@listener) + scanner.parse(input) rescue XMLScan::Error => e - raise NotWellFormedError.new(e.lineno){e.message} + lineno = e.lineno || scanner.lineno || input.lineno + raise NotWellFormedError.new(lineno){e.message} end end @@ -57,7 +67,7 @@ module RSS end def on_entityref(ref) - text(ENTITIES[ref]) + text(entity(ref)) end def on_charref(code) @@ -79,7 +89,7 @@ module RSS end def on_attr_entityref(ref) - @current_attr << ENTITIES[ref] + @current_attr << entity(ref) end def on_attr_charref(code) @@ -97,6 +107,15 @@ module RSS tag_end(name) end + private + def entity(ref) + ent = ENTITIES[ref] + if ent + ent + else + wellformed_error("undefined entity: #{ref}") + end + end end end -- cgit v1.2.3