From 7d21c237ccd46ec1d56639ce53b5882bf97d9de3 Mon Sep 17 00:00:00 2001 From: ser Date: Fri, 10 Oct 2003 12:54:46 +0000 Subject: * Changes to the encoding mechanism. If iconv is found, it is used first for encoding changes. This should be the case on all 1.8 installations. When it isn't found (<1.6), the native REXML encoding mechanism is used. This cleaned out some files, and tightened up the code a bit; and iconv should be faster than the pure Ruby code. * Changed deprecated assert_not_nil to assert throughout the tests. * Parse exceptions are a little more verbose, and extend RuntimeError. * Bug fixes to XPathParser * The Light API is still shifting, like the sands of the desert. * Fixed a new Ruby 1.8.0 warning, added some speed optimizations, and tightened error reporting in the base parser git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@4737 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/rexml/parsers/baseparser.rb | 53 +++++++++++++++++++++++----------------- lib/rexml/parsers/lightparser.rb | 14 +++++------ 2 files changed, 37 insertions(+), 30 deletions(-) (limited to 'lib/rexml/parsers') diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index d6e04c7817..27c9642a68 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -89,10 +89,10 @@ module REXML EREFERENCE = /&(?!#{NAME};)/ DEFAULT_ENTITIES = { - 'gt' => [/>/, '>', '>'], - 'lt' => [/</, '<', '<'], - 'quot' => [/"/, '"', '"'], - "apos" => [/'/, "'", "'"] + 'gt' => [/>/, '>', '>', />/], + 'lt' => [/</, '<', '<', / [/"/, '"', '"', /"/], + "apos" => [/'/, "'", "'", /'/] } def initialize( source ) @@ -126,6 +126,7 @@ module REXML # Returns true if there are more events. Synonymous with !empty? def has_next? + return true if @closed @source.read if @source.buffer.size==0 and !@source.empty? (!@source.empty? and @source.buffer.strip.size>0) or @stack.size>0 or @closed end @@ -143,7 +144,7 @@ module REXML # event, so you can effectively pre-parse the entire document (pull the # entire thing into memory) using this method. def peek depth=0 - raise 'Illegal argument "#{depth}"' if depth < -1 + raise %Q[Illegal argument "#{depth}"] if depth < -1 temp = [] if depth == -1 temp.push(pull()) until empty? @@ -166,8 +167,9 @@ module REXML return @stack.shift if @stack.size > 0 @source.read if @source.buffer.size==0 if @document_status == nil - @source.match( /^\s*/um, true ) - word = @source.match( /^\s*(<.*?)>/um ) + @source.consume( /^\s*/um ) + word = @source.match( /(<.*?)>/um ) + #word = @source.match_to( '>', /(<.*?)>/um ) word = word[1] unless word.nil? case word when COMMENT_START @@ -190,7 +192,7 @@ module REXML close = md[2] identity =~ IDENTITY name = $1 - raise "DOCTYPE is missing a name" if name.nil? + raise REXML::ParseException("DOCTYPE is missing a name") if name.nil? pub_sys = $2.nil? ? nil : $2.strip long_name = $3.nil? ? nil : $3.strip uri = $4.nil? ? nil : $4.strip @@ -274,10 +276,11 @@ module REXML return [ :end_doctype ] end end - begin + begin if @source.buffer[0] == ?< if @source.buffer[1] == ?/ last_tag = @tags.pop + #md = @source.match_to_consume( '>', CLOSE_MATCH) md = @source.match( CLOSE_MATCH, true ) raise REXML::ParseException.new( "Missing end tag for '#{last_tag}' "+ "(got \"#{md[1]}\")", @source) unless last_tag == md[1] @@ -286,18 +289,20 @@ module REXML md = @source.match(/\A(\s*[^>]*>)/um) #puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" raise REXML::ParseException.new("Malformed node", @source) unless md - case md[1] - when CDATA_START - return [ :cdata, @source.match( CDATA_PATTERN, true )[1] ] - when COMMENT_START - return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ] + if md[0][2] == ?- + md = @source.match( COMMENT_PATTERN, true ) + return [ :comment, md[1] ] if md else - raise REXML::ParseException.new( "Declarations can only occur "+ - "in the doctype declaration.", @source) + md = @source.match( CDATA_PATTERN, true ) + return [ :cdata, md[1] ] if md end + raise REXML::ParseException.new( "Declarations can only occur "+ + "in the doctype declaration.", @source) elsif @source.buffer[1] == ?? md = @source.match( INSTRUCTION_PATTERN, true ) - return [ :processing_instruction, md[1], md[2] ] + return [ :processing_instruction, md[1], md[2] ] if md + raise REXML::ParseException.new( "Bad instruction declaration", + @source) else # Get the next tag md = @source.match(TAG_MATCH, true) @@ -318,17 +323,19 @@ module REXML return [ :start_element, md[1], attributes ] end else - md = @source.match(TEXT_PATTERN, true) - raise "no text to add" if md[0].length == 0 + md = @source.match( TEXT_PATTERN, true ) + #md = @source.match_to_consume( '<', TEXT_PATTERN ) + #@source.read + raise REXML::ParseException("no text to add") if md[0].length == 0 # unnormalized = Text::unnormalize( md[1], self ) # return PullEvent.new( :text, md[1], unnormalized ) return [ :text, md[1] ] end - rescue REXML::ParseException - raise $! + rescue REXML::ParseException + raise rescue Exception, NameError => error raise REXML::ParseException.new( "Exception parsing", - @source, self, error ) + @source, self, (error ? error : $!) ) end return [ :dummy ] end @@ -354,7 +361,7 @@ module REXML end if entities copy.gsub!( EREFERENCE, '&' ) DEFAULT_ENTITIES.each do |key, value| - copy.gsub!( value[2], value[1] ) + copy.gsub!( value[3], value[1] ) end copy end diff --git a/lib/rexml/parsers/lightparser.rb b/lib/rexml/parsers/lightparser.rb index e2f083bc8e..8c555f7960 100644 --- a/lib/rexml/parsers/lightparser.rb +++ b/lib/rexml/parsers/lightparser.rb @@ -16,25 +16,25 @@ module REXML end def parse - root = context = REXML::Light::Node.new([ :document ]) + root = context = [ :document ] while true event = @parser.pull case event[0] when :end_document break when :end_doctype - context = context.parent + context = context[1] when :start_element, :start_doctype - new_node = REXML::Light::Node.new(event) + new_node = event context << new_node - new_node.parent = context + new_node[1,0] = [context] context = new_node when :end_element, :end_doctype - context = context.parent + context = context[1] else - new_node = REXML::Light::Node.new(event) + new_node = event context << new_node - new_node.parent = context + new_node[1,0] = [context] end end root -- cgit v1.2.3