diff options
author | ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2006-04-14 02:56:44 +0000 |
---|---|---|
committer | ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2006-04-14 02:56:44 +0000 |
commit | 5f4bf329291f885d23f4d6277b4a22862a291687 (patch) | |
tree | 0ce7091ae84f46a62452f6671c28ad8aac834d68 /lib/rexml/parsers | |
parent | bec759abcc335aabde7c0dcd8c85c18223446644 (diff) |
Short summary:
This is a version bump to REXML 3.1.4. It includes numerous bug fixes and is
a pretty big patch, but is nonetheless a minor revision bump, since the API
hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Minor, yet incomplete, documentation changes. Again, these are in this patch
because of Subversion's glaring lack of block-level commits.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@10090 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rexml/parsers')
-rw-r--r-- | lib/rexml/parsers/baseparser.rb | 46 | ||||
-rw-r--r-- | lib/rexml/parsers/streamparser.rb | 74 | ||||
-rw-r--r-- | lib/rexml/parsers/treeparser.rb | 10 |
3 files changed, 66 insertions, 64 deletions
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index c898ba0b60..bce4ba4c20 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -42,7 +42,7 @@ module REXML CDATA_END = /^\s*\]\s*>/um CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um XMLDECL_START = /\A<\?xml\s/u; - XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um + XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um INSTRUCTION_START = /\A<\?/u INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um @@ -68,8 +68,8 @@ module REXML ATTLISTDECL_START = /^\s*<!ATTLIST/um ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um NOTATIONDECL_START = /^\s*<!NOTATION/um - PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um - SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um + PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um + SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um TEXT_PATTERN = /\A([^<]*)/um @@ -120,20 +120,7 @@ module REXML attr_reader :source def stream=( source ) - if source.kind_of? String - @source = Source.new(source) - elsif source.kind_of? IO - @source = IOSource.new(source) - elsif source.kind_of? Source - @source = source - elsif defined? StringIO and source.kind_of? StringIO - @source = IOSource.new(source) - elsif defined? Tempfile and source.kind_of? Tempfile - @source = IOSource.new(source) - else - raise "#{source.class} is not a valid input stream. It must be \n"+ - "either a String, IO, StringIO or Source." - end + @source = SourceFactory.create_from( source ) @closed = nil @document_status = nil @tags = [] @@ -152,8 +139,8 @@ module REXML # Returns true if there are no more events def empty? - #puts "@source.empty? = #{@source.empty?}" - #puts "@stack.empty? = #{@stack.empty?}" + #STDERR.puts "@source.empty? = #{@source.empty?}" + #STDERR.puts "@stack.empty? = #{@stack.empty?}" return (@source.empty? and @stack.empty?) end @@ -197,14 +184,17 @@ module REXML return [ :end_document ] if empty? return @stack.shift if @stack.size > 0 @source.read if @source.buffer.size<2 + #STDERR.puts "BUFFER = #{@source.buffer.inspect}" if @document_status == nil - @source.consume( /^\s*/um ) - word = @source.match( /(<[^>]*)>/um ) + #@source.consume( /^\s*/um ) + word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um ) word = word[1] unless word.nil? + #STDERR.puts "WORD = #{word.inspect}" case word when COMMENT_START return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ] when XMLDECL_START + #STDERR.puts "XMLDECL" results = @source.match( XMLDECL_PATTERN, true )[1] version = VERSION.match( results ) version = version[1] unless version.nil? @@ -213,7 +203,7 @@ module REXML @source.encoding = encoding standalone = STANDALONE.match(results) standalone = standalone[1] unless standalone.nil? - return [ :xmldecl, version, encoding, standalone] + return [ :xmldecl, version, encoding, standalone ] when INSTRUCTION_START return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ] when DOCTYPE_START @@ -236,6 +226,7 @@ module REXML @document_status = :in_doctype end return args + when /^\s+/ else @document_status = :after_doctype @source.read if @source.buffer.size<2 @@ -299,12 +290,14 @@ module REXML md = nil if @source.match( PUBLIC ) md = @source.match( PUBLIC, true ) + vals = [md[1],md[2],md[4],md[6]] elsif @source.match( SYSTEM ) md = @source.match( SYSTEM, true ) + vals = [md[1],md[2],nil,md[4]] else raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source ) end - return [ :notationdecl, md[1], md[2], md[3] ] + return [ :notationdecl, *vals ] when CDATA_END @document_status = :after_doctype @source.match( CDATA_END, true ) @@ -323,7 +316,7 @@ module REXML return [ :end_element, last_tag ] elsif @source.buffer[1] == ?! md = @source.match(/\A(\s*[^>]*>)/um) - #puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" + #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" raise REXML::ParseException.new("Malformed node", @source) unless md if md[0][2] == ?- md = @source.match( COMMENT_PATTERN, true ) @@ -361,10 +354,11 @@ module REXML else md = @source.match( TEXT_PATTERN, true ) if md[0].length == 0 - #puts "EMPTY = #{empty?}" - #puts "BUFFER = \"#{@source.buffer}\"" + puts "EMPTY = #{empty?}" + puts "BUFFER = \"#{@source.buffer}\"" @source.match( /(\s+)/, true ) end + #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0 #return [ :text, "" ] if md[0].length == 0 # unnormalized = Text::unnormalize( md[1], self ) # return PullEvent.new( :text, md[1], unnormalized ) diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb index 996d613e15..256d0f611c 100644 --- a/lib/rexml/parsers/streamparser.rb +++ b/lib/rexml/parsers/streamparser.rb @@ -1,42 +1,46 @@ module REXML - module Parsers - class StreamParser - def initialize source, listener - @listener = listener - @parser = BaseParser.new( source ) - end - + module Parsers + class StreamParser + def initialize source, listener + @listener = listener + @parser = BaseParser.new( source ) + end + def add_listener( listener ) @parser.add_listener( listener ) end - - def parse - # entity string - while true - event = @parser.pull - case event[0] - when :end_document - return - when :start_element - attrs = event[2].each do |n, v| - event[2][n] = @parser.unnormalize( v ) - end - @listener.tag_start( event[1], attrs ) - when :end_element - @listener.tag_end( event[1] ) - when :text - normalized = @parser.unnormalize( event[1] ) - @listener.text( normalized ) - when :processing_instruction - @listener.instruction( *event[1,2] ) + + def parse + # entity string + while true + event = @parser.pull + case event[0] + when :end_document + return + when :start_element + attrs = event[2].each do |n, v| + event[2][n] = @parser.unnormalize( v ) + end + @listener.tag_start( event[1], attrs ) + when :end_element + @listener.tag_end( event[1] ) + when :text + normalized = @parser.unnormalize( event[1] ) + @listener.text( normalized ) + when :processing_instruction + @listener.instruction( *event[1,2] ) when :start_doctype @listener.doctype( *event[1..-1] ) - when :comment, :attlistdecl, :notationdecl, :elementdecl, - :entitydecl, :cdata, :xmldecl, :attlistdecl - @listener.send( event[0].to_s, *event[1..-1] ) - end - end - end - end - end + when :end_doctype + # FIXME: remove this condition for milestone:3.2 + @listener.doctype_end if @listener.respond_to? :doctype_end + when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl + @listener.send( event[0].to_s, *event[1..-1] ) + when :entitydecl, :notationdecl + @listener.send( event[0].to_s, event[1..-1] ) + end + end + end + end + end end diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb index 57d11f7e23..500a53f426 100644 --- a/lib/rexml/parsers/treeparser.rb +++ b/lib/rexml/parsers/treeparser.rb @@ -19,8 +19,12 @@ module REXML begin while true event = @parser.pull + #STDERR.puts "TREEPARSER GOT #{event.inspect}" case event[0] when :end_document + unless tag_stack.empty? + raise ParseException.new("No close tag for #{tag_stack.inspect}") + end return when :start_element tag_stack.push(event[1]) @@ -35,10 +39,10 @@ module REXML @build_context[-1] << event[1] else @build_context.add( - Text.new( event[1], @build_context.whitespace, nil, true ) + Text.new(event[1], @build_context.whitespace, nil, true) ) unless ( - event[1].strip.size==0 and - @build_context.ignore_whitespace_nodes + @build_context.ignore_whitespace_nodes and + event[1].strip.size==0 ) end end |