From a8379e7f1855c999b931a76b19f9c0922a9f13a4 Mon Sep 17 00:00:00 2001 From: ser Date: Sun, 4 Jul 2004 15:39:05 +0000 Subject: * Added the treeparser, and added the file to the MANIFEST. r1002 | ser | 2004-06-07 07:45:53 -0400 (Mon, 07 Jun 2004) | 2 lines * Workin' in the coal mine, goin' down, down, down... r1003 | ser | 2004-06-08 22:24:08 -0400 (Tue, 08 Jun 2004) | 7 lines * Entirely rewrote the validation code; the finite state machine, while cool, didn't survive the encounter with Interleave. It was getting sort of hacky, too. The new mechanism is less elegant, but is basically still a FSM, and is more flexible without having to add hacks to extend it. Large chunks of the FSM may be reusable in other validation mechanisms. * Added interleave support r1004 | ser | 2004-06-09 07:24:17 -0400 (Wed, 09 Jun 2004) | 2 lines * Added suppert for mixed r1005 | ser | 2004-06-09 08:01:33 -0400 (Wed, 09 Jun 2004) | 3 lines * Added Kou's patch to normalize attribute values passed through the SAX2 and Stream parsers. r1006 | ser | 2004-06-09 08:12:35 -0400 (Wed, 09 Jun 2004) | 2 lines * Applied Kou's preceding-sibling patch, which fixes the order of the axe results r1009 | ser | 2004-06-20 11:02:55 -0400 (Sun, 20 Jun 2004) | 8 lines * Redesigned and rewrote the RelaxNG code. It isn't elegant, but it works. Particular problems encountered were interleave and ref. Interleave means I can't use a clean FSM design, and ref means the dirty FSM design has to be modified during validation. There's a lot of code that could be cleaned up in here. However, I'm pretty sure that this design is reasonably fast and space efficient. I'm not entirely convinced that it is correct; more tests are required. * This version adds support for defines and refs. r1011 | ser | 2004-06-20 11:20:07 -0400 (Sun, 20 Jun 2004) | 3 lines * Removed debugging output from unit test * Moved ">" in Element.inspect r1014 | ser | 2004-06-20 11:40:30 -0400 (Sun, 20 Jun 2004) | 2 lines * Minor big in missing includes for validation rules r1023 | ser | 2004-07-03 08:57:34 -0400 (Sat, 03 Jul 2004) | 2 lines * Fixed bug #34, typo in xpath_parser. r1024 | ser | 2004-07-03 10:22:08 -0400 (Sat, 03 Jul 2004) | 9 lines * Previous fix, (include? -> includes?) was incorrect. * Added another test for encoding * Started AnyName support in RelaxNG * Added Element#Attributes#to_a, so that it does something intelligent. This was needed by XPath, for '@*' * Fixed XPath so that @* works. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@6579 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/rexml/attribute.rb | 6 ++ lib/rexml/document.rb | 73 +-------------- lib/rexml/element.rb | 33 ++++++- lib/rexml/parsers/baseparser.rb | 44 ++++++++- lib/rexml/parsers/lightparser.rb | 4 + lib/rexml/parsers/pullparser.rb | 6 ++ lib/rexml/parsers/sax2parser.rb | 5 ++ lib/rexml/parsers/streamparser.rb | 9 +- lib/rexml/parsers/ultralightparser.rb | 4 + lib/rexml/parsers/xpathparser.rb | 164 ++++++++++++++++++++++++++-------- lib/rexml/rexml.rb | 8 +- lib/rexml/text.rb | 8 ++ lib/rexml/xpath_parser.rb | 28 +++--- 13 files changed, 261 insertions(+), 131 deletions(-) (limited to 'lib/rexml') diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb index 9eb3c211ea..a5a58055b8 100644 --- a/lib/rexml/attribute.rb +++ b/lib/rexml/attribute.rb @@ -152,6 +152,12 @@ module REXML write( rv ) rv end + + def xpath + path = @element.xpath + path += "/@#{self.expanded_name}" + return path + end end end #vim:ts=2 sw=2 noexpandtab: diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb index 1378bb212c..084ebd2a39 100644 --- a/lib/rexml/document.rb +++ b/lib/rexml/document.rb @@ -9,6 +9,7 @@ require "rexml/parseexception" require "rexml/output" require "rexml/parsers/baseparser" require "rexml/parsers/streamparser" +require "rexml/parsers/treeparser" module REXML # Represents a full XML document, including PIs, a doctype, etc. A @@ -171,77 +172,7 @@ module REXML private def build( source ) - build_context = self - parser = Parsers::BaseParser.new( source ) - tag_stack = [] - in_doctype = false - entities = nil - begin - while true - event = parser.pull - case event[0] - when :end_document - return - when :start_element - tag_stack.push(event[1]) - # find the observers for namespaces - build_context = build_context.add_element( event[1], event[2] ) - when :end_element - tag_stack.pop - build_context = build_context.parent - when :text - if not in_doctype - if build_context[-1].instance_of? Text - build_context[-1] << event[1] - else - build_context.add( - Text.new( event[1], build_context.whitespace, nil, true ) - ) unless ( - event[1].strip.size==0 and - build_context.ignore_whitespace_nodes - ) - end - end - when :comment - c = Comment.new( event[1] ) - build_context.add( c ) - when :cdata - c = CData.new( event[1] ) - build_context.add( c ) - when :processing_instruction - build_context.add( Instruction.new( event[1], event[2] ) ) - when :end_doctype - in_doctype = false - entities.each { |k,v| entities[k] = build_context.entities[k].value } - build_context = build_context.parent - when :start_doctype - doctype = DocType.new( event[1..-1], build_context ) - build_context = doctype - entities = {} - in_doctype = true - when :attlistdecl - n = AttlistDecl.new( event[1..-1] ) - build_context.add( n ) - when :externalentity - n = ExternalEntity.new( event[1] ) - build_context.add( n ) - when :elementdecl - n = ElementDecl.new( event[1] ) - build_context.add(n) - when :entitydecl - entities[ event[1] ] = event[2] unless event[2] =~ /PUBLIC|SYSTEM/ - build_context.add(Entity.new(event)) - when :notationdecl - n = NotationDecl.new( *event[1..-1] ) - build_context.add( n ) - when :xmldecl - x = XMLDecl.new( event[1], event[2], event[3] ) - build_context.add( x ) - end - end - rescue - raise ParseException.new( $!.message, parser.source, parser, $! ) - end + Parsers::TreeParser.new( source, self ).parse end end end diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index ffc81bed91..25e530de41 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -73,10 +73,10 @@ module REXML @attributes.each_attribute do |attr| rv << " " attr.write( rv, 0 ) - end unless @attributes.empty? + end if children.size > 0 - rv << " ... " + rv << "> ... " else rv << "/>" end @@ -517,6 +517,17 @@ module REXML :element end + def xpath + path_elements = [] + cur = self + path_elements << __to_xpath_helper( self ) + while cur.parent + cur = cur.parent + path_elements << __to_xpath_helper( cur ) + end + return path_elements.reverse.join( "/" ) + end + ################################################# # Attributes # ################################################# @@ -677,6 +688,20 @@ module REXML private + def __to_xpath_helper node + rv = node.expanded_name + if node.parent + results = node.parent.find_all {|n| + n.kind_of?(REXML::Element) and n.expanded_name == node.expanded_name + } + if results.length > 1 + idx = results.index( node ) + rv << "[#{idx+1}]" + end + end + rv + end + # A private helper method def each_with_something( test, max=0, name=nil ) num = 0 @@ -950,6 +975,10 @@ module REXML return nil end + def to_a + values.flatten + end + # Returns the number of attributes the owning Element contains. # doc = Document "" # doc.root.attributes.length #-> 3 diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index fbb1ec06a8..e95cba8904 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -100,6 +100,23 @@ module REXML self.stream = source end + def add_listener( listener ) + if !defined?(@listeners) or !@listeners + @listeners = [] + instance_eval <<-EOL + alias :_old_pull :pull + def pull + event = _old_pull + @listeners.each do |listener| + listener.receive event + end + event + end + EOL + end + @listeners << listener + end + attr_reader :source def stream=( source ) @@ -162,11 +179,11 @@ module REXML # Returns the next event. This is a +PullEvent+ object. def pull - return [ :end_document ] if empty? if @closed x, @closed = @closed, nil return [ :end_element, x ] end + return [ :end_document ] if empty? return @stack.shift if @stack.size > 0 @source.read if @source.buffer.size<2 if @document_status == nil @@ -289,8 +306,9 @@ module REXML last_tag = @tags.pop #md = @source.match_to_consume( '>', CLOSE_MATCH) md = @source.match( CLOSE_MATCH, true ) - raise REXML::ParseException.new( "Missing end tag for '#{last_tag}' "+ - "(got \"#{md[1]}\")", @source) unless last_tag == md[1] + raise REXML::ParseException.new( "Missing end tag for "+ + "'#{last_tag}' (got \"#{md[1]}\")", + @source) unless last_tag == md[1] return [ :end_element, last_tag ] elsif @source.buffer[1] == ?! md = @source.match(/\A(\s*[^>]*>)/um) @@ -411,3 +429,23 @@ module REXML end end end + +=begin + case event[0] + when :start_element + when :text + when :end_element + when :processing_instruction + when :cdata + when :comment + when :xmldecl + when :start_doctype + when :end_doctype + when :externalentity + when :elementdecl + when :entity + when :attlistdecl + when :notationdecl + when :end_doctype + end +=end diff --git a/lib/rexml/parsers/lightparser.rb b/lib/rexml/parsers/lightparser.rb index 8c555f7960..0f35034993 100644 --- a/lib/rexml/parsers/lightparser.rb +++ b/lib/rexml/parsers/lightparser.rb @@ -10,6 +10,10 @@ module REXML @parser = REXML::Parsers::BaseParser.new( stream ) end + def add_listener( listener ) + @parser.add_listener( listener ) + end + def rewind @stream.rewind @parser.stream = @stream diff --git a/lib/rexml/parsers/pullparser.rb b/lib/rexml/parsers/pullparser.rb index aeda6251fe..fe4d41c959 100644 --- a/lib/rexml/parsers/pullparser.rb +++ b/lib/rexml/parsers/pullparser.rb @@ -29,8 +29,14 @@ module REXML def initialize stream super @entities = {} + @listeners = nil end + def add_listener( listener ) + @listeners = [] unless @listeners + @listeners << listener + end + def each while has_next? yield self.pull diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb index 8c82cf8fc1..96440d17bf 100644 --- a/lib/rexml/parsers/sax2parser.rb +++ b/lib/rexml/parsers/sax2parser.rb @@ -14,6 +14,10 @@ module REXML @tag_stack = [] end + def add_listener( listener ) + @parser.add_listener( listener ) + end + # Listen arguments: # # Symbol, Array, Block @@ -89,6 +93,7 @@ module REXML if procs or listeners # break out the namespace declarations # The attributes live in event[2] + event[2].each {|n, v| event[2][n] = @parser.normalize(v)} nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ } nsdecl.collect! { |n, value| [ n[6..-1], value ] } @namespace_stack.push({}) diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb index 49bef0d8fa..357cc186e6 100644 --- a/lib/rexml/parsers/streamparser.rb +++ b/lib/rexml/parsers/streamparser.rb @@ -6,6 +6,10 @@ module REXML @parser = BaseParser.new( source ) end + def add_listener( listener ) + @parser.add_listener( listener ) + end + def parse # entity string while true @@ -14,7 +18,10 @@ module REXML when :end_document return when :start_element - @listener.tag_start( event[1], event[2] ) + attrs = event[2].each do |n, v| + event[2][n] = @parser.unnormalize( v ) + end + @listener.tag_start( event[1], attrs ) when :end_element @listener.tag_end( event[1] ) when :text diff --git a/lib/rexml/parsers/ultralightparser.rb b/lib/rexml/parsers/ultralightparser.rb index f3b208bf0f..adc4af18e2 100644 --- a/lib/rexml/parsers/ultralightparser.rb +++ b/lib/rexml/parsers/ultralightparser.rb @@ -9,6 +9,10 @@ module REXML @parser = REXML::Parsers::BaseParser.new( stream ) end + def add_listener( listener ) + @parser.add_listener( listener ) + end + def rewind @stream.rewind @parser.stream = @stream diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb index 082f8255ad..41b2b8a5c1 100644 --- a/lib/rexml/parsers/xpathparser.rb +++ b/lib/rexml/parsers/xpathparser.rb @@ -30,46 +30,126 @@ module REXML parsed end - def to_string( path ) + def abbreviate( path ) + path = path.kind_of?(String) ? parse( path ) : path string = "" + document = false while path.size > 0 - case path[0] - when :ancestor, :ancestor_or_self, :attribute, :child, :descendant, :descendant_or_self, :following, :following_sibling, :namespace, :parent, :preceding, :preceding_sibling, :self - op = path.shift + op = path.shift + case op + when :node + when :attribute + string << "/" if string.size > 0 + string << "@" + when :child + string << "/" if string.size > 0 + when :descendant_or_self + string << "/" + when :self + string << "." + when :parent + string << ".." + when :any + string << "*" + when :text + string << "text()" + when :following, :following_sibling, + :ancestor, :ancestor_or_self, :descendant, + :namespace, :preceding, :preceding_sibling + string << "/" unless string.size == 0 + string << op.to_s.tr("_", "-") + string << "::" + when :qname + prefix = path.shift + name = path.shift + string << prefix+":" if prefix.size > 0 + string << name + when :predicate + string << '[' + string << predicate_to_string( path.shift ) {|x| abbreviate( x ) } + string << ']' + when :document + document = true + when :function + string << path.shift + string << "( " + string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )} + string << " )" + when :literal + string << %Q{ "#{path.shift}" } + else + string << "/" unless string.size == 0 + string << "UNKNOWN(" + string << op.inspect + string << ")" + end + end + string = "/"+string if document + return string + end + + def expand( path ) + path = path.kind_of?(String) ? parse( path ) : path + string = "" + document = false + while path.size > 0 + op = path.shift + case op + when :node + string << "node()" + when :attribute, :child, :following, :following_sibling, + :ancestor, :ancestor_or_self, :descendant, :descendant_or_self, + :namespace, :preceding, :preceding_sibling, :self, :parent string << "/" unless string.size == 0 - string << op.to_s + string << op.to_s.tr("_", "-") string << "::" when :any - path.shift string << "*" when :qname - path.shift prefix = path.shift name = path.shift string << prefix+":" if prefix.size > 0 string << name when :predicate - path.shift string << '[' - string << predicate_to_string( path.shift ) - string << ' ]' + string << predicate_to_string( path.shift ) { |x| expand(x) } + string << ']' + when :document + document = true else string << "/" unless string.size == 0 string << "UNKNOWN(" - string << path.shift.inspect + string << op.inspect string << ")" end end + string = "/"+string if document return string end - def predicate_to_string( path ) + def predicate_to_string( path, &block ) string = "" case path[0] - when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :neq, :union + when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union op = path.shift - left = predicate_to_string( path.shift ) - right = predicate_to_string( path.shift ) + case op + when :eq + op = "=" + when :lt + op = "<" + when :gt + op = ">" + when :lteq + op = "<=" + when :gteq + op = ">=" + when :neq + op = "!=" + when :union + op = "|" + end + left = predicate_to_string( path.shift, &block ) + right = predicate_to_string( path.shift, &block ) string << " " string << left string << " " @@ -82,7 +162,7 @@ module REXML name = path.shift string << name string << "( " - string << predicate_to_string( path.shift ) + string << predicate_to_string( path.shift, &block ) string << " )" when :literal path.shift @@ -91,7 +171,7 @@ module REXML string << " " else string << " " - string << to_string( path ) + string << yield( path ) string << " " end return string.squeeze(" ") @@ -534,7 +614,6 @@ module REXML #| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')' def FunctionCall rest, parsed path, arguments = parse_args(rest) - #puts "Function call >>> (#{arguments.inspect})" argset = [] for argument in arguments args = [] @@ -567,28 +646,39 @@ module REXML def parse_args( string ) arguments = [] ind = 0 + inquot = false + inapos = false depth = 1 begin case string[ind] - when ?( - depth += 1 - if depth == 1 - string = string[1..-1] - ind -= 1 - end - when ?) - depth -= 1 - if depth == 0 - s = string[0,ind].strip - arguments << s unless s == "" - string = string[ind+1..-1] - end - when ?, - if depth == 1 - s = string[0,ind].strip - arguments << s unless s == "" - string = string[ind+1..-1] - ind = 0 + when ?" + inquot = !inquot unless inapos + when ?' + inapos = !inapos unless inquot + else + unless inquot or inapos + case string[ind] + when ?( + depth += 1 + if depth == 1 + string = string[1..-1] + ind -= 1 + end + when ?) + depth -= 1 + if depth == 0 + s = string[0,ind].strip + arguments << s unless s == "" + string = string[ind+1..-1] + end + when ?, + if depth == 1 + s = string[0,ind].strip + arguments << s unless s == "" + string = string[ind+1..-1] + ind = -1 + end + end end end ind += 1 diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb index 6403a785c0..f3ad57b8bc 100644 --- a/lib/rexml/rexml.rb +++ b/lib/rexml/rexml.rb @@ -10,8 +10,8 @@ # # Main page:: http://www.germane-software.com/software/rexml # Author:: Sean Russell -# Version:: 3.0.9 -# Date:: +2004/137 +# Version:: @ANT_VERSION@ +# Date:: @ANT_DATE@ # # This API documentation can be downloaded from the REXML home page, or can # be accessed online[http://www.germane-software.com/software/rexml_doc] @@ -21,6 +21,6 @@ # online[http://www.germane-software.com/software/rexml/docs/tutorial.html] module REXML Copyright = "Copyright © 2001, 2002, 2003, 2004 Sean Russell " - Date = "+2004/137" - Version = "3.0.9" + Date = "+2004/186" + Version = "3.1.2" end diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb index 388256ac81..3e5fcc23b6 100644 --- a/lib/rexml/text.rb +++ b/lib/rexml/text.rb @@ -218,6 +218,14 @@ module REXML writer << s end + # FIXME + # This probably won't work properly + def xpath + path = @parent.xpath + path += "/text()" + return path + end + # Writes out text, substituting special characters beforehand. # +out+ A String, IO, or any other object supporting <<( String ) # +input+ the text to substitute and the write out diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index 8aadb8ef86..5a976d5e82 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -43,8 +43,6 @@ module REXML @variables[ variable_name ] = value end - private - def match( path_stack, nodeset ) while ( path_stack.size > 0 and nodeset.size > 0 ) #puts "PARSE: #{path_stack.inspect} '#{nodeset.collect{|n|n.class}.inspect}'" @@ -55,6 +53,8 @@ module REXML nodeset end + private + def internal_parse path_stack, nodeset #puts "INTERNAL_PARSE RETURNING WITH NO RESULTS" if nodeset.size == 0 or path_stack.size == 0 return nodeset if nodeset.size == 0 or path_stack.size == 0 @@ -151,13 +151,14 @@ module REXML end end when :any + #puts "ANY" for element in nodeset if element.node_type == :element - attr = element.attributes + new_nodeset += element.attributes.to_a end end end - #puts "RETURNING #{new_nodeset.collect{|n|n.to_s}.inspect}" + #puts "RETURNING #{new_nodeset.collect{|n|n.to_s}.inspect}" return new_nodeset when :parent @@ -182,7 +183,7 @@ module REXML new_nodeset << node while ( node.parent ) node = node.parent - new_nodeset << node unless new_nodeset.includes? node + new_nodeset << node unless new_nodeset.include? node end end end @@ -423,17 +424,15 @@ module REXML current_index = all_siblings.index( node ) preceding_siblings = all_siblings[ 0 .. current_index-1 ] - preceding_siblings.reverse! preceding = [] - recurse( preceding_siblings ) { |node| preceding << node } - preceding.reverse + recurse( preceding_siblings ) { |node| preceding.unshift( node ) } + preceding end def equality_relational_compare( set1, op, set2 ) - #puts "EQ_REL_COMP: #{set1.to_s}, #{op}, #{set2.to_s}" - #puts "#{set1.class.name} #{op} #{set2.class.name}" + #puts "#"*80 if set1.kind_of? Array and set2.kind_of? Array - #puts "#{set1.size} & #{set2.size}" + #puts "#{set1.size} & #{set2.size}" if set1.size == 1 and set2.size == 1 set1 = set1[0] set2 = set2[0] @@ -451,6 +450,7 @@ module REXML return false end end + #puts "EQ_REL_COMP: #{set1.class.name} #{set1.inspect}, #{op}, #{set2.class.name} #{set2.inspect}" #puts "COMPARING VALUES" # If one is nodeset and other is number, compare number to each item # in nodeset s.t. number op number(string(item)) @@ -459,7 +459,7 @@ module REXML # If one is nodeset and other is boolean, compare boolean to each item # in nodeset s.t. boolean op boolean(item) if set1.kind_of? Array or set2.kind_of? Array - #puts "ISA ARRAY" + #puts "ISA ARRAY" if set1.kind_of? Array a = set1 b = set2.to_s @@ -486,8 +486,10 @@ module REXML return true if compare( v, op, b ) end else + #puts "Functions::string( #{b}(#{b.class.name}) ) = #{Functions::string(b)}" b = Functions::string( b ) for v in a + #puts "v = #{v.class.name} #{v.inspect}" v = Functions::string(v) return true if compare( v, op, b ) end @@ -530,7 +532,7 @@ module REXML end def compare a, op, b - #puts "COMPARE #{a.to_s} #{op} #{b.to_s}" + #puts "COMPARE #{a.to_s}(#{a.class.name}) #{op} #{b.to_s}(#{a.class.name})" case op when :eq a == b -- cgit v1.2.3