From a8379e7f1855c999b931a76b19f9c0922a9f13a4 Mon Sep 17 00:00:00 2001 From: ser Date: Sun, 4 Jul 2004 15:39:05 +0000 Subject: * Added the treeparser, and added the file to the MANIFEST. r1002 | ser | 2004-06-07 07:45:53 -0400 (Mon, 07 Jun 2004) | 2 lines * Workin' in the coal mine, goin' down, down, down... r1003 | ser | 2004-06-08 22:24:08 -0400 (Tue, 08 Jun 2004) | 7 lines * Entirely rewrote the validation code; the finite state machine, while cool, didn't survive the encounter with Interleave. It was getting sort of hacky, too. The new mechanism is less elegant, but is basically still a FSM, and is more flexible without having to add hacks to extend it. Large chunks of the FSM may be reusable in other validation mechanisms. * Added interleave support r1004 | ser | 2004-06-09 07:24:17 -0400 (Wed, 09 Jun 2004) | 2 lines * Added suppert for mixed r1005 | ser | 2004-06-09 08:01:33 -0400 (Wed, 09 Jun 2004) | 3 lines * Added Kou's patch to normalize attribute values passed through the SAX2 and Stream parsers. r1006 | ser | 2004-06-09 08:12:35 -0400 (Wed, 09 Jun 2004) | 2 lines * Applied Kou's preceding-sibling patch, which fixes the order of the axe results r1009 | ser | 2004-06-20 11:02:55 -0400 (Sun, 20 Jun 2004) | 8 lines * Redesigned and rewrote the RelaxNG code. It isn't elegant, but it works. Particular problems encountered were interleave and ref. Interleave means I can't use a clean FSM design, and ref means the dirty FSM design has to be modified during validation. There's a lot of code that could be cleaned up in here. However, I'm pretty sure that this design is reasonably fast and space efficient. I'm not entirely convinced that it is correct; more tests are required. * This version adds support for defines and refs. r1011 | ser | 2004-06-20 11:20:07 -0400 (Sun, 20 Jun 2004) | 3 lines * Removed debugging output from unit test * Moved ">" in Element.inspect r1014 | ser | 2004-06-20 11:40:30 -0400 (Sun, 20 Jun 2004) | 2 lines * Minor big in missing includes for validation rules r1023 | ser | 2004-07-03 08:57:34 -0400 (Sat, 03 Jul 2004) | 2 lines * Fixed bug #34, typo in xpath_parser. r1024 | ser | 2004-07-03 10:22:08 -0400 (Sat, 03 Jul 2004) | 9 lines * Previous fix, (include? -> includes?) was incorrect. * Added another test for encoding * Started AnyName support in RelaxNG * Added Element#Attributes#to_a, so that it does something intelligent. This was needed by XPath, for '@*' * Fixed XPath so that @* works. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@6579 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/rexml/parsers/baseparser.rb | 44 ++++++++- lib/rexml/parsers/lightparser.rb | 4 + lib/rexml/parsers/pullparser.rb | 6 ++ lib/rexml/parsers/sax2parser.rb | 5 ++ lib/rexml/parsers/streamparser.rb | 9 +- lib/rexml/parsers/ultralightparser.rb | 4 + lib/rexml/parsers/xpathparser.rb | 164 ++++++++++++++++++++++++++-------- 7 files changed, 195 insertions(+), 41 deletions(-) (limited to 'lib/rexml/parsers') diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index fbb1ec06a8..e95cba8904 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -100,6 +100,23 @@ module REXML self.stream = source end + def add_listener( listener ) + if !defined?(@listeners) or !@listeners + @listeners = [] + instance_eval <<-EOL + alias :_old_pull :pull + def pull + event = _old_pull + @listeners.each do |listener| + listener.receive event + end + event + end + EOL + end + @listeners << listener + end + attr_reader :source def stream=( source ) @@ -162,11 +179,11 @@ module REXML # Returns the next event. This is a +PullEvent+ object. def pull - return [ :end_document ] if empty? if @closed x, @closed = @closed, nil return [ :end_element, x ] end + return [ :end_document ] if empty? return @stack.shift if @stack.size > 0 @source.read if @source.buffer.size<2 if @document_status == nil @@ -289,8 +306,9 @@ module REXML last_tag = @tags.pop #md = @source.match_to_consume( '>', CLOSE_MATCH) md = @source.match( CLOSE_MATCH, true ) - raise REXML::ParseException.new( "Missing end tag for '#{last_tag}' "+ - "(got \"#{md[1]}\")", @source) unless last_tag == md[1] + raise REXML::ParseException.new( "Missing end tag for "+ + "'#{last_tag}' (got \"#{md[1]}\")", + @source) unless last_tag == md[1] return [ :end_element, last_tag ] elsif @source.buffer[1] == ?! md = @source.match(/\A(\s*[^>]*>)/um) @@ -411,3 +429,23 @@ module REXML end end end + +=begin + case event[0] + when :start_element + when :text + when :end_element + when :processing_instruction + when :cdata + when :comment + when :xmldecl + when :start_doctype + when :end_doctype + when :externalentity + when :elementdecl + when :entity + when :attlistdecl + when :notationdecl + when :end_doctype + end +=end diff --git a/lib/rexml/parsers/lightparser.rb b/lib/rexml/parsers/lightparser.rb index 8c555f7960..0f35034993 100644 --- a/lib/rexml/parsers/lightparser.rb +++ b/lib/rexml/parsers/lightparser.rb @@ -10,6 +10,10 @@ module REXML @parser = REXML::Parsers::BaseParser.new( stream ) end + def add_listener( listener ) + @parser.add_listener( listener ) + end + def rewind @stream.rewind @parser.stream = @stream diff --git a/lib/rexml/parsers/pullparser.rb b/lib/rexml/parsers/pullparser.rb index aeda6251fe..fe4d41c959 100644 --- a/lib/rexml/parsers/pullparser.rb +++ b/lib/rexml/parsers/pullparser.rb @@ -29,8 +29,14 @@ module REXML def initialize stream super @entities = {} + @listeners = nil end + def add_listener( listener ) + @listeners = [] unless @listeners + @listeners << listener + end + def each while has_next? yield self.pull diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb index 8c82cf8fc1..96440d17bf 100644 --- a/lib/rexml/parsers/sax2parser.rb +++ b/lib/rexml/parsers/sax2parser.rb @@ -14,6 +14,10 @@ module REXML @tag_stack = [] end + def add_listener( listener ) + @parser.add_listener( listener ) + end + # Listen arguments: # # Symbol, Array, Block @@ -89,6 +93,7 @@ module REXML if procs or listeners # break out the namespace declarations # The attributes live in event[2] + event[2].each {|n, v| event[2][n] = @parser.normalize(v)} nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ } nsdecl.collect! { |n, value| [ n[6..-1], value ] } @namespace_stack.push({}) diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb index 49bef0d8fa..357cc186e6 100644 --- a/lib/rexml/parsers/streamparser.rb +++ b/lib/rexml/parsers/streamparser.rb @@ -6,6 +6,10 @@ module REXML @parser = BaseParser.new( source ) end + def add_listener( listener ) + @parser.add_listener( listener ) + end + def parse # entity string while true @@ -14,7 +18,10 @@ module REXML when :end_document return when :start_element - @listener.tag_start( event[1], event[2] ) + attrs = event[2].each do |n, v| + event[2][n] = @parser.unnormalize( v ) + end + @listener.tag_start( event[1], attrs ) when :end_element @listener.tag_end( event[1] ) when :text diff --git a/lib/rexml/parsers/ultralightparser.rb b/lib/rexml/parsers/ultralightparser.rb index f3b208bf0f..adc4af18e2 100644 --- a/lib/rexml/parsers/ultralightparser.rb +++ b/lib/rexml/parsers/ultralightparser.rb @@ -9,6 +9,10 @@ module REXML @parser = REXML::Parsers::BaseParser.new( stream ) end + def add_listener( listener ) + @parser.add_listener( listener ) + end + def rewind @stream.rewind @parser.stream = @stream diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb index 082f8255ad..41b2b8a5c1 100644 --- a/lib/rexml/parsers/xpathparser.rb +++ b/lib/rexml/parsers/xpathparser.rb @@ -30,46 +30,126 @@ module REXML parsed end - def to_string( path ) + def abbreviate( path ) + path = path.kind_of?(String) ? parse( path ) : path string = "" + document = false while path.size > 0 - case path[0] - when :ancestor, :ancestor_or_self, :attribute, :child, :descendant, :descendant_or_self, :following, :following_sibling, :namespace, :parent, :preceding, :preceding_sibling, :self - op = path.shift + op = path.shift + case op + when :node + when :attribute + string << "/" if string.size > 0 + string << "@" + when :child + string << "/" if string.size > 0 + when :descendant_or_self + string << "/" + when :self + string << "." + when :parent + string << ".." + when :any + string << "*" + when :text + string << "text()" + when :following, :following_sibling, + :ancestor, :ancestor_or_self, :descendant, + :namespace, :preceding, :preceding_sibling + string << "/" unless string.size == 0 + string << op.to_s.tr("_", "-") + string << "::" + when :qname + prefix = path.shift + name = path.shift + string << prefix+":" if prefix.size > 0 + string << name + when :predicate + string << '[' + string << predicate_to_string( path.shift ) {|x| abbreviate( x ) } + string << ']' + when :document + document = true + when :function + string << path.shift + string << "( " + string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )} + string << " )" + when :literal + string << %Q{ "#{path.shift}" } + else + string << "/" unless string.size == 0 + string << "UNKNOWN(" + string << op.inspect + string << ")" + end + end + string = "/"+string if document + return string + end + + def expand( path ) + path = path.kind_of?(String) ? parse( path ) : path + string = "" + document = false + while path.size > 0 + op = path.shift + case op + when :node + string << "node()" + when :attribute, :child, :following, :following_sibling, + :ancestor, :ancestor_or_self, :descendant, :descendant_or_self, + :namespace, :preceding, :preceding_sibling, :self, :parent string << "/" unless string.size == 0 - string << op.to_s + string << op.to_s.tr("_", "-") string << "::" when :any - path.shift string << "*" when :qname - path.shift prefix = path.shift name = path.shift string << prefix+":" if prefix.size > 0 string << name when :predicate - path.shift string << '[' - string << predicate_to_string( path.shift ) - string << ' ]' + string << predicate_to_string( path.shift ) { |x| expand(x) } + string << ']' + when :document + document = true else string << "/" unless string.size == 0 string << "UNKNOWN(" - string << path.shift.inspect + string << op.inspect string << ")" end end + string = "/"+string if document return string end - def predicate_to_string( path ) + def predicate_to_string( path, &block ) string = "" case path[0] - when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :neq, :union + when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union op = path.shift - left = predicate_to_string( path.shift ) - right = predicate_to_string( path.shift ) + case op + when :eq + op = "=" + when :lt + op = "<" + when :gt + op = ">" + when :lteq + op = "<=" + when :gteq + op = ">=" + when :neq + op = "!=" + when :union + op = "|" + end + left = predicate_to_string( path.shift, &block ) + right = predicate_to_string( path.shift, &block ) string << " " string << left string << " " @@ -82,7 +162,7 @@ module REXML name = path.shift string << name string << "( " - string << predicate_to_string( path.shift ) + string << predicate_to_string( path.shift, &block ) string << " )" when :literal path.shift @@ -91,7 +171,7 @@ module REXML string << " " else string << " " - string << to_string( path ) + string << yield( path ) string << " " end return string.squeeze(" ") @@ -534,7 +614,6 @@ module REXML #| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')' def FunctionCall rest, parsed path, arguments = parse_args(rest) - #puts "Function call >>> (#{arguments.inspect})" argset = [] for argument in arguments args = [] @@ -567,28 +646,39 @@ module REXML def parse_args( string ) arguments = [] ind = 0 + inquot = false + inapos = false depth = 1 begin case string[ind] - when ?( - depth += 1 - if depth == 1 - string = string[1..-1] - ind -= 1 - end - when ?) - depth -= 1 - if depth == 0 - s = string[0,ind].strip - arguments << s unless s == "" - string = string[ind+1..-1] - end - when ?, - if depth == 1 - s = string[0,ind].strip - arguments << s unless s == "" - string = string[ind+1..-1] - ind = 0 + when ?" + inquot = !inquot unless inapos + when ?' + inapos = !inapos unless inquot + else + unless inquot or inapos + case string[ind] + when ?( + depth += 1 + if depth == 1 + string = string[1..-1] + ind -= 1 + end + when ?) + depth -= 1 + if depth == 0 + s = string[0,ind].strip + arguments << s unless s == "" + string = string[ind+1..-1] + end + when ?, + if depth == 1 + s = string[0,ind].strip + arguments << s unless s == "" + string = string[ind+1..-1] + ind = -1 + end + end end end ind += 1 -- cgit v1.2.3