require 'rexml/namespace' require 'rexml/xmltokens' module REXML module Parsers # You don't want to use this class. Really. Use XPath, which is a wrapper # for this class. Believe me. You don't want to poke around in here. # There is strange, dark magic at work in this code. Beware. Go back! Go # back while you still can! class XPathParser include XMLTokens LITERAL = /^'([^']*)'|^"([^"]*)"/u def namespaces=( namespaces ) Functions::namespace_context = namespaces @namespaces = namespaces end def parse path path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces path.gsub!( /\s+([\]\)])/, '\1' ) parsed = [] path = OrExpr(path, parsed) parsed end def predicate path parsed = [] Predicate( "[#{path}]", parsed ) parsed end def abbreviate( path ) path = path.kind_of?(String) ? parse( path ) : path string = "" document = false while path.size > 0 op = path.shift case op when :node when :attribute string << "/" if string.size > 0 string << "@" when :child string << "/" if string.size > 0 when :descendant_or_self string << "/" when :self string << "." when :parent string << ".." when :any string << "*" when :text string << "text()" when :following, :following_sibling, :ancestor, :ancestor_or_self, :descendant, :namespace, :preceding, :preceding_sibling string << "/" unless string.size == 0 string << op.to_s.tr("_", "-") string << "::" when :qname prefix = path.shift name = path.shift string << prefix+":" if prefix.size > 0 string << name when :predicate string << '[' string << predicate_to_string( path.shift ) {|x| abbreviate( x ) } string << ']' when :document document = true when :function string << path.shift string << "( " string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )} string << " )" when :literal string << %Q{ "#{path.shift}" } else string << "/" unless string.size == 0 string << "UNKNOWN(" string << op.inspect string << ")" end end string = "/"+string if document return string end def expand( path ) path = path.kind_of?(String) ? parse( path ) : path string = "" document = false while path.size > 0 op = path.shift case op when :node string << "node()" when :attribute, :child, :following, :following_sibling, :ancestor, :ancestor_or_self, :descendant, :descendant_or_self, :namespace, :preceding, :preceding_sibling, :self, :parent string << "/" unless string.size == 0 string << op.to_s.tr("_", "-") string << "::" when :any string << "*" when :qname prefix = path.shift name = path.shift string << prefix+":" if prefix.size > 0 string << name when :predicate string << '[' string << predicate_to_string( path.shift ) { |x| expand(x) } string << ']' when :document document = true else string << "/" unless string.size == 0 string << "UNKNOWN(" string << op.inspect string << ")" end end string = "/"+string if document return string end def predicate_to_string( path, &block ) string = "" case path[0] when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union op = path.shift case op when :eq op = "=" when :lt op = "<" when :gt op = ">" when :lteq op = "<=" when :gteq op = ">=" when :neq op = "!=" when :union op = "|" end left = predicate_to_string( path.shift, &block ) right = predicate_to_string( path.shift, &block ) string << " " string << left string << " " string << op.to_s string << " " string << right string << " " when :function path.shift name = path.shift string << name string << "( " string << predicate_to_string( path.shift, &block ) string << " )" when :literal path.shift string << " " string << path.shift.inspect string << " " else string << " " string << yield( path ) string << " " end return string.squeeze(" ") end private #LocationPath # | RelativeLocationPath # | '/' RelativeLocationPath? # | '//' RelativeLocationPath def LocationPath path, parsed #puts "LocationPath '#{path}'" path = path.strip if path[0] == ?/ parsed << :document if path[1] == ?/ parsed << :descendant_or_self parsed << :node path = path[2..-1] else path = path[1..-1] end end #puts parsed.inspect return RelativeLocationPath( path, parsed ) if path.size > 0 end #RelativeLocationPath # | Step # | (AXIS_NAME '::' | '@' | '') AxisSpecifier # NodeTest # Predicate # | '.' | '..' AbbreviatedStep # | RelativeLocationPath '/' Step # | RelativeLocationPath '//' Step AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/ def RelativeLocationPath path, parsed #puts "RelativeLocationPath #{path}" while path.size > 0 # (axis or @ or ) nodetest predicate > # OR > / Step # (. or ..) > if path[0] == ?. if path[1] == ?. parsed << :parent parsed << :node path = path[2..-1] else parsed << :self parsed << :node path = path[1..-1] end else if path[0] == ?@ #puts "ATTRIBUTE" parsed << :attribute path = path[1..-1] # Goto Nodetest elsif path =~ AXIS parsed << $1.tr('-','_').intern path = $' # Goto Nodetest else parsed << :child end #puts "NODETESTING '#{path}'" n = [] path = NodeTest( path, n) #puts "NODETEST RETURNED '#{path}'" if path[0] == ?[ path = Predicate( path, n ) end parsed.concat(n) end if path.size > 0 if path[0] == ?/ if path[1] == ?/ parsed << :descendant_or_self parsed << :node path = path[2..-1] else path = path[1..-1] end else return path end end end return path end # Returns a 1-1 map of the nodeset # The contents of the resulting array are either: # true/false, if a positive match # String, if a name match #NodeTest # | ('*' | NCNAME ':' '*' | QNAME) NameTest # | NODE_TYPE '(' ')' NodeType # | PI '(' LITERAL ')' PI # | '[' expr ']' Predicate NCNAMETEST= /^(#{NCNAME_STR}):\*/u QNAME = Namespace::NAMESPLIT NODE_TYPE = /^(comment|text|node)\(\s*\)/m PI = /^processing-instruction\(/ def NodeTest path, parsed #puts "NodeTest with #{path}" res = nil case path when /^\*/ path = $' parsed << :any when NODE_TYPE type = $1 path = $' parsed << type.tr('-', '_').intern when PI path = $' literal = nil if path !~ /^\s*\)/ path =~ LITERAL literal = $1 path = $' raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?) path = path[1..-1] end parsed << :processing_instruction parsed << (literal || '') when NCNAMETEST #puts "NCNAMETEST" prefix = $1 path = $' parsed << :namespace parsed << prefix when QNAME #puts "QNAME" prefix = $1 name = $2 path = $' prefix = "" unless prefix parsed << :qname parsed << prefix parsed << name end return path end # Filters the supplied nodeset on the predicate(s) def Predicate path, parsed #puts "PREDICATE with #{path}" return nil unless path[0] == ?[ predicates = [] while path[0] == ?[ path, expr = get_group(path) predicates << expr[1..-2] if expr end #puts "PREDICATES = #{predicates.inspect}" predicates.each{ |expr| #puts "ORING #{expr}" preds = [] parsed << :predicate parsed << preds OrExpr(expr, preds) } #puts "PREDICATES = #{predicates.inspect}" path end # The following return arrays of true/false, a 1-1 mapping of the # supplied nodeset, except for axe(), which returns a filtered # nodeset #| OrExpr S 'or' S AndExpr #| AndExpr def OrExpr path, parsed #puts "OR >>> #{path}" n = [] rest = AndExpr( path, n ) #puts "OR <<< #{rest}" if rest != path while rest =~ /^\s*( or )/ n = [ :or, n, [] ] rest = AndExpr( $', n[-1] ) end end if parsed.size == 0 and n.size != 0 parsed.replace(n) elsif n.size > 0 parsed << n end rest end #| AndExpr S 'and' S EqualityExpr #| EqualityExpr def AndExpr path, parsed #puts "AND >>> #{path}" n = [] rest = EqualityExpr( path, n ) #puts "AND <<< #{rest}" if rest != path while rest =~ /^\s*( and )/ n = [ :and, n, [] ] #puts "AND >>> #{rest}" rest = EqualityExpr( $', n[-1] ) #puts "AND <<< #{rest}" end end if parsed.size == 0 and n.size != 0 parsed.replace(n) elsif n.size > 0 parsed << n end rest end #| EqualityExpr ('=' | '!=') RelationalExpr #| RelationalExpr def EqualityExpr path, parsed #puts "EQUALITY >>> #{path}" n = [] rest = RelationalExpr( path, n ) #puts "EQUALITY <<< #{rest}" if rest != path while rest =~ /^\s*(!?=)\s*/ if $1[0] == ?! n = [ :neq, n, [] ] else n = [ :eq, n, [] ] end rest = RelationalExpr( $', n[-1] ) end end if parsed.size == 0 and n.size != 0 parsed.replace(n) elsif n.size > 0 parsed << n end rest end #| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr #| AdditiveExpr def RelationalExpr path, parsed #puts "RELATION >>> #{path}" n = [] rest = AdditiveExpr( path, n ) #puts "RELATION <<< #{rest}" if rest != path while rest =~ /^\s*([<>]=?)\s*/ if $1[0] == ?< sym = "lt" else sym = "gt" end sym << "eq" if $1[-1] == ?= n = [ sym.intern, n, [] ] rest = AdditiveExpr( $', n[-1] ) end end if parsed.size == 0 and n.size != 0 parsed.replace(n) elsif n.size > 0 parsed << n end rest end #| AdditiveExpr ('+' | S '-') MultiplicativeExpr #| MultiplicativeExpr def AdditiveExpr path, parsed #puts "ADDITIVE >>> #{path}" n = [] rest = MultiplicativeExpr( path, n ) #puts "ADDITIVE <<< #{rest}" if rest != path while rest =~ /^\s*(\+| -)\s*/ if $1[0] == ?+ n = [ :plus, n, [] ] else n = [ :minus, n, [] ] end rest = MultiplicativeExpr( $', n[-1] ) end end if parsed.size == 0 and n.size != 0 parsed.replace(n) elsif n.size > 0 parsed << n end rest end #| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr #| UnaryExpr def MultiplicativeExpr path, parsed #puts "MULT >>> #{path}" n = [] rest = UnaryExpr( path, n ) #puts "MULT <<< #{rest}" if rest != path while rest =~ /^\s*(\*| div | mod )\s*/ if $1[0] == ?* n = [ :mult, n, [] ] elsif $1.include?( "div" ) n = [ :div, n, [] ] else n = [ :mod, n, [] ] end rest = UnaryExpr( $', n[-1] ) end end if parsed.size == 0 and n.size != 0 parsed.replace(n) elsif n.size > 0 parsed << n end rest end #| '-' UnaryExpr #| UnionExpr def UnaryExpr path, parsed path =~ /^(\-*)/ path = $' if $1 and (($1.size % 2) != 0) mult = -1 else mult = 1 end parsed << :neg if mult < 0 #puts "UNARY >>> #{path}" n = [] path = UnionExpr( path, n ) #puts "UNARY <<< #{path}" parsed.concat( n ) path end #| UnionExpr '|' PathExpr #| PathExpr def UnionExpr path, parsed #puts "UNION >>> #{path}" n = [] rest = PathExpr( path, n ) #puts "UNION <<< #{rest}" if rest != path while rest =~ /^\s*(\|)\s*/ n = [ :union, n, [] ] rest = PathExpr( $', n[-1] ) end end if parsed.size == 0 and n.size != 0 parsed.replace( n ) elsif n.size > 0 parsed << n end rest end #| LocationPath #| FilterExpr ('/' | '//') RelativeLocationPath def PathExpr path, parsed path =~ /^\s*/ path = $' #puts "PATH >>> #{path}" n = [] rest = FilterExpr( path, n ) #puts "PATH <<< '#{rest}'" if rest != path if rest and rest[0] == ?/ return RelativeLocationPath(rest, n) end end #puts "BEFORE WITH '#{rest}'" rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w_*]/ parsed.concat(n) return rest end #| FilterExpr Predicate #| PrimaryExpr def FilterExpr path, parsed #puts "FILTER >>> #{path}" n = [] path = PrimaryExpr( path, n ) #puts "FILTER <<< #{path}" path = Predicate(path, n) if path and path[0] == ?[ #puts "FILTER <<< #{path}" parsed.concat(n) path end #| VARIABLE_REFERENCE #| '(' expr ')' #| LITERAL #| NUMBER #| FunctionCall VARIABLE_REFERENCE = /^\$(#{NAME_STR})/u NUMBER = /^(\d*\.?\d+)/ NT = /^comment|text|processing-instruction|node$/ def PrimaryExpr path, parsed arry = [] case path when VARIABLE_REFERENCE varname = $1 path = $' parsed << :variable parsed << varname #arry << @variables[ varname ] when /^(\w[-\w]*)(?:\()/ #puts "PrimaryExpr :: Function >>> #$1 -- '#$''" fname = $1 tmp = $' #puts "#{fname} =~ #{NT.inspect}" return path if fname =~ NT path = tmp parsed << :function parsed << fname path = FunctionCall(path, parsed) when NUMBER #puts "LITERAL or NUMBER: #$1" varname = $1.nil? ? $2 : $1 path = $' parsed << :literal parsed << (varname.include?('.') ? varname.to_f : varname.to_i) when LITERAL #puts "LITERAL or NUMBER: #$1" varname = $1.nil? ? $2 : $1 path = $' parsed << :literal parsed << varname when /^\(/ #/ path, contents = get_group(path) contents = contents[1..-2] n = [] OrExpr( contents, n ) parsed.concat(n) end path end #| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')' def FunctionCall rest, parsed path, arguments = parse_args(rest) argset = [] for argument in arguments args = [] OrExpr( argument, args ) argset << args end parsed << argset path end # get_group( '[foo]bar' ) -> ['bar', '[foo]'] def get_group string ind = 0 depth = 0 st = string[0,1] en = (st == "(" ? ")" : "]") begin case string[ind,1] when st depth += 1 when en depth -= 1 end ind += 1 end while depth > 0 and ind < string.length return nil unless depth==0 [string[ind..-1], string[0..ind-1]] end def parse_args( string ) arguments = [] ind = 0 inquot = false inapos = false depth = 1 begin case string[ind] when ?" inquot = !inquot unless inapos when ?' inapos = !inapos unless inquot else unless inquot or inapos case string[ind] when ?( depth += 1 if depth == 1 string = string[1..-1] ind -= 1 end when ?) depth -= 1 if depth == 0 s = string[0,ind].strip arguments << s unless s == "" string = string[ind+1..-1] end when ?, if depth == 1 s = string[0,ind].strip arguments << s unless s == "" string = string[ind+1..-1] ind = -1 end end end end ind += 1 end while depth > 0 and ind < string.length return nil unless depth==0 [string,arguments] end end end end