diff options
Diffstat (limited to 'ruby_1_8_6/lib/rexml/xpath_parser.rb')
-rw-r--r-- | ruby_1_8_6/lib/rexml/xpath_parser.rb | 792 |
1 files changed, 792 insertions, 0 deletions
diff --git a/ruby_1_8_6/lib/rexml/xpath_parser.rb b/ruby_1_8_6/lib/rexml/xpath_parser.rb new file mode 100644 index 0000000000..eb608fdb34 --- /dev/null +++ b/ruby_1_8_6/lib/rexml/xpath_parser.rb @@ -0,0 +1,792 @@ +require 'rexml/namespace' +require 'rexml/xmltokens' +require 'rexml/attribute' +require 'rexml/syncenumerator' +require 'rexml/parsers/xpathparser' + +class Object + def dclone + clone + end +end +class Symbol + def dclone ; self ; end +end +class Fixnum + def dclone ; self ; end +end +class Float + def dclone ; self ; end +end +class Array + def dclone + klone = self.clone + klone.clear + self.each{|v| klone << v.dclone} + klone + end +end + +module REXML + # You don't want to use this class. Really. Use XPath, which is a wrapper + # for this class. Believe me. You don't want to poke around in here. + # There is strange, dark magic at work in this code. Beware. Go back! Go + # back while you still can! + class XPathParser + include XMLTokens + LITERAL = /^'([^']*)'|^"([^"]*)"/u + + def initialize( ) + @parser = REXML::Parsers::XPathParser.new + @namespaces = nil + @variables = {} + end + + def namespaces=( namespaces={} ) + Functions::namespace_context = namespaces + @namespaces = namespaces + end + + def variables=( vars={} ) + Functions::variables = vars + @variables = vars + end + + def parse path, nodeset + #puts "#"*40 + path_stack = @parser.parse( path ) + #puts "PARSE: #{path} => #{path_stack.inspect}" + #puts "PARSE: nodeset = #{nodeset.inspect}" + match( path_stack, nodeset ) + end + + def get_first path, nodeset + #puts "#"*40 + path_stack = @parser.parse( path ) + #puts "PARSE: #{path} => #{path_stack.inspect}" + #puts "PARSE: nodeset = #{nodeset.inspect}" + first( path_stack, nodeset ) + end + + def predicate path, nodeset + path_stack = @parser.parse( path ) + expr( path_stack, nodeset ) + end + + def []=( variable_name, value ) + @variables[ variable_name ] = value + end + + + # Performs a depth-first (document order) XPath search, and returns the + # first match. This is the fastest, lightest way to return a single result. + # + # FIXME: This method is incomplete! + def first( path_stack, node ) + #puts "#{depth}) Entering match( #{path.inspect}, #{tree.inspect} )" + return nil if path.size == 0 + + case path[0] + when :document + # do nothing + return first( path[1..-1], node ) + when :child + for c in node.children + #puts "#{depth}) CHILD checking #{name(c)}" + r = first( path[1..-1], c ) + #puts "#{depth}) RETURNING #{r.inspect}" if r + return r if r + end + when :qname + name = path[2] + #puts "#{depth}) QNAME #{name(tree)} == #{name} (path => #{path.size})" + if node.name == name + #puts "#{depth}) RETURNING #{tree.inspect}" if path.size == 3 + return node if path.size == 3 + return first( path[3..-1], node ) + else + return nil + end + when :descendant_or_self + r = first( path[1..-1], node ) + return r if r + for c in node.children + r = first( path, c ) + return r if r + end + when :node + return first( path[1..-1], node ) + when :any + return first( path[1..-1], node ) + end + return nil + end + + + def match( path_stack, nodeset ) + #puts "MATCH: path_stack = #{path_stack.inspect}" + #puts "MATCH: nodeset = #{nodeset.inspect}" + r = expr( path_stack, nodeset ) + #puts "MAIN EXPR => #{r.inspect}" + r + end + + private + + + # Returns a String namespace for a node, given a prefix + # The rules are: + # + # 1. Use the supplied namespace mapping first. + # 2. If no mapping was supplied, use the context node to look up the namespace + def get_namespace( node, prefix ) + if @namespaces + return @namespaces[prefix] || '' + else + return node.namespace( prefix ) if node.node_type == :element + return '' + end + end + + + # Expr takes a stack of path elements and a set of nodes (either a Parent + # or an Array and returns an Array of matching nodes + ALL = [ :attribute, :element, :text, :processing_instruction, :comment ] + ELEMENTS = [ :element ] + def expr( path_stack, nodeset, context=nil ) + #puts "#"*15 + #puts "In expr with #{path_stack.inspect}" + #puts "Returning" if path_stack.length == 0 || nodeset.length == 0 + node_types = ELEMENTS + return nodeset if path_stack.length == 0 || nodeset.length == 0 + while path_stack.length > 0 + #puts "#"*5 + #puts "Path stack = #{path_stack.inspect}" + #puts "Nodeset is #{nodeset.inspect}" + if nodeset.length == 0 + path_stack.clear + return [] + end + case (op = path_stack.shift) + when :document + nodeset = [ nodeset[0].root_node ] + #puts ":document, nodeset = #{nodeset.inspect}" + + when :qname + #puts "IN QNAME" + prefix = path_stack.shift + name = path_stack.shift + nodeset.delete_if do |node| + # FIXME: This DOUBLES the time XPath searches take + ns = get_namespace( node, prefix ) + #puts "NS = #{ns.inspect}" + #puts "node.node_type == :element => #{node.node_type == :element}" + if node.node_type == :element + #puts "node.name == #{name} => #{node.name == name}" + if node.name == name + #puts "node.namespace == #{ns.inspect} => #{node.namespace == ns}" + end + end + !(node.node_type == :element and + node.name == name and + node.namespace == ns ) + end + node_types = ELEMENTS + + when :any + #puts "ANY 1: nodeset = #{nodeset.inspect}" + #puts "ANY 1: node_types = #{node_types.inspect}" + nodeset.delete_if { |node| !node_types.include?(node.node_type) } + #puts "ANY 2: nodeset = #{nodeset.inspect}" + + when :self + # This space left intentionally blank + + when :processing_instruction + target = path_stack.shift + nodeset.delete_if do |node| + (node.node_type != :processing_instruction) or + ( target!='' and ( node.target != target ) ) + end + + when :text + nodeset.delete_if { |node| node.node_type != :text } + + when :comment + nodeset.delete_if { |node| node.node_type != :comment } + + when :node + # This space left intentionally blank + node_types = ALL + + when :child + new_nodeset = [] + nt = nil + for node in nodeset + nt = node.node_type + new_nodeset += node.children if nt == :element or nt == :document + end + nodeset = new_nodeset + node_types = ELEMENTS + + when :literal + return path_stack.shift + + when :attribute + new_nodeset = [] + case path_stack.shift + when :qname + prefix = path_stack.shift + name = path_stack.shift + for element in nodeset + if element.node_type == :element + #puts "Element name = #{element.name}" + #puts "get_namespace( #{element.inspect}, #{prefix} ) = #{get_namespace(element, prefix)}" + attrib = element.attribute( name, get_namespace(element, prefix) ) + #puts "attrib = #{attrib.inspect}" + new_nodeset << attrib if attrib + end + end + when :any + #puts "ANY" + for element in nodeset + if element.node_type == :element + new_nodeset += element.attributes.to_a + end + end + end + nodeset = new_nodeset + + when :parent + #puts "PARENT 1: nodeset = #{nodeset}" + nodeset = nodeset.collect{|n| n.parent}.compact + #nodeset = expr(path_stack.dclone, nodeset.collect{|n| n.parent}.compact) + #puts "PARENT 2: nodeset = #{nodeset.inspect}" + node_types = ELEMENTS + + when :ancestor + new_nodeset = [] + for node in nodeset + while node.parent + node = node.parent + new_nodeset << node unless new_nodeset.include? node + end + end + nodeset = new_nodeset + node_types = ELEMENTS + + when :ancestor_or_self + new_nodeset = [] + for node in nodeset + if node.node_type == :element + new_nodeset << node + while ( node.parent ) + node = node.parent + new_nodeset << node unless new_nodeset.include? node + end + end + end + nodeset = new_nodeset + node_types = ELEMENTS + + when :predicate + new_nodeset = [] + subcontext = { :size => nodeset.size } + pred = path_stack.shift + nodeset.each_with_index { |node, index| + subcontext[ :node ] = node + #puts "PREDICATE SETTING CONTEXT INDEX TO #{index+1}" + subcontext[ :index ] = index+1 + pc = pred.dclone + #puts "#{node.hash}) Recursing with #{pred.inspect} and [#{node.inspect}]" + result = expr( pc, [node], subcontext ) + result = result[0] if result.kind_of? Array and result.length == 1 + #puts "#{node.hash}) Result = #{result.inspect} (#{result.class.name})" + if result.kind_of? Numeric + #puts "Adding node #{node.inspect}" if result == (index+1) + new_nodeset << node if result == (index+1) + elsif result.instance_of? Array + if result.size > 0 and result.inject(false) {|k,s| s or k} + #puts "Adding node #{node.inspect}" if result.size > 0 + new_nodeset << node if result.size > 0 + end + else + #puts "Adding node #{node.inspect}" if result + new_nodeset << node if result + end + } + #puts "New nodeset = #{new_nodeset.inspect}" + #puts "Path_stack = #{path_stack.inspect}" + nodeset = new_nodeset +=begin + predicate = path_stack.shift + ns = nodeset.clone + result = expr( predicate, ns ) + #puts "Result = #{result.inspect} (#{result.class.name})" + #puts "nodeset = #{nodeset.inspect}" + if result.kind_of? Array + nodeset = result.zip(ns).collect{|m,n| n if m}.compact + else + nodeset = result ? nodeset : [] + end + #puts "Outgoing NS = #{nodeset.inspect}" +=end + + when :descendant_or_self + rv = descendant_or_self( path_stack, nodeset ) + path_stack.clear + nodeset = rv + node_types = ELEMENTS + + when :descendant + results = [] + nt = nil + for node in nodeset + nt = node.node_type + results += expr( path_stack.dclone.unshift( :descendant_or_self ), + node.children ) if nt == :element or nt == :document + end + nodeset = results + node_types = ELEMENTS + + when :following_sibling + #puts "FOLLOWING_SIBLING 1: nodeset = #{nodeset}" + results = [] + nodeset.each do |node| + next if node.parent.nil? + all_siblings = node.parent.children + current_index = all_siblings.index( node ) + following_siblings = all_siblings[ current_index+1 .. -1 ] + results += expr( path_stack.dclone, following_siblings ) + end + #puts "FOLLOWING_SIBLING 2: nodeset = #{nodeset}" + nodeset = results + + when :preceding_sibling + results = [] + nodeset.each do |node| + next if node.parent.nil? + all_siblings = node.parent.children + current_index = all_siblings.index( node ) + preceding_siblings = all_siblings[ 0, current_index ].reverse + results += preceding_siblings + end + nodeset = results + node_types = ELEMENTS + + when :preceding + new_nodeset = [] + for node in nodeset + new_nodeset += preceding( node ) + end + #puts "NEW NODESET => #{new_nodeset.inspect}" + nodeset = new_nodeset + node_types = ELEMENTS + + when :following + new_nodeset = [] + for node in nodeset + new_nodeset += following( node ) + end + nodeset = new_nodeset + node_types = ELEMENTS + + when :namespace + #puts "In :namespace" + new_nodeset = [] + prefix = path_stack.shift + for node in nodeset + if (node.node_type == :element or node.node_type == :attribute) + if @namespaces + namespaces = @namespaces + elsif (node.node_type == :element) + namespaces = node.namespaces + else + namespaces = node.element.namesapces + end + #puts "Namespaces = #{namespaces.inspect}" + #puts "Prefix = #{prefix.inspect}" + #puts "Node.namespace = #{node.namespace}" + if (node.namespace == namespaces[prefix]) + new_nodeset << node + end + end + end + nodeset = new_nodeset + + when :variable + var_name = path_stack.shift + return @variables[ var_name ] + + # :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq + # TODO: Special case for :or and :and -- not evaluate the right + # operand if the left alone determines result (i.e. is true for + # :or and false for :and). + when :eq, :neq, :lt, :lteq, :gt, :gteq, :and, :or + left = expr( path_stack.shift, nodeset.dup, context ) + #puts "LEFT => #{left.inspect} (#{left.class.name})" + right = expr( path_stack.shift, nodeset.dup, context ) + #puts "RIGHT => #{right.inspect} (#{right.class.name})" + res = equality_relational_compare( left, op, right ) + #puts "RES => #{res.inspect}" + return res + + when :and + left = expr( path_stack.shift, nodeset.dup, context ) + #puts "LEFT => #{left.inspect} (#{left.class.name})" + if left == false || left.nil? || !left.inject(false) {|a,b| a | b} + return [] + end + right = expr( path_stack.shift, nodeset.dup, context ) + #puts "RIGHT => #{right.inspect} (#{right.class.name})" + res = equality_relational_compare( left, op, right ) + #puts "RES => #{res.inspect}" + return res + + when :div + left = Functions::number(expr(path_stack.shift, nodeset, context)).to_f + right = Functions::number(expr(path_stack.shift, nodeset, context)).to_f + return (left / right) + + when :mod + left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f + right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f + return (left % right) + + when :mult + left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f + right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f + return (left * right) + + when :plus + left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f + right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f + return (left + right) + + when :minus + left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f + right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f + return (left - right) + + when :union + left = expr( path_stack.shift, nodeset, context ) + right = expr( path_stack.shift, nodeset, context ) + return (left | right) + + when :neg + res = expr( path_stack, nodeset, context ) + return -(res.to_f) + + when :not + when :function + func_name = path_stack.shift.tr('-','_') + arguments = path_stack.shift + #puts "FUNCTION 0: #{func_name}(#{arguments.collect{|a|a.inspect}.join(', ')})" + subcontext = context ? nil : { :size => nodeset.size } + + res = [] + cont = context + nodeset.each_with_index { |n, i| + if subcontext + subcontext[:node] = n + subcontext[:index] = i + cont = subcontext + end + arg_clone = arguments.dclone + args = arg_clone.collect { |arg| + #puts "FUNCTION 1: Calling expr( #{arg.inspect}, [#{n.inspect}] )" + expr( arg, [n], cont ) + } + #puts "FUNCTION 2: #{func_name}(#{args.collect{|a|a.inspect}.join(', ')})" + Functions.context = cont + res << Functions.send( func_name, *args ) + #puts "FUNCTION 3: #{res[-1].inspect}" + } + return res + + end + end # while + #puts "EXPR returning #{nodeset.inspect}" + return nodeset + end + + + ########################################################## + # FIXME + # The next two methods are BAD MOJO! + # This is my achilles heel. If anybody thinks of a better + # way of doing this, be my guest. This really sucks, but + # it is a wonder it works at all. + # ######################################################## + + def descendant_or_self( path_stack, nodeset ) + rs = [] + #puts "#"*80 + #puts "PATH_STACK = #{path_stack.inspect}" + #puts "NODESET = #{nodeset.collect{|n|n.inspect}.inspect}" + d_o_s( path_stack, nodeset, rs ) + #puts "RS = #{rs.collect{|n|n.inspect}.inspect}" + document_order(rs.flatten.compact) + #rs.flatten.compact + end + + def d_o_s( p, ns, r ) + #puts "IN DOS with #{ns.inspect}; ALREADY HAVE #{r.inspect}" + nt = nil + ns.each_index do |i| + n = ns[i] + #puts "P => #{p.inspect}" + x = expr( p.dclone, [ n ] ) + nt = n.node_type + d_o_s( p, n.children, x ) if nt == :element or nt == :document and n.children.size > 0 + r.concat(x) if x.size > 0 + end + end + + + # Reorders an array of nodes so that they are in document order + # It tries to do this efficiently. + # + # FIXME: I need to get rid of this, but the issue is that most of the XPath + # interpreter functions as a filter, which means that we lose context going + # in and out of function calls. If I knew what the index of the nodes was, + # I wouldn't have to do this. Maybe add a document IDX for each node? + # Problems with mutable documents. Or, rewrite everything. + def document_order( array_of_nodes ) + new_arry = [] + array_of_nodes.each { |node| + node_idx = [] + np = node.node_type == :attribute ? node.element : node + while np.parent and np.parent.node_type == :element + node_idx << np.parent.index( np ) + np = np.parent + end + new_arry << [ node_idx.reverse, node ] + } + #puts "new_arry = #{new_arry.inspect}" + new_arry.sort{ |s1, s2| s1[0] <=> s2[0] }.collect{ |s| s[1] } + end + + + def recurse( nodeset, &block ) + for node in nodeset + yield node + recurse( node, &block ) if node.node_type == :element + end + end + + + + # Builds a nodeset of all of the preceding nodes of the supplied node, + # in reverse document order + # preceding:: includes every element in the document that precedes this node, + # except for ancestors + def preceding( node ) + #puts "IN PRECEDING" + ancestors = [] + p = node.parent + while p + ancestors << p + p = p.parent + end + + acc = [] + p = preceding_node_of( node ) + #puts "P = #{p.inspect}" + while p + if ancestors.include? p + ancestors.delete(p) + else + acc << p + end + p = preceding_node_of( p ) + #puts "P = #{p.inspect}" + end + acc + end + + def preceding_node_of( node ) + #puts "NODE: #{node.inspect}" + #puts "PREVIOUS NODE: #{node.previous_sibling_node.inspect}" + #puts "PARENT NODE: #{node.parent}" + psn = node.previous_sibling_node + if psn.nil? + if node.parent.nil? or node.parent.class == Document + return nil + end + return node.parent + #psn = preceding_node_of( node.parent ) + end + while psn and psn.kind_of? Element and psn.children.size > 0 + psn = psn.children[-1] + end + psn + end + + def following( node ) + #puts "IN PRECEDING" + acc = [] + p = next_sibling_node( node ) + #puts "P = #{p.inspect}" + while p + acc << p + p = following_node_of( p ) + #puts "P = #{p.inspect}" + end + acc + end + + def following_node_of( node ) + #puts "NODE: #{node.inspect}" + #puts "PREVIOUS NODE: #{node.previous_sibling_node.inspect}" + #puts "PARENT NODE: #{node.parent}" + if node.kind_of? Element and node.children.size > 0 + return node.children[0] + end + return next_sibling_node(node) + end + + def next_sibling_node(node) + psn = node.next_sibling_node + while psn.nil? + if node.parent.nil? or node.parent.class == Document + return nil + end + node = node.parent + psn = node.next_sibling_node + #puts "psn = #{psn.inspect}" + end + return psn + end + + def norm b + case b + when true, false + return b + when 'true', 'false' + return Functions::boolean( b ) + when /^\d+(\.\d+)?$/ + return Functions::number( b ) + else + return Functions::string( b ) + end + end + + def equality_relational_compare( set1, op, set2 ) + #puts "EQ_REL_COMP(#{set1.inspect} #{op.inspect} #{set2.inspect})" + if set1.kind_of? Array and set2.kind_of? Array + #puts "#{set1.size} & #{set2.size}" + if set1.size == 1 and set2.size == 1 + set1 = set1[0] + set2 = set2[0] + elsif set1.size == 0 or set2.size == 0 + nd = set1.size==0 ? set2 : set1 + rv = nd.collect { |il| compare( il, op, nil ) } + #puts "RV = #{rv.inspect}" + return rv + else + res = [] + enum = SyncEnumerator.new( set1, set2 ).each { |i1, i2| + #puts "i1 = #{i1.inspect} (#{i1.class.name})" + #puts "i2 = #{i2.inspect} (#{i2.class.name})" + i1 = norm( i1 ) + i2 = norm( i2 ) + res << compare( i1, op, i2 ) + } + return res + end + end + #puts "EQ_REL_COMP: #{set1.inspect} (#{set1.class.name}), #{op}, #{set2.inspect} (#{set2.class.name})" + #puts "COMPARING VALUES" + # If one is nodeset and other is number, compare number to each item + # in nodeset s.t. number op number(string(item)) + # If one is nodeset and other is string, compare string to each item + # in nodeset s.t. string op string(item) + # If one is nodeset and other is boolean, compare boolean to each item + # in nodeset s.t. boolean op boolean(item) + if set1.kind_of? Array or set2.kind_of? Array + #puts "ISA ARRAY" + if set1.kind_of? Array + a = set1 + b = set2 + else + a = set2 + b = set1 + end + + case b + when true, false + return a.collect {|v| compare( Functions::boolean(v), op, b ) } + when Numeric + return a.collect {|v| compare( Functions::number(v), op, b )} + when /^\d+(\.\d+)?$/ + b = Functions::number( b ) + #puts "B = #{b.inspect}" + return a.collect {|v| compare( Functions::number(v), op, b )} + else + #puts "Functions::string( #{b}(#{b.class.name}) ) = #{Functions::string(b)}" + b = Functions::string( b ) + return a.collect { |v| compare( Functions::string(v), op, b ) } + end + else + # If neither is nodeset, + # If op is = or != + # If either boolean, convert to boolean + # If either number, convert to number + # Else, convert to string + # Else + # Convert both to numbers and compare + s1 = set1.to_s + s2 = set2.to_s + #puts "EQ_REL_COMP: #{set1}=>#{s1}, #{set2}=>#{s2}" + if s1 == 'true' or s1 == 'false' or s2 == 'true' or s2 == 'false' + #puts "Functions::boolean(#{set1})=>#{Functions::boolean(set1)}" + #puts "Functions::boolean(#{set2})=>#{Functions::boolean(set2)}" + set1 = Functions::boolean( set1 ) + set2 = Functions::boolean( set2 ) + else + if op == :eq or op == :neq + if s1 =~ /^\d+(\.\d+)?$/ or s2 =~ /^\d+(\.\d+)?$/ + set1 = Functions::number( s1 ) + set2 = Functions::number( s2 ) + else + set1 = Functions::string( set1 ) + set2 = Functions::string( set2 ) + end + else + set1 = Functions::number( set1 ) + set2 = Functions::number( set2 ) + end + end + #puts "EQ_REL_COMP: #{set1} #{op} #{set2}" + #puts ">>> #{compare( set1, op, set2 )}" + return compare( set1, op, set2 ) + end + return false + end + + def compare a, op, b + #puts "COMPARE #{a.inspect}(#{a.class.name}) #{op} #{b.inspect}(#{b.class.name})" + case op + when :eq + a == b + when :neq + a != b + when :lt + a < b + when :lteq + a <= b + when :gt + a > b + when :gteq + a >= b + when :and + a and b + when :or + a or b + else + false + end + end + end +end |