summaryrefslogtreecommitdiff
path: root/trunk/lib/rexml/xpath_parser.rb
diff options
context:
space:
mode:
Diffstat (limited to 'trunk/lib/rexml/xpath_parser.rb')
-rw-r--r--trunk/lib/rexml/xpath_parser.rb792
1 files changed, 0 insertions, 792 deletions
diff --git a/trunk/lib/rexml/xpath_parser.rb b/trunk/lib/rexml/xpath_parser.rb
deleted file mode 100644
index 4275c2db0e..0000000000
--- a/trunk/lib/rexml/xpath_parser.rb
+++ /dev/null
@@ -1,792 +0,0 @@
-require 'rexml/namespace'
-require 'rexml/xmltokens'
-require 'rexml/attribute'
-require 'rexml/syncenumerator'
-require 'rexml/parsers/xpathparser'
-
-class Object
- def dclone
- clone
- end
-end
-class Symbol
- def dclone ; self ; end
-end
-class Fixnum
- def dclone ; self ; end
-end
-class Float
- def dclone ; self ; end
-end
-class Array
- def dclone
- klone = self.clone
- klone.clear
- self.each{|v| klone << v.dclone}
- klone
- end
-end
-
-module REXML
- # You don't want to use this class. Really. Use XPath, which is a wrapper
- # for this class. Believe me. You don't want to poke around in here.
- # There is strange, dark magic at work in this code. Beware. Go back! Go
- # back while you still can!
- class XPathParser
- include XMLTokens
- LITERAL = /^'([^']*)'|^"([^"]*)"/u
-
- def initialize( )
- @parser = REXML::Parsers::XPathParser.new
- @namespaces = nil
- @variables = {}
- end
-
- def namespaces=( namespaces={} )
- Functions::namespace_context = namespaces
- @namespaces = namespaces
- end
-
- def variables=( vars={} )
- Functions::variables = vars
- @variables = vars
- end
-
- def parse path, nodeset
- #puts "#"*40
- path_stack = @parser.parse( path )
- #puts "PARSE: #{path} => #{path_stack.inspect}"
- #puts "PARSE: nodeset = #{nodeset.inspect}"
- match( path_stack, nodeset )
- end
-
- def get_first path, nodeset
- #puts "#"*40
- path_stack = @parser.parse( path )
- #puts "PARSE: #{path} => #{path_stack.inspect}"
- #puts "PARSE: nodeset = #{nodeset.inspect}"
- first( path_stack, nodeset )
- end
-
- def predicate path, nodeset
- path_stack = @parser.parse( path )
- expr( path_stack, nodeset )
- end
-
- def []=( variable_name, value )
- @variables[ variable_name ] = value
- end
-
-
- # Performs a depth-first (document order) XPath search, and returns the
- # first match. This is the fastest, lightest way to return a single result.
- #
- # FIXME: This method is incomplete!
- def first( path_stack, node )
- #puts "#{depth}) Entering match( #{path.inspect}, #{tree.inspect} )"
- return nil if path.size == 0
-
- case path[0]
- when :document
- # do nothing
- return first( path[1..-1], node )
- when :child
- for c in node.children
- #puts "#{depth}) CHILD checking #{name(c)}"
- r = first( path[1..-1], c )
- #puts "#{depth}) RETURNING #{r.inspect}" if r
- return r if r
- end
- when :qname
- name = path[2]
- #puts "#{depth}) QNAME #{name(tree)} == #{name} (path => #{path.size})"
- if node.name == name
- #puts "#{depth}) RETURNING #{tree.inspect}" if path.size == 3
- return node if path.size == 3
- return first( path[3..-1], node )
- else
- return nil
- end
- when :descendant_or_self
- r = first( path[1..-1], node )
- return r if r
- for c in node.children
- r = first( path, c )
- return r if r
- end
- when :node
- return first( path[1..-1], node )
- when :any
- return first( path[1..-1], node )
- end
- return nil
- end
-
-
- def match( path_stack, nodeset )
- #puts "MATCH: path_stack = #{path_stack.inspect}"
- #puts "MATCH: nodeset = #{nodeset.inspect}"
- r = expr( path_stack, nodeset )
- #puts "MAIN EXPR => #{r.inspect}"
- r
- end
-
- private
-
-
- # Returns a String namespace for a node, given a prefix
- # The rules are:
- #
- # 1. Use the supplied namespace mapping first.
- # 2. If no mapping was supplied, use the context node to look up the namespace
- def get_namespace( node, prefix )
- if @namespaces
- return @namespaces[prefix] || ''
- else
- return node.namespace( prefix ) if node.node_type == :element
- return ''
- end
- end
-
-
- # Expr takes a stack of path elements and a set of nodes (either a Parent
- # or an Array and returns an Array of matching nodes
- ALL = [ :attribute, :element, :text, :processing_instruction, :comment ]
- ELEMENTS = [ :element ]
- def expr( path_stack, nodeset, context=nil )
- #puts "#"*15
- #puts "In expr with #{path_stack.inspect}"
- #puts "Returning" if path_stack.length == 0 || nodeset.length == 0
- node_types = ELEMENTS
- return nodeset if path_stack.length == 0 || nodeset.length == 0
- while path_stack.length > 0
- #puts "#"*5
- #puts "Path stack = #{path_stack.inspect}"
- #puts "Nodeset is #{nodeset.inspect}"
- if nodeset.length == 0
- path_stack.clear
- return []
- end
- case (op = path_stack.shift)
- when :document
- nodeset = [ nodeset[0].root_node ]
- #puts ":document, nodeset = #{nodeset.inspect}"
-
- when :qname
- #puts "IN QNAME"
- prefix = path_stack.shift
- name = path_stack.shift
- nodeset.delete_if do |node|
- # FIXME: This DOUBLES the time XPath searches take
- ns = get_namespace( node, prefix )
- #puts "NS = #{ns.inspect}"
- #puts "node.node_type == :element => #{node.node_type == :element}"
- if node.node_type == :element
- #puts "node.name == #{name} => #{node.name == name}"
- if node.name == name
- #puts "node.namespace == #{ns.inspect} => #{node.namespace == ns}"
- end
- end
- !(node.node_type == :element and
- node.name == name and
- node.namespace == ns )
- end
- node_types = ELEMENTS
-
- when :any
- #puts "ANY 1: nodeset = #{nodeset.inspect}"
- #puts "ANY 1: node_types = #{node_types.inspect}"
- nodeset.delete_if { |node| !node_types.include?(node.node_type) }
- #puts "ANY 2: nodeset = #{nodeset.inspect}"
-
- when :self
- # This space left intentionally blank
-
- when :processing_instruction
- target = path_stack.shift
- nodeset.delete_if do |node|
- (node.node_type != :processing_instruction) or
- ( target!='' and ( node.target != target ) )
- end
-
- when :text
- nodeset.delete_if { |node| node.node_type != :text }
-
- when :comment
- nodeset.delete_if { |node| node.node_type != :comment }
-
- when :node
- # This space left intentionally blank
- node_types = ALL
-
- when :child
- new_nodeset = []
- nt = nil
- nodeset.each do |node|
- nt = node.node_type
- new_nodeset += node.children if nt == :element or nt == :document
- end
- nodeset = new_nodeset
- node_types = ELEMENTS
-
- when :literal
- return path_stack.shift
-
- when :attribute
- new_nodeset = []
- case path_stack.shift
- when :qname
- prefix = path_stack.shift
- name = path_stack.shift
- for element in nodeset
- if element.node_type == :element
- #puts "Element name = #{element.name}"
- #puts "get_namespace( #{element.inspect}, #{prefix} ) = #{get_namespace(element, prefix)}"
- attrib = element.attribute( name, get_namespace(element, prefix) )
- #puts "attrib = #{attrib.inspect}"
- new_nodeset << attrib if attrib
- end
- end
- when :any
- #puts "ANY"
- for element in nodeset
- if element.node_type == :element
- new_nodeset += element.attributes.to_a
- end
- end
- end
- nodeset = new_nodeset
-
- when :parent
- #puts "PARENT 1: nodeset = #{nodeset}"
- nodeset = nodeset.collect{|n| n.parent}.compact
- #nodeset = expr(path_stack.dclone, nodeset.collect{|n| n.parent}.compact)
- #puts "PARENT 2: nodeset = #{nodeset.inspect}"
- node_types = ELEMENTS
-
- when :ancestor
- new_nodeset = []
- nodeset.each do |node|
- while node.parent
- node = node.parent
- new_nodeset << node unless new_nodeset.include? node
- end
- end
- nodeset = new_nodeset
- node_types = ELEMENTS
-
- when :ancestor_or_self
- new_nodeset = []
- nodeset.each do |node|
- if node.node_type == :element
- new_nodeset << node
- while ( node.parent )
- node = node.parent
- new_nodeset << node unless new_nodeset.include? node
- end
- end
- end
- nodeset = new_nodeset
- node_types = ELEMENTS
-
- when :predicate
- new_nodeset = []
- subcontext = { :size => nodeset.size }
- pred = path_stack.shift
- nodeset.each_with_index { |node, index|
- subcontext[ :node ] = node
- #puts "PREDICATE SETTING CONTEXT INDEX TO #{index+1}"
- subcontext[ :index ] = index+1
- pc = pred.dclone
- #puts "#{node.hash}) Recursing with #{pred.inspect} and [#{node.inspect}]"
- result = expr( pc, [node], subcontext )
- result = result[0] if result.kind_of? Array and result.length == 1
- #puts "#{node.hash}) Result = #{result.inspect} (#{result.class.name})"
- if result.kind_of? Numeric
- #puts "Adding node #{node.inspect}" if result == (index+1)
- new_nodeset << node if result == (index+1)
- elsif result.instance_of? Array
- if result.size > 0 and result.inject(false) {|k,s| s or k}
- #puts "Adding node #{node.inspect}" if result.size > 0
- new_nodeset << node if result.size > 0
- end
- else
- #puts "Adding node #{node.inspect}" if result
- new_nodeset << node if result
- end
- }
- #puts "New nodeset = #{new_nodeset.inspect}"
- #puts "Path_stack = #{path_stack.inspect}"
- nodeset = new_nodeset
-=begin
- predicate = path_stack.shift
- ns = nodeset.clone
- result = expr( predicate, ns )
- #puts "Result = #{result.inspect} (#{result.class.name})"
- #puts "nodeset = #{nodeset.inspect}"
- if result.kind_of? Array
- nodeset = result.zip(ns).collect{|m,n| n if m}.compact
- else
- nodeset = result ? nodeset : []
- end
- #puts "Outgoing NS = #{nodeset.inspect}"
-=end
-
- when :descendant_or_self
- rv = descendant_or_self( path_stack, nodeset )
- path_stack.clear
- nodeset = rv
- node_types = ELEMENTS
-
- when :descendant
- results = []
- nt = nil
- nodeset.each do |node|
- nt = node.node_type
- results += expr( path_stack.dclone.unshift( :descendant_or_self ),
- node.children ) if nt == :element or nt == :document
- end
- nodeset = results
- node_types = ELEMENTS
-
- when :following_sibling
- #puts "FOLLOWING_SIBLING 1: nodeset = #{nodeset}"
- results = []
- nodeset.each do |node|
- next if node.parent.nil?
- all_siblings = node.parent.children
- current_index = all_siblings.index( node )
- following_siblings = all_siblings[ current_index+1 .. -1 ]
- results += expr( path_stack.dclone, following_siblings )
- end
- #puts "FOLLOWING_SIBLING 2: nodeset = #{nodeset}"
- nodeset = results
-
- when :preceding_sibling
- results = []
- nodeset.each do |node|
- next if node.parent.nil?
- all_siblings = node.parent.children
- current_index = all_siblings.index( node )
- preceding_siblings = all_siblings[ 0, current_index ].reverse
- results += preceding_siblings
- end
- nodeset = results
- node_types = ELEMENTS
-
- when :preceding
- new_nodeset = []
- nodeset.each do |node|
- new_nodeset += preceding( node )
- end
- #puts "NEW NODESET => #{new_nodeset.inspect}"
- nodeset = new_nodeset
- node_types = ELEMENTS
-
- when :following
- new_nodeset = []
- nodeset.each do |node|
- new_nodeset += following( node )
- end
- nodeset = new_nodeset
- node_types = ELEMENTS
-
- when :namespace
- #puts "In :namespace"
- new_nodeset = []
- prefix = path_stack.shift
- nodeset.each do |node|
- if (node.node_type == :element or node.node_type == :attribute)
- if @namespaces
- namespaces = @namespaces
- elsif (node.node_type == :element)
- namespaces = node.namespaces
- else
- namespaces = node.element.namesapces
- end
- #puts "Namespaces = #{namespaces.inspect}"
- #puts "Prefix = #{prefix.inspect}"
- #puts "Node.namespace = #{node.namespace}"
- if (node.namespace == namespaces[prefix])
- new_nodeset << node
- end
- end
- end
- nodeset = new_nodeset
-
- when :variable
- var_name = path_stack.shift
- return @variables[ var_name ]
-
- # :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq
- # TODO: Special case for :or and :and -- not evaluate the right
- # operand if the left alone determines result (i.e. is true for
- # :or and false for :and).
- when :eq, :neq, :lt, :lteq, :gt, :gteq, :and, :or
- left = expr( path_stack.shift, nodeset.dup, context )
- #puts "LEFT => #{left.inspect} (#{left.class.name})"
- right = expr( path_stack.shift, nodeset.dup, context )
- #puts "RIGHT => #{right.inspect} (#{right.class.name})"
- res = equality_relational_compare( left, op, right )
- #puts "RES => #{res.inspect}"
- return res
-
- when :and
- left = expr( path_stack.shift, nodeset.dup, context )
- #puts "LEFT => #{left.inspect} (#{left.class.name})"
- if left == false || left.nil? || !left.inject(false) {|a,b| a | b}
- return []
- end
- right = expr( path_stack.shift, nodeset.dup, context )
- #puts "RIGHT => #{right.inspect} (#{right.class.name})"
- res = equality_relational_compare( left, op, right )
- #puts "RES => #{res.inspect}"
- return res
-
- when :div
- left = Functions::number(expr(path_stack.shift, nodeset, context)).to_f
- right = Functions::number(expr(path_stack.shift, nodeset, context)).to_f
- return (left / right)
-
- when :mod
- left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- return (left % right)
-
- when :mult
- left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- return (left * right)
-
- when :plus
- left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- return (left + right)
-
- when :minus
- left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- return (left - right)
-
- when :union
- left = expr( path_stack.shift, nodeset, context )
- right = expr( path_stack.shift, nodeset, context )
- return (left | right)
-
- when :neg
- res = expr( path_stack, nodeset, context )
- return -(res.to_f)
-
- when :not
- when :function
- func_name = path_stack.shift.tr('-','_')
- arguments = path_stack.shift
- #puts "FUNCTION 0: #{func_name}(#{arguments.collect{|a|a.inspect}.join(', ')})"
- subcontext = context ? nil : { :size => nodeset.size }
-
- res = []
- cont = context
- nodeset.each_with_index { |n, i|
- if subcontext
- subcontext[:node] = n
- subcontext[:index] = i
- cont = subcontext
- end
- arg_clone = arguments.dclone
- args = arg_clone.collect { |arg|
- #puts "FUNCTION 1: Calling expr( #{arg.inspect}, [#{n.inspect}] )"
- expr( arg, [n], cont )
- }
- #puts "FUNCTION 2: #{func_name}(#{args.collect{|a|a.inspect}.join(', ')})"
- Functions.context = cont
- res << Functions.send( func_name, *args )
- #puts "FUNCTION 3: #{res[-1].inspect}"
- }
- return res
-
- end
- end # while
- #puts "EXPR returning #{nodeset.inspect}"
- return nodeset
- end
-
-
- ##########################################################
- # FIXME
- # The next two methods are BAD MOJO!
- # This is my achilles heel. If anybody thinks of a better
- # way of doing this, be my guest. This really sucks, but
- # it is a wonder it works at all.
- # ########################################################
-
- def descendant_or_self( path_stack, nodeset )
- rs = []
- #puts "#"*80
- #puts "PATH_STACK = #{path_stack.inspect}"
- #puts "NODESET = #{nodeset.collect{|n|n.inspect}.inspect}"
- d_o_s( path_stack, nodeset, rs )
- #puts "RS = #{rs.collect{|n|n.inspect}.inspect}"
- document_order(rs.flatten.compact)
- #rs.flatten.compact
- end
-
- def d_o_s( p, ns, r )
- #puts "IN DOS with #{ns.inspect}; ALREADY HAVE #{r.inspect}"
- nt = nil
- ns.each_index do |i|
- n = ns[i]
- #puts "P => #{p.inspect}"
- x = expr( p.dclone, [ n ] )
- nt = n.node_type
- d_o_s( p, n.children, x ) if nt == :element or nt == :document and n.children.size > 0
- r.concat(x) if x.size > 0
- end
- end
-
-
- # Reorders an array of nodes so that they are in document order
- # It tries to do this efficiently.
- #
- # FIXME: I need to get rid of this, but the issue is that most of the XPath
- # interpreter functions as a filter, which means that we lose context going
- # in and out of function calls. If I knew what the index of the nodes was,
- # I wouldn't have to do this. Maybe add a document IDX for each node?
- # Problems with mutable documents. Or, rewrite everything.
- def document_order( array_of_nodes )
- new_arry = []
- array_of_nodes.each { |node|
- node_idx = []
- np = node.node_type == :attribute ? node.element : node
- while np.parent and np.parent.node_type == :element
- node_idx << np.parent.index( np )
- np = np.parent
- end
- new_arry << [ node_idx.reverse, node ]
- }
- #puts "new_arry = #{new_arry.inspect}"
- new_arry.sort{ |s1, s2| s1[0] <=> s2[0] }.collect{ |s| s[1] }
- end
-
-
- def recurse( nodeset, &block )
- for node in nodeset
- yield node
- recurse( node, &block ) if node.node_type == :element
- end
- end
-
-
-
- # Builds a nodeset of all of the preceding nodes of the supplied node,
- # in reverse document order
- # preceding:: includes every element in the document that precedes this node,
- # except for ancestors
- def preceding( node )
- #puts "IN PRECEDING"
- ancestors = []
- p = node.parent
- while p
- ancestors << p
- p = p.parent
- end
-
- acc = []
- p = preceding_node_of( node )
- #puts "P = #{p.inspect}"
- while p
- if ancestors.include? p
- ancestors.delete(p)
- else
- acc << p
- end
- p = preceding_node_of( p )
- #puts "P = #{p.inspect}"
- end
- acc
- end
-
- def preceding_node_of( node )
- #puts "NODE: #{node.inspect}"
- #puts "PREVIOUS NODE: #{node.previous_sibling_node.inspect}"
- #puts "PARENT NODE: #{node.parent}"
- psn = node.previous_sibling_node
- if psn.nil?
- if node.parent.nil? or node.parent.class == Document
- return nil
- end
- return node.parent
- #psn = preceding_node_of( node.parent )
- end
- while psn and psn.kind_of? Element and psn.children.size > 0
- psn = psn.children[-1]
- end
- psn
- end
-
- def following( node )
- #puts "IN PRECEDING"
- acc = []
- p = next_sibling_node( node )
- #puts "P = #{p.inspect}"
- while p
- acc << p
- p = following_node_of( p )
- #puts "P = #{p.inspect}"
- end
- acc
- end
-
- def following_node_of( node )
- #puts "NODE: #{node.inspect}"
- #puts "PREVIOUS NODE: #{node.previous_sibling_node.inspect}"
- #puts "PARENT NODE: #{node.parent}"
- if node.kind_of? Element and node.children.size > 0
- return node.children[0]
- end
- return next_sibling_node(node)
- end
-
- def next_sibling_node(node)
- psn = node.next_sibling_node
- while psn.nil?
- if node.parent.nil? or node.parent.class == Document
- return nil
- end
- node = node.parent
- psn = node.next_sibling_node
- #puts "psn = #{psn.inspect}"
- end
- return psn
- end
-
- def norm b
- case b
- when true, false
- return b
- when 'true', 'false'
- return Functions::boolean( b )
- when /^\d+(\.\d+)?$/
- return Functions::number( b )
- else
- return Functions::string( b )
- end
- end
-
- def equality_relational_compare( set1, op, set2 )
- #puts "EQ_REL_COMP(#{set1.inspect} #{op.inspect} #{set2.inspect})"
- if set1.kind_of? Array and set2.kind_of? Array
- #puts "#{set1.size} & #{set2.size}"
- if set1.size == 1 and set2.size == 1
- set1 = set1[0]
- set2 = set2[0]
- elsif set1.size == 0 or set2.size == 0
- nd = set1.size==0 ? set2 : set1
- rv = nd.collect { |il| compare( il, op, nil ) }
- #puts "RV = #{rv.inspect}"
- return rv
- else
- res = []
- enum = SyncEnumerator.new( set1, set2 ).each { |i1, i2|
- #puts "i1 = #{i1.inspect} (#{i1.class.name})"
- #puts "i2 = #{i2.inspect} (#{i2.class.name})"
- i1 = norm( i1 )
- i2 = norm( i2 )
- res << compare( i1, op, i2 )
- }
- return res
- end
- end
- #puts "EQ_REL_COMP: #{set1.inspect} (#{set1.class.name}), #{op}, #{set2.inspect} (#{set2.class.name})"
- #puts "COMPARING VALUES"
- # If one is nodeset and other is number, compare number to each item
- # in nodeset s.t. number op number(string(item))
- # If one is nodeset and other is string, compare string to each item
- # in nodeset s.t. string op string(item)
- # If one is nodeset and other is boolean, compare boolean to each item
- # in nodeset s.t. boolean op boolean(item)
- if set1.kind_of? Array or set2.kind_of? Array
- #puts "ISA ARRAY"
- if set1.kind_of? Array
- a = set1
- b = set2
- else
- a = set2
- b = set1
- end
-
- case b
- when true, false
- return a.collect {|v| compare( Functions::boolean(v), op, b ) }
- when Numeric
- return a.collect {|v| compare( Functions::number(v), op, b )}
- when /^\d+(\.\d+)?$/
- b = Functions::number( b )
- #puts "B = #{b.inspect}"
- return a.collect {|v| compare( Functions::number(v), op, b )}
- else
- #puts "Functions::string( #{b}(#{b.class.name}) ) = #{Functions::string(b)}"
- b = Functions::string( b )
- return a.collect { |v| compare( Functions::string(v), op, b ) }
- end
- else
- # If neither is nodeset,
- # If op is = or !=
- # If either boolean, convert to boolean
- # If either number, convert to number
- # Else, convert to string
- # Else
- # Convert both to numbers and compare
- s1 = set1.to_s
- s2 = set2.to_s
- #puts "EQ_REL_COMP: #{set1}=>#{s1}, #{set2}=>#{s2}"
- if s1 == 'true' or s1 == 'false' or s2 == 'true' or s2 == 'false'
- #puts "Functions::boolean(#{set1})=>#{Functions::boolean(set1)}"
- #puts "Functions::boolean(#{set2})=>#{Functions::boolean(set2)}"
- set1 = Functions::boolean( set1 )
- set2 = Functions::boolean( set2 )
- else
- if op == :eq or op == :neq
- if s1 =~ /^\d+(\.\d+)?$/ or s2 =~ /^\d+(\.\d+)?$/
- set1 = Functions::number( s1 )
- set2 = Functions::number( s2 )
- else
- set1 = Functions::string( set1 )
- set2 = Functions::string( set2 )
- end
- else
- set1 = Functions::number( set1 )
- set2 = Functions::number( set2 )
- end
- end
- #puts "EQ_REL_COMP: #{set1} #{op} #{set2}"
- #puts ">>> #{compare( set1, op, set2 )}"
- return compare( set1, op, set2 )
- end
- return false
- end
-
- def compare a, op, b
- #puts "COMPARE #{a.inspect}(#{a.class.name}) #{op} #{b.inspect}(#{b.class.name})"
- case op
- when :eq
- a == b
- when :neq
- a != b
- when :lt
- a < b
- when :lteq
- a <= b
- when :gt
- a > b
- when :gteq
- a >= b
- when :and
- a and b
- when :or
- a or b
- else
- false
- end
- end
- end
-end