summaryrefslogtreecommitdiff
path: root/ruby_1_8_6/lib/rexml/parsers
diff options
context:
space:
mode:
Diffstat (limited to 'ruby_1_8_6/lib/rexml/parsers')
-rw-r--r--ruby_1_8_6/lib/rexml/parsers/baseparser.rb503
-rw-r--r--ruby_1_8_6/lib/rexml/parsers/lightparser.rb60
-rw-r--r--ruby_1_8_6/lib/rexml/parsers/pullparser.rb196
-rw-r--r--ruby_1_8_6/lib/rexml/parsers/sax2parser.rb238
-rw-r--r--ruby_1_8_6/lib/rexml/parsers/streamparser.rb46
-rw-r--r--ruby_1_8_6/lib/rexml/parsers/treeparser.rb97
-rw-r--r--ruby_1_8_6/lib/rexml/parsers/ultralightparser.rb56
-rw-r--r--ruby_1_8_6/lib/rexml/parsers/xpathparser.rb698
8 files changed, 0 insertions, 1894 deletions
diff --git a/ruby_1_8_6/lib/rexml/parsers/baseparser.rb b/ruby_1_8_6/lib/rexml/parsers/baseparser.rb
deleted file mode 100644
index fc2354a67f..0000000000
--- a/ruby_1_8_6/lib/rexml/parsers/baseparser.rb
+++ /dev/null
@@ -1,503 +0,0 @@
-require 'rexml/parseexception'
-require 'rexml/undefinednamespaceexception'
-require 'rexml/source'
-require 'set'
-
-module REXML
- module Parsers
- # = Using the Pull Parser
- # <em>This API is experimental, and subject to change.</em>
- # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
- # while parser.has_next?
- # res = parser.next
- # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
- # end
- # See the PullEvent class for information on the content of the results.
- # The data is identical to the arguments passed for the various events to
- # the StreamListener API.
- #
- # Notice that:
- # parser = PullParser.new( "<a>BAD DOCUMENT" )
- # while parser.has_next?
- # res = parser.next
- # raise res[1] if res.error?
- # end
- #
- # Nat Price gave me some good ideas for the API.
- class BaseParser
- NCNAME_STR= '[\w:][\-\w\d.]*'
- NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
- UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
-
- NAMECHAR = '[\-\w\d\.:]'
- NAME = "([\\w:]#{NAMECHAR}*)"
- NMTOKEN = "(?:#{NAMECHAR})+"
- NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
- REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
- REFERENCE_RE = /#{REFERENCE}/
-
- DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
- DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
- ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
- COMMENT_START = /\A<!--/u
- COMMENT_PATTERN = /<!--(.*?)-->/um
- CDATA_START = /\A<!\[CDATA\[/u
- CDATA_END = /^\s*\]\s*>/um
- CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
- XMLDECL_START = /\A<\?xml\s/u;
- XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
- INSTRUCTION_START = /\A<\?/u
- INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
- TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um
- CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
-
- VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
- ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
- STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um
-
- ENTITY_START = /^\s*<!ENTITY/
- IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
- ELEMENTDECL_START = /^\s*<!ELEMENT/um
- ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
- SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
- ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
- NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
- ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
- ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
- ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
- DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
- ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
- ATTDEF_RE = /#{ATTDEF}/
- ATTLISTDECL_START = /^\s*<!ATTLIST/um
- ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
- NOTATIONDECL_START = /^\s*<!NOTATION/um
- PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
- SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
-
- TEXT_PATTERN = /\A([^<]*)/um
-
- # Entity constants
- PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
- SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
- PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
- EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
- NDATADECL = "\\s+NDATA\\s+#{NAME}"
- PEREFERENCE = "%#{NAME};"
- ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
- PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
- ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
- PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
- GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
- ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
-
- EREFERENCE = /&(?!#{NAME};)/
-
- DEFAULT_ENTITIES = {
- 'gt' => [/&gt;/, '&gt;', '>', />/],
- 'lt' => [/&lt;/, '&lt;', '<', /</],
- 'quot' => [/&quot;/, '&quot;', '"', /"/],
- "apos" => [/&apos;/, "&apos;", "'", /'/]
- }
-
-
- ######################################################################
- # These are patterns to identify common markup errors, to make the
- # error messages more informative.
- ######################################################################
- MISSING_ATTRIBUTE_QUOTES = /^<#{NAME_STR}\s+#{NAME_STR}\s*=\s*[^"']/um
-
- def initialize( source )
- self.stream = source
- end
-
- def add_listener( listener )
- if !defined?(@listeners) or !@listeners
- @listeners = []
- instance_eval <<-EOL
- alias :_old_pull :pull
- def pull
- event = _old_pull
- @listeners.each do |listener|
- listener.receive event
- end
- event
- end
- EOL
- end
- @listeners << listener
- end
-
- attr_reader :source
-
- def stream=( source )
- @source = SourceFactory.create_from( source )
- @closed = nil
- @document_status = nil
- @tags = []
- @stack = []
- @entities = []
- @nsstack = []
- end
-
- def position
- if @source.respond_to? :position
- @source.position
- else
- # FIXME
- 0
- end
- end
-
- # Returns true if there are no more events
- def empty?
- return (@source.empty? and @stack.empty?)
- end
-
- # Returns true if there are more events. Synonymous with !empty?
- def has_next?
- return !(@source.empty? and @stack.empty?)
- end
-
- # Push an event back on the head of the stream. This method
- # has (theoretically) infinite depth.
- def unshift token
- @stack.unshift(token)
- end
-
- # Peek at the +depth+ event in the stack. The first element on the stack
- # is at depth 0. If +depth+ is -1, will parse to the end of the input
- # stream and return the last event, which is always :end_document.
- # Be aware that this causes the stream to be parsed up to the +depth+
- # event, so you can effectively pre-parse the entire document (pull the
- # entire thing into memory) using this method.
- def peek depth=0
- raise %Q[Illegal argument "#{depth}"] if depth < -1
- temp = []
- if depth == -1
- temp.push(pull()) until empty?
- else
- while @stack.size+temp.size < depth+1
- temp.push(pull())
- end
- end
- @stack += temp if temp.size > 0
- @stack[depth]
- end
-
- # Returns the next event. This is a +PullEvent+ object.
- def pull
- if @closed
- x, @closed = @closed, nil
- return [ :end_element, x ]
- end
- return [ :end_document ] if empty?
- return @stack.shift if @stack.size > 0
- #STDERR.puts @source.encoding
- @source.read if @source.buffer.size<2
- #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
- if @document_status == nil
- #@source.consume( /^\s*/um )
- word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
- word = word[1] unless word.nil?
- #STDERR.puts "WORD = #{word.inspect}"
- case word
- when COMMENT_START
- return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
- when XMLDECL_START
- #STDERR.puts "XMLDECL"
- results = @source.match( XMLDECL_PATTERN, true )[1]
- version = VERSION.match( results )
- version = version[1] unless version.nil?
- encoding = ENCODING.match(results)
- encoding = encoding[1] unless encoding.nil?
- @source.encoding = encoding
- standalone = STANDALONE.match(results)
- standalone = standalone[1] unless standalone.nil?
- return [ :xmldecl, version, encoding, standalone ]
- when INSTRUCTION_START
- return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
- when DOCTYPE_START
- md = @source.match( DOCTYPE_PATTERN, true )
- @nsstack.unshift(curr_ns=Set.new)
- identity = md[1]
- close = md[2]
- identity =~ IDENTITY
- name = $1
- raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
- pub_sys = $2.nil? ? nil : $2.strip
- long_name = $4.nil? ? nil : $4.strip
- uri = $6.nil? ? nil : $6.strip
- args = [ :start_doctype, name, pub_sys, long_name, uri ]
- if close == ">"
- @document_status = :after_doctype
- @source.read if @source.buffer.size<2
- md = @source.match(/^\s*/um, true)
- @stack << [ :end_doctype ]
- else
- @document_status = :in_doctype
- end
- return args
- when /^\s+/
- else
- @document_status = :after_doctype
- @source.read if @source.buffer.size<2
- md = @source.match(/\s*/um, true)
- end
- end
- if @document_status == :in_doctype
- md = @source.match(/\s*(.*?>)/um)
- case md[1]
- when SYSTEMENTITY
- match = @source.match( SYSTEMENTITY, true )[1]
- return [ :externalentity, match ]
-
- when ELEMENTDECL_START
- return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
-
- when ENTITY_START
- match = @source.match( ENTITYDECL, true ).to_a.compact
- match[0] = :entitydecl
- ref = false
- if match[1] == '%'
- ref = true
- match.delete_at 1
- end
- # Now we have to sort out what kind of entity reference this is
- if match[2] == 'SYSTEM'
- # External reference
- match[3] = match[3][1..-2] # PUBID
- match.delete_at(4) if match.size > 4 # Chop out NDATA decl
- # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
- elsif match[2] == 'PUBLIC'
- # External reference
- match[3] = match[3][1..-2] # PUBID
- match[4] = match[4][1..-2] # HREF
- # match is [ :entity, name, PUBLIC, pubid, href ]
- else
- match[2] = match[2][1..-2]
- match.pop if match.size == 4
- # match is [ :entity, name, value ]
- end
- match << '%' if ref
- return match
- when ATTLISTDECL_START
- md = @source.match( ATTLISTDECL_PATTERN, true )
- raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
- element = md[1]
- contents = md[0]
-
- pairs = {}
- values = md[0].scan( ATTDEF_RE )
- values.each do |attdef|
- unless attdef[3] == "#IMPLIED"
- attdef.compact!
- val = attdef[3]
- val = attdef[4] if val == "#FIXED "
- pairs[attdef[0]] = val
- if attdef[0] =~ /^xmlns:(.*)/
- @nsstack[0] << $1
- end
- end
- end
- return [ :attlistdecl, element, pairs, contents ]
- when NOTATIONDECL_START
- md = nil
- if @source.match( PUBLIC )
- md = @source.match( PUBLIC, true )
- vals = [md[1],md[2],md[4],md[6]]
- elsif @source.match( SYSTEM )
- md = @source.match( SYSTEM, true )
- vals = [md[1],md[2],nil,md[4]]
- else
- raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
- end
- return [ :notationdecl, *vals ]
- when CDATA_END
- @document_status = :after_doctype
- @source.match( CDATA_END, true )
- return [ :end_doctype ]
- end
- end
- begin
- if @source.buffer[0] == ?<
- if @source.buffer[1] == ?/
- @nsstack.shift
- last_tag = @tags.pop
- #md = @source.match_to_consume( '>', CLOSE_MATCH)
- md = @source.match( CLOSE_MATCH, true )
- raise REXML::ParseException.new( "Missing end tag for "+
- "'#{last_tag}' (got \"#{md[1]}\")",
- @source) unless last_tag == md[1]
- return [ :end_element, last_tag ]
- elsif @source.buffer[1] == ?!
- md = @source.match(/\A(\s*[^>]*>)/um)
- #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
- raise REXML::ParseException.new("Malformed node", @source) unless md
- if md[0][2] == ?-
- md = @source.match( COMMENT_PATTERN, true )
- return [ :comment, md[1] ] if md
- else
- md = @source.match( CDATA_PATTERN, true )
- return [ :cdata, md[1] ] if md
- end
- raise REXML::ParseException.new( "Declarations can only occur "+
- "in the doctype declaration.", @source)
- elsif @source.buffer[1] == ??
- md = @source.match( INSTRUCTION_PATTERN, true )
- return [ :processing_instruction, md[1], md[2] ] if md
- raise REXML::ParseException.new( "Bad instruction declaration",
- @source)
- else
- # Get the next tag
- md = @source.match(TAG_MATCH, true)
- unless md
- # Check for missing attribute quotes
- raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
- raise REXML::ParseException.new("malformed XML: missing tag start", @source)
- end
- attributes = {}
- prefixes = Set.new
- prefixes << md[2] if md[2]
- @nsstack.unshift(curr_ns=Set.new)
- if md[4].size > 0
- attrs = md[4].scan( ATTRIBUTE_PATTERN )
- raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
- attrs.each { |a,b,c,d,e|
- if b == "xmlns"
- if c == "xml"
- if d != "http://www.w3.org/XML/1998/namespace"
- msg = "The 'xml' prefix must not be bound to any other namespace "+
- "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
- raise REXML::ParseException.new( msg, @source, self )
- end
- elsif c == "xmlns"
- msg = "The 'xmlns' prefix must not be declared "+
- "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
- raise REXML::ParseException.new( msg, @source, self)
- end
- curr_ns << c
- elsif b
- prefixes << b unless b == "xml"
- end
- attributes[a] = e
- }
- end
-
- # Verify that all of the prefixes have been defined
- for prefix in prefixes
- unless @nsstack.find{|k| k.member?(prefix)}
- raise UndefinedNamespaceException.new(prefix,@source,self)
- end
- end
-
- if md[6]
- @closed = md[1]
- @nsstack.shift
- else
- @tags.push( md[1] )
- end
- return [ :start_element, md[1], attributes ]
- end
- else
- md = @source.match( TEXT_PATTERN, true )
- if md[0].length == 0
- @source.match( /(\s+)/, true )
- end
- #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
- #return [ :text, "" ] if md[0].length == 0
- # unnormalized = Text::unnormalize( md[1], self )
- # return PullEvent.new( :text, md[1], unnormalized )
- return [ :text, md[1] ]
- end
- rescue REXML::UndefinedNamespaceException
- raise
- rescue REXML::ParseException
- raise
- rescue Exception, NameError => error
- raise REXML::ParseException.new( "Exception parsing",
- @source, self, (error ? error : $!) )
- end
- return [ :dummy ]
- end
-
- def entity( reference, entities )
- value = nil
- value = entities[ reference ] if entities
- if not value
- value = DEFAULT_ENTITIES[ reference ]
- value = value[2] if value
- end
- unnormalize( value, entities ) if value
- end
-
- # Escapes all possible entities
- def normalize( input, entities=nil, entity_filter=nil )
- copy = input.clone
- # Doing it like this rather than in a loop improves the speed
- copy.gsub!( EREFERENCE, '&amp;' )
- entities.each do |key, value|
- copy.gsub!( value, "&#{key};" ) unless entity_filter and
- entity_filter.include?(entity)
- end if entities
- copy.gsub!( EREFERENCE, '&amp;' )
- DEFAULT_ENTITIES.each do |key, value|
- copy.gsub!( value[3], value[1] )
- end
- copy
- end
-
- # Unescapes all possible entities
- def unnormalize( string, entities=nil, filter=nil )
- rv = string.clone
- rv.gsub!( /\r\n?/, "\n" )
- matches = rv.scan( REFERENCE_RE )
- return rv if matches.size == 0
- rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
- m=$1
- m = "0#{m}" if m[0] == ?x
- [Integer(m)].pack('U*')
- }
- matches.collect!{|x|x[0]}.compact!
- if matches.size > 0
- matches.each do |entity_reference|
- unless filter and filter.include?(entity_reference)
- entity_value = entity( entity_reference, entities )
- if entity_value
- re = /&#{entity_reference};/
- rv.gsub!( re, entity_value )
- end
- end
- end
- matches.each do |entity_reference|
- unless filter and filter.include?(entity_reference)
- er = DEFAULT_ENTITIES[entity_reference]
- rv.gsub!( er[0], er[2] ) if er
- end
- end
- rv.gsub!( /&amp;/, '&' )
- end
- rv
- end
- end
- end
-end
-
-=begin
- case event[0]
- when :start_element
- when :text
- when :end_element
- when :processing_instruction
- when :cdata
- when :comment
- when :xmldecl
- when :start_doctype
- when :end_doctype
- when :externalentity
- when :elementdecl
- when :entity
- when :attlistdecl
- when :notationdecl
- when :end_doctype
- end
-=end
diff --git a/ruby_1_8_6/lib/rexml/parsers/lightparser.rb b/ruby_1_8_6/lib/rexml/parsers/lightparser.rb
deleted file mode 100644
index 0f35034993..0000000000
--- a/ruby_1_8_6/lib/rexml/parsers/lightparser.rb
+++ /dev/null
@@ -1,60 +0,0 @@
-require 'rexml/parsers/streamparser'
-require 'rexml/parsers/baseparser'
-require 'rexml/light/node'
-
-module REXML
- module Parsers
- class LightParser
- def initialize stream
- @stream = stream
- @parser = REXML::Parsers::BaseParser.new( stream )
- end
-
- def add_listener( listener )
- @parser.add_listener( listener )
- end
-
- def rewind
- @stream.rewind
- @parser.stream = @stream
- end
-
- def parse
- root = context = [ :document ]
- while true
- event = @parser.pull
- case event[0]
- when :end_document
- break
- when :end_doctype
- context = context[1]
- when :start_element, :start_doctype
- new_node = event
- context << new_node
- new_node[1,0] = [context]
- context = new_node
- when :end_element, :end_doctype
- context = context[1]
- else
- new_node = event
- context << new_node
- new_node[1,0] = [context]
- end
- end
- root
- end
- end
-
- # An element is an array. The array contains:
- # 0 The parent element
- # 1 The tag name
- # 2 A hash of attributes
- # 3..-1 The child elements
- # An element is an array of size > 3
- # Text is a String
- # PIs are [ :processing_instruction, target, data ]
- # Comments are [ :comment, data ]
- # DocTypes are DocType structs
- # The root is an array with XMLDecls, Text, DocType, Array, Text
- end
-end
diff --git a/ruby_1_8_6/lib/rexml/parsers/pullparser.rb b/ruby_1_8_6/lib/rexml/parsers/pullparser.rb
deleted file mode 100644
index 36dc7160c3..0000000000
--- a/ruby_1_8_6/lib/rexml/parsers/pullparser.rb
+++ /dev/null
@@ -1,196 +0,0 @@
-require 'forwardable'
-
-require 'rexml/parseexception'
-require 'rexml/parsers/baseparser'
-require 'rexml/xmltokens'
-
-module REXML
- module Parsers
- # = Using the Pull Parser
- # <em>This API is experimental, and subject to change.</em>
- # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
- # while parser.has_next?
- # res = parser.next
- # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
- # end
- # See the PullEvent class for information on the content of the results.
- # The data is identical to the arguments passed for the various events to
- # the StreamListener API.
- #
- # Notice that:
- # parser = PullParser.new( "<a>BAD DOCUMENT" )
- # while parser.has_next?
- # res = parser.next
- # raise res[1] if res.error?
- # end
- #
- # Nat Price gave me some good ideas for the API.
- class PullParser
- include XMLTokens
- extend Forwardable
-
- def_delegators( :@parser, :has_next? )
- def_delegators( :@parser, :entity )
- def_delegators( :@parser, :empty? )
- def_delegators( :@parser, :source )
-
- def initialize stream
- @entities = {}
- @listeners = nil
- @parser = BaseParser.new( stream )
- @my_stack = []
- end
-
- def add_listener( listener )
- @listeners = [] unless @listeners
- @listeners << listener
- end
-
- def each
- while has_next?
- yield self.pull
- end
- end
-
- def peek depth=0
- if @my_stack.length <= depth
- (depth - @my_stack.length + 1).times {
- e = PullEvent.new(@parser.pull)
- @my_stack.push(e)
- }
- end
- @my_stack[depth]
- end
-
- def pull
- return @my_stack.shift if @my_stack.length > 0
-
- event = @parser.pull
- case event[0]
- when :entitydecl
- @entities[ event[1] ] =
- event[2] unless event[2] =~ /PUBLIC|SYSTEM/
- when :text
- unnormalized = @parser.unnormalize( event[1], @entities )
- event << unnormalized
- end
- PullEvent.new( event )
- end
-
- def unshift token
- @my_stack.unshift token
- end
- end
-
- # A parsing event. The contents of the event are accessed as an +Array?,
- # and the type is given either by the ...? methods, or by accessing the
- # +type+ accessor. The contents of this object vary from event to event,
- # but are identical to the arguments passed to +StreamListener+s for each
- # event.
- class PullEvent
- # The type of this event. Will be one of :tag_start, :tag_end, :text,
- # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
- # :notationdecl, :entity, :cdata, :xmldecl, or :error.
- def initialize(arg)
- @contents = arg
- end
-
- def []( start, endd=nil)
- if start.kind_of? Range
- @contents.slice( start.begin+1 .. start.end )
- elsif start.kind_of? Numeric
- if endd.nil?
- @contents.slice( start+1 )
- else
- @contents.slice( start+1, endd )
- end
- else
- raise "Illegal argument #{start.inspect} (#{start.class})"
- end
- end
-
- def event_type
- @contents[0]
- end
-
- # Content: [ String tag_name, Hash attributes ]
- def start_element?
- @contents[0] == :start_element
- end
-
- # Content: [ String tag_name ]
- def end_element?
- @contents[0] == :end_element
- end
-
- # Content: [ String raw_text, String unnormalized_text ]
- def text?
- @contents[0] == :text
- end
-
- # Content: [ String text ]
- def instruction?
- @contents[0] == :processing_instruction
- end
-
- # Content: [ String text ]
- def comment?
- @contents[0] == :comment
- end
-
- # Content: [ String name, String pub_sys, String long_name, String uri ]
- def doctype?
- @contents[0] == :start_doctype
- end
-
- # Content: [ String text ]
- def attlistdecl?
- @contents[0] == :attlistdecl
- end
-
- # Content: [ String text ]
- def elementdecl?
- @contents[0] == :elementdecl
- end
-
- # Due to the wonders of DTDs, an entity declaration can be just about
- # anything. There's no way to normalize it; you'll have to interpret the
- # content yourself. However, the following is true:
- #
- # * If the entity declaration is an internal entity:
- # [ String name, String value ]
- # Content: [ String text ]
- def entitydecl?
- @contents[0] == :entitydecl
- end
-
- # Content: [ String text ]
- def notationdecl?
- @contents[0] == :notationdecl
- end
-
- # Content: [ String text ]
- def entity?
- @contents[0] == :entity
- end
-
- # Content: [ String text ]
- def cdata?
- @contents[0] == :cdata
- end
-
- # Content: [ String version, String encoding, String standalone ]
- def xmldecl?
- @contents[0] == :xmldecl
- end
-
- def error?
- @contents[0] == :error
- end
-
- def inspect
- @contents[0].to_s + ": " + @contents[1..-1].inspect
- end
- end
- end
-end
diff --git a/ruby_1_8_6/lib/rexml/parsers/sax2parser.rb b/ruby_1_8_6/lib/rexml/parsers/sax2parser.rb
deleted file mode 100644
index e402eb7747..0000000000
--- a/ruby_1_8_6/lib/rexml/parsers/sax2parser.rb
+++ /dev/null
@@ -1,238 +0,0 @@
-require 'rexml/parsers/baseparser'
-require 'rexml/parseexception'
-require 'rexml/namespace'
-require 'rexml/text'
-
-module REXML
- module Parsers
- # SAX2Parser
- class SAX2Parser
- def initialize source
- @parser = BaseParser.new(source)
- @listeners = []
- @procs = []
- @namespace_stack = []
- @has_listeners = false
- @tag_stack = []
- @entities = {}
- end
-
- def source
- @parser.source
- end
-
- def add_listener( listener )
- @parser.add_listener( listener )
- end
-
- # Listen arguments:
- #
- # Symbol, Array, Block
- # Listen to Symbol events on Array elements
- # Symbol, Block
- # Listen to Symbol events
- # Array, Listener
- # Listen to all events on Array elements
- # Array, Block
- # Listen to :start_element events on Array elements
- # Listener
- # Listen to All events
- #
- # Symbol can be one of: :start_element, :end_element,
- # :start_prefix_mapping, :end_prefix_mapping, :characters,
- # :processing_instruction, :doctype, :attlistdecl, :elementdecl,
- # :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
- #
- # There is an additional symbol that can be listened for: :progress.
- # This will be called for every event generated, passing in the current
- # stream position.
- #
- # Array contains regular expressions or strings which will be matched
- # against fully qualified element names.
- #
- # Listener must implement the methods in SAX2Listener
- #
- # Block will be passed the same arguments as a SAX2Listener method would
- # be, where the method name is the same as the matched Symbol.
- # See the SAX2Listener for more information.
- def listen( *args, &blok )
- if args[0].kind_of? Symbol
- if args.size == 2
- args[1].each { |match| @procs << [args[0], match, blok] }
- else
- add( [args[0], nil, blok] )
- end
- elsif args[0].kind_of? Array
- if args.size == 2
- args[0].each { |match| add( [nil, match, args[1]] ) }
- else
- args[0].each { |match| add( [ :start_element, match, blok ] ) }
- end
- else
- add([nil, nil, args[0]])
- end
- end
-
- def deafen( listener=nil, &blok )
- if listener
- @listeners.delete_if {|item| item[-1] == listener }
- @has_listeners = false if @listeners.size == 0
- else
- @procs.delete_if {|item| item[-1] == blok }
- end
- end
-
- def parse
- @procs.each { |sym,match,block| block.call if sym == :start_document }
- @listeners.each { |sym,match,block|
- block.start_document if sym == :start_document or sym.nil?
- }
- root = context = []
- while true
- event = @parser.pull
- case event[0]
- when :end_document
- handle( :end_document )
- break
- when :start_doctype
- handle( :doctype, *event[1..-1])
- when :end_doctype
- context = context[1]
- when :start_element
- @tag_stack.push(event[1])
- # find the observers for namespaces
- procs = get_procs( :start_prefix_mapping, event[1] )
- listeners = get_listeners( :start_prefix_mapping, event[1] )
- if procs or listeners
- # break out the namespace declarations
- # The attributes live in event[2]
- event[2].each {|n, v| event[2][n] = @parser.normalize(v)}
- nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
- nsdecl.collect! { |n, value| [ n[6..-1], value ] }
- @namespace_stack.push({})
- nsdecl.each do |n,v|
- @namespace_stack[-1][n] = v
- # notify observers of namespaces
- procs.each { |ob| ob.call( n, v ) } if procs
- listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
- end
- end
- event[1] =~ Namespace::NAMESPLIT
- prefix = $1
- local = $2
- uri = get_namespace(prefix)
- # find the observers for start_element
- procs = get_procs( :start_element, event[1] )
- listeners = get_listeners( :start_element, event[1] )
- # notify observers
- procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
- listeners.each { |ob|
- ob.start_element( uri, local, event[1], event[2] )
- } if listeners
- when :end_element
- @tag_stack.pop
- event[1] =~ Namespace::NAMESPLIT
- prefix = $1
- local = $2
- uri = get_namespace(prefix)
- # find the observers for start_element
- procs = get_procs( :end_element, event[1] )
- listeners = get_listeners( :end_element, event[1] )
- # notify observers
- procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
- listeners.each { |ob|
- ob.end_element( uri, local, event[1] )
- } if listeners
-
- namespace_mapping = @namespace_stack.pop
- # find the observers for namespaces
- procs = get_procs( :end_prefix_mapping, event[1] )
- listeners = get_listeners( :end_prefix_mapping, event[1] )
- if procs or listeners
- namespace_mapping.each do |prefix, uri|
- # notify observers of namespaces
- procs.each { |ob| ob.call( prefix ) } if procs
- listeners.each { |ob| ob.end_prefix_mapping(prefix) } if listeners
- end
- end
- when :text
- #normalized = @parser.normalize( event[1] )
- #handle( :characters, normalized )
- copy = event[1].clone
- @entities.each { |key, value| copy = copy.gsub("&#{key};", value) }
- copy.gsub!( Text::NUMERICENTITY ) {|m|
- m=$1
- m = "0#{m}" if m[0] == ?x
- [Integer(m)].pack('U*')
- }
- handle( :characters, copy )
- when :entitydecl
- @entities[ event[1] ] = event[2] if event.size == 3
- handle( *event )
- when :processing_instruction, :comment, :attlistdecl,
- :elementdecl, :cdata, :notationdecl, :xmldecl
- handle( *event )
- end
- handle( :progress, @parser.position )
- end
- end
-
- private
- def handle( symbol, *arguments )
- tag = @tag_stack[-1]
- procs = get_procs( symbol, tag )
- listeners = get_listeners( symbol, tag )
- # notify observers
- procs.each { |ob| ob.call( *arguments ) } if procs
- listeners.each { |l|
- l.send( symbol.to_s, *arguments )
- } if listeners
- end
-
- # The following methods are duplicates, but it is faster than using
- # a helper
- def get_procs( symbol, name )
- return nil if @procs.size == 0
- @procs.find_all do |sym, match, block|
- #puts sym.inspect+"=="+symbol.inspect+ "\t"+match.inspect+"=="+name.inspect+ "\t"+( (sym.nil? or symbol == sym) and ((name.nil? and match.nil?) or match.nil? or ( (name == match) or (match.kind_of? Regexp and name =~ match)))).to_s
- (
- (sym.nil? or symbol == sym) and
- ((name.nil? and match.nil?) or match.nil? or (
- (name == match) or
- (match.kind_of? Regexp and name =~ match)
- )
- )
- )
- end.collect{|x| x[-1]}
- end
- def get_listeners( symbol, name )
- return nil if @listeners.size == 0
- @listeners.find_all do |sym, match, block|
- (
- (sym.nil? or symbol == sym) and
- ((name.nil? and match.nil?) or match.nil? or (
- (name == match) or
- (match.kind_of? Regexp and name =~ match)
- )
- )
- )
- end.collect{|x| x[-1]}
- end
-
- def add( pair )
- if pair[-1].respond_to? :call
- @procs << pair unless @procs.include? pair
- else
- @listeners << pair unless @listeners.include? pair
- @has_listeners = true
- end
- end
-
- def get_namespace( prefix )
- uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
- (@namespace_stack.find { |ns| not ns[nil].nil? })
- uris[-1][prefix] unless uris.nil? or 0 == uris.size
- end
- end
- end
-end
diff --git a/ruby_1_8_6/lib/rexml/parsers/streamparser.rb b/ruby_1_8_6/lib/rexml/parsers/streamparser.rb
deleted file mode 100644
index 256d0f611c..0000000000
--- a/ruby_1_8_6/lib/rexml/parsers/streamparser.rb
+++ /dev/null
@@ -1,46 +0,0 @@
-module REXML
- module Parsers
- class StreamParser
- def initialize source, listener
- @listener = listener
- @parser = BaseParser.new( source )
- end
-
- def add_listener( listener )
- @parser.add_listener( listener )
- end
-
- def parse
- # entity string
- while true
- event = @parser.pull
- case event[0]
- when :end_document
- return
- when :start_element
- attrs = event[2].each do |n, v|
- event[2][n] = @parser.unnormalize( v )
- end
- @listener.tag_start( event[1], attrs )
- when :end_element
- @listener.tag_end( event[1] )
- when :text
- normalized = @parser.unnormalize( event[1] )
- @listener.text( normalized )
- when :processing_instruction
- @listener.instruction( *event[1,2] )
- when :start_doctype
- @listener.doctype( *event[1..-1] )
- when :end_doctype
- # FIXME: remove this condition for milestone:3.2
- @listener.doctype_end if @listener.respond_to? :doctype_end
- when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
- @listener.send( event[0].to_s, *event[1..-1] )
- when :entitydecl, :notationdecl
- @listener.send( event[0].to_s, event[1..-1] )
- end
- end
- end
- end
- end
-end
diff --git a/ruby_1_8_6/lib/rexml/parsers/treeparser.rb b/ruby_1_8_6/lib/rexml/parsers/treeparser.rb
deleted file mode 100644
index 5c3e142ea7..0000000000
--- a/ruby_1_8_6/lib/rexml/parsers/treeparser.rb
+++ /dev/null
@@ -1,97 +0,0 @@
-require 'rexml/validation/validationexception'
-require 'rexml/undefinednamespaceexception'
-
-module REXML
- module Parsers
- class TreeParser
- def initialize( source, build_context = Document.new )
- @build_context = build_context
- @parser = Parsers::BaseParser.new( source )
- end
-
- def add_listener( listener )
- @parser.add_listener( listener )
- end
-
- def parse
- tag_stack = []
- in_doctype = false
- entities = nil
- begin
- while true
- event = @parser.pull
- #STDERR.puts "TREEPARSER GOT #{event.inspect}"
- case event[0]
- when :end_document
- unless tag_stack.empty?
- #raise ParseException.new("No close tag for #{tag_stack.inspect}")
- raise ParseException.new("No close tag for #{@build_context.xpath}")
- end
- return
- when :start_element
- tag_stack.push(event[1])
- el = @build_context = @build_context.add_element( event[1], event[2] )
- when :end_element
- tag_stack.pop
- @build_context = @build_context.parent
- when :text
- if not in_doctype
- if @build_context[-1].instance_of? Text
- @build_context[-1] << event[1]
- else
- @build_context.add(
- Text.new(event[1], @build_context.whitespace, nil, true)
- ) unless (
- @build_context.ignore_whitespace_nodes and
- event[1].strip.size==0
- )
- end
- end
- when :comment
- c = Comment.new( event[1] )
- @build_context.add( c )
- when :cdata
- c = CData.new( event[1] )
- @build_context.add( c )
- when :processing_instruction
- @build_context.add( Instruction.new( event[1], event[2] ) )
- when :end_doctype
- in_doctype = false
- entities.each { |k,v| entities[k] = @build_context.entities[k].value }
- @build_context = @build_context.parent
- when :start_doctype
- doctype = DocType.new( event[1..-1], @build_context )
- @build_context = doctype
- entities = {}
- in_doctype = true
- when :attlistdecl
- n = AttlistDecl.new( event[1..-1] )
- @build_context.add( n )
- when :externalentity
- n = ExternalEntity.new( event[1] )
- @build_context.add( n )
- when :elementdecl
- n = ElementDecl.new( event[1] )
- @build_context.add(n)
- when :entitydecl
- entities[ event[1] ] = event[2] unless event[2] =~ /PUBLIC|SYSTEM/
- @build_context.add(Entity.new(event))
- when :notationdecl
- n = NotationDecl.new( *event[1..-1] )
- @build_context.add( n )
- when :xmldecl
- x = XMLDecl.new( event[1], event[2], event[3] )
- @build_context.add( x )
- end
- end
- rescue REXML::Validation::ValidationException
- raise
- rescue REXML::UndefinedNamespaceException
- raise
- rescue
- raise ParseException.new( $!.message, @parser.source, @parser, $! )
- end
- end
- end
- end
-end
diff --git a/ruby_1_8_6/lib/rexml/parsers/ultralightparser.rb b/ruby_1_8_6/lib/rexml/parsers/ultralightparser.rb
deleted file mode 100644
index adc4af18e2..0000000000
--- a/ruby_1_8_6/lib/rexml/parsers/ultralightparser.rb
+++ /dev/null
@@ -1,56 +0,0 @@
-require 'rexml/parsers/streamparser'
-require 'rexml/parsers/baseparser'
-
-module REXML
- module Parsers
- class UltraLightParser
- def initialize stream
- @stream = stream
- @parser = REXML::Parsers::BaseParser.new( stream )
- end
-
- def add_listener( listener )
- @parser.add_listener( listener )
- end
-
- def rewind
- @stream.rewind
- @parser.stream = @stream
- end
-
- def parse
- root = context = []
- while true
- event = @parser.pull
- case event[0]
- when :end_document
- break
- when :end_doctype
- context = context[1]
- when :start_element, :doctype
- context << event
- event[1,0] = [context]
- context = event
- when :end_element
- context = context[1]
- else
- context << event
- end
- end
- root
- end
- end
-
- # An element is an array. The array contains:
- # 0 The parent element
- # 1 The tag name
- # 2 A hash of attributes
- # 3..-1 The child elements
- # An element is an array of size > 3
- # Text is a String
- # PIs are [ :processing_instruction, target, data ]
- # Comments are [ :comment, data ]
- # DocTypes are DocType structs
- # The root is an array with XMLDecls, Text, DocType, Array, Text
- end
-end
diff --git a/ruby_1_8_6/lib/rexml/parsers/xpathparser.rb b/ruby_1_8_6/lib/rexml/parsers/xpathparser.rb
deleted file mode 100644
index de2530e347..0000000000
--- a/ruby_1_8_6/lib/rexml/parsers/xpathparser.rb
+++ /dev/null
@@ -1,698 +0,0 @@
-require 'rexml/namespace'
-require 'rexml/xmltokens'
-
-module REXML
- module Parsers
- # You don't want to use this class. Really. Use XPath, which is a wrapper
- # for this class. Believe me. You don't want to poke around in here.
- # There is strange, dark magic at work in this code. Beware. Go back! Go
- # back while you still can!
- class XPathParser
- include XMLTokens
- LITERAL = /^'([^']*)'|^"([^"]*)"/u
-
- def namespaces=( namespaces )
- Functions::namespace_context = namespaces
- @namespaces = namespaces
- end
-
- def parse path
- path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
- path.gsub!( /\s+([\]\)])/, '\1' )
- parsed = []
- path = OrExpr(path, parsed)
- parsed
- end
-
- def predicate path
- parsed = []
- Predicate( "[#{path}]", parsed )
- parsed
- end
-
- def abbreviate( path )
- path = path.kind_of?(String) ? parse( path ) : path
- string = ""
- document = false
- while path.size > 0
- op = path.shift
- case op
- when :node
- when :attribute
- string << "/" if string.size > 0
- string << "@"
- when :child
- string << "/" if string.size > 0
- when :descendant_or_self
- string << "/"
- when :self
- string << "."
- when :parent
- string << ".."
- when :any
- string << "*"
- when :text
- string << "text()"
- when :following, :following_sibling,
- :ancestor, :ancestor_or_self, :descendant,
- :namespace, :preceding, :preceding_sibling
- string << "/" unless string.size == 0
- string << op.to_s.tr("_", "-")
- string << "::"
- when :qname
- prefix = path.shift
- name = path.shift
- string << prefix+":" if prefix.size > 0
- string << name
- when :predicate
- string << '['
- string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
- string << ']'
- when :document
- document = true
- when :function
- string << path.shift
- string << "( "
- string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
- string << " )"
- when :literal
- string << %Q{ "#{path.shift}" }
- else
- string << "/" unless string.size == 0
- string << "UNKNOWN("
- string << op.inspect
- string << ")"
- end
- end
- string = "/"+string if document
- return string
- end
-
- def expand( path )
- path = path.kind_of?(String) ? parse( path ) : path
- string = ""
- document = false
- while path.size > 0
- op = path.shift
- case op
- when :node
- string << "node()"
- when :attribute, :child, :following, :following_sibling,
- :ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
- :namespace, :preceding, :preceding_sibling, :self, :parent
- string << "/" unless string.size == 0
- string << op.to_s.tr("_", "-")
- string << "::"
- when :any
- string << "*"
- when :qname
- prefix = path.shift
- name = path.shift
- string << prefix+":" if prefix.size > 0
- string << name
- when :predicate
- string << '['
- string << predicate_to_string( path.shift ) { |x| expand(x) }
- string << ']'
- when :document
- document = true
- else
- string << "/" unless string.size == 0
- string << "UNKNOWN("
- string << op.inspect
- string << ")"
- end
- end
- string = "/"+string if document
- return string
- end
-
- def predicate_to_string( path, &block )
- string = ""
- case path[0]
- when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
- op = path.shift
- case op
- when :eq
- op = "="
- when :lt
- op = "<"
- when :gt
- op = ">"
- when :lteq
- op = "<="
- when :gteq
- op = ">="
- when :neq
- op = "!="
- when :union
- op = "|"
- end
- left = predicate_to_string( path.shift, &block )
- right = predicate_to_string( path.shift, &block )
- string << " "
- string << left
- string << " "
- string << op.to_s
- string << " "
- string << right
- string << " "
- when :function
- path.shift
- name = path.shift
- string << name
- string << "( "
- string << predicate_to_string( path.shift, &block )
- string << " )"
- when :literal
- path.shift
- string << " "
- string << path.shift.inspect
- string << " "
- else
- string << " "
- string << yield( path )
- string << " "
- end
- return string.squeeze(" ")
- end
-
- private
- #LocationPath
- # | RelativeLocationPath
- # | '/' RelativeLocationPath?
- # | '//' RelativeLocationPath
- def LocationPath path, parsed
- #puts "LocationPath '#{path}'"
- path = path.strip
- if path[0] == ?/
- parsed << :document
- if path[1] == ?/
- parsed << :descendant_or_self
- parsed << :node
- path = path[2..-1]
- else
- path = path[1..-1]
- end
- end
- #puts parsed.inspect
- return RelativeLocationPath( path, parsed ) if path.size > 0
- end
-
- #RelativeLocationPath
- # | Step
- # | (AXIS_NAME '::' | '@' | '') AxisSpecifier
- # NodeTest
- # Predicate
- # | '.' | '..' AbbreviatedStep
- # | RelativeLocationPath '/' Step
- # | RelativeLocationPath '//' Step
- AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
- def RelativeLocationPath path, parsed
- #puts "RelativeLocationPath #{path}"
- while path.size > 0
- # (axis or @ or <child::>) nodetest predicate >
- # OR > / Step
- # (. or ..) >
- if path[0] == ?.
- if path[1] == ?.
- parsed << :parent
- parsed << :node
- path = path[2..-1]
- else
- parsed << :self
- parsed << :node
- path = path[1..-1]
- end
- else
- if path[0] == ?@
- #puts "ATTRIBUTE"
- parsed << :attribute
- path = path[1..-1]
- # Goto Nodetest
- elsif path =~ AXIS
- parsed << $1.tr('-','_').intern
- path = $'
- # Goto Nodetest
- else
- parsed << :child
- end
-
- #puts "NODETESTING '#{path}'"
- n = []
- path = NodeTest( path, n)
- #puts "NODETEST RETURNED '#{path}'"
-
- if path[0] == ?[
- path = Predicate( path, n )
- end
-
- parsed.concat(n)
- end
-
- if path.size > 0
- if path[0] == ?/
- if path[1] == ?/
- parsed << :descendant_or_self
- parsed << :node
- path = path[2..-1]
- else
- path = path[1..-1]
- end
- else
- return path
- end
- end
- end
- return path
- end
-
- # Returns a 1-1 map of the nodeset
- # The contents of the resulting array are either:
- # true/false, if a positive match
- # String, if a name match
- #NodeTest
- # | ('*' | NCNAME ':' '*' | QNAME) NameTest
- # | NODE_TYPE '(' ')' NodeType
- # | PI '(' LITERAL ')' PI
- # | '[' expr ']' Predicate
- NCNAMETEST= /^(#{NCNAME_STR}):\*/u
- QNAME = Namespace::NAMESPLIT
- NODE_TYPE = /^(comment|text|node)\(\s*\)/m
- PI = /^processing-instruction\(/
- def NodeTest path, parsed
- #puts "NodeTest with #{path}"
- res = nil
- case path
- when /^\*/
- path = $'
- parsed << :any
- when NODE_TYPE
- type = $1
- path = $'
- parsed << type.tr('-', '_').intern
- when PI
- path = $'
- literal = nil
- if path !~ /^\s*\)/
- path =~ LITERAL
- literal = $1
- path = $'
- raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?)
- path = path[1..-1]
- end
- parsed << :processing_instruction
- parsed << (literal || '')
- when NCNAMETEST
- #puts "NCNAMETEST"
- prefix = $1
- path = $'
- parsed << :namespace
- parsed << prefix
- when QNAME
- #puts "QNAME"
- prefix = $1
- name = $2
- path = $'
- prefix = "" unless prefix
- parsed << :qname
- parsed << prefix
- parsed << name
- end
- return path
- end
-
- # Filters the supplied nodeset on the predicate(s)
- def Predicate path, parsed
- #puts "PREDICATE with #{path}"
- return nil unless path[0] == ?[
- predicates = []
- while path[0] == ?[
- path, expr = get_group(path)
- predicates << expr[1..-2] if expr
- end
- #puts "PREDICATES = #{predicates.inspect}"
- predicates.each{ |expr|
- #puts "ORING #{expr}"
- preds = []
- parsed << :predicate
- parsed << preds
- OrExpr(expr, preds)
- }
- #puts "PREDICATES = #{predicates.inspect}"
- path
- end
-
- # The following return arrays of true/false, a 1-1 mapping of the
- # supplied nodeset, except for axe(), which returns a filtered
- # nodeset
-
- #| OrExpr S 'or' S AndExpr
- #| AndExpr
- def OrExpr path, parsed
- #puts "OR >>> #{path}"
- n = []
- rest = AndExpr( path, n )
- #puts "OR <<< #{rest}"
- if rest != path
- while rest =~ /^\s*( or )/
- n = [ :or, n, [] ]
- rest = AndExpr( $', n[-1] )
- end
- end
- if parsed.size == 0 and n.size != 0
- parsed.replace(n)
- elsif n.size > 0
- parsed << n
- end
- rest
- end
-
- #| AndExpr S 'and' S EqualityExpr
- #| EqualityExpr
- def AndExpr path, parsed
- #puts "AND >>> #{path}"
- n = []
- rest = EqualityExpr( path, n )
- #puts "AND <<< #{rest}"
- if rest != path
- while rest =~ /^\s*( and )/
- n = [ :and, n, [] ]
- #puts "AND >>> #{rest}"
- rest = EqualityExpr( $', n[-1] )
- #puts "AND <<< #{rest}"
- end
- end
- if parsed.size == 0 and n.size != 0
- parsed.replace(n)
- elsif n.size > 0
- parsed << n
- end
- rest
- end
-
- #| EqualityExpr ('=' | '!=') RelationalExpr
- #| RelationalExpr
- def EqualityExpr path, parsed
- #puts "EQUALITY >>> #{path}"
- n = []
- rest = RelationalExpr( path, n )
- #puts "EQUALITY <<< #{rest}"
- if rest != path
- while rest =~ /^\s*(!?=)\s*/
- if $1[0] == ?!
- n = [ :neq, n, [] ]
- else
- n = [ :eq, n, [] ]
- end
- rest = RelationalExpr( $', n[-1] )
- end
- end
- if parsed.size == 0 and n.size != 0
- parsed.replace(n)
- elsif n.size > 0
- parsed << n
- end
- rest
- end
-
- #| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr
- #| AdditiveExpr
- def RelationalExpr path, parsed
- #puts "RELATION >>> #{path}"
- n = []
- rest = AdditiveExpr( path, n )
- #puts "RELATION <<< #{rest}"
- if rest != path
- while rest =~ /^\s*([<>]=?)\s*/
- if $1[0] == ?<
- sym = "lt"
- else
- sym = "gt"
- end
- sym << "eq" if $1[-1] == ?=
- n = [ sym.intern, n, [] ]
- rest = AdditiveExpr( $', n[-1] )
- end
- end
- if parsed.size == 0 and n.size != 0
- parsed.replace(n)
- elsif n.size > 0
- parsed << n
- end
- rest
- end
-
- #| AdditiveExpr ('+' | S '-') MultiplicativeExpr
- #| MultiplicativeExpr
- def AdditiveExpr path, parsed
- #puts "ADDITIVE >>> #{path}"
- n = []
- rest = MultiplicativeExpr( path, n )
- #puts "ADDITIVE <<< #{rest}"
- if rest != path
- while rest =~ /^\s*(\+| -)\s*/
- if $1[0] == ?+
- n = [ :plus, n, [] ]
- else
- n = [ :minus, n, [] ]
- end
- rest = MultiplicativeExpr( $', n[-1] )
- end
- end
- if parsed.size == 0 and n.size != 0
- parsed.replace(n)
- elsif n.size > 0
- parsed << n
- end
- rest
- end
-
- #| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr
- #| UnaryExpr
- def MultiplicativeExpr path, parsed
- #puts "MULT >>> #{path}"
- n = []
- rest = UnaryExpr( path, n )
- #puts "MULT <<< #{rest}"
- if rest != path
- while rest =~ /^\s*(\*| div | mod )\s*/
- if $1[0] == ?*
- n = [ :mult, n, [] ]
- elsif $1.include?( "div" )
- n = [ :div, n, [] ]
- else
- n = [ :mod, n, [] ]
- end
- rest = UnaryExpr( $', n[-1] )
- end
- end
- if parsed.size == 0 and n.size != 0
- parsed.replace(n)
- elsif n.size > 0
- parsed << n
- end
- rest
- end
-
- #| '-' UnaryExpr
- #| UnionExpr
- def UnaryExpr path, parsed
- path =~ /^(\-*)/
- path = $'
- if $1 and (($1.size % 2) != 0)
- mult = -1
- else
- mult = 1
- end
- parsed << :neg if mult < 0
-
- #puts "UNARY >>> #{path}"
- n = []
- path = UnionExpr( path, n )
- #puts "UNARY <<< #{path}"
- parsed.concat( n )
- path
- end
-
- #| UnionExpr '|' PathExpr
- #| PathExpr
- def UnionExpr path, parsed
- #puts "UNION >>> #{path}"
- n = []
- rest = PathExpr( path, n )
- #puts "UNION <<< #{rest}"
- if rest != path
- while rest =~ /^\s*(\|)\s*/
- n = [ :union, n, [] ]
- rest = PathExpr( $', n[-1] )
- end
- end
- if parsed.size == 0 and n.size != 0
- parsed.replace( n )
- elsif n.size > 0
- parsed << n
- end
- rest
- end
-
- #| LocationPath
- #| FilterExpr ('/' | '//') RelativeLocationPath
- def PathExpr path, parsed
- path =~ /^\s*/
- path = $'
- #puts "PATH >>> #{path}"
- n = []
- rest = FilterExpr( path, n )
- #puts "PATH <<< '#{rest}'"
- if rest != path
- if rest and rest[0] == ?/
- return RelativeLocationPath(rest, n)
- end
- end
- #puts "BEFORE WITH '#{rest}'"
- rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w_*]/
- parsed.concat(n)
- return rest
- end
-
- #| FilterExpr Predicate
- #| PrimaryExpr
- def FilterExpr path, parsed
- #puts "FILTER >>> #{path}"
- n = []
- path = PrimaryExpr( path, n )
- #puts "FILTER <<< #{path}"
- path = Predicate(path, n) if path and path[0] == ?[
- #puts "FILTER <<< #{path}"
- parsed.concat(n)
- path
- end
-
- #| VARIABLE_REFERENCE
- #| '(' expr ')'
- #| LITERAL
- #| NUMBER
- #| FunctionCall
- VARIABLE_REFERENCE = /^\$(#{NAME_STR})/u
- NUMBER = /^(\d*\.?\d+)/
- NT = /^comment|text|processing-instruction|node$/
- def PrimaryExpr path, parsed
- arry = []
- case path
- when VARIABLE_REFERENCE
- varname = $1
- path = $'
- parsed << :variable
- parsed << varname
- #arry << @variables[ varname ]
- when /^(\w[-\w]*)(?:\()/
- #puts "PrimaryExpr :: Function >>> #$1 -- '#$''"
- fname = $1
- tmp = $'
- #puts "#{fname} =~ #{NT.inspect}"
- return path if fname =~ NT
- path = tmp
- parsed << :function
- parsed << fname
- path = FunctionCall(path, parsed)
- when NUMBER
- #puts "LITERAL or NUMBER: #$1"
- varname = $1.nil? ? $2 : $1
- path = $'
- parsed << :literal
- parsed << (varname.include?('.') ? varname.to_f : varname.to_i)
- when LITERAL
- #puts "LITERAL or NUMBER: #$1"
- varname = $1.nil? ? $2 : $1
- path = $'
- parsed << :literal
- parsed << varname
- when /^\(/ #/
- path, contents = get_group(path)
- contents = contents[1..-2]
- n = []
- OrExpr( contents, n )
- parsed.concat(n)
- end
- path
- end
-
- #| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
- def FunctionCall rest, parsed
- path, arguments = parse_args(rest)
- argset = []
- for argument in arguments
- args = []
- OrExpr( argument, args )
- argset << args
- end
- parsed << argset
- path
- end
-
- # get_group( '[foo]bar' ) -> ['bar', '[foo]']
- def get_group string
- ind = 0
- depth = 0
- st = string[0,1]
- en = (st == "(" ? ")" : "]")
- begin
- case string[ind,1]
- when st
- depth += 1
- when en
- depth -= 1
- end
- ind += 1
- end while depth > 0 and ind < string.length
- return nil unless depth==0
- [string[ind..-1], string[0..ind-1]]
- end
-
- def parse_args( string )
- arguments = []
- ind = 0
- inquot = false
- inapos = false
- depth = 1
- begin
- case string[ind]
- when ?"
- inquot = !inquot unless inapos
- when ?'
- inapos = !inapos unless inquot
- else
- unless inquot or inapos
- case string[ind]
- when ?(
- depth += 1
- if depth == 1
- string = string[1..-1]
- ind -= 1
- end
- when ?)
- depth -= 1
- if depth == 0
- s = string[0,ind].strip
- arguments << s unless s == ""
- string = string[ind+1..-1]
- end
- when ?,
- if depth == 1
- s = string[0,ind].strip
- arguments << s unless s == ""
- string = string[ind+1..-1]
- ind = -1
- end
- end
- end
- end
- ind += 1
- end while depth > 0 and ind < string.length
- return nil unless depth==0
- [string,arguments]
- end
- end
- end
-end