summaryrefslogtreecommitdiff
path: root/lib/rexml/parsers
diff options
context:
space:
mode:
authorser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2003-06-10 01:31:01 +0000
committerser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2003-06-10 01:31:01 +0000
commitea7a527a2ae7024a5cf2885dee8f7a5c21fedd5d (patch)
treed3e1f95a5acf262a9dd46e9663b7034bb285b406 /lib/rexml/parsers
parentca02190d8887ecd852e4e3f18f3a3ea91e9c6f7a (diff)
Initial revision
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3925 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rexml/parsers')
-rw-r--r--lib/rexml/parsers/baseparser.rb391
-rw-r--r--lib/rexml/parsers/lightparser.rb56
-rw-r--r--lib/rexml/parsers/pullparser.rb143
-rw-r--r--lib/rexml/parsers/sax2parser.rb204
-rw-r--r--lib/rexml/parsers/streamparser.rb33
-rw-r--r--lib/rexml/parsers/ultralightparser.rb52
-rw-r--r--lib/rexml/parsers/xpathparser.rb598
7 files changed, 1477 insertions, 0 deletions
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
new file mode 100644
index 0000000000..e5c2cf7d96
--- /dev/null
+++ b/lib/rexml/parsers/baseparser.rb
@@ -0,0 +1,391 @@
+require 'rexml/parseexception'
+require 'rexml/source'
+
+module REXML
+ module Parsers
+ # = Using the Pull Parser
+ # <em>This API is experimental, and subject to change.</em>
+ # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
+ # while parser.has_next?
+ # res = parser.next
+ # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
+ # end
+ # See the PullEvent class for information on the content of the results.
+ # The data is identical to the arguments passed for the various events to
+ # the StreamListener API.
+ #
+ # Notice that:
+ # parser = PullParser.new( "<a>BAD DOCUMENT" )
+ # while parser.has_next?
+ # res = parser.next
+ # raise res[1] if res.error?
+ # end
+ #
+ # Nat Price gave me some good ideas for the API.
+ class BaseParser
+ NCNAME_STR= '[\w:][-\w\d.]*'
+ NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
+
+ NAMECHAR = '[-\w\d\.:]'
+ NAME = "([\\w:]#{NAMECHAR}*)"
+ NMTOKEN = "(?:#{NAMECHAR})+"
+ NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
+ REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
+
+ DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
+ DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
+ ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
+ COMMENT_START = /\A<!--/u
+ COMMENT_PATTERN = /<!--(.*?)-->/um
+ CDATA_START = /\A<!\[CDATA\[/u
+ CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
+ XMLDECL_START = /\A<\?xml\s/u;
+ XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um
+ INSTRUCTION_START = /\A<\?/u
+ INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
+ TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
+ CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
+
+ VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
+ ENCODING = /\bencoding=["'](.*?)['"]/um
+ STANDALONE = /\bstandalone=["'](.*?)['"]/um
+
+ ENTITY_START = /^\s*<!ENTITY/
+ IDENTITY = /^([!\*\w-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
+ ELEMENTDECL_START = /^\s*<!ELEMENT/um
+ ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
+ ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
+ NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
+ ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
+ ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
+ ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
+ DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
+ ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
+ ATTLISTDECL_START = /^\s*<!ATTLIST/um
+ ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
+ NOTATIONDECL_START = /^\s*<!NOTATION/um
+ PUBLIC = /^\s*<!NOTATION\s+(\w[-\w]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
+ SYSTEM = /^\s*<!NOTATION\s+(\w[-\w]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
+
+ TEXT_PATTERN = /\A([^<]*)/um
+
+ # Entity constants
+ PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9-()+,./:=?;!*@$_%#"
+ SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
+ PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
+ EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
+ NDATADECL = "\\s+NDATA\\s+#{NAME}"
+ PEREFERENCE = "%#{NAME};"
+ ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
+ PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
+ ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
+ PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
+ GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
+ ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
+
+ EREFERENCE = /&(?!#{NAME};)/
+
+ DEFAULT_ENTITIES = {
+ 'gt' => [/&gt;/, '&gt;', '>'],
+ 'lt' => [/&lt;/, '&lt;', '<'],
+ 'quot' => [/&quot;/, '&quot;', '"'],
+ "apos" => [/&apos;/, "&apos;", "'"]
+ }
+
+ def initialize( source )
+ self.stream = source
+ end
+
+ def stream=( source )
+ if source.kind_of? String
+ @source = Source.new(source)
+ elsif source.kind_of? IO
+ @source = IOSource.new(source)
+ elsif source.kind_of? Source
+ @source = source
+ else
+ raise "#{source.type} is not a valid input stream. It must be \n"+
+ "either a String, IO, or Source."
+ end
+ @closed = nil
+ @document_status = nil
+ @tags = []
+ @stack = []
+ @entities = []
+ end
+
+ # Returns true if there are no more events
+ def empty?
+ !has_next?
+ end
+
+ # Returns true if there are more events. Synonymous with !empty?
+ def has_next?
+ @source.read if @source.buffer.size==0 and !@source.empty?
+ (!@source.empty? and @source.buffer.strip.size>0) or @stack.size>0 or @closed
+ end
+
+ # Push an event back on the head of the stream. This method
+ # has (theoretically) infinite depth.
+ def unshift token
+ @stack.unshift(token)
+ end
+
+ # Peek at the +depth+ event in the stack. The first element on the stack
+ # is at depth 0. If +depth+ is -1, will parse to the end of the input
+ # stream and return the last event, which is always :end_document.
+ # Be aware that this causes the stream to be parsed up to the +depth+
+ # event, so you can effectively pre-parse the entire document (pull the
+ # entire thing into memory) using this method.
+ def peek depth=0
+ raise 'Illegal argument "#{depth}"' if depth < -1
+ temp = []
+ if depth == -1
+ temp.push(pull()) until empty?
+ else
+ while @stack.size+temp.size < depth+1
+ temp.push(pull())
+ end
+ end
+ @stack += temp if temp.size > 0
+ @stack[depth]
+ end
+
+ # Returns the next event. This is a +PullEvent+ object.
+ def pull
+ return [ :end_document ] if empty?
+ if @closed
+ x, @closed = @closed, nil
+ return [ :end_element, x ]
+ end
+ return @stack.shift if @stack.size > 0
+ @source.read if @source.buffer.size==0
+ if @document_status == nil
+ @source.match( /^\s*/um, true )
+ word = @source.match( /^\s*(<.*?)>/um )
+ word = word[1] unless word.nil?
+ case word
+ when COMMENT_START
+ return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
+ when XMLDECL_START
+ results = @source.match( XMLDECL_PATTERN, true )[1]
+ version = VERSION.match( results )
+ version = version[1] unless version.nil?
+ encoding = ENCODING.match(results)
+ encoding = encoding[1] unless encoding.nil?
+ @source.encoding = encoding
+ standalone = STANDALONE.match(results)
+ standalone = standalone[1] unless standalone.nil?
+ return [ :xmldecl, version, encoding, standalone]
+ when INSTRUCTION_START
+ return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
+ when DOCTYPE_START
+ md = @source.match( DOCTYPE_PATTERN, true )
+ identity = md[1]
+ close = md[2]
+ identity =~ IDENTITY
+ name = $1
+ raise "DOCTYPE is missing a name" if name.nil?
+ pub_sys = $2.nil? ? nil : $2.strip
+ long_name = $3.nil? ? nil : $3.strip
+ uri = $4.nil? ? nil : $4.strip
+ args = [ :start_doctype, name, pub_sys, long_name, uri ]
+ if close == ">"
+ @document_status = :after_doctype
+ @source.read if @source.buffer.size==0
+ md = @source.match(/^\s*/um, true)
+ @stack << [ :end_doctype ]
+ else
+ @document_status = :in_doctype
+ end
+ return args
+ else
+ @document_status = :after_doctype
+ @source.read if @source.buffer.size==0
+ md = @source.match(/\s*/um, true)
+ end
+ end
+ if @document_status == :in_doctype
+ md = @source.match(/\s*(.*?>)/um)
+ case md[1]
+ when ELEMENTDECL_START
+ return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
+ when ENTITY_START
+ match = @source.match( ENTITYDECL, true ).to_a.compact
+ match[0] = :entitydecl
+ ref = false
+ if match[1] == '%'
+ ref = true
+ match.delete_at 1
+ end
+ # Now we have to sort out what kind of entity reference this is
+ if match[2] == 'SYSTEM'
+ # External reference
+ match[3] = match[3][1..-2] # PUBID
+ match.delete_at(4) if match.size > 4 # Chop out NDATA decl
+ # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
+ elsif match[2] == 'PUBLIC'
+ # External reference
+ match[3] = match[3][1..-2] # PUBID
+ match[4] = match[4][1..-2] # HREF
+ # match is [ :entity, name, PUBLIC, pubid, href ]
+ else
+ match[2] = match[2][1..-2]
+ match.pop if match.size == 4
+ # match is [ :entity, name, value ]
+ end
+ match << '%' if ref
+ return match
+ when ATTLISTDECL_START
+ md = @source.match( ATTLISTDECL_PATTERN, true )
+ raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
+ element = md[1]
+ contents = md[0]
+
+ pairs = {}
+ values = md[0].scan( ATTDEF )
+ values.each do |attdef|
+ unless attdef[3] == "#IMPLIED"
+ attdef.compact!
+ val = attdef[3]
+ val = attdef[4] if val == "#FIXED "
+ pairs[attdef[0]] = val
+ end
+ end
+ return [ :attlistdecl, element, pairs, contents ]
+ when NOTATIONDECL_START
+ md = nil
+ if @source.match( PUBLIC )
+ md = @source.match( PUBLIC, true )
+ elsif @source.match( SYSTEM )
+ md = @source.match( SYSTEM, true )
+ else
+ raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
+ end
+ return [ :notationdecl, md[1], md[2], md[3] ]
+ when /^\s*]\s*>/um
+ @document_status = :after_doctype
+ @source.match( /^\s*]\s*>/um, true )
+ return [ :end_doctype ]
+ end
+ end
+ begin
+ if @source.buffer[0] == ?<
+ if @source.buffer[1] == ?/
+ last_tag = @tags.pop
+ md = @source.match( CLOSE_MATCH, true )
+ raise REXML::ParseException.new( "Missing end tag for '#{last_tag}' "+
+ "(got \"#{md[1]}\")", @source) unless last_tag == md[1]
+ return [ :end_element, last_tag ]
+ elsif @source.buffer[1] == ?!
+ md = @source.match(/\A(\s*[^>]*>)/um)
+ #puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
+ raise REXML::ParseException.new("Malformed node", @source) unless md
+ case md[1]
+ when CDATA_START
+ return [ :cdata, @source.match( CDATA_PATTERN, true )[1] ]
+ when COMMENT_START
+ return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
+ else
+ raise REXML::ParseException.new( "Declarations can only occur "+
+ "in the doctype declaration.", @source)
+ end
+ elsif @source.buffer[1] == ??
+ md = @source.match( INSTRUCTION_PATTERN, true )
+ return [ :processing_instruction, md[1], md[2] ]
+ else
+ # Get the next tag
+ md = @source.match(TAG_MATCH, true)
+ raise REXML::ParseException.new("malformed XML: missing tag start", @source) unless md
+ attrs = []
+ if md[2].size > 0
+ attrs = md[2].scan( ATTRIBUTE_PATTERN )
+ raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
+ end
+
+ if md[4]
+ @closed = md[1]
+ else
+ @tags.push( md[1] )
+ end
+ attributes = {}
+ attrs.each { |a,b,c| attributes[a] = c }
+ return [ :start_element, md[1], attributes ]
+ end
+ else
+ md = @source.match(TEXT_PATTERN, true)
+ raise "no text to add" if md[0].length == 0
+ # unnormalized = Text::unnormalize( md[1], self )
+ # return PullEvent.new( :text, md[1], unnormalized )
+ return [ :text, md[1] ]
+ end
+ rescue REXML::ParseException
+ raise $!
+ rescue Exception, NameError => error
+ raise REXML::ParseException.new( "Exception parsing",
+ @source, self, error )
+ end
+ return [ :dummy ]
+ end
+
+ def entity( reference, entities )
+ value = nil
+ value = entities[ reference ] if entities
+ if not value
+ value = DEFAULT_ENTITIES[ reference ]
+ value = value[2] if value
+ end
+ unnormalize( value, entities ) if value
+ end
+
+ # Escapes all possible entities
+ def normalize( input, entities=nil, entity_filter=nil )
+ copy = input.clone
+ # Doing it like this rather than in a loop improves the speed
+ copy.gsub!( EREFERENCE, '&amp;' )
+ entities.each do |key, value|
+ copy.gsub!( value, "&#{key};" ) unless entity_filter and
+ entity_filter.include?(entity)
+ end if entities
+ copy.gsub!( EREFERENCE, '&amp;' )
+ DEFAULT_ENTITIES.each do |key, value|
+ copy.gsub!( value[2], value[1] )
+ end
+ copy
+ end
+
+ # Unescapes all possible entities
+ def unnormalize( string, entities=nil, filter=nil )
+ rv = string.clone
+ rv.gsub!( /\r\n?/, "\n" )
+ matches = rv.scan( REFERENCE)
+ return rv if matches.size == 0
+ rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
+ m=$1
+ m = "0#{m}" if m[0] == ?x
+ [Integer(m)].pack('U*')
+ }
+ matches.collect!{|x|x[0]}.compact!
+ if matches.size > 0
+ matches.each do |entity_reference|
+ unless filter and filter.include?(entity_reference)
+ entity_value = entity( entity_reference, entities )
+ if entity_value
+ re = /&#{entity_reference};/
+ rv.gsub!( re, entity_value )
+ end
+ end
+ end
+ matches.each do |entity_reference|
+ unless filter and filter.include?(entity_reference)
+ er = DEFAULT_ENTITIES[entity_reference]
+ rv.gsub!( er[0], er[2] ) if er
+ end
+ end
+ rv.gsub!( /&amp;/, '&' )
+ end
+ rv
+ end
+ end
+ end
+end
diff --git a/lib/rexml/parsers/lightparser.rb b/lib/rexml/parsers/lightparser.rb
new file mode 100644
index 0000000000..e2f083bc8e
--- /dev/null
+++ b/lib/rexml/parsers/lightparser.rb
@@ -0,0 +1,56 @@
+require 'rexml/parsers/streamparser'
+require 'rexml/parsers/baseparser'
+require 'rexml/light/node'
+
+module REXML
+ module Parsers
+ class LightParser
+ def initialize stream
+ @stream = stream
+ @parser = REXML::Parsers::BaseParser.new( stream )
+ end
+
+ def rewind
+ @stream.rewind
+ @parser.stream = @stream
+ end
+
+ def parse
+ root = context = REXML::Light::Node.new([ :document ])
+ while true
+ event = @parser.pull
+ case event[0]
+ when :end_document
+ break
+ when :end_doctype
+ context = context.parent
+ when :start_element, :start_doctype
+ new_node = REXML::Light::Node.new(event)
+ context << new_node
+ new_node.parent = context
+ context = new_node
+ when :end_element, :end_doctype
+ context = context.parent
+ else
+ new_node = REXML::Light::Node.new(event)
+ context << new_node
+ new_node.parent = context
+ end
+ end
+ root
+ end
+ end
+
+ # An element is an array. The array contains:
+ # 0 The parent element
+ # 1 The tag name
+ # 2 A hash of attributes
+ # 3..-1 The child elements
+ # An element is an array of size > 3
+ # Text is a String
+ # PIs are [ :processing_instruction, target, data ]
+ # Comments are [ :comment, data ]
+ # DocTypes are DocType structs
+ # The root is an array with XMLDecls, Text, DocType, Array, Text
+ end
+end
diff --git a/lib/rexml/parsers/pullparser.rb b/lib/rexml/parsers/pullparser.rb
new file mode 100644
index 0000000000..aeda6251fe
--- /dev/null
+++ b/lib/rexml/parsers/pullparser.rb
@@ -0,0 +1,143 @@
+require 'rexml/parseexception'
+require 'rexml/parsers/baseparser'
+require 'rexml/xmltokens'
+
+module REXML
+ module Parsers
+ # = Using the Pull Parser
+ # <em>This API is experimental, and subject to change.</em>
+ # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
+ # while parser.has_next?
+ # res = parser.next
+ # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
+ # end
+ # See the PullEvent class for information on the content of the results.
+ # The data is identical to the arguments passed for the various events to
+ # the StreamListener API.
+ #
+ # Notice that:
+ # parser = PullParser.new( "<a>BAD DOCUMENT" )
+ # while parser.has_next?
+ # res = parser.next
+ # raise res[1] if res.error?
+ # end
+ #
+ # Nat Price gave me some good ideas for the API.
+ class PullParser < BaseParser
+ include XMLTokens
+
+ def initialize stream
+ super
+ @entities = {}
+ end
+
+ def each
+ while has_next?
+ yield self.pull
+ end
+ end
+
+ def peek depth=0
+ PullEvent.new(super)
+ end
+
+ def pull
+ event = super
+ case event[0]
+ when :entitydecl
+ @entities[ event[1] ] =
+ event[2] unless event[2] =~ /PUBLIC|SYSTEM/
+ when :text
+ unnormalized = unnormalize( event[1], @entities )
+ event << unnormalized
+ end
+ PullEvent.new( event )
+ end
+ end
+
+ # A parsing event. The contents of the event are accessed as an +Array?,
+ # and the type is given either by the ...? methods, or by accessing the
+ # +type+ accessor. The contents of this object vary from event to event,
+ # but are identical to the arguments passed to +StreamListener+s for each
+ # event.
+ class PullEvent
+ # The type of this event. Will be one of :tag_start, :tag_end, :text,
+ # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
+ # :notationdecl, :entity, :cdata, :xmldecl, or :error.
+ def initialize(arg)
+ @contents = arg
+ end
+ def []( index )
+ @contents[index+1]
+ end
+ def event_type
+ @contents[0]
+ end
+ # Content: [ String tag_name, Hash attributes ]
+ def start_element?
+ @contents[0] == :start_element
+ end
+ # Content: [ String tag_name ]
+ def end_element?
+ @contents[0] == :end_element
+ end
+ # Content: [ String raw_text, String unnormalized_text ]
+ def text?
+ @contents[0] == :text
+ end
+ # Content: [ String text ]
+ def instruction?
+ @contents[0] == :processing_instruction
+ end
+ # Content: [ String text ]
+ def comment?
+ @contents[0] == :comment
+ end
+ # Content: [ String name, String pub_sys, String long_name, String uri ]
+ def doctype?
+ @contents[0] == :start_doctype
+ end
+ # Content: [ String text ]
+ def attlistdecl?
+ @contents[0] == :attlistdecl
+ end
+ # Content: [ String text ]
+ def elementdecl?
+ @contents[0] == :elementdecl
+ end
+ # Due to the wonders of DTDs, an entity declaration can be just about
+ # anything. There's no way to normalize it; you'll have to interpret the
+ # content yourself. However, the following is true:
+ #
+ # * If the entity declaration is an internal entity:
+ # [ String name, String value ]
+ # Content: [ String text ]
+ def entitydecl?
+ @contents[0] == :entitydecl
+ end
+ # Content: [ String text ]
+ def notationdecl?
+ @contents[0] == :notationdecl
+ end
+ # Content: [ String text ]
+ def entity?
+ @contents[0] == :entity
+ end
+ # Content: [ String text ]
+ def cdata?
+ @contents[0] == :cdata
+ end
+ # Content: [ String version, String encoding, String standalone ]
+ def xmldecl?
+ @contents[0] == :xmldecl
+ end
+ def error?
+ @contents[0] == :error
+ end
+
+ def inspect
+ @contents[0].to_s + ": " + @contents[1..-1].inspect
+ end
+ end
+ end
+end
diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb
new file mode 100644
index 0000000000..8598fd43e9
--- /dev/null
+++ b/lib/rexml/parsers/sax2parser.rb
@@ -0,0 +1,204 @@
+module REXML
+ module Parsers
+ class SAX2Parser
+ def initialize source
+ @parser = BaseParser.new(source)
+ @listeners = []
+ @procs = []
+ @namespace_stack = []
+ @has_listeners = false
+ @tag_stack = []
+ end
+
+ # Listen arguments:
+ #
+ # Symbol, Array, Block
+ # Listen to Symbol events on Array elements
+ # Symbol, Block
+ # Listen to Symbol events
+ # Array, Listener
+ # Listen to all events on Array elements
+ # Array, Block
+ # Listen to :start_element events on Array elements
+ # Listener
+ # Listen to All events
+ #
+ # Symbol can be one of: :start_element, :end_element,
+ # :start_prefix_mapping, :end_prefix_mapping, :characters,
+ # :processing_instruction, :doctype, :attlistdecl, :elementdecl,
+ # :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
+ #
+ # Array contains regular expressions or strings which will be matched
+ # against fully qualified element names.
+ #
+ # Listener must implement the methods in SAX2Listener
+ #
+ # Block will be passed the same arguments as a SAX2Listener method would
+ # be, where the method name is the same as the matched Symbol.
+ # See the SAX2Listener for more information.
+ def listen( *args, &blok )
+ if args[0].kind_of? Symbol
+ if args.size == 2
+ args[1].each { |match| @procs << [args[0], match, blok] }
+ else
+ add( [args[0], /.*/, blok] )
+ end
+ elsif args[0].kind_of? Array
+ if args.size == 2
+ args[0].each { |match| add( [nil, match, args[1]] ) }
+ else
+ args[0].each { |match| add( [ :start_element, match, blok ] ) }
+ end
+ else
+ add([nil, /.*/, args[0]])
+ end
+ end
+
+ def deafen( listener=nil, &blok )
+ if listener
+ @listeners.delete_if {|item| item[-1] == listener }
+ @has_listeners = false if @listeners.size == 0
+ else
+ @procs.delete_if {|item| item[-1] == blok }
+ end
+ end
+
+ def parse
+ @procs.each { |sym,match,block| block.call if sym == :start_document }
+ @listeners.each { |sym,match,block|
+ block.start_document if sym == :start_document or sym.nil?
+ }
+ root = context = []
+ while true
+ event = @parser.pull
+ case event[0]
+ when :end_document
+ handle( :end_document )
+ break
+ when :end_doctype
+ context = context[1]
+ when :start_element
+ @tag_stack.push(event[1])
+ # find the observers for namespaces
+ procs = get_procs( :start_prefix_mapping, event[1] )
+ listeners = get_listeners( :start_prefix_mapping, event[1] )
+ if procs or listeners
+ # break out the namespace declarations
+ # The attributes live in event[2]
+ nsdecl = event[2].find_all { |n, value| n =~ /^xmlns:/ }
+ nsdecl.collect! { |n, value| [ n[6..-1], value ] }
+ @namespace_stack.push({})
+ nsdecl.each do |n,v|
+ @namespace_stack[-1][n] = v
+ # notify observers of namespaces
+ procs.each { |ob| ob.call( n, v ) } if procs
+ listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
+ end
+ end
+ event[1] =~ Namespace::NAMESPLIT
+ prefix = $1
+ local = $2
+ uri = get_namespace(prefix)
+ # find the observers for start_element
+ procs = get_procs( :start_element, event[1] )
+ listeners = get_listeners( :start_element, event[1] )
+ # notify observers
+ procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
+ listeners.each { |ob|
+ ob.start_element( uri, local, event[1], event[2] )
+ } if listeners
+ when :end_element
+ @tag_stack.pop
+ event[1] =~ Namespace::NAMESPLIT
+ prefix = $1
+ local = $2
+ uri = get_namespace(prefix)
+ # find the observers for start_element
+ procs = get_procs( :end_element, event[1] )
+ listeners = get_listeners( :end_element, event[1] )
+ # notify observers
+ procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
+ listeners.each { |ob|
+ ob.end_element( uri, local, event[1] )
+ } if listeners
+
+ namespace_mapping = @namespace_stack.pop
+ # find the observers for namespaces
+ procs = get_procs( :end_prefix_mapping, event[1] )
+ listeners = get_listeners( :end_prefix_mapping, event[1] )
+ if procs or listeners
+ namespace_mapping.each do |prefix, uri|
+ # notify observers of namespaces
+ procs.each { |ob| ob.call( prefix ) } if procs
+ listeners.each { |ob| ob.end_prefix_mapping(prefix) } if listeners
+ end
+ end
+ when :text
+ normalized = @parser.normalize( event[1] )
+ handle( :characters, normalized )
+ when :processing_instruction, :comment, :doctype, :attlistdecl,
+ :elementdecl, :entitydecl, :cdata, :notationdecl, :xmldecl
+ handle( *event )
+ end
+ end
+ end
+
+ private
+ def handle( symbol, *arguments )
+ tag = @tag_stack[-1]
+ procs = get_procs( symbol, tag )
+ listeners = get_listeners( symbol, tag )
+ # notify observers
+ procs.each { |ob| ob.call( *arguments ) } if procs
+ listeners.each { |l|
+ l.send( symbol.to_s, *arguments )
+ } if listeners
+ end
+
+ # The following methods are duplicates, but it is faster than using
+ # a helper
+ def get_procs( symbol, name )
+ return nil if @procs.size == 0
+ @procs.find_all do |sym, match, block|
+ (
+ (sym.nil? or symbol == sym) and
+ (name.nil? or (
+ (name == match) or
+ (match.kind_of? Regexp and name =~ match)
+ )
+ )
+ )
+ end.collect{|x| x[-1]}
+ end
+ def get_listeners( symbol, name )
+ return nil if @listeners.size == 0
+ @listeners.find_all do |sym, match, block|
+ (
+ (sym.nil? or symbol == sym) and
+ (name.nil? or (
+ (name == match) or
+ (match.kind_of? Regexp and name =~ match)
+ )
+ )
+ )
+ end.collect{|x| x[-1]}
+ end
+
+ def add( pair )
+ if pair[-1].kind_of? Proc
+ @procs << pair unless @procs.include? pair
+ else
+ @listeners << pair unless @listeners.include? pair
+ @has_listeners = true
+ end
+ end
+
+ def get_namespace( prefix )
+ uri = @namespace_stack.find do |ns|
+ not ns[prefix].nil?
+ end
+ uri[prefix] unless uri.nil?
+ end
+ end
+ end
+end
diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb
new file mode 100644
index 0000000000..51441289d9
--- /dev/null
+++ b/lib/rexml/parsers/streamparser.rb
@@ -0,0 +1,33 @@
+module REXML
+ module Parsers
+ class StreamParser
+ def initialize source, listener
+ @listener = listener
+ @parser = BaseParser.new( source )
+ end
+
+ def parse
+ # entity string
+ while true
+ event = @parser.pull
+ case event[0]
+ when :end_document
+ return
+ when :start_element
+ @listener.tag_start( event[1], event[2] )
+ when :end_element
+ @listener.tag_end( event[1] )
+ when :text
+ normalized = @parser.unnormalize( event[1] )
+ @listener.text( normalized )
+ when :processing_instruction
+ @listener.instruction( *event[1,2] )
+ when :comment, :doctype, :attlistdecl,
+ :elementdecl, :entitydecl, :cdata, :notationdecl, :xmldecl
+ @listener.send( event[0].to_s, *event[1..-1] )
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/rexml/parsers/ultralightparser.rb b/lib/rexml/parsers/ultralightparser.rb
new file mode 100644
index 0000000000..f3b208bf0f
--- /dev/null
+++ b/lib/rexml/parsers/ultralightparser.rb
@@ -0,0 +1,52 @@
+require 'rexml/parsers/streamparser'
+require 'rexml/parsers/baseparser'
+
+module REXML
+ module Parsers
+ class UltraLightParser
+ def initialize stream
+ @stream = stream
+ @parser = REXML::Parsers::BaseParser.new( stream )
+ end
+
+ def rewind
+ @stream.rewind
+ @parser.stream = @stream
+ end
+
+ def parse
+ root = context = []
+ while true
+ event = @parser.pull
+ case event[0]
+ when :end_document
+ break
+ when :end_doctype
+ context = context[1]
+ when :start_element, :doctype
+ context << event
+ event[1,0] = [context]
+ context = event
+ when :end_element
+ context = context[1]
+ else
+ context << event
+ end
+ end
+ root
+ end
+ end
+
+ # An element is an array. The array contains:
+ # 0 The parent element
+ # 1 The tag name
+ # 2 A hash of attributes
+ # 3..-1 The child elements
+ # An element is an array of size > 3
+ # Text is a String
+ # PIs are [ :processing_instruction, target, data ]
+ # Comments are [ :comment, data ]
+ # DocTypes are DocType structs
+ # The root is an array with XMLDecls, Text, DocType, Array, Text
+ end
+end
diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb
new file mode 100644
index 0000000000..da27e7c705
--- /dev/null
+++ b/lib/rexml/parsers/xpathparser.rb
@@ -0,0 +1,598 @@
+require 'rexml/namespace'
+require 'rexml/xmltokens'
+
+module REXML
+ module Parsers
+ # You don't want to use this class. Really. Use XPath, which is a wrapper
+ # for this class. Believe me. You don't want to poke around in here.
+ # There is strange, dark magic at work in this code. Beware. Go back! Go
+ # back while you still can!
+ class XPathParser
+ include XMLTokens
+ LITERAL = /^'([^']*)'|^"([^"]*)"/u
+
+ def namespaces=( namespaces )
+ Functions::namespace_context = namespaces
+ @namespaces = namespaces
+ end
+
+ def parse path
+ path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
+ path.gsub!( /\s+([\]\)])/, '\1' )
+ parsed = []
+ path = LocationPath(path, parsed)
+ parsed
+ end
+
+ def predicate path
+ parsed = []
+ Predicate( "[#{path}]", parsed )
+ parsed
+ end
+
+ def to_string( path )
+ string = ""
+ while path.size > 0
+ case path[0]
+ when :ancestor, :ancestor_or_self, :attribute, :child, :descendant, :descendant_or_self, :following, :following_sibling, :namespace, :parent, :preceding, :preceding_sibling, :self
+ op = path.shift
+ string << "/" unless string.size == 0
+ string << op.to_s
+ string << "::"
+ when :any
+ path.shift
+ string << "*"
+ when :qname
+ path.shift
+ prefix = path.shift
+ name = path.shift
+ string << prefix+":" if prefix.size > 0
+ string << name
+ when :predicate
+ path.shift
+ string << '['
+ string << predicate_to_string( path.shift )
+ string << ' ]'
+ else
+ string << "/" unless string.size == 0
+ string << "UNKNOWN("
+ string << path.shift.inspect
+ string << ")"
+ end
+ end
+ return string
+ end
+
+ def predicate_to_string( path )
+ string = ""
+ case path[0]
+ when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :neq, :union
+ op = path.shift
+ left = predicate_to_string( path.shift )
+ right = predicate_to_string( path.shift )
+ string << " "
+ string << left
+ string << " "
+ string << op.to_s
+ string << " "
+ string << right
+ string << " "
+ when :function
+ path.shift
+ name = path.shift
+ string << name
+ string << "( "
+ string << predicate_to_string( path.shift )
+ string << " )"
+ when :literal
+ path.shift
+ string << " "
+ string << path.shift.inspect
+ string << " "
+ else
+ string << " "
+ string << to_string( path )
+ string << " "
+ end
+ return string.squeeze(" ")
+ end
+
+ private
+ #LocationPath
+ # | RelativeLocationPath
+ # | '/' RelativeLocationPath?
+ # | '//' RelativeLocationPath
+ def LocationPath path, parsed
+ #puts "LocationPath '#{path}'"
+ path = path.strip
+ if path[0] == ?/
+ parsed << :document
+ if path[1] == ?/
+ parsed << :descendant_or_self
+ parsed << :node
+ path = path[2..-1]
+ else
+ path = path[1..-1]
+ end
+ end
+ #puts parsed.inspect
+ return RelativeLocationPath( path, parsed ) if path.size > 0
+ end
+
+ #RelativeLocationPath
+ # | Step
+ # | (AXIS_NAME '::' | '@' | '') AxisSpecifier
+ # NodeTest
+ # Predicate
+ # | '.' | '..' AbbreviatedStep
+ # | RelativeLocationPath '/' Step
+ # | RelativeLocationPath '//' Step
+ AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
+ def RelativeLocationPath path, parsed
+ #puts "RelativeLocationPath #{path}"
+ while path.size > 0
+ # (axis or @ or <child::>) nodetest predicate >
+ # OR > / Step
+ # (. or ..) >
+ if path[0] == ?.
+ if path[1] == ?.
+ parsed << :parent
+ parsed << :node
+ path = path[2..-1]
+ else
+ parsed << :self
+ parsed << :node
+ path = path[1..-1]
+ end
+ else
+ if path[0] == ?@
+ #puts "ATTRIBUTE"
+ parsed << :attribute
+ path = path[1..-1]
+ # Goto Nodetest
+ elsif path =~ AXIS
+ parsed << $1.tr('-','_').intern
+ path = $'
+ # Goto Nodetest
+ else
+ parsed << :child
+ end
+
+ #puts "NODETESTING '#{path}'"
+ n = []
+ path = NodeTest( path, n)
+ #puts "NODETEST RETURNED '#{path}'"
+
+ if path[0] == ?[
+ path = Predicate( path, n )
+ end
+
+ parsed.concat(n)
+ end
+
+ if path.size > 0
+ if path[0] == ?/
+ if path[1] == ?/
+ parsed << :descendant_or_self
+ parsed << :node
+ path = path[2..-1]
+ else
+ path = path[1..-1]
+ end
+ else
+ return path
+ end
+ end
+ end
+ return path
+ end
+
+ # Returns a 1-1 map of the nodeset
+ # The contents of the resulting array are either:
+ # true/false, if a positive match
+ # String, if a name match
+ #NodeTest
+ # | ('*' | NCNAME ':' '*' | QNAME) NameTest
+ # | NODE_TYPE '(' ')' NodeType
+ # | PI '(' LITERAL ')' PI
+ # | '[' expr ']' Predicate
+ NCNAMETEST= /^(#{NCNAME_STR}):\*/u
+ QNAME = Namespace::NAMESPLIT
+ NODE_TYPE = /^(comment|text|node)\(\s*\)/m
+ PI = /^processing-instruction\(/
+ def NodeTest path, parsed
+ #puts "NodeTest with #{path}"
+ res = nil
+ case path
+ when /^\*/
+ path = $'
+ parsed << :any
+ when NODE_TYPE
+ type = $1
+ path = $'
+ parsed << type.tr('-', '_').intern
+ when PI
+ path = $'
+ literal = nil
+ if path !~ /^\s*\)/
+ path =~ LITERAL
+ literal = $1
+ path = $'
+ raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?)
+ path = path[1..-1]
+ end
+ parsed << :processing_instruction
+ parsed << literal
+ when NCNAMETEST
+ #puts "NCNAMETEST"
+ prefix = $1
+ path = $'
+ parsed << :namespace
+ parsed << prefix
+ when QNAME
+ #puts "QNAME"
+ prefix = $1
+ name = $2
+ path = $'
+ prefix = "" unless prefix
+ parsed << :qname
+ parsed << prefix
+ parsed << name
+ end
+ return path
+ end
+
+ # Filters the supplied nodeset on the predicate(s)
+ def Predicate path, parsed
+ #puts "PREDICATE with #{path}"
+ return nil unless path[0] == ?[
+ predicates = []
+ while path[0] == ?[
+ path, expr = get_group(path)
+ predicates << expr[1..-2] if expr
+ end
+ #puts "PREDICATES = #{predicates.inspect}"
+ predicates.each{ |expr|
+ #puts "ORING #{expr}"
+ preds = []
+ parsed << :predicate
+ parsed << preds
+ OrExpr(expr, preds)
+ }
+ #puts "PREDICATES = #{predicates.inspect}"
+ path
+ end
+
+ # The following return arrays of true/false, a 1-1 mapping of the
+ # supplied nodeset, except for axe(), which returns a filtered
+ # nodeset
+
+ #| OrExpr S 'or' S AndExpr
+ #| AndExpr
+ def OrExpr path, parsed
+ #puts "OR >>> #{path}"
+ n = []
+ rest = AndExpr( path, n )
+ #puts "OR <<< #{rest}"
+ if rest != path
+ while rest =~ /^\s*( or )/
+ n = [ :or, n, [] ]
+ rest = AndExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace(n)
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| AndExpr S 'and' S EqualityExpr
+ #| EqualityExpr
+ def AndExpr path, parsed
+ #puts "AND >>> #{path}"
+ n = []
+ rest = EqualityExpr( path, n )
+ #puts "AND <<< #{rest}"
+ if rest != path
+ while rest =~ /^\s*( and )/
+ n = [ :and, n, [] ]
+ #puts "AND >>> #{rest}"
+ rest = EqualityExpr( $', n[-1] )
+ #puts "AND <<< #{rest}"
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace(n)
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| EqualityExpr ('=' | '!=') RelationalExpr
+ #| RelationalExpr
+ def EqualityExpr path, parsed
+ #puts "EQUALITY >>> #{path}"
+ n = []
+ rest = RelationalExpr( path, n )
+ #puts "EQUALITY <<< #{rest}"
+ if rest != path
+ while rest =~ /^\s*(!?=)\s*/
+ if $1[0] == ?!
+ n = [ :neq, n, [] ]
+ else
+ n = [ :eq, n, [] ]
+ end
+ rest = RelationalExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace(n)
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr
+ #| AdditiveExpr
+ def RelationalExpr path, parsed
+ #puts "RELATION >>> #{path}"
+ n = []
+ rest = AdditiveExpr( path, n )
+ #puts "RELATION <<< #{rest}"
+ if rest != path
+ while rest =~ /^\s*([<>]=?)\s*/
+ if $1[0] == ?<
+ sym = "lt"
+ else
+ sym = "gt"
+ end
+ sym << "eq" if $1[-1] == ?=
+ n = [ sym.intern, n, [] ]
+ rest = AdditiveExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace(n)
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| AdditiveExpr ('+' | S '-') MultiplicativeExpr
+ #| MultiplicativeExpr
+ def AdditiveExpr path, parsed
+ #puts "ADDITIVE >>> #{path}"
+ n = []
+ rest = MultiplicativeExpr( path, n )
+ #puts "ADDITIVE <<< #{rest}"
+ if rest != path
+ while rest =~ /^\s*(\+| -)\s*/
+ if $1[0] == ?+
+ n = [ :plus, n, [] ]
+ else
+ n = [ :minus, n, [] ]
+ end
+ rest = MultiplicativeExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace(n)
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr
+ #| UnaryExpr
+ def MultiplicativeExpr path, parsed
+ #puts "MULT >>> #{path}"
+ n = []
+ rest = UnaryExpr( path, n )
+ #puts "MULT <<< #{rest}"
+ if rest != path
+ while rest =~ /^\s*(\*| div | mod )\s*/
+ if $1[0] == ?*
+ n = [ :mult, n, [] ]
+ elsif $1.include?( "div" )
+ n = [ :div, n, [] ]
+ else
+ n = [ :mod, n, [] ]
+ end
+ rest = UnaryExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace(n)
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| '-' UnaryExpr
+ #| UnionExpr
+ def UnaryExpr path, parsed
+ path =~ /^(\-*)/
+ path = $'
+ if $1 and (($1.size % 2) != 0)
+ mult = -1
+ else
+ mult = 1
+ end
+ parsed << :neg if mult < 0
+
+ #puts "UNARY >>> #{path}"
+ n = []
+ path = UnionExpr( path, n )
+ #puts "UNARY <<< #{path}"
+ parsed.concat( n )
+ path
+ end
+
+ #| UnionExpr '|' PathExpr
+ #| PathExpr
+ def UnionExpr path, parsed
+ #puts "UNION >>> #{path}"
+ n = []
+ rest = PathExpr( path, n )
+ #puts "UNION <<< #{rest}"
+ if rest != path
+ while rest =~ /^\s*(\|)\s*/
+ n = [ :union, n, [] ]
+ rest = PathExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace( n )
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| LocationPath
+ #| FilterExpr ('/' | '//') RelativeLocationPath
+ def PathExpr path, parsed
+ path =~ /^\s*/
+ path = $'
+ #puts "PATH >>> #{path}"
+ n = []
+ rest = FilterExpr( path, n )
+ #puts "PATH <<< '#{rest}'"
+ if rest != path
+ if rest and rest[0] == ?/
+ return RelativeLocationPath(rest, n)
+ end
+ end
+ #puts "BEFORE WITH '#{rest}'"
+ rest = LocationPath(rest, n) if rest =~ /^[\/\.\@\[\w_*]/
+ parsed.concat(n)
+ return rest
+ end
+
+ #| FilterExpr Predicate
+ #| PrimaryExpr
+ def FilterExpr path, parsed
+ #puts "FILTER >>> #{path}"
+ n = []
+ path = PrimaryExpr( path, n )
+ #puts "FILTER <<< #{path}"
+ path = Predicate(path, n) if path and path[0] == ?[
+ #puts "FILTER <<< #{path}"
+ parsed.concat(n)
+ path
+ end
+
+ #| VARIABLE_REFERENCE
+ #| '(' expr ')'
+ #| LITERAL
+ #| NUMBER
+ #| FunctionCall
+ VARIABLE_REFERENCE = /^\$(#{NAME_STR})/u
+ NUMBER = /^(\d*\.?\d+)/
+ NT = /^comment|text|processing-instruction|node$/
+ def PrimaryExpr path, parsed
+ arry = []
+ case path
+ when VARIABLE_REFERENCE
+ varname = $1
+ path = $'
+ parsed << :variable
+ parsed << varname
+ #arry << @variables[ varname ]
+ when /^(\w[-\w]*)(?:\()/
+ fname = $1
+ path = $'
+ return nil if fname =~ NT
+ parsed << :function
+ parsed << fname
+ path = FunctionCall(path, parsed)
+ when LITERAL, NUMBER
+ #puts "LITERAL or NUMBER: #$1"
+ varname = $1.nil? ? $2 : $1
+ path = $'
+ parsed << :literal
+ parsed << varname
+ when /^\(/ #/
+ path, contents = get_group(path)
+ contents = contents[1..-2]
+ n = []
+ OrExpr( contents, n )
+ parsed.concat(n)
+ end
+ path
+ end
+
+ #| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
+ def FunctionCall rest, parsed
+ path, arguments = parse_args(rest)
+ argset = []
+ for argument in arguments
+ args = []
+ OrExpr( argument, args )
+ argset << args
+ end
+ parsed << argset
+ path
+ end
+
+ # get_group( '[foo]bar' ) -> ['bar', '[foo]']
+ def get_group string
+ ind = 0
+ depth = 0
+ st = string[0,1]
+ en = (st == "(" ? ")" : "]")
+ begin
+ case string[ind,1]
+ when st
+ depth += 1
+ when en
+ depth -= 1
+ end
+ ind += 1
+ end while depth > 0 and ind < string.length
+ return nil unless depth==0
+ [string[ind..-1], string[0..ind-1]]
+ end
+
+ def parse_args( string )
+ arguments = []
+ ind = 0
+ depth = 1
+ begin
+ case string[ind]
+ when ?(
+ depth += 1
+ if depth == 1
+ string = string[1..-1]
+ ind -= 1
+ end
+ when ?)
+ depth -= 1
+ if depth == 0
+ s = string[0,ind].strip
+ arguments << s unless s == ""
+ string = string[ind+1..-1]
+ end
+ when ?,
+ if depth == 1
+ s = string[0,ind].strip
+ arguments << s unless s == ""
+ string = string[ind+1..-1]
+ ind = 0
+ end
+ end
+ ind += 1
+ end while depth > 0 and ind < string.length
+ return nil unless depth==0
+ [string,arguments]
+ end
+ end
+ end
+end