summaryrefslogtreecommitdiff
path: root/lib/rexml/parsers
diff options
context:
space:
mode:
authorser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2004-07-04 15:39:05 +0000
committerser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2004-07-04 15:39:05 +0000
commita8379e7f1855c999b931a76b19f9c0922a9f13a4 (patch)
tree526863f5aa43f0870a611103ca307043d81e4904 /lib/rexml/parsers
parentdddfbb3897b89619214d90aefb79a6685a4ae911 (diff)
* Added the treeparser, and added the file to the MANIFEST.
r1002 | ser | 2004-06-07 07:45:53 -0400 (Mon, 07 Jun 2004) | 2 lines * Workin' in the coal mine, goin' down, down, down... r1003 | ser | 2004-06-08 22:24:08 -0400 (Tue, 08 Jun 2004) | 7 lines * Entirely rewrote the validation code; the finite state machine, while cool, didn't survive the encounter with Interleave. It was getting sort of hacky, too. The new mechanism is less elegant, but is basically still a FSM, and is more flexible without having to add hacks to extend it. Large chunks of the FSM may be reusable in other validation mechanisms. * Added interleave support r1004 | ser | 2004-06-09 07:24:17 -0400 (Wed, 09 Jun 2004) | 2 lines * Added suppert for mixed r1005 | ser | 2004-06-09 08:01:33 -0400 (Wed, 09 Jun 2004) | 3 lines * Added Kou's patch to normalize attribute values passed through the SAX2 and Stream parsers. r1006 | ser | 2004-06-09 08:12:35 -0400 (Wed, 09 Jun 2004) | 2 lines * Applied Kou's preceding-sibling patch, which fixes the order of the axe results r1009 | ser | 2004-06-20 11:02:55 -0400 (Sun, 20 Jun 2004) | 8 lines * Redesigned and rewrote the RelaxNG code. It isn't elegant, but it works. Particular problems encountered were interleave and ref. Interleave means I can't use a clean FSM design, and ref means the dirty FSM design has to be modified during validation. There's a lot of code that could be cleaned up in here. However, I'm pretty sure that this design is reasonably fast and space efficient. I'm not entirely convinced that it is correct; more tests are required. * This version adds support for defines and refs. r1011 | ser | 2004-06-20 11:20:07 -0400 (Sun, 20 Jun 2004) | 3 lines * Removed debugging output from unit test * Moved ">" in Element.inspect r1014 | ser | 2004-06-20 11:40:30 -0400 (Sun, 20 Jun 2004) | 2 lines * Minor big in missing includes for validation rules r1023 | ser | 2004-07-03 08:57:34 -0400 (Sat, 03 Jul 2004) | 2 lines * Fixed bug #34, typo in xpath_parser. r1024 | ser | 2004-07-03 10:22:08 -0400 (Sat, 03 Jul 2004) | 9 lines * Previous fix, (include? -> includes?) was incorrect. * Added another test for encoding * Started AnyName support in RelaxNG * Added Element#Attributes#to_a, so that it does something intelligent. This was needed by XPath, for '@*' * Fixed XPath so that @* works. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@6579 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rexml/parsers')
-rw-r--r--lib/rexml/parsers/baseparser.rb44
-rw-r--r--lib/rexml/parsers/lightparser.rb4
-rw-r--r--lib/rexml/parsers/pullparser.rb6
-rw-r--r--lib/rexml/parsers/sax2parser.rb5
-rw-r--r--lib/rexml/parsers/streamparser.rb9
-rw-r--r--lib/rexml/parsers/ultralightparser.rb4
-rw-r--r--lib/rexml/parsers/xpathparser.rb164
7 files changed, 195 insertions, 41 deletions
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index fbb1ec06a8..e95cba8904 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -100,6 +100,23 @@ module REXML
self.stream = source
end
+ def add_listener( listener )
+ if !defined?(@listeners) or !@listeners
+ @listeners = []
+ instance_eval <<-EOL
+ alias :_old_pull :pull
+ def pull
+ event = _old_pull
+ @listeners.each do |listener|
+ listener.receive event
+ end
+ event
+ end
+ EOL
+ end
+ @listeners << listener
+ end
+
attr_reader :source
def stream=( source )
@@ -162,11 +179,11 @@ module REXML
# Returns the next event. This is a +PullEvent+ object.
def pull
- return [ :end_document ] if empty?
if @closed
x, @closed = @closed, nil
return [ :end_element, x ]
end
+ return [ :end_document ] if empty?
return @stack.shift if @stack.size > 0
@source.read if @source.buffer.size<2
if @document_status == nil
@@ -289,8 +306,9 @@ module REXML
last_tag = @tags.pop
#md = @source.match_to_consume( '>', CLOSE_MATCH)
md = @source.match( CLOSE_MATCH, true )
- raise REXML::ParseException.new( "Missing end tag for '#{last_tag}' "+
- "(got \"#{md[1]}\")", @source) unless last_tag == md[1]
+ raise REXML::ParseException.new( "Missing end tag for "+
+ "'#{last_tag}' (got \"#{md[1]}\")",
+ @source) unless last_tag == md[1]
return [ :end_element, last_tag ]
elsif @source.buffer[1] == ?!
md = @source.match(/\A(\s*[^>]*>)/um)
@@ -411,3 +429,23 @@ module REXML
end
end
end
+
+=begin
+ case event[0]
+ when :start_element
+ when :text
+ when :end_element
+ when :processing_instruction
+ when :cdata
+ when :comment
+ when :xmldecl
+ when :start_doctype
+ when :end_doctype
+ when :externalentity
+ when :elementdecl
+ when :entity
+ when :attlistdecl
+ when :notationdecl
+ when :end_doctype
+ end
+=end
diff --git a/lib/rexml/parsers/lightparser.rb b/lib/rexml/parsers/lightparser.rb
index 8c555f7960..0f35034993 100644
--- a/lib/rexml/parsers/lightparser.rb
+++ b/lib/rexml/parsers/lightparser.rb
@@ -10,6 +10,10 @@ module REXML
@parser = REXML::Parsers::BaseParser.new( stream )
end
+ def add_listener( listener )
+ @parser.add_listener( listener )
+ end
+
def rewind
@stream.rewind
@parser.stream = @stream
diff --git a/lib/rexml/parsers/pullparser.rb b/lib/rexml/parsers/pullparser.rb
index aeda6251fe..fe4d41c959 100644
--- a/lib/rexml/parsers/pullparser.rb
+++ b/lib/rexml/parsers/pullparser.rb
@@ -29,8 +29,14 @@ module REXML
def initialize stream
super
@entities = {}
+ @listeners = nil
end
+ def add_listener( listener )
+ @listeners = [] unless @listeners
+ @listeners << listener
+ end
+
def each
while has_next?
yield self.pull
diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb
index 8c82cf8fc1..96440d17bf 100644
--- a/lib/rexml/parsers/sax2parser.rb
+++ b/lib/rexml/parsers/sax2parser.rb
@@ -14,6 +14,10 @@ module REXML
@tag_stack = []
end
+ def add_listener( listener )
+ @parser.add_listener( listener )
+ end
+
# Listen arguments:
#
# Symbol, Array, Block
@@ -89,6 +93,7 @@ module REXML
if procs or listeners
# break out the namespace declarations
# The attributes live in event[2]
+ event[2].each {|n, v| event[2][n] = @parser.normalize(v)}
nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
nsdecl.collect! { |n, value| [ n[6..-1], value ] }
@namespace_stack.push({})
diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb
index 49bef0d8fa..357cc186e6 100644
--- a/lib/rexml/parsers/streamparser.rb
+++ b/lib/rexml/parsers/streamparser.rb
@@ -6,6 +6,10 @@ module REXML
@parser = BaseParser.new( source )
end
+ def add_listener( listener )
+ @parser.add_listener( listener )
+ end
+
def parse
# entity string
while true
@@ -14,7 +18,10 @@ module REXML
when :end_document
return
when :start_element
- @listener.tag_start( event[1], event[2] )
+ attrs = event[2].each do |n, v|
+ event[2][n] = @parser.unnormalize( v )
+ end
+ @listener.tag_start( event[1], attrs )
when :end_element
@listener.tag_end( event[1] )
when :text
diff --git a/lib/rexml/parsers/ultralightparser.rb b/lib/rexml/parsers/ultralightparser.rb
index f3b208bf0f..adc4af18e2 100644
--- a/lib/rexml/parsers/ultralightparser.rb
+++ b/lib/rexml/parsers/ultralightparser.rb
@@ -9,6 +9,10 @@ module REXML
@parser = REXML::Parsers::BaseParser.new( stream )
end
+ def add_listener( listener )
+ @parser.add_listener( listener )
+ end
+
def rewind
@stream.rewind
@parser.stream = @stream
diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb
index 082f8255ad..41b2b8a5c1 100644
--- a/lib/rexml/parsers/xpathparser.rb
+++ b/lib/rexml/parsers/xpathparser.rb
@@ -30,46 +30,126 @@ module REXML
parsed
end
- def to_string( path )
+ def abbreviate( path )
+ path = path.kind_of?(String) ? parse( path ) : path
string = ""
+ document = false
while path.size > 0
- case path[0]
- when :ancestor, :ancestor_or_self, :attribute, :child, :descendant, :descendant_or_self, :following, :following_sibling, :namespace, :parent, :preceding, :preceding_sibling, :self
- op = path.shift
+ op = path.shift
+ case op
+ when :node
+ when :attribute
+ string << "/" if string.size > 0
+ string << "@"
+ when :child
+ string << "/" if string.size > 0
+ when :descendant_or_self
+ string << "/"
+ when :self
+ string << "."
+ when :parent
+ string << ".."
+ when :any
+ string << "*"
+ when :text
+ string << "text()"
+ when :following, :following_sibling,
+ :ancestor, :ancestor_or_self, :descendant,
+ :namespace, :preceding, :preceding_sibling
+ string << "/" unless string.size == 0
+ string << op.to_s.tr("_", "-")
+ string << "::"
+ when :qname
+ prefix = path.shift
+ name = path.shift
+ string << prefix+":" if prefix.size > 0
+ string << name
+ when :predicate
+ string << '['
+ string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
+ string << ']'
+ when :document
+ document = true
+ when :function
+ string << path.shift
+ string << "( "
+ string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
+ string << " )"
+ when :literal
+ string << %Q{ "#{path.shift}" }
+ else
+ string << "/" unless string.size == 0
+ string << "UNKNOWN("
+ string << op.inspect
+ string << ")"
+ end
+ end
+ string = "/"+string if document
+ return string
+ end
+
+ def expand( path )
+ path = path.kind_of?(String) ? parse( path ) : path
+ string = ""
+ document = false
+ while path.size > 0
+ op = path.shift
+ case op
+ when :node
+ string << "node()"
+ when :attribute, :child, :following, :following_sibling,
+ :ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
+ :namespace, :preceding, :preceding_sibling, :self, :parent
string << "/" unless string.size == 0
- string << op.to_s
+ string << op.to_s.tr("_", "-")
string << "::"
when :any
- path.shift
string << "*"
when :qname
- path.shift
prefix = path.shift
name = path.shift
string << prefix+":" if prefix.size > 0
string << name
when :predicate
- path.shift
string << '['
- string << predicate_to_string( path.shift )
- string << ' ]'
+ string << predicate_to_string( path.shift ) { |x| expand(x) }
+ string << ']'
+ when :document
+ document = true
else
string << "/" unless string.size == 0
string << "UNKNOWN("
- string << path.shift.inspect
+ string << op.inspect
string << ")"
end
end
+ string = "/"+string if document
return string
end
- def predicate_to_string( path )
+ def predicate_to_string( path, &block )
string = ""
case path[0]
- when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :neq, :union
+ when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
op = path.shift
- left = predicate_to_string( path.shift )
- right = predicate_to_string( path.shift )
+ case op
+ when :eq
+ op = "="
+ when :lt
+ op = "<"
+ when :gt
+ op = ">"
+ when :lteq
+ op = "<="
+ when :gteq
+ op = ">="
+ when :neq
+ op = "!="
+ when :union
+ op = "|"
+ end
+ left = predicate_to_string( path.shift, &block )
+ right = predicate_to_string( path.shift, &block )
string << " "
string << left
string << " "
@@ -82,7 +162,7 @@ module REXML
name = path.shift
string << name
string << "( "
- string << predicate_to_string( path.shift )
+ string << predicate_to_string( path.shift, &block )
string << " )"
when :literal
path.shift
@@ -91,7 +171,7 @@ module REXML
string << " "
else
string << " "
- string << to_string( path )
+ string << yield( path )
string << " "
end
return string.squeeze(" ")
@@ -534,7 +614,6 @@ module REXML
#| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
def FunctionCall rest, parsed
path, arguments = parse_args(rest)
- #puts "Function call >>> (#{arguments.inspect})"
argset = []
for argument in arguments
args = []
@@ -567,28 +646,39 @@ module REXML
def parse_args( string )
arguments = []
ind = 0
+ inquot = false
+ inapos = false
depth = 1
begin
case string[ind]
- when ?(
- depth += 1
- if depth == 1
- string = string[1..-1]
- ind -= 1
- end
- when ?)
- depth -= 1
- if depth == 0
- s = string[0,ind].strip
- arguments << s unless s == ""
- string = string[ind+1..-1]
- end
- when ?,
- if depth == 1
- s = string[0,ind].strip
- arguments << s unless s == ""
- string = string[ind+1..-1]
- ind = 0
+ when ?"
+ inquot = !inquot unless inapos
+ when ?'
+ inapos = !inapos unless inquot
+ else
+ unless inquot or inapos
+ case string[ind]
+ when ?(
+ depth += 1
+ if depth == 1
+ string = string[1..-1]
+ ind -= 1
+ end
+ when ?)
+ depth -= 1
+ if depth == 0
+ s = string[0,ind].strip
+ arguments << s unless s == ""
+ string = string[ind+1..-1]
+ end
+ when ?,
+ if depth == 1
+ s = string[0,ind].strip
+ arguments << s unless s == ""
+ string = string[ind+1..-1]
+ ind = -1
+ end
+ end
end
end
ind += 1