summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/rexml/attribute.rb6
-rw-r--r--lib/rexml/cdata.rb4
-rw-r--r--lib/rexml/doctype.rb433
-rw-r--r--lib/rexml/document.rb302
-rw-r--r--lib/rexml/element.rb21
-rw-r--r--lib/rexml/encoding.rb110
-rw-r--r--lib/rexml/functions.rb18
-rw-r--r--lib/rexml/instruction.rb4
-rw-r--r--lib/rexml/parent.rb319
-rw-r--r--lib/rexml/parsers/baseparser.rb761
-rw-r--r--lib/rexml/parsers/pullparser.rb318
-rw-r--r--lib/rexml/parsers/sax2parser.rb7
-rw-r--r--lib/rexml/parsers/streamparser.rb74
-rw-r--r--lib/rexml/parsers/treeparser.rb10
-rw-r--r--lib/rexml/rexml.rb13
-rw-r--r--lib/rexml/sax2listener.rb3
-rw-r--r--lib/rexml/source.rb27
-rw-r--r--lib/rexml/streamlistener.rb3
-rw-r--r--lib/rexml/text.rb14
-rw-r--r--lib/rexml/validation/validation.rb5
-rw-r--r--lib/rexml/xmldecl.rb125
-rw-r--r--lib/rexml/xpath.rb117
-rw-r--r--lib/rexml/xpath_parser.rb29
23 files changed, 1436 insertions, 1287 deletions
diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb
index a5a58055b8..a169148f32 100644
--- a/lib/rexml/attribute.rb
+++ b/lib/rexml/attribute.rb
@@ -101,20 +101,20 @@ module REXML
end
@unnormalized = nil
- @value = @normalized = Text::normalize( @value, doctype )
+ @normalized = Text::normalize( @value, doctype )
end
# Returns the UNNORMALIZED value of this attribute. That is, entities
# have been expanded to their values
def value
- @unnormalized if @unnormalized
+ return @unnormalized if @unnormalized
doctype = nil
if @element
doc = @element.document
doctype = doc.doctype if doc
end
@normalized = nil
- @value = @unnormalized = Text::unnormalize( @value, doctype )
+ @unnormalized = Text::unnormalize( @value, doctype )
end
# Returns a copy of this attribute
diff --git a/lib/rexml/cdata.rb b/lib/rexml/cdata.rb
index ffedac1b53..046012ba61 100644
--- a/lib/rexml/cdata.rb
+++ b/lib/rexml/cdata.rb
@@ -35,6 +35,10 @@ module REXML
@string
end
+ def value
+ @string
+ end
+
# Generates XML output of this object
#
# output::
diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb
index 652a04fce2..4a1ffb4336 100644
--- a/lib/rexml/doctype.rb
+++ b/lib/rexml/doctype.rb
@@ -6,55 +6,55 @@ require 'rexml/attlistdecl'
require 'rexml/xmltokens'
module REXML
- # Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
- # ... >. DOCTYPES can be used to declare the DTD of a document, as well as
- # being used to declare entities used in the document.
- class DocType < Parent
- include XMLTokens
- START = "<!DOCTYPE"
- STOP = ">"
- SYSTEM = "SYSTEM"
- PUBLIC = "PUBLIC"
- DEFAULT_ENTITIES = {
- 'gt'=>EntityConst::GT,
- 'lt'=>EntityConst::LT,
- 'quot'=>EntityConst::QUOT,
- "apos"=>EntityConst::APOS
- }
-
- # name is the name of the doctype
- # external_id is the referenced DTD, if given
- attr_reader :name, :external_id, :entities, :namespaces
-
- # Constructor
- #
- # dt = DocType.new( 'foo', '-//I/Hate/External/IDs' )
- # # <!DOCTYPE foo '-//I/Hate/External/IDs'>
- # dt = DocType.new( doctype_to_clone )
- # # Incomplete. Shallow clone of doctype
+ # Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
+ # ... >. DOCTYPES can be used to declare the DTD of a document, as well as
+ # being used to declare entities used in the document.
+ class DocType < Parent
+ include XMLTokens
+ START = "<!DOCTYPE"
+ STOP = ">"
+ SYSTEM = "SYSTEM"
+ PUBLIC = "PUBLIC"
+ DEFAULT_ENTITIES = {
+ 'gt'=>EntityConst::GT,
+ 'lt'=>EntityConst::LT,
+ 'quot'=>EntityConst::QUOT,
+ "apos"=>EntityConst::APOS
+ }
+
+ # name is the name of the doctype
+ # external_id is the referenced DTD, if given
+ attr_reader :name, :external_id, :entities, :namespaces
+
+ # Constructor
+ #
+ # dt = DocType.new( 'foo', '-//I/Hate/External/IDs' )
+ # # <!DOCTYPE foo '-//I/Hate/External/IDs'>
+ # dt = DocType.new( doctype_to_clone )
+ # # Incomplete. Shallow clone of doctype
#
# +Note+ that the constructor:
#
# Doctype.new( Source.new( "<!DOCTYPE foo 'bar'>" ) )
#
# is _deprecated_. Do not use it. It will probably disappear.
- def initialize( first, parent=nil )
- @entities = DEFAULT_ENTITIES
- @long_name = @uri = nil
- if first.kind_of? String
- super()
- @name = first
- @external_id = parent
- elsif first.kind_of? DocType
- super( parent )
- @name = first.name
- @external_id = first.external_id
- elsif first.kind_of? Array
- super( parent )
- @name = first[0]
- @external_id = first[1]
- @long_name = first[2]
- @uri = first[3]
+ def initialize( first, parent=nil )
+ @entities = DEFAULT_ENTITIES
+ @long_name = @uri = nil
+ if first.kind_of? String
+ super()
+ @name = first
+ @external_id = parent
+ elsif first.kind_of? DocType
+ super( parent )
+ @name = first.name
+ @external_id = first.external_id
+ elsif first.kind_of? Array
+ super( parent )
+ @name = first[0]
+ @external_id = first[1]
+ @long_name = first[2]
+ @uri = first[3]
elsif first.kind_of? Source
super( parent )
parser = Parsers::BaseParser.new( first )
@@ -64,150 +64,215 @@ module REXML
end
else
super()
- end
- end
-
- def node_type
- :doctype
- end
-
- def attributes_of element
- rv = []
- each do |child|
- child.each do |key,val|
- rv << Attribute.new(key,val)
- end if child.kind_of? AttlistDecl and child.element_name == element
- end
- rv
- end
-
- def attribute_of element, attribute
- att_decl = find do |child|
- child.kind_of? AttlistDecl and
- child.element_name == element and
- child.include? attribute
- end
- return nil unless att_decl
- att_decl[attribute]
- end
-
- def clone
- DocType.new self
- end
-
- # output::
- # Where to write the string
- # indent::
- # An integer. If -1, no indenting will be used; otherwise, the
- # indentation will be this number of spaces, and children will be
- # indented an additional amount.
- # transitive::
- # If transitive is true and indent is >= 0, then the output will be
- # pretty-printed in such a way that the added whitespace does not affect
- # the absolute *value* of the document -- that is, it leaves the value
- # and number of Text nodes in the document unchanged.
- # ie_hack::
- # Internet Explorer is the worst piece of crap to have ever been
- # written, with the possible exception of Windows itself. Since IE is
- # unable to parse proper XML, we have to provide a hack to generate XML
- # that IE's limited abilities can handle. This hack inserts a space
- # before the /> on empty tags.
- #
- def write( output, indent=0, transitive=false, ie_hack=false )
- indent( output, indent )
- output << START
- output << ' '
- output << @name
- output << " #@external_id" if @external_id
- output << " #@long_name" if @long_name
- output << " #@uri" if @uri
- unless @children.empty?
- next_indent = indent + 1
- output << ' ['
- child = nil # speed
- @children.each { |child|
- output << "\n"
- child.write( output, next_indent )
- }
- output << "\n"
- #output << ' '*next_indent
- output << "]"
- end
- output << STOP
- end
+ end
+ end
+
+ def node_type
+ :doctype
+ end
+
+ def attributes_of element
+ rv = []
+ each do |child|
+ child.each do |key,val|
+ rv << Attribute.new(key,val)
+ end if child.kind_of? AttlistDecl and child.element_name == element
+ end
+ rv
+ end
+
+ def attribute_of element, attribute
+ att_decl = find do |child|
+ child.kind_of? AttlistDecl and
+ child.element_name == element and
+ child.include? attribute
+ end
+ return nil unless att_decl
+ att_decl[attribute]
+ end
+
+ def clone
+ DocType.new self
+ end
+
+ # output::
+ # Where to write the string
+ # indent::
+ # An integer. If -1, no indenting will be used; otherwise, the
+ # indentation will be this number of spaces, and children will be
+ # indented an additional amount.
+ # transitive::
+ # If transitive is true and indent is >= 0, then the output will be
+ # pretty-printed in such a way that the added whitespace does not affect
+ # the absolute *value* of the document -- that is, it leaves the value
+ # and number of Text nodes in the document unchanged.
+ # ie_hack::
+ # Internet Explorer is the worst piece of crap to have ever been
+ # written, with the possible exception of Windows itself. Since IE is
+ # unable to parse proper XML, we have to provide a hack to generate XML
+ # that IE's limited abilities can handle. This hack inserts a space
+ # before the /> on empty tags.
+ #
+ def write( output, indent=0, transitive=false, ie_hack=false )
+ indent( output, indent )
+ output << START
+ output << ' '
+ output << @name
+ output << " #@external_id" if @external_id
+ output << " #@long_name" if @long_name
+ output << " #@uri" if @uri
+ unless @children.empty?
+ next_indent = indent + 1
+ output << ' ['
+ child = nil # speed
+ @children.each { |child|
+ output << "\n"
+ child.write( output, next_indent )
+ }
+ #output << ' '*next_indent
+ output << "\n]"
+ end
+ output << STOP
+ end
def context
@parent.context
end
- def entity( name )
- @entities[name].unnormalized if @entities[name]
- end
-
- def add child
- super(child)
- @entities = DEFAULT_ENTITIES.clone if @entities == DEFAULT_ENTITIES
- @entities[ child.name ] = child if child.kind_of? Entity
- end
- end
-
- # We don't really handle any of these since we're not a validating
- # parser, so we can be pretty dumb about them. All we need to be able
- # to do is spew them back out on a write()
-
- # This is an abstract class. You never use this directly; it serves as a
- # parent class for the specific declarations.
- class Declaration < Child
- def initialize src
- super()
- @string = src
- end
-
- def to_s
- @string+'>'
- end
-
- def write( output, indent )
- output << (' '*indent) if indent > 0
- output << to_s
- end
- end
-
- public
- class ElementDecl < Declaration
- def initialize( src )
- super
- end
- end
-
- class ExternalEntity < Child
- def initialize( src )
- super()
- @entity = src
- end
- def to_s
- @entity
- end
- def write( output, indent )
- output << @entity
- output << "\n"
- end
- end
-
- class NotationDecl < Child
- def initialize name, middle, rest
- @name = name
- @middle = middle
- @rest = rest
- end
-
- def to_s
- "<!NOTATION #@name '#@middle #@rest'>"
- end
-
- def write( output, indent=-1 )
- output << (' '*indent) if indent > 0
- output << to_s
- end
- end
+ def entity( name )
+ @entities[name].unnormalized if @entities[name]
+ end
+
+ def add child
+ super(child)
+ @entities = DEFAULT_ENTITIES.clone if @entities == DEFAULT_ENTITIES
+ @entities[ child.name ] = child if child.kind_of? Entity
+ end
+
+ # This method retrieves the public identifier identifying the document's
+ # DTD.
+ #
+ # Method contributed by Henrik Martensson
+ def public
+ case @external_id
+ when "SYSTEM"
+ nil
+ when "PUBLIC"
+ strip_quotes(@long_name)
+ end
+ end
+
+ # This method retrieves the system identifier identifying the document's DTD
+ #
+ # Method contributed by Henrik Martensson
+ def system
+ case @external_id
+ when "SYSTEM"
+ strip_quotes(@long_name)
+ when "PUBLIC"
+ @uri.kind_of?(String) ? strip_quotes(@uri) : nil
+ end
+ end
+
+ # This method returns a list of notations that have been declared in the
+ # _internal_ DTD subset. Notations in the external DTD subset are not
+ # listed.
+ #
+ # Method contributed by Henrik Martensson
+ def notations
+ children().select {|node| node.kind_of?(REXML::NotationDecl)}
+ end
+
+ # Retrieves a named notation. Only notations declared in the internal
+ # DTD subset can be retrieved.
+ #
+ # Method contributed by Henrik Martensson
+ def notation(name)
+ notations.find { |notation_decl|
+ notation_decl.name == name
+ }
+ end
+
+ private
+
+ # Method contributed by Henrik Martensson
+ def strip_quotes(quoted_string)
+ quoted_string =~ /^[\'\"].*[\´\"]$/ ?
+ quoted_string[1, quoted_string.length-2] :
+ quoted_string
+ end
+ end
+
+ # We don't really handle any of these since we're not a validating
+ # parser, so we can be pretty dumb about them. All we need to be able
+ # to do is spew them back out on a write()
+
+ # This is an abstract class. You never use this directly; it serves as a
+ # parent class for the specific declarations.
+ class Declaration < Child
+ def initialize src
+ super()
+ @string = src
+ end
+
+ def to_s
+ @string+'>'
+ end
+
+ def write( output, indent )
+ output << (' '*indent) if indent > 0
+ output << to_s
+ end
+ end
+
+ public
+ class ElementDecl < Declaration
+ def initialize( src )
+ super
+ end
+ end
+
+ class ExternalEntity < Child
+ def initialize( src )
+ super()
+ @entity = src
+ end
+ def to_s
+ @entity
+ end
+ def write( output, indent )
+ output << @entity
+ end
+ end
+
+ class NotationDecl < Child
+ attr_accessor :public, :system
+ def initialize name, middle, pub, sys
+ super(nil)
+ @name = name
+ @middle = middle
+ @public = pub
+ @system = sys
+ end
+
+ def to_s
+ "<!NOTATION #@name #@middle#{
+ @public ? ' ' + public.inspect : ''
+ }#{
+ @system ? ' ' +@system.inspect : ''
+ }>"
+ end
+
+ def write( output, indent=-1 )
+ output << (' '*indent) if indent > 0
+ output << to_s
+ end
+
+ # This method retrieves the name of the notation.
+ #
+ # Method contributed by Henrik Martensson
+ def name
+ @name
+ end
+ end
end
diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb
index 8755e04de1..619a844257 100644
--- a/lib/rexml/document.rb
+++ b/lib/rexml/document.rb
@@ -16,166 +16,178 @@ module REXML
# Document has a single child that can be accessed by root().
# Note that if you want to have an XML declaration written for a document
# you create, you must add one; REXML documents do not write a default
- # declaration for you. See |DECLARATION| and |write|.
- class Document < Element
- # A convenient default XML declaration. If you want an XML declaration,
- # the easiest way to add one is mydoc << Document::DECLARATION
+ # declaration for you. See |DECLARATION| and |write|.
+ class Document < Element
+ # A convenient default XML declaration. If you want an XML declaration,
+ # the easiest way to add one is mydoc << Document::DECLARATION
# +DEPRECATED+
# Use: mydoc << XMLDecl.default
- DECLARATION = XMLDecl.default
-
- # Constructor
- # @param source if supplied, must be a Document, String, or IO.
- # Documents have their context and Element attributes cloned.
- # Strings are expected to be valid XML documents. IOs are expected
- # to be sources of valid XML documents.
- # @param context if supplied, contains the context of the document;
- # this should be a Hash.
- # NOTE that I'm not sure what the context is for; I cloned it out of
- # the Electric XML API (in which it also seems to do nothing), and it
- # is now legacy. It may do something, someday... it may disappear.
- def initialize( source = nil, context = {} )
- super()
- @context = context
- return if source.nil?
- if source.kind_of? Document
- @context = source.context
- super source
- else
- build( source )
- end
- end
+ DECLARATION = XMLDecl.default
+
+ # Constructor
+ # @param source if supplied, must be a Document, String, or IO.
+ # Documents have their context and Element attributes cloned.
+ # Strings are expected to be valid XML documents. IOs are expected
+ # to be sources of valid XML documents.
+ # @param context if supplied, contains the context of the document;
+ # this should be a Hash.
+ # NOTE that I'm not sure what the context is for; I cloned it out of
+ # the Electric XML API (in which it also seems to do nothing), and it
+ # is now legacy. It may do something, someday... it may disappear.
+ def initialize( source = nil, context = {} )
+ super()
+ @context = context
+ return if source.nil?
+ if source.kind_of? Document
+ @context = source.context
+ super source
+ else
+ build( source )
+ end
+ end
def node_type
:document
end
- # Should be obvious
- def clone
- Document.new self
- end
-
- # According to the XML spec, a root node has no expanded name
- def expanded_name
- ''
- #d = doc_type
- #d ? d.name : "UNDEFINED"
- end
-
- alias :name :expanded_name
-
- # We override this, because XMLDecls and DocTypes must go at the start
- # of the document
- def add( child )
- if child.kind_of? XMLDecl
- @children.unshift child
- elsif child.kind_of? DocType
- if @children[0].kind_of? XMLDecl
- @children[1,0] = child
- else
- @children.unshift child
+ # Should be obvious
+ def clone
+ Document.new self
+ end
+
+ # According to the XML spec, a root node has no expanded name
+ def expanded_name
+ ''
+ #d = doc_type
+ #d ? d.name : "UNDEFINED"
+ end
+
+ alias :name :expanded_name
+
+ # We override this, because XMLDecls and DocTypes must go at the start
+ # of the document
+ def add( child )
+ if child.kind_of? XMLDecl
+ @children.unshift child
+ elsif child.kind_of? DocType
+ # Find first Element or DocType node and insert the decl right
+ # before it. If there is no such node, just insert the child at the
+ # end. If there is a child and it is an DocType, then replace it.
+ insert_before_index = 0
+ @children.find { |x|
+ insert_before_index += 1
+ x.kind_of?(Element) || x.kind_of?(DocType)
+ }
+ if @children[ insert_before_index ] # Not null = not end of list
+ if @children[ insert_before_index ].kind_of DocType
+ @children[ insert_before_index ] = child
+ else
+ @children[ index_before_index-1, 0 ] = child
+ end
+ else # Insert at end of list
+ @children[insert_before_index] = child
end
- child.parent = self
- else
- rv = super
- raise "attempted adding second root element to document" if @elements.size > 1
- rv
- end
- end
- alias :<< :add
-
- def add_element(arg=nil, arg2=nil)
- rv = super
- raise "attempted adding second root element to document" if @elements.size > 1
- rv
- end
-
- # @return the root Element of the document, or nil if this document
- # has no children.
- def root
+ child.parent = self
+ else
+ rv = super
+ raise "attempted adding second root element to document" if @elements.size > 1
+ rv
+ end
+ end
+ alias :<< :add
+
+ def add_element(arg=nil, arg2=nil)
+ rv = super
+ raise "attempted adding second root element to document" if @elements.size > 1
+ rv
+ end
+
+ # @return the root Element of the document, or nil if this document
+ # has no children.
+ def root
elements[1]
#self
#@children.find { |item| item.kind_of? Element }
- end
-
- # @return the DocType child of the document, if one exists,
- # and nil otherwise.
- def doctype
- @children.find { |item| item.kind_of? DocType }
- end
-
- # @return the XMLDecl of this document; if no XMLDecl has been
- # set, the default declaration is returned.
- def xml_decl
- rv = @children[0]
+ end
+
+ # @return the DocType child of the document, if one exists,
+ # and nil otherwise.
+ def doctype
+ @children.find { |item| item.kind_of? DocType }
+ end
+
+ # @return the XMLDecl of this document; if no XMLDecl has been
+ # set, the default declaration is returned.
+ def xml_decl
+ rv = @children[0]
return rv if rv.kind_of? XMLDecl
rv = @children.unshift(XMLDecl.default)[0]
- end
-
- # @return the XMLDecl version of this document as a String.
- # If no XMLDecl has been set, returns the default version.
- def version
- xml_decl().version
- end
-
- # @return the XMLDecl encoding of this document as a String.
- # If no XMLDecl has been set, returns the default encoding.
- def encoding
- xml_decl().encoding
- end
-
- # @return the XMLDecl standalone value of this document as a String.
- # If no XMLDecl has been set, returns the default setting.
- def stand_alone?
- xml_decl().stand_alone?
- end
-
- # Write the XML tree out, optionally with indent. This writes out the
- # entire XML document, including XML declarations, doctype declarations,
- # and processing instructions (if any are given).
- # A controversial point is whether Document should always write the XML
- # declaration (<?xml version='1.0'?>) whether or not one is given by the
- # user (or source document). REXML does not write one if one was not
- # specified, because it adds unneccessary bandwidth to applications such
- # as XML-RPC.
- #
- #
- # output::
- # output an object which supports '<< string'; this is where the
- # document will be written.
- # indent::
- # An integer. If -1, no indenting will be used; otherwise, the
- # indentation will be this number of spaces, and children will be
- # indented an additional amount. Defaults to -1
- # transitive::
- # If transitive is true and indent is >= 0, then the output will be
- # pretty-printed in such a way that the added whitespace does not affect
- # the absolute *value* of the document -- that is, it leaves the value
- # and number of Text nodes in the document unchanged.
- # ie_hack::
- # Internet Explorer is the worst piece of crap to have ever been
- # written, with the possible exception of Windows itself. Since IE is
- # unable to parse proper XML, we have to provide a hack to generate XML
- # that IE's limited abilities can handle. This hack inserts a space
- # before the /> on empty tags. Defaults to false
- def write( output=$stdout, indent_level=-1, transitive=false, ie_hack=false )
- output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
- @children.each { |node|
- indent( output, indent_level ) if node.node_type == :element
- if node.write( output, indent_level, transitive, ie_hack )
- output << "\n" unless indent_level<0 or node == @children[-1]
+ end
+
+ # @return the XMLDecl version of this document as a String.
+ # If no XMLDecl has been set, returns the default version.
+ def version
+ xml_decl().version
+ end
+
+ # @return the XMLDecl encoding of this document as a String.
+ # If no XMLDecl has been set, returns the default encoding.
+ def encoding
+ xml_decl().encoding
+ end
+
+ # @return the XMLDecl standalone value of this document as a String.
+ # If no XMLDecl has been set, returns the default setting.
+ def stand_alone?
+ xml_decl().stand_alone?
+ end
+
+ # Write the XML tree out, optionally with indent. This writes out the
+ # entire XML document, including XML declarations, doctype declarations,
+ # and processing instructions (if any are given).
+ # A controversial point is whether Document should always write the XML
+ # declaration (<?xml version='1.0'?>) whether or not one is given by the
+ # user (or source document). REXML does not write one if one was not
+ # specified, because it adds unneccessary bandwidth to applications such
+ # as XML-RPC.
+ #
+ #
+ # output::
+ # output an object which supports '<< string'; this is where the
+ # document will be written.
+ # indent::
+ # An integer. If -1, no indenting will be used; otherwise, the
+ # indentation will be this number of spaces, and children will be
+ # indented an additional amount. Defaults to -1
+ # transitive::
+ # If transitive is true and indent is >= 0, then the output will be
+ # pretty-printed in such a way that the added whitespace does not affect
+ # the absolute *value* of the document -- that is, it leaves the value
+ # and number of Text nodes in the document unchanged.
+ # ie_hack::
+ # Internet Explorer is the worst piece of crap to have ever been
+ # written, with the possible exception of Windows itself. Since IE is
+ # unable to parse proper XML, we have to provide a hack to generate XML
+ # that IE's limited abilities can handle. This hack inserts a space
+ # before the /> on empty tags. Defaults to false
+ def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
+ output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
+ @children.each { |node|
+ indent( output, indent ) if node.node_type == :element
+ if node.write( output, indent, transitive, ie_hack )
+ output << "\n" unless indent<0 or node == @children[-1]
end
- }
- end
+ }
+ end
-
- def Document::parse_stream( source, listener )
- Parsers::StreamParser.new( source, listener ).parse
- end
+
+ def Document::parse_stream( source, listener )
+ Parsers::StreamParser.new( source, listener ).parse
+ end
- private
- def build( source )
+ private
+ def build( source )
Parsers::TreeParser.new( source, self ).parse
- end
- end
+ end
+ end
end
diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb
index 7f578ecb3d..80463d95b7 100644
--- a/lib/rexml/element.rb
+++ b/lib/rexml/element.rb
@@ -36,8 +36,6 @@ module REXML
# If an Element, the object will be shallowly cloned; name,
# attributes, and namespaces will be copied. Children will +not+ be
# copied.
- # If a Source, the source will be scanned and parsed for an Element,
- # and all child elements will be recursively parsed as well.
# parent::
# if supplied, must be a Parent, and will be used as
# the parent of this object.
@@ -223,7 +221,7 @@ module REXML
# b.namespace("y") # -> '2'
def namespace(prefix=nil)
if prefix.nil?
- prefix = self.prefix()
+ prefix = prefix()
end
if prefix == ''
prefix = "xmlns"
@@ -715,7 +713,7 @@ module REXML
private
def __to_xpath_helper node
- rv = node.expanded_name
+ rv = node.expanded_name.clone
if node.parent
results = node.parent.find_all {|n|
n.kind_of?(REXML::Element) and n.expanded_name == node.expanded_name
@@ -1226,5 +1224,20 @@ module REXML
rv.each{ |attr| attr.remove }
return rv
end
+
+ # The +get_attribute_ns+ method retrieves a method by its namespace
+ # and name. Thus it is possible to reliably identify an attribute
+ # even if an XML processor has changed the prefix.
+ #
+ # Method contributed by Henrik Martensson
+ def get_attribute_ns(namespace, name)
+ each_attribute() { |attribute|
+ if name == attribute.name &&
+ namespace == attribute.namespace()
+ return attribute
+ end
+ }
+ nil
+ end
end
end
diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb
index edd3e80dfe..644957439e 100644
--- a/lib/rexml/encoding.rb
+++ b/lib/rexml/encoding.rb
@@ -1,58 +1,64 @@
# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
module REXML
- module Encoding
- @encoding_methods = {}
- def self.register(enc, &block)
- @encoding_methods[enc] = block
- end
- def self.apply(obj, enc)
- @encoding_methods[enc][obj]
- end
- def self.encoding_method(enc)
- @encoding_methods[enc]
- end
+ module Encoding
+ @encoding_methods = {}
+ def self.register(enc, &block)
+ @encoding_methods[enc] = block
+ end
+ def self.apply(obj, enc)
+ @encoding_methods[enc][obj]
+ end
+ def self.encoding_method(enc)
+ @encoding_methods[enc]
+ end
- # Native, default format is UTF-8, so it is declared here rather than in
- # an encodings/ definition.
- UTF_8 = 'UTF-8'
- UTF_16 = 'UTF-16'
- UNILE = 'UNILE'
+ # Native, default format is UTF-8, so it is declared here rather than in
+ # an encodings/ definition.
+ UTF_8 = 'UTF-8'
+ UTF_16 = 'UTF-16'
+ UNILE = 'UNILE'
- # ID ---> Encoding name
- attr_reader :encoding
- def encoding=( enc )
- old_verbosity = $VERBOSE
- begin
- $VERBOSE = false
- return if defined? @encoding and enc == @encoding
- if enc
- raise ArgumentError, "Bad encoding name #{enc}" unless /\A[\w-]+\z/n =~ enc
- @encoding = enc.upcase.untaint
- else
- @encoding = UTF_8
- end
- err = nil
- [@encoding, "ICONV"].each do |enc|
- begin
- require File.join("rexml", "encodings", "#{enc}.rb")
- return Encoding.apply(self, enc)
- rescue LoadError, Exception => err
- end
- end
- puts err.message
- raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
- ensure
- $VERBOSE = old_verbosity
- end
- end
+ # ID ---> Encoding name
+ attr_reader :encoding
+ def encoding=( enc )
+ old_verbosity = $VERBOSE
+ begin
+ $VERBOSE = false
+ return if defined? @encoding and enc == @encoding
+ if enc and enc != UTF_8
+ @encoding = enc.upcase
+ begin
+ require 'rexml/encodings/ICONV.rb'
+ Encoding.apply(self, "ICONV")
+ rescue LoadError, Exception => err
+ raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
+ @encoding.untaint
+ enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
+ begin
+ require enc_file
+ Encoding.apply(self, @encoding)
+ rescue LoadError
+ puts $!.message
+ raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
+ end
+ end
+ else
+ @encoding = UTF_8
+ require 'rexml/encodings/UTF-8.rb'
+ Encoding.apply(self, @encoding)
+ end
+ ensure
+ $VERBOSE = old_verbosity
+ end
+ end
- def check_encoding str
- # We have to recognize UTF-16, LSB UTF-16, and UTF-8
- return UTF_16 if str[0] == 254 && str[1] == 255
- return UNILE if str[0] == 255 && str[1] == 254
- str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um
- return $1.upcase if $1
- return UTF_8
- end
- end
+ def check_encoding str
+ # We have to recognize UTF-16, LSB UTF-16, and UTF-8
+ return UTF_16 if str[0] == 254 && str[1] == 255
+ return UNILE if str[0] == 255 && str[1] == 254
+ str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um
+ return $1.upcase if $1
+ return UTF_8
+ end
+ end
end
diff --git a/lib/rexml/functions.rb b/lib/rexml/functions.rb
index 7a2fb996a0..c09ffdeae7 100644
--- a/lib/rexml/functions.rb
+++ b/lib/rexml/functions.rb
@@ -67,11 +67,10 @@ module REXML
if node_set == nil
yield @@context[:node] if defined? @@context[:node].namespace
else
- if node_set.namespace
- yield node_set
- else
- return unless node_set.kind_of? Enumerable
+ if node_set.respond_to? :each
node_set.each { |node| yield node if defined? node.namespace }
+ elsif node_set.respond_to? :namespace
+ yield node_set
end
end
end
@@ -157,12 +156,9 @@ module REXML
# Kouhei fixed this too
def Functions::substring_after( string, test )
ruby_string = string(string)
- ruby_index = ruby_string.index(string(test))
- if ruby_index.nil?
- ""
- else
- ruby_string[ ruby_index+1..-1 ]
- end
+ test_string = string(test)
+ return $1 if ruby_string =~ /#{test}(.*)/
+ ""
end
# Take equal portions of Mike Stok and Sean Russell; mix
@@ -339,6 +335,8 @@ module REXML
end
def Functions::sum( nodes )
+ nodes = [nodes] unless nodes.kind_of? Array
+ nodes.inject(0) { |r,n| r += number(string(n)) }
end
def Functions::floor( number )
diff --git a/lib/rexml/instruction.rb b/lib/rexml/instruction.rb
index ed4f604c74..f24f7786f7 100644
--- a/lib/rexml/instruction.rb
+++ b/lib/rexml/instruction.rb
@@ -38,8 +38,8 @@ module REXML
Instruction.new self
end
- def write writer, indent_level=-1, transitive=false, ie_hack=false
- indent(writer, indent_level)
+ def write writer, indent=-1, transitive=false, ie_hack=false
+ indent(writer, indent)
writer << START.sub(/\\/u, '')
writer << @target
writer << ' '
diff --git a/lib/rexml/parent.rb b/lib/rexml/parent.rb
index 5c1ed97324..a20aaaef6b 100644
--- a/lib/rexml/parent.rb
+++ b/lib/rexml/parent.rb
@@ -1,165 +1,166 @@
require "rexml/child"
module REXML
- # A parent has children, and has methods for accessing them. The Parent
+ # A parent has children, and has methods for accessing them. The Parent
# class is never encountered except as the superclass for some other
# object.
- class Parent < Child
- include Enumerable
-
- # Constructor
- # @param parent if supplied, will be set as the parent of this object
- def initialize parent=nil
- super(parent)
- @children = []
- end
-
- def add( object )
- #puts "PARENT GOTS #{size} CHILDREN"
- object.parent = self
- @children << object
- #puts "PARENT NOW GOTS #{size} CHILDREN"
- object
- end
-
- alias :push :add
- alias :<< :push
-
- def unshift( object )
- object.parent = self
- @children.unshift object
- end
-
- def delete( object )
- return unless @children.include? object
- @children.delete object
- object.parent = nil
- end
-
- def each(&block)
- @children.each(&block)
- end
-
- def delete_if( &block )
- @children.delete_if(&block)
- end
-
- def delete_at( index )
- @children.delete_at index
- end
-
- def each_index( &block )
- @children.each_index(&block)
- end
-
- # Fetches a child at a given index
- # @param index the Integer index of the child to fetch
- def []( index )
- @children[index]
- end
-
- alias :each_child :each
-
-
-
- # Set an index entry. See Array.[]=
- # @param index the index of the element to set
- # @param opt either the object to set, or an Integer length
- # @param child if opt is an Integer, this is the child to set
- # @return the parent (self)
- def []=( *args )
- args[-1].parent = self
- @children[*args[0..-2]] = args[-1]
- end
-
- # Inserts an child before another child
- # @param child1 this is either an xpath or an Element. If an Element,
- # child2 will be inserted before child1 in the child list of the parent.
- # If an xpath, child2 will be inserted before the first child to match
- # the xpath.
- # @param child2 the child to insert
- # @return the parent (self)
- def insert_before( child1, child2 )
- if child1.kind_of? String
- child1 = XPath.first( self, child1 )
- child1.parent.insert_before child1, child2
- else
- ind = index(child1)
- child2.parent.delete(child2) if child2.parent
- @children[ind,0] = child2
- child2.parent = self
- end
- self
- end
-
- # Inserts an child after another child
- # @param child1 this is either an xpath or an Element. If an Element,
- # child2 will be inserted after child1 in the child list of the parent.
- # If an xpath, child2 will be inserted after the first child to match
- # the xpath.
- # @param child2 the child to insert
- # @return the parent (self)
- def insert_after( child1, child2 )
- if child1.kind_of? String
- child1 = XPath.first( self, child1 )
- child1.parent.insert_after child1, child2
- else
- ind = index(child1)+1
- child2.parent.delete(child2) if child2.parent
- @children[ind,0] = child2
- child2.parent = self
- end
- self
- end
-
- def to_a
- @children.dup
- end
-
- # Fetches the index of a given child
- # @param child the child to get the index of
- # @return the index of the child, or nil if the object is not a child
- # of this parent.
- def index( child )
- count = -1
- @children.find { |i| count += 1 ; i.hash == child.hash }
- count
- end
-
- # @return the number of children of this parent
- def size
- @children.size
- end
-
- # Replaces one child with another, making sure the nodelist is correct
- # @param to_replace the child to replace (must be a Child)
- # @param replacement the child to insert into the nodelist (must be a
- # Child)
- def replace_child( to_replace, replacement )
- ind = @children.index( to_replace )
- to_replace.parent = nil
- @children[ind,0] = replacement
- replacement.parent = self
- end
-
- # Deeply clones this object. This creates a complete duplicate of this
- # Parent, including all descendants.
- def deep_clone
- cl = clone()
- each do |child|
- if child.kind_of? Parent
- cl << child.deep_clone
- else
- cl << child.clone
- end
- end
- cl
- end
-
- alias :children :to_a
-
- def parent?
- true
- end
- end
+ class Parent < Child
+ include Enumerable
+
+ # Constructor
+ # @param parent if supplied, will be set as the parent of this object
+ def initialize parent=nil
+ super(parent)
+ @children = []
+ end
+
+ def add( object )
+ #puts "PARENT GOTS #{size} CHILDREN"
+ object.parent = self
+ @children << object
+ #puts "PARENT NOW GOTS #{size} CHILDREN"
+ object
+ end
+
+ alias :push :add
+ alias :<< :push
+
+ def unshift( object )
+ object.parent = self
+ @children.unshift object
+ end
+
+ def delete( object )
+ found = false
+ @children.delete_if {|c| c.equal?(object) and found = true }
+ object.parent = nil if found
+ end
+
+ def each(&block)
+ @children.each(&block)
+ end
+
+ def delete_if( &block )
+ @children.delete_if(&block)
+ end
+
+ def delete_at( index )
+ @children.delete_at index
+ end
+
+ def each_index( &block )
+ @children.each_index(&block)
+ end
+
+ # Fetches a child at a given index
+ # @param index the Integer index of the child to fetch
+ def []( index )
+ @children[index]
+ end
+
+ alias :each_child :each
+
+
+
+ # Set an index entry. See Array.[]=
+ # @param index the index of the element to set
+ # @param opt either the object to set, or an Integer length
+ # @param child if opt is an Integer, this is the child to set
+ # @return the parent (self)
+ def []=( *args )
+ args[-1].parent = self
+ @children[*args[0..-2]] = args[-1]
+ end
+
+ # Inserts an child before another child
+ # @param child1 this is either an xpath or an Element. If an Element,
+ # child2 will be inserted before child1 in the child list of the parent.
+ # If an xpath, child2 will be inserted before the first child to match
+ # the xpath.
+ # @param child2 the child to insert
+ # @return the parent (self)
+ def insert_before( child1, child2 )
+ if child1.kind_of? String
+ child1 = XPath.first( self, child1 )
+ child1.parent.insert_before child1, child2
+ else
+ ind = index(child1)
+ child2.parent.delete(child2) if child2.parent
+ @children[ind,0] = child2
+ child2.parent = self
+ end
+ self
+ end
+
+ # Inserts an child after another child
+ # @param child1 this is either an xpath or an Element. If an Element,
+ # child2 will be inserted after child1 in the child list of the parent.
+ # If an xpath, child2 will be inserted after the first child to match
+ # the xpath.
+ # @param child2 the child to insert
+ # @return the parent (self)
+ def insert_after( child1, child2 )
+ if child1.kind_of? String
+ child1 = XPath.first( self, child1 )
+ child1.parent.insert_after child1, child2
+ else
+ ind = index(child1)+1
+ child2.parent.delete(child2) if child2.parent
+ @children[ind,0] = child2
+ child2.parent = self
+ end
+ self
+ end
+
+ def to_a
+ @children.dup
+ end
+
+ # Fetches the index of a given child
+ # @param child the child to get the index of
+ # @return the index of the child, or nil if the object is not a child
+ # of this parent.
+ def index( child )
+ count = -1
+ @children.find { |i| count += 1 ; i.hash == child.hash }
+ count
+ end
+
+ # @return the number of children of this parent
+ def size
+ @children.size
+ end
+
+ alias :length :size
+
+ # Replaces one child with another, making sure the nodelist is correct
+ # @param to_replace the child to replace (must be a Child)
+ # @param replacement the child to insert into the nodelist (must be a
+ # Child)
+ def replace_child( to_replace, replacement )
+ @children.map! {|c| c.equal?( to_replace ) ? replacement : c }
+ to_replace.parent = nil
+ replacement.parent = self
+ end
+
+ # Deeply clones this object. This creates a complete duplicate of this
+ # Parent, including all descendants.
+ def deep_clone
+ cl = clone()
+ each do |child|
+ if child.kind_of? Parent
+ cl << child.deep_clone
+ else
+ cl << child.clone
+ end
+ end
+ cl
+ end
+
+ alias :children :to_a
+
+ def parent?
+ true
+ end
+ end
end
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index e95cba8904..bce4ba4c20 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -2,103 +2,103 @@ require 'rexml/parseexception'
require 'rexml/source'
module REXML
- module Parsers
- # = Using the Pull Parser
- # <em>This API is experimental, and subject to change.</em>
- # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
- # while parser.has_next?
- # res = parser.next
- # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
- # end
- # See the PullEvent class for information on the content of the results.
- # The data is identical to the arguments passed for the various events to
- # the StreamListener API.
- #
- # Notice that:
- # parser = PullParser.new( "<a>BAD DOCUMENT" )
- # while parser.has_next?
- # res = parser.next
- # raise res[1] if res.error?
- # end
- #
- # Nat Price gave me some good ideas for the API.
- class BaseParser
- NCNAME_STR= '[\w:][\-\w\d.]*'
- NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
+ module Parsers
+ # = Using the Pull Parser
+ # <em>This API is experimental, and subject to change.</em>
+ # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
+ # while parser.has_next?
+ # res = parser.next
+ # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
+ # end
+ # See the PullEvent class for information on the content of the results.
+ # The data is identical to the arguments passed for the various events to
+ # the StreamListener API.
+ #
+ # Notice that:
+ # parser = PullParser.new( "<a>BAD DOCUMENT" )
+ # while parser.has_next?
+ # res = parser.next
+ # raise res[1] if res.error?
+ # end
+ #
+ # Nat Price gave me some good ideas for the API.
+ class BaseParser
+ NCNAME_STR= '[\w:][\-\w\d.]*'
+ NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
- NAMECHAR = '[\-\w\d\.:]'
- NAME = "([\\w:]#{NAMECHAR}*)"
- NMTOKEN = "(?:#{NAMECHAR})+"
- NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
- REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
- REFERENCE_RE = /#{REFERENCE}/
+ NAMECHAR = '[\-\w\d\.:]'
+ NAME = "([\\w:]#{NAMECHAR}*)"
+ NMTOKEN = "(?:#{NAMECHAR})+"
+ NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
+ REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
+ REFERENCE_RE = /#{REFERENCE}/
- DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
- DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
- ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
- COMMENT_START = /\A<!--/u
- COMMENT_PATTERN = /<!--(.*?)-->/um
- CDATA_START = /\A<!\[CDATA\[/u
- CDATA_END = /^\s*\]\s*>/um
- CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
- XMLDECL_START = /\A<\?xml\s/u;
- XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um
- INSTRUCTION_START = /\A<\?/u
- INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
- TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
- CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
+ DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
+ DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
+ ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
+ COMMENT_START = /\A<!--/u
+ COMMENT_PATTERN = /<!--(.*?)-->/um
+ CDATA_START = /\A<!\[CDATA\[/u
+ CDATA_END = /^\s*\]\s*>/um
+ CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
+ XMLDECL_START = /\A<\?xml\s/u;
+ XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
+ INSTRUCTION_START = /\A<\?/u
+ INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
+ TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
+ CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
- VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
- ENCODING = /\bencoding=["'](.*?)['"]/um
- STANDALONE = /\bstandalone=["'](.*?)['"]/um
+ VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
+ ENCODING = /\bencoding=["'](.*?)['"]/um
+ STANDALONE = /\bstandalone=["'](.*?)['"]/um
- ENTITY_START = /^\s*<!ENTITY/
- IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
- ELEMENTDECL_START = /^\s*<!ELEMENT/um
- ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
- SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
- ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
- NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
- ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
- ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
- ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
- DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
- ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
- ATTDEF_RE = /#{ATTDEF}/
- ATTLISTDECL_START = /^\s*<!ATTLIST/um
- ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
- NOTATIONDECL_START = /^\s*<!NOTATION/um
- PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
- SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
+ ENTITY_START = /^\s*<!ENTITY/
+ IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
+ ELEMENTDECL_START = /^\s*<!ELEMENT/um
+ ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
+ SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
+ ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
+ NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
+ ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
+ ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
+ ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
+ DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
+ ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
+ ATTDEF_RE = /#{ATTDEF}/
+ ATTLISTDECL_START = /^\s*<!ATTLIST/um
+ ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
+ NOTATIONDECL_START = /^\s*<!NOTATION/um
+ PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
+ SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
- TEXT_PATTERN = /\A([^<]*)/um
+ TEXT_PATTERN = /\A([^<]*)/um
- # Entity constants
- PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
- SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
- PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
- EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
- NDATADECL = "\\s+NDATA\\s+#{NAME}"
- PEREFERENCE = "%#{NAME};"
- ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
- PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
- ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
- PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
- GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
- ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
+ # Entity constants
+ PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
+ SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
+ PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
+ EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
+ NDATADECL = "\\s+NDATA\\s+#{NAME}"
+ PEREFERENCE = "%#{NAME};"
+ ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
+ PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
+ ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
+ PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
+ GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
+ ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
- EREFERENCE = /&(?!#{NAME};)/
+ EREFERENCE = /&(?!#{NAME};)/
- DEFAULT_ENTITIES = {
- 'gt' => [/&gt;/, '&gt;', '>', />/],
- 'lt' => [/&lt;/, '&lt;', '<', /</],
- 'quot' => [/&quot;/, '&quot;', '"', /"/],
- "apos" => [/&apos;/, "&apos;", "'", /'/]
- }
+ DEFAULT_ENTITIES = {
+ 'gt' => [/&gt;/, '&gt;', '>', />/],
+ 'lt' => [/&lt;/, '&lt;', '<', /</],
+ 'quot' => [/&quot;/, '&quot;', '"', /"/],
+ "apos" => [/&apos;/, "&apos;", "'", /'/]
+ }
- def initialize( source )
- self.stream = source
- end
+ def initialize( source )
+ self.stream = source
+ end
def add_listener( listener )
if !defined?(@listeners) or !@listeners
@@ -119,315 +119,320 @@ module REXML
attr_reader :source
- def stream=( source )
- if source.kind_of? String
- @source = Source.new(source)
- elsif source.kind_of? IO
- @source = IOSource.new(source)
- elsif source.kind_of? Source
- @source = source
- elsif defined? StringIO and source.kind_of? StringIO
- @source = IOSource.new(source)
- else
- raise "#{source.class} is not a valid input stream. It must be \n"+
- "either a String, IO, StringIO or Source."
- end
- @closed = nil
- @document_status = nil
- @tags = []
- @stack = []
- @entities = []
- end
+ def stream=( source )
+ @source = SourceFactory.create_from( source )
+ @closed = nil
+ @document_status = nil
+ @tags = []
+ @stack = []
+ @entities = []
+ end
+
+ def position
+ if @source.respond_to? :position
+ @source.position
+ else
+ # FIXME
+ 0
+ end
+ end
- # Returns true if there are no more events
- def empty?
- #puts "@source.empty? = #{@source.empty?}"
- #puts "@stack.empty? = #{@stack.empty?}"
+ # Returns true if there are no more events
+ def empty?
+ #STDERR.puts "@source.empty? = #{@source.empty?}"
+ #STDERR.puts "@stack.empty? = #{@stack.empty?}"
return (@source.empty? and @stack.empty?)
- end
+ end
- # Returns true if there are more events. Synonymous with !empty?
- def has_next?
+ # Returns true if there are more events. Synonymous with !empty?
+ def has_next?
return !(@source.empty? and @stack.empty?)
- end
+ end
- # Push an event back on the head of the stream. This method
- # has (theoretically) infinite depth.
- def unshift token
- @stack.unshift(token)
- end
+ # Push an event back on the head of the stream. This method
+ # has (theoretically) infinite depth.
+ def unshift token
+ @stack.unshift(token)
+ end
- # Peek at the +depth+ event in the stack. The first element on the stack
- # is at depth 0. If +depth+ is -1, will parse to the end of the input
- # stream and return the last event, which is always :end_document.
- # Be aware that this causes the stream to be parsed up to the +depth+
- # event, so you can effectively pre-parse the entire document (pull the
- # entire thing into memory) using this method.
- def peek depth=0
- raise %Q[Illegal argument "#{depth}"] if depth < -1
- temp = []
- if depth == -1
- temp.push(pull()) until empty?
- else
- while @stack.size+temp.size < depth+1
- temp.push(pull())
- end
- end
- @stack += temp if temp.size > 0
- @stack[depth]
- end
+ # Peek at the +depth+ event in the stack. The first element on the stack
+ # is at depth 0. If +depth+ is -1, will parse to the end of the input
+ # stream and return the last event, which is always :end_document.
+ # Be aware that this causes the stream to be parsed up to the +depth+
+ # event, so you can effectively pre-parse the entire document (pull the
+ # entire thing into memory) using this method.
+ def peek depth=0
+ raise %Q[Illegal argument "#{depth}"] if depth < -1
+ temp = []
+ if depth == -1
+ temp.push(pull()) until empty?
+ else
+ while @stack.size+temp.size < depth+1
+ temp.push(pull())
+ end
+ end
+ @stack += temp if temp.size > 0
+ @stack[depth]
+ end
- # Returns the next event. This is a +PullEvent+ object.
- def pull
- if @closed
- x, @closed = @closed, nil
- return [ :end_element, x ]
- end
- return [ :end_document ] if empty?
- return @stack.shift if @stack.size > 0
- @source.read if @source.buffer.size<2
- if @document_status == nil
- @source.consume( /^\s*/um )
- word = @source.match( /(<[^>]*)>/um )
- word = word[1] unless word.nil?
- case word
- when COMMENT_START
- return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
- when XMLDECL_START
- results = @source.match( XMLDECL_PATTERN, true )[1]
- version = VERSION.match( results )
- version = version[1] unless version.nil?
- encoding = ENCODING.match(results)
- encoding = encoding[1] unless encoding.nil?
- @source.encoding = encoding
- standalone = STANDALONE.match(results)
- standalone = standalone[1] unless standalone.nil?
- return [ :xmldecl, version, encoding, standalone]
- when INSTRUCTION_START
- return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
- when DOCTYPE_START
- md = @source.match( DOCTYPE_PATTERN, true )
- identity = md[1]
- close = md[2]
- identity =~ IDENTITY
- name = $1
- raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
- pub_sys = $2.nil? ? nil : $2.strip
- long_name = $3.nil? ? nil : $3.strip
- uri = $4.nil? ? nil : $4.strip
- args = [ :start_doctype, name, pub_sys, long_name, uri ]
- if close == ">"
- @document_status = :after_doctype
- @source.read if @source.buffer.size<2
- md = @source.match(/^\s*/um, true)
- @stack << [ :end_doctype ]
- else
- @document_status = :in_doctype
- end
- return args
- else
- @document_status = :after_doctype
- @source.read if @source.buffer.size<2
- md = @source.match(/\s*/um, true)
- end
- end
- if @document_status == :in_doctype
- md = @source.match(/\s*(.*?>)/um)
- case md[1]
- when SYSTEMENTITY
- match = @source.match( SYSTEMENTITY, true )[1]
- return [ :externalentity, match ]
+ # Returns the next event. This is a +PullEvent+ object.
+ def pull
+ if @closed
+ x, @closed = @closed, nil
+ return [ :end_element, x ]
+ end
+ return [ :end_document ] if empty?
+ return @stack.shift if @stack.size > 0
+ @source.read if @source.buffer.size<2
+ #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
+ if @document_status == nil
+ #@source.consume( /^\s*/um )
+ word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
+ word = word[1] unless word.nil?
+ #STDERR.puts "WORD = #{word.inspect}"
+ case word
+ when COMMENT_START
+ return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
+ when XMLDECL_START
+ #STDERR.puts "XMLDECL"
+ results = @source.match( XMLDECL_PATTERN, true )[1]
+ version = VERSION.match( results )
+ version = version[1] unless version.nil?
+ encoding = ENCODING.match(results)
+ encoding = encoding[1] unless encoding.nil?
+ @source.encoding = encoding
+ standalone = STANDALONE.match(results)
+ standalone = standalone[1] unless standalone.nil?
+ return [ :xmldecl, version, encoding, standalone ]
+ when INSTRUCTION_START
+ return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
+ when DOCTYPE_START
+ md = @source.match( DOCTYPE_PATTERN, true )
+ identity = md[1]
+ close = md[2]
+ identity =~ IDENTITY
+ name = $1
+ raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
+ pub_sys = $2.nil? ? nil : $2.strip
+ long_name = $3.nil? ? nil : $3.strip
+ uri = $4.nil? ? nil : $4.strip
+ args = [ :start_doctype, name, pub_sys, long_name, uri ]
+ if close == ">"
+ @document_status = :after_doctype
+ @source.read if @source.buffer.size<2
+ md = @source.match(/^\s*/um, true)
+ @stack << [ :end_doctype ]
+ else
+ @document_status = :in_doctype
+ end
+ return args
+ when /^\s+/
+ else
+ @document_status = :after_doctype
+ @source.read if @source.buffer.size<2
+ md = @source.match(/\s*/um, true)
+ end
+ end
+ if @document_status == :in_doctype
+ md = @source.match(/\s*(.*?>)/um)
+ case md[1]
+ when SYSTEMENTITY
+ match = @source.match( SYSTEMENTITY, true )[1]
+ return [ :externalentity, match ]
- when ELEMENTDECL_START
- return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
+ when ELEMENTDECL_START
+ return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
- when ENTITY_START
- match = @source.match( ENTITYDECL, true ).to_a.compact
- match[0] = :entitydecl
- ref = false
- if match[1] == '%'
- ref = true
- match.delete_at 1
- end
- # Now we have to sort out what kind of entity reference this is
- if match[2] == 'SYSTEM'
- # External reference
- match[3] = match[3][1..-2] # PUBID
- match.delete_at(4) if match.size > 4 # Chop out NDATA decl
- # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
- elsif match[2] == 'PUBLIC'
- # External reference
- match[3] = match[3][1..-2] # PUBID
- match[4] = match[4][1..-2] # HREF
- # match is [ :entity, name, PUBLIC, pubid, href ]
- else
- match[2] = match[2][1..-2]
- match.pop if match.size == 4
- # match is [ :entity, name, value ]
- end
- match << '%' if ref
- return match
- when ATTLISTDECL_START
- md = @source.match( ATTLISTDECL_PATTERN, true )
- raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
- element = md[1]
- contents = md[0]
+ when ENTITY_START
+ match = @source.match( ENTITYDECL, true ).to_a.compact
+ match[0] = :entitydecl
+ ref = false
+ if match[1] == '%'
+ ref = true
+ match.delete_at 1
+ end
+ # Now we have to sort out what kind of entity reference this is
+ if match[2] == 'SYSTEM'
+ # External reference
+ match[3] = match[3][1..-2] # PUBID
+ match.delete_at(4) if match.size > 4 # Chop out NDATA decl
+ # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
+ elsif match[2] == 'PUBLIC'
+ # External reference
+ match[3] = match[3][1..-2] # PUBID
+ match[4] = match[4][1..-2] # HREF
+ # match is [ :entity, name, PUBLIC, pubid, href ]
+ else
+ match[2] = match[2][1..-2]
+ match.pop if match.size == 4
+ # match is [ :entity, name, value ]
+ end
+ match << '%' if ref
+ return match
+ when ATTLISTDECL_START
+ md = @source.match( ATTLISTDECL_PATTERN, true )
+ raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
+ element = md[1]
+ contents = md[0]
- pairs = {}
- values = md[0].scan( ATTDEF_RE )
- values.each do |attdef|
- unless attdef[3] == "#IMPLIED"
- attdef.compact!
- val = attdef[3]
- val = attdef[4] if val == "#FIXED "
- pairs[attdef[0]] = val
- end
- end
- return [ :attlistdecl, element, pairs, contents ]
- when NOTATIONDECL_START
- md = nil
- if @source.match( PUBLIC )
- md = @source.match( PUBLIC, true )
- elsif @source.match( SYSTEM )
- md = @source.match( SYSTEM, true )
- else
- raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
- end
- return [ :notationdecl, md[1], md[2], md[3] ]
- when CDATA_END
- @document_status = :after_doctype
- @source.match( CDATA_END, true )
- return [ :end_doctype ]
- end
- end
- begin
- if @source.buffer[0] == ?<
- if @source.buffer[1] == ?/
- last_tag = @tags.pop
- #md = @source.match_to_consume( '>', CLOSE_MATCH)
- md = @source.match( CLOSE_MATCH, true )
- raise REXML::ParseException.new( "Missing end tag for "+
+ pairs = {}
+ values = md[0].scan( ATTDEF_RE )
+ values.each do |attdef|
+ unless attdef[3] == "#IMPLIED"
+ attdef.compact!
+ val = attdef[3]
+ val = attdef[4] if val == "#FIXED "
+ pairs[attdef[0]] = val
+ end
+ end
+ return [ :attlistdecl, element, pairs, contents ]
+ when NOTATIONDECL_START
+ md = nil
+ if @source.match( PUBLIC )
+ md = @source.match( PUBLIC, true )
+ vals = [md[1],md[2],md[4],md[6]]
+ elsif @source.match( SYSTEM )
+ md = @source.match( SYSTEM, true )
+ vals = [md[1],md[2],nil,md[4]]
+ else
+ raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
+ end
+ return [ :notationdecl, *vals ]
+ when CDATA_END
+ @document_status = :after_doctype
+ @source.match( CDATA_END, true )
+ return [ :end_doctype ]
+ end
+ end
+ begin
+ if @source.buffer[0] == ?<
+ if @source.buffer[1] == ?/
+ last_tag = @tags.pop
+ #md = @source.match_to_consume( '>', CLOSE_MATCH)
+ md = @source.match( CLOSE_MATCH, true )
+ raise REXML::ParseException.new( "Missing end tag for "+
"'#{last_tag}' (got \"#{md[1]}\")",
@source) unless last_tag == md[1]
- return [ :end_element, last_tag ]
- elsif @source.buffer[1] == ?!
- md = @source.match(/\A(\s*[^>]*>)/um)
- #puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
- raise REXML::ParseException.new("Malformed node", @source) unless md
- if md[0][2] == ?-
- md = @source.match( COMMENT_PATTERN, true )
- return [ :comment, md[1] ] if md
- else
- md = @source.match( CDATA_PATTERN, true )
- return [ :cdata, md[1] ] if md
- end
- raise REXML::ParseException.new( "Declarations can only occur "+
- "in the doctype declaration.", @source)
- elsif @source.buffer[1] == ??
- md = @source.match( INSTRUCTION_PATTERN, true )
- return [ :processing_instruction, md[1], md[2] ] if md
- raise REXML::ParseException.new( "Bad instruction declaration",
- @source)
- else
- # Get the next tag
- md = @source.match(TAG_MATCH, true)
- raise REXML::ParseException.new("malformed XML: missing tag start", @source) unless md
- attrs = []
- if md[2].size > 0
- attrs = md[2].scan( ATTRIBUTE_PATTERN )
- raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
- end
-
- if md[4]
- @closed = md[1]
- else
- @tags.push( md[1] )
- end
- attributes = {}
- attrs.each { |a,b,c| attributes[a] = c }
- return [ :start_element, md[1], attributes ]
- end
- else
- md = @source.match( TEXT_PATTERN, true )
+ return [ :end_element, last_tag ]
+ elsif @source.buffer[1] == ?!
+ md = @source.match(/\A(\s*[^>]*>)/um)
+ #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
+ raise REXML::ParseException.new("Malformed node", @source) unless md
+ if md[0][2] == ?-
+ md = @source.match( COMMENT_PATTERN, true )
+ return [ :comment, md[1] ] if md
+ else
+ md = @source.match( CDATA_PATTERN, true )
+ return [ :cdata, md[1] ] if md
+ end
+ raise REXML::ParseException.new( "Declarations can only occur "+
+ "in the doctype declaration.", @source)
+ elsif @source.buffer[1] == ??
+ md = @source.match( INSTRUCTION_PATTERN, true )
+ return [ :processing_instruction, md[1], md[2] ] if md
+ raise REXML::ParseException.new( "Bad instruction declaration",
+ @source)
+ else
+ # Get the next tag
+ md = @source.match(TAG_MATCH, true)
+ raise REXML::ParseException.new("malformed XML: missing tag start", @source) unless md
+ attrs = []
+ if md[2].size > 0
+ attrs = md[2].scan( ATTRIBUTE_PATTERN )
+ raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
+ end
+
+ if md[4]
+ @closed = md[1]
+ else
+ @tags.push( md[1] )
+ end
+ attributes = {}
+ attrs.each { |a,b,c| attributes[a] = c }
+ return [ :start_element, md[1], attributes ]
+ end
+ else
+ md = @source.match( TEXT_PATTERN, true )
if md[0].length == 0
- #puts "EMPTY = #{empty?}"
- #puts "BUFFER = \"#{@source.buffer}\""
+ puts "EMPTY = #{empty?}"
+ puts "BUFFER = \"#{@source.buffer}\""
@source.match( /(\s+)/, true )
end
+ #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
#return [ :text, "" ] if md[0].length == 0
- # unnormalized = Text::unnormalize( md[1], self )
- # return PullEvent.new( :text, md[1], unnormalized )
- return [ :text, md[1] ]
- end
- rescue REXML::ParseException
- raise
- rescue Exception, NameError => error
- raise REXML::ParseException.new( "Exception parsing",
- @source, self, (error ? error : $!) )
- end
- return [ :dummy ]
- end
+ # unnormalized = Text::unnormalize( md[1], self )
+ # return PullEvent.new( :text, md[1], unnormalized )
+ return [ :text, md[1] ]
+ end
+ rescue REXML::ParseException
+ raise
+ rescue Exception, NameError => error
+ raise REXML::ParseException.new( "Exception parsing",
+ @source, self, (error ? error : $!) )
+ end
+ return [ :dummy ]
+ end
- def entity( reference, entities )
- value = nil
- value = entities[ reference ] if entities
- if not value
- value = DEFAULT_ENTITIES[ reference ]
- value = value[2] if value
- end
- unnormalize( value, entities ) if value
- end
+ def entity( reference, entities )
+ value = nil
+ value = entities[ reference ] if entities
+ if not value
+ value = DEFAULT_ENTITIES[ reference ]
+ value = value[2] if value
+ end
+ unnormalize( value, entities ) if value
+ end
- # Escapes all possible entities
- def normalize( input, entities=nil, entity_filter=nil )
- copy = input.clone
- # Doing it like this rather than in a loop improves the speed
- copy.gsub!( EREFERENCE, '&amp;' )
- entities.each do |key, value|
- copy.gsub!( value, "&#{key};" ) unless entity_filter and
- entity_filter.include?(entity)
- end if entities
- copy.gsub!( EREFERENCE, '&amp;' )
- DEFAULT_ENTITIES.each do |key, value|
- copy.gsub!( value[3], value[1] )
- end
- copy
- end
+ # Escapes all possible entities
+ def normalize( input, entities=nil, entity_filter=nil )
+ copy = input.clone
+ # Doing it like this rather than in a loop improves the speed
+ copy.gsub!( EREFERENCE, '&amp;' )
+ entities.each do |key, value|
+ copy.gsub!( value, "&#{key};" ) unless entity_filter and
+ entity_filter.include?(entity)
+ end if entities
+ copy.gsub!( EREFERENCE, '&amp;' )
+ DEFAULT_ENTITIES.each do |key, value|
+ copy.gsub!( value[3], value[1] )
+ end
+ copy
+ end
- # Unescapes all possible entities
- def unnormalize( string, entities=nil, filter=nil )
- rv = string.clone
- rv.gsub!( /\r\n?/, "\n" )
- matches = rv.scan( REFERENCE_RE )
- return rv if matches.size == 0
- rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
- m=$1
- m = "0#{m}" if m[0] == ?x
- [Integer(m)].pack('U*')
- }
- matches.collect!{|x|x[0]}.compact!
- if matches.size > 0
- matches.each do |entity_reference|
- unless filter and filter.include?(entity_reference)
- entity_value = entity( entity_reference, entities )
- if entity_value
- re = /&#{entity_reference};/
- rv.gsub!( re, entity_value )
- end
- end
- end
- matches.each do |entity_reference|
- unless filter and filter.include?(entity_reference)
- er = DEFAULT_ENTITIES[entity_reference]
- rv.gsub!( er[0], er[2] ) if er
- end
- end
- rv.gsub!( /&amp;/, '&' )
- end
- rv
- end
- end
- end
+ # Unescapes all possible entities
+ def unnormalize( string, entities=nil, filter=nil )
+ rv = string.clone
+ rv.gsub!( /\r\n?/, "\n" )
+ matches = rv.scan( REFERENCE_RE )
+ return rv if matches.size == 0
+ rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
+ m=$1
+ m = "0#{m}" if m[0] == ?x
+ [Integer(m)].pack('U*')
+ }
+ matches.collect!{|x|x[0]}.compact!
+ if matches.size > 0
+ matches.each do |entity_reference|
+ unless filter and filter.include?(entity_reference)
+ entity_value = entity( entity_reference, entities )
+ if entity_value
+ re = /&#{entity_reference};/
+ rv.gsub!( re, entity_value )
+ end
+ end
+ end
+ matches.each do |entity_reference|
+ unless filter and filter.include?(entity_reference)
+ er = DEFAULT_ENTITIES[entity_reference]
+ rv.gsub!( er[0], er[2] ) if er
+ end
+ end
+ rv.gsub!( /&amp;/, '&' )
+ end
+ rv
+ end
+ end
+ end
end
=begin
diff --git a/lib/rexml/parsers/pullparser.rb b/lib/rexml/parsers/pullparser.rb
index 0a328ea8fc..36dc7160c3 100644
--- a/lib/rexml/parsers/pullparser.rb
+++ b/lib/rexml/parsers/pullparser.rb
@@ -1,95 +1,99 @@
+require 'forwardable'
+
require 'rexml/parseexception'
require 'rexml/parsers/baseparser'
require 'rexml/xmltokens'
module REXML
- module Parsers
- # = Using the Pull Parser
- # <em>This API is experimental, and subject to change.</em>
- # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
- # while parser.has_next?
- # res = parser.next
- # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
- # end
- # See the PullEvent class for information on the content of the results.
- # The data is identical to the arguments passed for the various events to
- # the StreamListener API.
- #
- # Notice that:
- # parser = PullParser.new( "<a>BAD DOCUMENT" )
- # while parser.has_next?
- # res = parser.next
- # raise res[1] if res.error?
- # end
- #
- # Nat Price gave me some good ideas for the API.
- class PullParser
- include XMLTokens
-
- def initialize stream
- @entities = {}
+ module Parsers
+ # = Using the Pull Parser
+ # <em>This API is experimental, and subject to change.</em>
+ # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
+ # while parser.has_next?
+ # res = parser.next
+ # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
+ # end
+ # See the PullEvent class for information on the content of the results.
+ # The data is identical to the arguments passed for the various events to
+ # the StreamListener API.
+ #
+ # Notice that:
+ # parser = PullParser.new( "<a>BAD DOCUMENT" )
+ # while parser.has_next?
+ # res = parser.next
+ # raise res[1] if res.error?
+ # end
+ #
+ # Nat Price gave me some good ideas for the API.
+ class PullParser
+ include XMLTokens
+ extend Forwardable
+
+ def_delegators( :@parser, :has_next? )
+ def_delegators( :@parser, :entity )
+ def_delegators( :@parser, :empty? )
+ def_delegators( :@parser, :source )
+
+ def initialize stream
+ @entities = {}
@listeners = nil
@parser = BaseParser.new( stream )
- end
+ @my_stack = []
+ end
def add_listener( listener )
@listeners = [] unless @listeners
@listeners << listener
end
- def each
- while has_next?
- yield self.pull
- end
- end
-
- def peek depth=0
- PullEvent.new(@parser.peek(depth))
- end
-
- def has_next?
- @parser.has_next?
+ def each
+ while has_next?
+ yield self.pull
+ end
end
- def pull
- event = @parser.pull
- case event[0]
- when :entitydecl
- @entities[ event[1] ] =
- event[2] unless event[2] =~ /PUBLIC|SYSTEM/
- when :text
- unnormalized = @parser.unnormalize( event[1], @entities )
- event << unnormalized
- end
- PullEvent.new( event )
- end
-
- def unshift token
- @parser.unshift token
+ def peek depth=0
+ if @my_stack.length <= depth
+ (depth - @my_stack.length + 1).times {
+ e = PullEvent.new(@parser.pull)
+ @my_stack.push(e)
+ }
+ end
+ @my_stack[depth]
end
- def entity reference
- @parser.entity( reference )
+ def pull
+ return @my_stack.shift if @my_stack.length > 0
+
+ event = @parser.pull
+ case event[0]
+ when :entitydecl
+ @entities[ event[1] ] =
+ event[2] unless event[2] =~ /PUBLIC|SYSTEM/
+ when :text
+ unnormalized = @parser.unnormalize( event[1], @entities )
+ event << unnormalized
+ end
+ PullEvent.new( event )
end
- def empty?
- @parser.empty?
+ def unshift token
+ @my_stack.unshift token
+ end
+ end
+
+ # A parsing event. The contents of the event are accessed as an +Array?,
+ # and the type is given either by the ...? methods, or by accessing the
+ # +type+ accessor. The contents of this object vary from event to event,
+ # but are identical to the arguments passed to +StreamListener+s for each
+ # event.
+ class PullEvent
+ # The type of this event. Will be one of :tag_start, :tag_end, :text,
+ # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
+ # :notationdecl, :entity, :cdata, :xmldecl, or :error.
+ def initialize(arg)
+ @contents = arg
end
-
- end
-
- # A parsing event. The contents of the event are accessed as an +Array?,
- # and the type is given either by the ...? methods, or by accessing the
- # +type+ accessor. The contents of this object vary from event to event,
- # but are identical to the arguments passed to +StreamListener+s for each
- # event.
- class PullEvent
- # The type of this event. Will be one of :tag_start, :tag_end, :text,
- # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
- # :notationdecl, :entity, :cdata, :xmldecl, or :error.
- def initialize(arg)
- @contents = arg
- end
def []( start, endd=nil)
if start.kind_of? Range
@@ -103,90 +107,90 @@ module REXML
else
raise "Illegal argument #{start.inspect} (#{start.class})"
end
- end
-
- def event_type
- @contents[0]
- end
-
- # Content: [ String tag_name, Hash attributes ]
- def start_element?
- @contents[0] == :start_element
- end
-
- # Content: [ String tag_name ]
- def end_element?
- @contents[0] == :end_element
- end
-
- # Content: [ String raw_text, String unnormalized_text ]
- def text?
- @contents[0] == :text
- end
-
- # Content: [ String text ]
- def instruction?
- @contents[0] == :processing_instruction
- end
-
- # Content: [ String text ]
- def comment?
- @contents[0] == :comment
- end
-
- # Content: [ String name, String pub_sys, String long_name, String uri ]
- def doctype?
- @contents[0] == :start_doctype
- end
-
- # Content: [ String text ]
- def attlistdecl?
- @contents[0] == :attlistdecl
- end
-
- # Content: [ String text ]
- def elementdecl?
- @contents[0] == :elementdecl
- end
-
- # Due to the wonders of DTDs, an entity declaration can be just about
- # anything. There's no way to normalize it; you'll have to interpret the
- # content yourself. However, the following is true:
- #
- # * If the entity declaration is an internal entity:
- # [ String name, String value ]
- # Content: [ String text ]
- def entitydecl?
- @contents[0] == :entitydecl
- end
-
- # Content: [ String text ]
- def notationdecl?
- @contents[0] == :notationdecl
- end
-
- # Content: [ String text ]
- def entity?
- @contents[0] == :entity
- end
-
- # Content: [ String text ]
- def cdata?
- @contents[0] == :cdata
- end
-
- # Content: [ String version, String encoding, String standalone ]
- def xmldecl?
- @contents[0] == :xmldecl
- end
-
- def error?
- @contents[0] == :error
- end
-
- def inspect
+ end
+
+ def event_type
+ @contents[0]
+ end
+
+ # Content: [ String tag_name, Hash attributes ]
+ def start_element?
+ @contents[0] == :start_element
+ end
+
+ # Content: [ String tag_name ]
+ def end_element?
+ @contents[0] == :end_element
+ end
+
+ # Content: [ String raw_text, String unnormalized_text ]
+ def text?
+ @contents[0] == :text
+ end
+
+ # Content: [ String text ]
+ def instruction?
+ @contents[0] == :processing_instruction
+ end
+
+ # Content: [ String text ]
+ def comment?
+ @contents[0] == :comment
+ end
+
+ # Content: [ String name, String pub_sys, String long_name, String uri ]
+ def doctype?
+ @contents[0] == :start_doctype
+ end
+
+ # Content: [ String text ]
+ def attlistdecl?
+ @contents[0] == :attlistdecl
+ end
+
+ # Content: [ String text ]
+ def elementdecl?
+ @contents[0] == :elementdecl
+ end
+
+ # Due to the wonders of DTDs, an entity declaration can be just about
+ # anything. There's no way to normalize it; you'll have to interpret the
+ # content yourself. However, the following is true:
+ #
+ # * If the entity declaration is an internal entity:
+ # [ String name, String value ]
+ # Content: [ String text ]
+ def entitydecl?
+ @contents[0] == :entitydecl
+ end
+
+ # Content: [ String text ]
+ def notationdecl?
+ @contents[0] == :notationdecl
+ end
+
+ # Content: [ String text ]
+ def entity?
+ @contents[0] == :entity
+ end
+
+ # Content: [ String text ]
+ def cdata?
+ @contents[0] == :cdata
+ end
+
+ # Content: [ String version, String encoding, String standalone ]
+ def xmldecl?
+ @contents[0] == :xmldecl
+ end
+
+ def error?
+ @contents[0] == :error
+ end
+
+ def inspect
@contents[0].to_s + ": " + @contents[1..-1].inspect
- end
- end
- end
+ end
+ end
+ end
end
diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb
index d5ee1bcfcd..61a216cec1 100644
--- a/lib/rexml/parsers/sax2parser.rb
+++ b/lib/rexml/parsers/sax2parser.rb
@@ -1,9 +1,11 @@
require 'rexml/parsers/baseparser'
require 'rexml/parseexception'
require 'rexml/namespace'
+require 'rexml/text'
module REXML
module Parsers
+ # SAX2Parser
class SAX2Parser
def initialize source
@parser = BaseParser.new(source)
@@ -36,6 +38,10 @@ module REXML
# :start_prefix_mapping, :end_prefix_mapping, :characters,
# :processing_instruction, :doctype, :attlistdecl, :elementdecl,
# :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
+ #
+ # There is an additional symbol that can be listened for: :progress.
+ # This will be called for every event generated, passing in the current
+ # stream position.
#
# Array contains regular expressions or strings which will be matched
# against fully qualified element names.
@@ -161,6 +167,7 @@ module REXML
:elementdecl, :cdata, :notationdecl, :xmldecl
handle( *event )
end
+ handle( :progress, @parser.position )
end
end
diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb
index 996d613e15..256d0f611c 100644
--- a/lib/rexml/parsers/streamparser.rb
+++ b/lib/rexml/parsers/streamparser.rb
@@ -1,42 +1,46 @@
module REXML
- module Parsers
- class StreamParser
- def initialize source, listener
- @listener = listener
- @parser = BaseParser.new( source )
- end
-
+ module Parsers
+ class StreamParser
+ def initialize source, listener
+ @listener = listener
+ @parser = BaseParser.new( source )
+ end
+
def add_listener( listener )
@parser.add_listener( listener )
end
-
- def parse
- # entity string
- while true
- event = @parser.pull
- case event[0]
- when :end_document
- return
- when :start_element
- attrs = event[2].each do |n, v|
- event[2][n] = @parser.unnormalize( v )
- end
- @listener.tag_start( event[1], attrs )
- when :end_element
- @listener.tag_end( event[1] )
- when :text
- normalized = @parser.unnormalize( event[1] )
- @listener.text( normalized )
- when :processing_instruction
- @listener.instruction( *event[1,2] )
+
+ def parse
+ # entity string
+ while true
+ event = @parser.pull
+ case event[0]
+ when :end_document
+ return
+ when :start_element
+ attrs = event[2].each do |n, v|
+ event[2][n] = @parser.unnormalize( v )
+ end
+ @listener.tag_start( event[1], attrs )
+ when :end_element
+ @listener.tag_end( event[1] )
+ when :text
+ normalized = @parser.unnormalize( event[1] )
+ @listener.text( normalized )
+ when :processing_instruction
+ @listener.instruction( *event[1,2] )
when :start_doctype
@listener.doctype( *event[1..-1] )
- when :comment, :attlistdecl, :notationdecl, :elementdecl,
- :entitydecl, :cdata, :xmldecl, :attlistdecl
- @listener.send( event[0].to_s, *event[1..-1] )
- end
- end
- end
- end
- end
+ when :end_doctype
+ # FIXME: remove this condition for milestone:3.2
+ @listener.doctype_end if @listener.respond_to? :doctype_end
+ when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
+ @listener.send( event[0].to_s, *event[1..-1] )
+ when :entitydecl, :notationdecl
+ @listener.send( event[0].to_s, event[1..-1] )
+ end
+ end
+ end
+ end
+ end
end
diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb
index 57d11f7e23..500a53f426 100644
--- a/lib/rexml/parsers/treeparser.rb
+++ b/lib/rexml/parsers/treeparser.rb
@@ -19,8 +19,12 @@ module REXML
begin
while true
event = @parser.pull
+ #STDERR.puts "TREEPARSER GOT #{event.inspect}"
case event[0]
when :end_document
+ unless tag_stack.empty?
+ raise ParseException.new("No close tag for #{tag_stack.inspect}")
+ end
return
when :start_element
tag_stack.push(event[1])
@@ -35,10 +39,10 @@ module REXML
@build_context[-1] << event[1]
else
@build_context.add(
- Text.new( event[1], @build_context.whitespace, nil, true )
+ Text.new(event[1], @build_context.whitespace, nil, true)
) unless (
- event[1].strip.size==0 and
- @build_context.ignore_whitespace_nodes
+ @build_context.ignore_whitespace_nodes and
+ event[1].strip.size==0
)
end
end
diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb
index 00fd50ad02..ca154443b5 100644
--- a/lib/rexml/rexml.rb
+++ b/lib/rexml/rexml.rb
@@ -10,8 +10,8 @@
#
# Main page:: http://www.germane-software.com/software/rexml
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
-# Version:: 3.1.3
-# Date:: +2005/139
+# Version:: 3.1.4
+# Date:: 2006/104
#
# This API documentation can be downloaded from the REXML home page, or can
# be accessed online[http://www.germane-software.com/software/rexml_doc]
@@ -20,7 +20,10 @@
# or can be accessed
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
module REXML
- Copyright = "Copyright © 2001-2005 Sean Russell <ser@germane-software.com>"
- Date = "+2005/139"
- Version = "3.1.3"
+ COPYRIGHT = "Copyright © 2001-2006 Sean Russell <ser@germane-software.com>"
+ DATE = "2006/104"
+ VERSION = "3.1.4"
+
+ Copyright = COPYRIGHT
+ Version = VERSION
end
diff --git a/lib/rexml/sax2listener.rb b/lib/rexml/sax2listener.rb
index 40a77ed464..9a992917e6 100644
--- a/lib/rexml/sax2listener.rb
+++ b/lib/rexml/sax2listener.rb
@@ -84,11 +84,14 @@ module REXML
# @p version the version attribute value. EG, "1.0"
# @p encoding the encoding attribute value, or nil. EG, "utf"
# @p standalone the standalone attribute value, or nil. EG, nil
+ # @p spaced the declaration is followed by a line break
def xmldecl version, encoding, standalone
end
# Called when a comment is encountered.
# @p comment The content of the comment
def comment comment
end
+ def progress position
+ end
end
end
diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb
index 7251666160..cacab221db 100644
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@@ -7,12 +7,19 @@ module REXML
# @param arg Either a String, or an IO
# @return a Source, or nil if a bad argument was given
def SourceFactory::create_from arg#, slurp=true
- if arg.kind_of? String
- source = Source.new(arg)
- elsif arg.kind_of? IO
- source = IOSource.new(arg)
- end
- source
+ if arg.kind_of? String
+ Source.new(arg)
+ elsif arg.respond_to? :read and
+ arg.respond_to? :readline and
+ arg.respond_to? :nil? and
+ arg.respond_to? :eof?
+ IOSource.new(arg)
+ elsif arg.kind_of? Source
+ arg
+ else
+ raise "#{source.class} is not a valid input stream. It must walk \n"+
+ "like either a String, IO, or Source."
+ end
end
end
@@ -98,6 +105,10 @@ module REXML
@buffer == ""
end
+ def position
+ @orig.index( @buffer )
+ end
+
# @return the current line in the source
def current_line
lines = @orig.split
@@ -194,6 +205,10 @@ module REXML
super and ( @source.nil? || @source.eof? )
end
+ def position
+ @er_source.stat.pipe? ? 0 : @er_source.pos
+ end
+
# @return the current line in the source
def current_line
begin
diff --git a/lib/rexml/streamlistener.rb b/lib/rexml/streamlistener.rb
index 3c3c5e3684..6f401125b5 100644
--- a/lib/rexml/streamlistener.rb
+++ b/lib/rexml/streamlistener.rb
@@ -39,6 +39,9 @@ module REXML
# @p uri the uri of the doctype, or nil. EG, "bar"
def doctype name, pub_sys, long_name, uri
end
+ # Called when the doctype is done
+ def doctype_end
+ end
# If a doctype includes an ATTLIST declaration, it will cause this
# method to be called. The content is the declaration itself, unparsed.
# EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb
index 9a83121af8..55bc9f50f8 100644
--- a/lib/rexml/text.rb
+++ b/lib/rexml/text.rb
@@ -39,8 +39,10 @@ module REXML
# text. If this value is nil (the default), then the raw value of the
# parent will be used as the raw value for this node. If there is no raw
# value for the parent, and no value is supplied, the default is false.
+ # Use this field if you have entities defined for some text, and you don't
+ # want REXML to escape that text in output.
# Text.new( "<&", false, nil, false ) #-> "&lt;&amp;"
- # Text.new( "<&", false, nil, true ) #-> IllegalArgumentException
+ # Text.new( "<&", false, nil, true ) #-> Parse exception
# Text.new( "&lt;&amp;", false, nil, true ) #-> "&lt;&amp;"
# # Assume that the entity "s" is defined to be "sean"
# # and that the entity "r" is defined to be "russell"
@@ -156,11 +158,11 @@ module REXML
# # Assume that the entity "s" is defined to be "sean", and that the
# # entity "r" is defined to be "russell"
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
- # t.string #-> "< & sean russell"
+ # t.value #-> "< & sean russell"
# t = Text.new( "< & &s; russell", false, nil, false )
- # t.string #-> "< & sean russell"
+ # t.value #-> "< & sean russell"
# u = Text.new( "sean russell", false, nil, true )
- # u.string #-> "sean russell"
+ # u.value #-> "sean russell"
def value
@unnormalized if @unnormalized
doctype = nil
@@ -282,9 +284,10 @@ module REXML
EREFERENCE = /&(?!#{Entity::NAME};)/
# Escapes all possible entities
def Text::normalize( input, doctype=nil, entity_filter=nil )
- copy = input.clone
+ copy = input
# Doing it like this rather than in a loop improves the speed
if doctype
+ # Replace all ampersands that aren't part of an entity
copy = copy.gsub( EREFERENCE, '&amp;' )
doctype.entities.each_value do |entity|
copy = copy.gsub( entity.value,
@@ -292,6 +295,7 @@ module REXML
not( entity_filter and entity_filter.include?(entity) )
end
else
+ # Replace all ampersands that aren't part of an entity
copy = copy.gsub( EREFERENCE, '&amp;' )
DocType::DEFAULT_ENTITIES.each_value do |entity|
copy = copy.gsub(entity.value, "&#{entity.name};" )
diff --git a/lib/rexml/validation/validation.rb b/lib/rexml/validation/validation.rb
index fbee315f0b..160ea96b31 100644
--- a/lib/rexml/validation/validation.rb
+++ b/lib/rexml/validation/validation.rb
@@ -82,10 +82,13 @@ module REXML
@event_arg = event_arg
end
- attr_reader :done?
attr_reader :event_type
attr_accessor :event_arg
+ def done?
+ @done
+ end
+
def single?
return (@event_type != :start_element and @event_type != :start_attribute)
end
diff --git a/lib/rexml/xmldecl.rb b/lib/rexml/xmldecl.rb
index 47131ac816..b65604b762 100644
--- a/lib/rexml/xmldecl.rb
+++ b/lib/rexml/xmldecl.rb
@@ -2,71 +2,71 @@ require 'rexml/encoding'
require 'rexml/source'
module REXML
- # NEEDS DOCUMENTATION
- class XMLDecl < Child
- include Encoding
+ # NEEDS DOCUMENTATION
+ class XMLDecl < Child
+ include Encoding
- DEFAULT_VERSION = "1.0";
- DEFAULT_ENCODING = "UTF-8";
- DEFAULT_STANDALONE = "no";
- START = '<\?xml';
- STOP = '\?>';
+ DEFAULT_VERSION = "1.0";
+ DEFAULT_ENCODING = "UTF-8";
+ DEFAULT_STANDALONE = "no";
+ START = '<\?xml';
+ STOP = '\?>';
- attr_accessor :version, :standalone
+ attr_accessor :version, :standalone
attr_reader :writeencoding
- def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
+ def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
@writethis = true
@writeencoding = !encoding.nil?
- if version.kind_of? XMLDecl
- super()
- @version = version.version
- self.encoding = version.encoding
+ if version.kind_of? XMLDecl
+ super()
+ @version = version.version
+ self.encoding = version.encoding
@writeencoding = version.writeencoding
- @standalone = version.standalone
- else
- super()
- @version = version
- self.encoding = encoding
- @standalone = standalone
- end
- @version = DEFAULT_VERSION if @version.nil?
- end
-
- def clone
- XMLDecl.new(self)
- end
-
- def write writer, indent_level=-1, transitive=false, ie_hack=false
+ @standalone = version.standalone
+ else
+ super()
+ @version = version
+ self.encoding = encoding
+ @standalone = standalone
+ end
+ @version = DEFAULT_VERSION if @version.nil?
+ end
+
+ def clone
+ XMLDecl.new(self)
+ end
+
+ def write writer, indent=-1, transitive=false, ie_hack=false
return nil unless @writethis or writer.kind_of? Output
- indent( writer, indent_level )
- writer << START.sub(/\\/u, '')
+ indent( writer, indent )
+ writer << START.sub(/\\/u, '')
if writer.kind_of? Output
writer << " #{content writer.encoding}"
else
writer << " #{content encoding}"
end
- writer << STOP.sub(/\\/u, '')
- end
-
- def ==( other )
- other.kind_of?(XMLDecl) and
- other.version == @version and
- other.encoding == self.encoding and
- other.standalone == @standalone
- end
-
- def xmldecl version, encoding, standalone
- @version = version
- self.encoding = encoding
- @standalone = standalone
- end
-
- def node_type
- :xmldecl
- end
-
- alias :stand_alone? :standalone
+ writer << STOP.sub(/\\/u, '')
+ end
+
+ def ==( other )
+ other.kind_of?(XMLDecl) and
+ other.version == @version and
+ other.encoding == self.encoding and
+ other.standalone == @standalone
+ end
+
+ def xmldecl version, encoding, standalone
+ @version = version
+ self.encoding = encoding
+ @standalone = standalone
+ end
+
+ def node_type
+ :xmldecl
+ end
+
+ alias :stand_alone? :standalone
alias :old_enc= :encoding=
def encoding=( enc )
@@ -80,6 +80,11 @@ module REXML
self.dowrite
end
+ # Only use this if you do not want the XML declaration to be written;
+ # this object is ignored by the XML writer. Otherwise, instantiate your
+ # own XMLDecl and add it to the document.
+ #
+ # Note that XML 1.1 documents *must* include an XML declaration
def XMLDecl.default
rv = XMLDecl.new( "1.0" )
rv.nowrite
@@ -98,12 +103,12 @@ module REXML
START.sub(/\\/u, '') + " ... " + STOP.sub(/\\/u, '')
end
- private
- def content(enc)
- rv = "version='#@version'"
- rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
- rv << " standalone='#@standalone'" if @standalone
- rv
- end
- end
+ private
+ def content(enc)
+ rv = "version='#@version'"
+ rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
+ rv << " standalone='#@standalone'" if @standalone
+ rv
+ end
+ end
end
diff --git a/lib/rexml/xpath.rb b/lib/rexml/xpath.rb
index 6875f038e0..1ed440868b 100644
--- a/lib/rexml/xpath.rb
+++ b/lib/rexml/xpath.rb
@@ -2,76 +2,65 @@ require 'rexml/functions'
require 'rexml/xpath_parser'
module REXML
- # Wrapper class. Use this class to access the XPath functions.
- class XPath
- include Functions
- EMPTY_HASH = {}
+ # Wrapper class. Use this class to access the XPath functions.
+ class XPath
+ include Functions
+ EMPTY_HASH = {}
- # Finds and returns the first node that matches the supplied xpath.
- # element::
- # The context element
- # path::
- # The xpath to search for. If not supplied or nil, returns the first
- # node matching '*'.
- # namespaces::
- # If supplied, a Hash which defines a namespace mapping.
- #
- # XPath.first( node )
- # XPath.first( doc, "//b"} )
- # XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
+ # Finds and returns the first node that matches the supplied xpath.
+ # element::
+ # The context element
+ # path::
+ # The xpath to search for. If not supplied or nil, returns the first
+ # node matching '*'.
+ # namespaces::
+ # If supplied, a Hash which defines a namespace mapping.
+ #
+ # XPath.first( node )
+ # XPath.first( doc, "//b"} )
+ # XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
def XPath::first element, path=nil, namespaces={}, variables={}
-=begin
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.kind_of? Hash
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of? Hash
- parser = XPathParser.new
- parser.namespaces = namespaces
- parser.variables = variables
- path = "*" unless path
- parser.first( path, element );
-=end
-#=begin
- raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.kind_of? Hash
- raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of? Hash
- parser = XPathParser.new
- parser.namespaces = namespaces
- parser.variables = variables
- path = "*" unless path
- element = [element] unless element.kind_of? Array
- parser.parse(path, element).flatten[0]
-#=end
- end
+ parser = XPathParser.new
+ parser.namespaces = namespaces
+ parser.variables = variables
+ path = "*" unless path
+ element = [element] unless element.kind_of? Array
+ parser.parse(path, element).flatten[0]
+ end
- # Itterates over nodes that match the given path, calling the supplied
- # block with the match.
- # element::
- # The context element
- # path::
- # The xpath to search for. If not supplied or nil, defaults to '*'
- # namespaces::
- # If supplied, a Hash which defines a namespace mapping
- #
- # XPath.each( node ) { |el| ... }
- # XPath.each( node, '/*[@attr='v']' ) { |el| ... }
- # XPath.each( node, 'ancestor::x' ) { |el| ... }
- def XPath::each element, path=nil, namespaces={}, variables={}, &block
+ # Itterates over nodes that match the given path, calling the supplied
+ # block with the match.
+ # element::
+ # The context element
+ # path::
+ # The xpath to search for. If not supplied or nil, defaults to '*'
+ # namespaces::
+ # If supplied, a Hash which defines a namespace mapping
+ #
+ # XPath.each( node ) { |el| ... }
+ # XPath.each( node, '/*[@attr='v']' ) { |el| ... }
+ # XPath.each( node, 'ancestor::x' ) { |el| ... }
+ def XPath::each element, path=nil, namespaces={}, variables={}, &block
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.kind_of? Hash
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of? Hash
- parser = XPathParser.new
- parser.namespaces = namespaces
- parser.variables = variables
- path = "*" unless path
- element = [element] unless element.kind_of? Array
- parser.parse(path, element).each( &block )
- end
+ parser = XPathParser.new
+ parser.namespaces = namespaces
+ parser.variables = variables
+ path = "*" unless path
+ element = [element] unless element.kind_of? Array
+ parser.parse(path, element).each( &block )
+ end
- # Returns an array of nodes matching a given XPath.
- def XPath::match element, path=nil, namespaces={}, variables={}
- parser = XPathParser.new
- parser.namespaces = namespaces
- parser.variables = variables
- path = "*" unless path
- element = [element] unless element.kind_of? Array
- parser.parse(path,element)
- end
- end
+ # Returns an array of nodes matching a given XPath.
+ def XPath::match element, path=nil, namespaces={}, variables={}
+ parser = XPathParser.new
+ parser.namespaces = namespaces
+ parser.variables = variables
+ path = "*" unless path
+ element = [element] unless element.kind_of? Array
+ parser.parse(path,element)
+ end
+ end
end
diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb
index 91b8ad48c8..98ed70cc10 100644
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@@ -76,6 +76,8 @@ module REXML
# Performs a depth-first (document order) XPath search, and returns the
# first match. This is the fastest, lightest way to return a single result.
+ #
+ # FIXME: This method is incomplete!
def first( path_stack, node )
#puts "#{depth}) Entering match( #{path.inspect}, #{tree.inspect} )"
return nil if path.size == 0
@@ -123,14 +125,6 @@ module REXML
r = expr( path_stack, nodeset )
#puts "MAIN EXPR => #{r.inspect}"
r
-
- #while ( path_stack.size > 0 and nodeset.size > 0 )
- # #puts "MATCH: #{path_stack.inspect} '#{nodeset.collect{|n|n.class}.inspect}'"
- # nodeset = expr( path_stack, nodeset )
- # #puts "NODESET: #{nodeset.inspect}"
- # #puts "PATH_STACK: #{path_stack.inspect}"
- #end
- #nodeset
end
private
@@ -158,9 +152,10 @@ module REXML
#puts "IN QNAME"
prefix = path_stack.shift
name = path_stack.shift
- ns = @namespaces[prefix]
- ns = ns ? ns : ''
+ default_ns = @namespaces[prefix]
+ default_ns = default_ns ? default_ns : ''
nodeset.delete_if do |node|
+ ns = default_ns
# FIXME: This DOUBLES the time XPath searches take
ns = node.namespace( prefix ) if node.node_type == :element and ns == ''
#puts "NS = #{ns.inspect}"
@@ -353,7 +348,7 @@ module REXML
preceding_siblings = all_siblings[ 0 .. current_index-1 ].reverse
#results += expr( path_stack.dclone, preceding_siblings )
end
- nodeset = preceding_siblings
+ nodeset = preceding_siblings || []
node_types = ELEMENTS
when :preceding
@@ -385,10 +380,13 @@ module REXML
return @variables[ var_name ]
# :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq
+ # TODO: Special case for :or and :and -- not evaluate the right
+ # operand if the left alone determines result (i.e. is true for
+ # :or and false for :and).
when :eq, :neq, :lt, :lteq, :gt, :gteq, :and, :or
- left = expr( path_stack.shift, nodeset, context )
+ left = expr( path_stack.shift, nodeset.dup, context )
#puts "LEFT => #{left.inspect} (#{left.class.name})"
- right = expr( path_stack.shift, nodeset, context )
+ right = expr( path_stack.shift, nodeset.dup, context )
#puts "RIGHT => #{right.inspect} (#{right.class.name})"
res = equality_relational_compare( left, op, right )
#puts "RES => #{res.inspect}"
@@ -472,8 +470,11 @@ module REXML
def descendant_or_self( path_stack, nodeset )
rs = []
+ #puts "#"*80
+ #puts "PATH_STACK = #{path_stack.inspect}"
+ #puts "NODESET = #{nodeset.collect{|n|n.inspect}.inspect}"
d_o_s( path_stack, nodeset, rs )
- #puts "RS = #{rs.collect{|n|n.to_s}.inspect}"
+ #puts "RS = #{rs.collect{|n|n.inspect}.inspect}"
document_order(rs.flatten.compact)
#rs.flatten.compact
end