summaryrefslogtreecommitdiff
path: root/lib/rexml
diff options
context:
space:
mode:
authorser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2005-08-12 12:08:52 +0000
committerser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2005-08-12 12:08:52 +0000
commit69e5c7d29797fe085d0ae919ebadc80a69f0f331 (patch)
tree8c1af789ecbdd10533ff280b1f686bd89df2999d /lib/rexml
parent1b12d598f8f6ed6cde7122e9f8b50238d19e5cf0 (diff)
* Fixed the inheritance bug in the pull parser that James Britt found.
* Indentation changes, and changed some exceptions to runtime exceptions. * Backed out the patch that changed the versions * Wasn't including Text class. * Fixes issue:25 (Trac) * Fixes ticket:3 (Issue38 in Roundup.) * Numerous fixes in the XPath interpreter correcting, among other things, ordering bugs and some incorrect behavior. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@8973 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rexml')
-rw-r--r--lib/rexml/attribute.rb6
-rw-r--r--lib/rexml/document.rb292
-rw-r--r--lib/rexml/element.rb4
-rw-r--r--lib/rexml/functions.rb2
-rw-r--r--lib/rexml/instruction.rb4
-rw-r--r--lib/rexml/parsers/baseparser.rb748
-rw-r--r--lib/rexml/parsers/pullparser.rb317
-rw-r--r--lib/rexml/parsers/sax2parser.rb7
-rw-r--r--lib/rexml/rexml.rb6
-rw-r--r--lib/rexml/sax2listener.rb2
-rw-r--r--lib/rexml/source.rb8
-rw-r--r--lib/rexml/validation/validation.rb5
-rw-r--r--lib/rexml/xmldecl.rb120
-rw-r--r--lib/rexml/xpath.rb117
-rw-r--r--lib/rexml/xpath_parser.rb10
15 files changed, 827 insertions, 821 deletions
diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb
index a5a58055b8..a169148f32 100644
--- a/lib/rexml/attribute.rb
+++ b/lib/rexml/attribute.rb
@@ -101,20 +101,20 @@ module REXML
end
@unnormalized = nil
- @value = @normalized = Text::normalize( @value, doctype )
+ @normalized = Text::normalize( @value, doctype )
end
# Returns the UNNORMALIZED value of this attribute. That is, entities
# have been expanded to their values
def value
- @unnormalized if @unnormalized
+ return @unnormalized if @unnormalized
doctype = nil
if @element
doc = @element.document
doctype = doc.doctype if doc
end
@normalized = nil
- @value = @unnormalized = Text::unnormalize( @value, doctype )
+ @unnormalized = Text::unnormalize( @value, doctype )
end
# Returns a copy of this attribute
diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb
index 8755e04de1..1a63501905 100644
--- a/lib/rexml/document.rb
+++ b/lib/rexml/document.rb
@@ -16,166 +16,166 @@ module REXML
# Document has a single child that can be accessed by root().
# Note that if you want to have an XML declaration written for a document
# you create, you must add one; REXML documents do not write a default
- # declaration for you. See |DECLARATION| and |write|.
- class Document < Element
- # A convenient default XML declaration. If you want an XML declaration,
- # the easiest way to add one is mydoc << Document::DECLARATION
+ # declaration for you. See |DECLARATION| and |write|.
+ class Document < Element
+ # A convenient default XML declaration. If you want an XML declaration,
+ # the easiest way to add one is mydoc << Document::DECLARATION
# +DEPRECATED+
# Use: mydoc << XMLDecl.default
- DECLARATION = XMLDecl.default
-
- # Constructor
- # @param source if supplied, must be a Document, String, or IO.
- # Documents have their context and Element attributes cloned.
- # Strings are expected to be valid XML documents. IOs are expected
- # to be sources of valid XML documents.
- # @param context if supplied, contains the context of the document;
- # this should be a Hash.
- # NOTE that I'm not sure what the context is for; I cloned it out of
- # the Electric XML API (in which it also seems to do nothing), and it
- # is now legacy. It may do something, someday... it may disappear.
- def initialize( source = nil, context = {} )
- super()
- @context = context
- return if source.nil?
- if source.kind_of? Document
- @context = source.context
- super source
- else
- build( source )
- end
- end
+ DECLARATION = XMLDecl.default
+
+ # Constructor
+ # @param source if supplied, must be a Document, String, or IO.
+ # Documents have their context and Element attributes cloned.
+ # Strings are expected to be valid XML documents. IOs are expected
+ # to be sources of valid XML documents.
+ # @param context if supplied, contains the context of the document;
+ # this should be a Hash.
+ # NOTE that I'm not sure what the context is for; I cloned it out of
+ # the Electric XML API (in which it also seems to do nothing), and it
+ # is now legacy. It may do something, someday... it may disappear.
+ def initialize( source = nil, context = {} )
+ super()
+ @context = context
+ return if source.nil?
+ if source.kind_of? Document
+ @context = source.context
+ super source
+ else
+ build( source )
+ end
+ end
def node_type
:document
end
- # Should be obvious
- def clone
- Document.new self
- end
-
- # According to the XML spec, a root node has no expanded name
- def expanded_name
- ''
- #d = doc_type
- #d ? d.name : "UNDEFINED"
- end
-
- alias :name :expanded_name
-
- # We override this, because XMLDecls and DocTypes must go at the start
- # of the document
- def add( child )
- if child.kind_of? XMLDecl
- @children.unshift child
- elsif child.kind_of? DocType
- if @children[0].kind_of? XMLDecl
- @children[1,0] = child
- else
- @children.unshift child
- end
- child.parent = self
- else
- rv = super
- raise "attempted adding second root element to document" if @elements.size > 1
- rv
- end
- end
- alias :<< :add
-
- def add_element(arg=nil, arg2=nil)
- rv = super
- raise "attempted adding second root element to document" if @elements.size > 1
- rv
- end
-
- # @return the root Element of the document, or nil if this document
- # has no children.
- def root
+ # Should be obvious
+ def clone
+ Document.new self
+ end
+
+ # According to the XML spec, a root node has no expanded name
+ def expanded_name
+ ''
+ #d = doc_type
+ #d ? d.name : "UNDEFINED"
+ end
+
+ alias :name :expanded_name
+
+ # We override this, because XMLDecls and DocTypes must go at the start
+ # of the document
+ def add( child )
+ if child.kind_of? XMLDecl
+ @children.unshift child
+ elsif child.kind_of? DocType
+ if @children[0].kind_of? XMLDecl
+ @children[1,0] = child
+ else
+ @children.unshift child
+ end
+ child.parent = self
+ else
+ rv = super
+ raise "attempted adding second root element to document" if @elements.size > 1
+ rv
+ end
+ end
+ alias :<< :add
+
+ def add_element(arg=nil, arg2=nil)
+ rv = super
+ raise "attempted adding second root element to document" if @elements.size > 1
+ rv
+ end
+
+ # @return the root Element of the document, or nil if this document
+ # has no children.
+ def root
elements[1]
#self
#@children.find { |item| item.kind_of? Element }
- end
-
- # @return the DocType child of the document, if one exists,
- # and nil otherwise.
- def doctype
- @children.find { |item| item.kind_of? DocType }
- end
-
- # @return the XMLDecl of this document; if no XMLDecl has been
- # set, the default declaration is returned.
- def xml_decl
- rv = @children[0]
+ end
+
+ # @return the DocType child of the document, if one exists,
+ # and nil otherwise.
+ def doctype
+ @children.find { |item| item.kind_of? DocType }
+ end
+
+ # @return the XMLDecl of this document; if no XMLDecl has been
+ # set, the default declaration is returned.
+ def xml_decl
+ rv = @children[0]
return rv if rv.kind_of? XMLDecl
rv = @children.unshift(XMLDecl.default)[0]
- end
-
- # @return the XMLDecl version of this document as a String.
- # If no XMLDecl has been set, returns the default version.
- def version
- xml_decl().version
- end
-
- # @return the XMLDecl encoding of this document as a String.
- # If no XMLDecl has been set, returns the default encoding.
- def encoding
- xml_decl().encoding
- end
-
- # @return the XMLDecl standalone value of this document as a String.
- # If no XMLDecl has been set, returns the default setting.
- def stand_alone?
- xml_decl().stand_alone?
- end
-
- # Write the XML tree out, optionally with indent. This writes out the
- # entire XML document, including XML declarations, doctype declarations,
- # and processing instructions (if any are given).
- # A controversial point is whether Document should always write the XML
- # declaration (<?xml version='1.0'?>) whether or not one is given by the
- # user (or source document). REXML does not write one if one was not
- # specified, because it adds unneccessary bandwidth to applications such
- # as XML-RPC.
- #
- #
- # output::
- # output an object which supports '<< string'; this is where the
- # document will be written.
- # indent::
- # An integer. If -1, no indenting will be used; otherwise, the
- # indentation will be this number of spaces, and children will be
- # indented an additional amount. Defaults to -1
- # transitive::
- # If transitive is true and indent is >= 0, then the output will be
- # pretty-printed in such a way that the added whitespace does not affect
- # the absolute *value* of the document -- that is, it leaves the value
- # and number of Text nodes in the document unchanged.
- # ie_hack::
- # Internet Explorer is the worst piece of crap to have ever been
- # written, with the possible exception of Windows itself. Since IE is
- # unable to parse proper XML, we have to provide a hack to generate XML
- # that IE's limited abilities can handle. This hack inserts a space
- # before the /> on empty tags. Defaults to false
- def write( output=$stdout, indent_level=-1, transitive=false, ie_hack=false )
- output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
- @children.each { |node|
- indent( output, indent_level ) if node.node_type == :element
- if node.write( output, indent_level, transitive, ie_hack )
- output << "\n" unless indent_level<0 or node == @children[-1]
+ end
+
+ # @return the XMLDecl version of this document as a String.
+ # If no XMLDecl has been set, returns the default version.
+ def version
+ xml_decl().version
+ end
+
+ # @return the XMLDecl encoding of this document as a String.
+ # If no XMLDecl has been set, returns the default encoding.
+ def encoding
+ xml_decl().encoding
+ end
+
+ # @return the XMLDecl standalone value of this document as a String.
+ # If no XMLDecl has been set, returns the default setting.
+ def stand_alone?
+ xml_decl().stand_alone?
+ end
+
+ # Write the XML tree out, optionally with indent. This writes out the
+ # entire XML document, including XML declarations, doctype declarations,
+ # and processing instructions (if any are given).
+ # A controversial point is whether Document should always write the XML
+ # declaration (<?xml version='1.0'?>) whether or not one is given by the
+ # user (or source document). REXML does not write one if one was not
+ # specified, because it adds unneccessary bandwidth to applications such
+ # as XML-RPC.
+ #
+ #
+ # output::
+ # output an object which supports '<< string'; this is where the
+ # document will be written.
+ # indent::
+ # An integer. If -1, no indenting will be used; otherwise, the
+ # indentation will be this number of spaces, and children will be
+ # indented an additional amount. Defaults to -1
+ # transitive::
+ # If transitive is true and indent is >= 0, then the output will be
+ # pretty-printed in such a way that the added whitespace does not affect
+ # the absolute *value* of the document -- that is, it leaves the value
+ # and number of Text nodes in the document unchanged.
+ # ie_hack::
+ # Internet Explorer is the worst piece of crap to have ever been
+ # written, with the possible exception of Windows itself. Since IE is
+ # unable to parse proper XML, we have to provide a hack to generate XML
+ # that IE's limited abilities can handle. This hack inserts a space
+ # before the /> on empty tags. Defaults to false
+ def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
+ output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
+ @children.each { |node|
+ indent( output, indent ) if node.node_type == :element
+ if node.write( output, indent, transitive, ie_hack )
+ output << "\n" unless indent<0 or node == @children[-1]
end
- }
- end
+ }
+ end
-
- def Document::parse_stream( source, listener )
- Parsers::StreamParser.new( source, listener ).parse
- end
+
+ def Document::parse_stream( source, listener )
+ Parsers::StreamParser.new( source, listener ).parse
+ end
- private
- def build( source )
+ private
+ def build( source )
Parsers::TreeParser.new( source, self ).parse
- end
- end
+ end
+ end
end
diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb
index 7f578ecb3d..e7754da2c5 100644
--- a/lib/rexml/element.rb
+++ b/lib/rexml/element.rb
@@ -36,8 +36,6 @@ module REXML
# If an Element, the object will be shallowly cloned; name,
# attributes, and namespaces will be copied. Children will +not+ be
# copied.
- # If a Source, the source will be scanned and parsed for an Element,
- # and all child elements will be recursively parsed as well.
# parent::
# if supplied, must be a Parent, and will be used as
# the parent of this object.
@@ -223,7 +221,7 @@ module REXML
# b.namespace("y") # -> '2'
def namespace(prefix=nil)
if prefix.nil?
- prefix = self.prefix()
+ prefix = prefix()
end
if prefix == ''
prefix = "xmlns"
diff --git a/lib/rexml/functions.rb b/lib/rexml/functions.rb
index 7a2fb996a0..0db9b98a53 100644
--- a/lib/rexml/functions.rb
+++ b/lib/rexml/functions.rb
@@ -339,6 +339,8 @@ module REXML
end
def Functions::sum( nodes )
+ nodes = [nodes] unless nodes.kind_of? Array
+ nodes.inject(0) { |r,n| r += number(string(n)) }
end
def Functions::floor( number )
diff --git a/lib/rexml/instruction.rb b/lib/rexml/instruction.rb
index ed4f604c74..f24f7786f7 100644
--- a/lib/rexml/instruction.rb
+++ b/lib/rexml/instruction.rb
@@ -38,8 +38,8 @@ module REXML
Instruction.new self
end
- def write writer, indent_level=-1, transitive=false, ie_hack=false
- indent(writer, indent_level)
+ def write writer, indent=-1, transitive=false, ie_hack=false
+ indent(writer, indent)
writer << START.sub(/\\/u, '')
writer << @target
writer << ' '
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index e95cba8904..cb33a64908 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -2,103 +2,103 @@ require 'rexml/parseexception'
require 'rexml/source'
module REXML
- module Parsers
- # = Using the Pull Parser
- # <em>This API is experimental, and subject to change.</em>
- # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
- # while parser.has_next?
- # res = parser.next
- # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
- # end
- # See the PullEvent class for information on the content of the results.
- # The data is identical to the arguments passed for the various events to
- # the StreamListener API.
- #
- # Notice that:
- # parser = PullParser.new( "<a>BAD DOCUMENT" )
- # while parser.has_next?
- # res = parser.next
- # raise res[1] if res.error?
- # end
- #
- # Nat Price gave me some good ideas for the API.
- class BaseParser
- NCNAME_STR= '[\w:][\-\w\d.]*'
- NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
+ module Parsers
+ # = Using the Pull Parser
+ # <em>This API is experimental, and subject to change.</em>
+ # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
+ # while parser.has_next?
+ # res = parser.next
+ # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
+ # end
+ # See the PullEvent class for information on the content of the results.
+ # The data is identical to the arguments passed for the various events to
+ # the StreamListener API.
+ #
+ # Notice that:
+ # parser = PullParser.new( "<a>BAD DOCUMENT" )
+ # while parser.has_next?
+ # res = parser.next
+ # raise res[1] if res.error?
+ # end
+ #
+ # Nat Price gave me some good ideas for the API.
+ class BaseParser
+ NCNAME_STR= '[\w:][\-\w\d.]*'
+ NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
- NAMECHAR = '[\-\w\d\.:]'
- NAME = "([\\w:]#{NAMECHAR}*)"
- NMTOKEN = "(?:#{NAMECHAR})+"
- NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
- REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
- REFERENCE_RE = /#{REFERENCE}/
+ NAMECHAR = '[\-\w\d\.:]'
+ NAME = "([\\w:]#{NAMECHAR}*)"
+ NMTOKEN = "(?:#{NAMECHAR})+"
+ NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
+ REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
+ REFERENCE_RE = /#{REFERENCE}/
- DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
- DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
- ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
- COMMENT_START = /\A<!--/u
- COMMENT_PATTERN = /<!--(.*?)-->/um
- CDATA_START = /\A<!\[CDATA\[/u
- CDATA_END = /^\s*\]\s*>/um
- CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
- XMLDECL_START = /\A<\?xml\s/u;
- XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um
- INSTRUCTION_START = /\A<\?/u
- INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
- TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
- CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
+ DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
+ DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
+ ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
+ COMMENT_START = /\A<!--/u
+ COMMENT_PATTERN = /<!--(.*?)-->/um
+ CDATA_START = /\A<!\[CDATA\[/u
+ CDATA_END = /^\s*\]\s*>/um
+ CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
+ XMLDECL_START = /\A<\?xml\s/u;
+ XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um
+ INSTRUCTION_START = /\A<\?/u
+ INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
+ TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
+ CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
- VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
- ENCODING = /\bencoding=["'](.*?)['"]/um
- STANDALONE = /\bstandalone=["'](.*?)['"]/um
+ VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
+ ENCODING = /\bencoding=["'](.*?)['"]/um
+ STANDALONE = /\bstandalone=["'](.*?)['"]/um
- ENTITY_START = /^\s*<!ENTITY/
- IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
- ELEMENTDECL_START = /^\s*<!ELEMENT/um
- ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
- SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
- ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
- NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
- ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
- ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
- ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
- DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
- ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
- ATTDEF_RE = /#{ATTDEF}/
- ATTLISTDECL_START = /^\s*<!ATTLIST/um
- ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
- NOTATIONDECL_START = /^\s*<!NOTATION/um
- PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
- SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
+ ENTITY_START = /^\s*<!ENTITY/
+ IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
+ ELEMENTDECL_START = /^\s*<!ELEMENT/um
+ ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
+ SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
+ ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
+ NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
+ ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
+ ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
+ ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
+ DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
+ ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
+ ATTDEF_RE = /#{ATTDEF}/
+ ATTLISTDECL_START = /^\s*<!ATTLIST/um
+ ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
+ NOTATIONDECL_START = /^\s*<!NOTATION/um
+ PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
+ SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
- TEXT_PATTERN = /\A([^<]*)/um
+ TEXT_PATTERN = /\A([^<]*)/um
- # Entity constants
- PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
- SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
- PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
- EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
- NDATADECL = "\\s+NDATA\\s+#{NAME}"
- PEREFERENCE = "%#{NAME};"
- ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
- PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
- ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
- PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
- GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
- ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
+ # Entity constants
+ PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
+ SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
+ PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
+ EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
+ NDATADECL = "\\s+NDATA\\s+#{NAME}"
+ PEREFERENCE = "%#{NAME};"
+ ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
+ PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
+ ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
+ PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
+ GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
+ ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
- EREFERENCE = /&(?!#{NAME};)/
+ EREFERENCE = /&(?!#{NAME};)/
- DEFAULT_ENTITIES = {
- 'gt' => [/&gt;/, '&gt;', '>', />/],
- 'lt' => [/&lt;/, '&lt;', '<', /</],
- 'quot' => [/&quot;/, '&quot;', '"', /"/],
- "apos" => [/&apos;/, "&apos;", "'", /'/]
- }
+ DEFAULT_ENTITIES = {
+ 'gt' => [/&gt;/, '&gt;', '>', />/],
+ 'lt' => [/&lt;/, '&lt;', '<', /</],
+ 'quot' => [/&quot;/, '&quot;', '"', /"/],
+ "apos" => [/&apos;/, "&apos;", "'", /'/]
+ }
- def initialize( source )
- self.stream = source
- end
+ def initialize( source )
+ self.stream = source
+ end
def add_listener( listener )
if !defined?(@listeners) or !@listeners
@@ -119,315 +119,315 @@ module REXML
attr_reader :source
- def stream=( source )
- if source.kind_of? String
- @source = Source.new(source)
- elsif source.kind_of? IO
- @source = IOSource.new(source)
- elsif source.kind_of? Source
- @source = source
- elsif defined? StringIO and source.kind_of? StringIO
- @source = IOSource.new(source)
- else
- raise "#{source.class} is not a valid input stream. It must be \n"+
- "either a String, IO, StringIO or Source."
- end
- @closed = nil
- @document_status = nil
- @tags = []
- @stack = []
- @entities = []
- end
+ def stream=( source )
+ if source.kind_of? String
+ @source = Source.new(source)
+ elsif source.kind_of? IO
+ @source = IOSource.new(source)
+ elsif source.kind_of? Source
+ @source = source
+ elsif defined? StringIO and source.kind_of? StringIO
+ @source = IOSource.new(source)
+ else
+ raise "#{source.class} is not a valid input stream. It must be \n"+
+ "either a String, IO, StringIO or Source."
+ end
+ @closed = nil
+ @document_status = nil
+ @tags = []
+ @stack = []
+ @entities = []
+ end
- # Returns true if there are no more events
- def empty?
+ # Returns true if there are no more events
+ def empty?
#puts "@source.empty? = #{@source.empty?}"
#puts "@stack.empty? = #{@stack.empty?}"
return (@source.empty? and @stack.empty?)
- end
+ end
- # Returns true if there are more events. Synonymous with !empty?
- def has_next?
+ # Returns true if there are more events. Synonymous with !empty?
+ def has_next?
return !(@source.empty? and @stack.empty?)
- end
+ end
- # Push an event back on the head of the stream. This method
- # has (theoretically) infinite depth.
- def unshift token
- @stack.unshift(token)
- end
+ # Push an event back on the head of the stream. This method
+ # has (theoretically) infinite depth.
+ def unshift token
+ @stack.unshift(token)
+ end
- # Peek at the +depth+ event in the stack. The first element on the stack
- # is at depth 0. If +depth+ is -1, will parse to the end of the input
- # stream and return the last event, which is always :end_document.
- # Be aware that this causes the stream to be parsed up to the +depth+
- # event, so you can effectively pre-parse the entire document (pull the
- # entire thing into memory) using this method.
- def peek depth=0
- raise %Q[Illegal argument "#{depth}"] if depth < -1
- temp = []
- if depth == -1
- temp.push(pull()) until empty?
- else
- while @stack.size+temp.size < depth+1
- temp.push(pull())
- end
- end
- @stack += temp if temp.size > 0
- @stack[depth]
- end
+ # Peek at the +depth+ event in the stack. The first element on the stack
+ # is at depth 0. If +depth+ is -1, will parse to the end of the input
+ # stream and return the last event, which is always :end_document.
+ # Be aware that this causes the stream to be parsed up to the +depth+
+ # event, so you can effectively pre-parse the entire document (pull the
+ # entire thing into memory) using this method.
+ def peek depth=0
+ raise %Q[Illegal argument "#{depth}"] if depth < -1
+ temp = []
+ if depth == -1
+ temp.push(pull()) until empty?
+ else
+ while @stack.size+temp.size < depth+1
+ temp.push(pull())
+ end
+ end
+ @stack += temp if temp.size > 0
+ @stack[depth]
+ end
- # Returns the next event. This is a +PullEvent+ object.
- def pull
- if @closed
- x, @closed = @closed, nil
- return [ :end_element, x ]
- end
- return [ :end_document ] if empty?
- return @stack.shift if @stack.size > 0
- @source.read if @source.buffer.size<2
- if @document_status == nil
- @source.consume( /^\s*/um )
- word = @source.match( /(<[^>]*)>/um )
- word = word[1] unless word.nil?
- case word
- when COMMENT_START
- return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
- when XMLDECL_START
- results = @source.match( XMLDECL_PATTERN, true )[1]
- version = VERSION.match( results )
- version = version[1] unless version.nil?
- encoding = ENCODING.match(results)
- encoding = encoding[1] unless encoding.nil?
- @source.encoding = encoding
- standalone = STANDALONE.match(results)
- standalone = standalone[1] unless standalone.nil?
- return [ :xmldecl, version, encoding, standalone]
- when INSTRUCTION_START
- return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
- when DOCTYPE_START
- md = @source.match( DOCTYPE_PATTERN, true )
- identity = md[1]
- close = md[2]
- identity =~ IDENTITY
- name = $1
- raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
- pub_sys = $2.nil? ? nil : $2.strip
- long_name = $3.nil? ? nil : $3.strip
- uri = $4.nil? ? nil : $4.strip
- args = [ :start_doctype, name, pub_sys, long_name, uri ]
- if close == ">"
- @document_status = :after_doctype
- @source.read if @source.buffer.size<2
- md = @source.match(/^\s*/um, true)
- @stack << [ :end_doctype ]
- else
- @document_status = :in_doctype
- end
- return args
- else
- @document_status = :after_doctype
- @source.read if @source.buffer.size<2
- md = @source.match(/\s*/um, true)
- end
- end
- if @document_status == :in_doctype
- md = @source.match(/\s*(.*?>)/um)
- case md[1]
- when SYSTEMENTITY
- match = @source.match( SYSTEMENTITY, true )[1]
- return [ :externalentity, match ]
+ # Returns the next event. This is a +PullEvent+ object.
+ def pull
+ if @closed
+ x, @closed = @closed, nil
+ return [ :end_element, x ]
+ end
+ return [ :end_document ] if empty?
+ return @stack.shift if @stack.size > 0
+ @source.read if @source.buffer.size<2
+ if @document_status == nil
+ @source.consume( /^\s*/um )
+ word = @source.match( /(<[^>]*)>/um )
+ word = word[1] unless word.nil?
+ case word
+ when COMMENT_START
+ return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
+ when XMLDECL_START
+ results = @source.match( XMLDECL_PATTERN, true )[1]
+ version = VERSION.match( results )
+ version = version[1] unless version.nil?
+ encoding = ENCODING.match(results)
+ encoding = encoding[1] unless encoding.nil?
+ @source.encoding = encoding
+ standalone = STANDALONE.match(results)
+ standalone = standalone[1] unless standalone.nil?
+ return [ :xmldecl, version, encoding, standalone]
+ when INSTRUCTION_START
+ return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
+ when DOCTYPE_START
+ md = @source.match( DOCTYPE_PATTERN, true )
+ identity = md[1]
+ close = md[2]
+ identity =~ IDENTITY
+ name = $1
+ raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
+ pub_sys = $2.nil? ? nil : $2.strip
+ long_name = $3.nil? ? nil : $3.strip
+ uri = $4.nil? ? nil : $4.strip
+ args = [ :start_doctype, name, pub_sys, long_name, uri ]
+ if close == ">"
+ @document_status = :after_doctype
+ @source.read if @source.buffer.size<2
+ md = @source.match(/^\s*/um, true)
+ @stack << [ :end_doctype ]
+ else
+ @document_status = :in_doctype
+ end
+ return args
+ else
+ @document_status = :after_doctype
+ @source.read if @source.buffer.size<2
+ md = @source.match(/\s*/um, true)
+ end
+ end
+ if @document_status == :in_doctype
+ md = @source.match(/\s*(.*?>)/um)
+ case md[1]
+ when SYSTEMENTITY
+ match = @source.match( SYSTEMENTITY, true )[1]
+ return [ :externalentity, match ]
- when ELEMENTDECL_START
- return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
+ when ELEMENTDECL_START
+ return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
- when ENTITY_START
- match = @source.match( ENTITYDECL, true ).to_a.compact
- match[0] = :entitydecl
- ref = false
- if match[1] == '%'
- ref = true
- match.delete_at 1
- end
- # Now we have to sort out what kind of entity reference this is
- if match[2] == 'SYSTEM'
- # External reference
- match[3] = match[3][1..-2] # PUBID
- match.delete_at(4) if match.size > 4 # Chop out NDATA decl
- # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
- elsif match[2] == 'PUBLIC'
- # External reference
- match[3] = match[3][1..-2] # PUBID
- match[4] = match[4][1..-2] # HREF
- # match is [ :entity, name, PUBLIC, pubid, href ]
- else
- match[2] = match[2][1..-2]
- match.pop if match.size == 4
- # match is [ :entity, name, value ]
- end
- match << '%' if ref
- return match
- when ATTLISTDECL_START
- md = @source.match( ATTLISTDECL_PATTERN, true )
- raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
- element = md[1]
- contents = md[0]
+ when ENTITY_START
+ match = @source.match( ENTITYDECL, true ).to_a.compact
+ match[0] = :entitydecl
+ ref = false
+ if match[1] == '%'
+ ref = true
+ match.delete_at 1
+ end
+ # Now we have to sort out what kind of entity reference this is
+ if match[2] == 'SYSTEM'
+ # External reference
+ match[3] = match[3][1..-2] # PUBID
+ match.delete_at(4) if match.size > 4 # Chop out NDATA decl
+ # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
+ elsif match[2] == 'PUBLIC'
+ # External reference
+ match[3] = match[3][1..-2] # PUBID
+ match[4] = match[4][1..-2] # HREF
+ # match is [ :entity, name, PUBLIC, pubid, href ]
+ else
+ match[2] = match[2][1..-2]
+ match.pop if match.size == 4
+ # match is [ :entity, name, value ]
+ end
+ match << '%' if ref
+ return match
+ when ATTLISTDECL_START
+ md = @source.match( ATTLISTDECL_PATTERN, true )
+ raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
+ element = md[1]
+ contents = md[0]
- pairs = {}
- values = md[0].scan( ATTDEF_RE )
- values.each do |attdef|
- unless attdef[3] == "#IMPLIED"
- attdef.compact!
- val = attdef[3]
- val = attdef[4] if val == "#FIXED "
- pairs[attdef[0]] = val
- end
- end
- return [ :attlistdecl, element, pairs, contents ]
- when NOTATIONDECL_START
- md = nil
- if @source.match( PUBLIC )
- md = @source.match( PUBLIC, true )
- elsif @source.match( SYSTEM )
- md = @source.match( SYSTEM, true )
- else
- raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
- end
- return [ :notationdecl, md[1], md[2], md[3] ]
- when CDATA_END
- @document_status = :after_doctype
- @source.match( CDATA_END, true )
- return [ :end_doctype ]
- end
- end
- begin
- if @source.buffer[0] == ?<
- if @source.buffer[1] == ?/
- last_tag = @tags.pop
- #md = @source.match_to_consume( '>', CLOSE_MATCH)
- md = @source.match( CLOSE_MATCH, true )
- raise REXML::ParseException.new( "Missing end tag for "+
+ pairs = {}
+ values = md[0].scan( ATTDEF_RE )
+ values.each do |attdef|
+ unless attdef[3] == "#IMPLIED"
+ attdef.compact!
+ val = attdef[3]
+ val = attdef[4] if val == "#FIXED "
+ pairs[attdef[0]] = val
+ end
+ end
+ return [ :attlistdecl, element, pairs, contents ]
+ when NOTATIONDECL_START
+ md = nil
+ if @source.match( PUBLIC )
+ md = @source.match( PUBLIC, true )
+ elsif @source.match( SYSTEM )
+ md = @source.match( SYSTEM, true )
+ else
+ raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
+ end
+ return [ :notationdecl, md[1], md[2], md[3] ]
+ when CDATA_END
+ @document_status = :after_doctype
+ @source.match( CDATA_END, true )
+ return [ :end_doctype ]
+ end
+ end
+ begin
+ if @source.buffer[0] == ?<
+ if @source.buffer[1] == ?/
+ last_tag = @tags.pop
+ #md = @source.match_to_consume( '>', CLOSE_MATCH)
+ md = @source.match( CLOSE_MATCH, true )
+ raise REXML::ParseException.new( "Missing end tag for "+
"'#{last_tag}' (got \"#{md[1]}\")",
@source) unless last_tag == md[1]
- return [ :end_element, last_tag ]
- elsif @source.buffer[1] == ?!
- md = @source.match(/\A(\s*[^>]*>)/um)
- #puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
- raise REXML::ParseException.new("Malformed node", @source) unless md
- if md[0][2] == ?-
- md = @source.match( COMMENT_PATTERN, true )
- return [ :comment, md[1] ] if md
- else
- md = @source.match( CDATA_PATTERN, true )
- return [ :cdata, md[1] ] if md
- end
- raise REXML::ParseException.new( "Declarations can only occur "+
- "in the doctype declaration.", @source)
- elsif @source.buffer[1] == ??
- md = @source.match( INSTRUCTION_PATTERN, true )
- return [ :processing_instruction, md[1], md[2] ] if md
- raise REXML::ParseException.new( "Bad instruction declaration",
- @source)
- else
- # Get the next tag
- md = @source.match(TAG_MATCH, true)
- raise REXML::ParseException.new("malformed XML: missing tag start", @source) unless md
- attrs = []
- if md[2].size > 0
- attrs = md[2].scan( ATTRIBUTE_PATTERN )
- raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
- end
-
- if md[4]
- @closed = md[1]
- else
- @tags.push( md[1] )
- end
- attributes = {}
- attrs.each { |a,b,c| attributes[a] = c }
- return [ :start_element, md[1], attributes ]
- end
- else
- md = @source.match( TEXT_PATTERN, true )
+ return [ :end_element, last_tag ]
+ elsif @source.buffer[1] == ?!
+ md = @source.match(/\A(\s*[^>]*>)/um)
+ #puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
+ raise REXML::ParseException.new("Malformed node", @source) unless md
+ if md[0][2] == ?-
+ md = @source.match( COMMENT_PATTERN, true )
+ return [ :comment, md[1] ] if md
+ else
+ md = @source.match( CDATA_PATTERN, true )
+ return [ :cdata, md[1] ] if md
+ end
+ raise REXML::ParseException.new( "Declarations can only occur "+
+ "in the doctype declaration.", @source)
+ elsif @source.buffer[1] == ??
+ md = @source.match( INSTRUCTION_PATTERN, true )
+ return [ :processing_instruction, md[1], md[2] ] if md
+ raise REXML::ParseException.new( "Bad instruction declaration",
+ @source)
+ else
+ # Get the next tag
+ md = @source.match(TAG_MATCH, true)
+ raise REXML::ParseException.new("malformed XML: missing tag start", @source) unless md
+ attrs = []
+ if md[2].size > 0
+ attrs = md[2].scan( ATTRIBUTE_PATTERN )
+ raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
+ end
+
+ if md[4]
+ @closed = md[1]
+ else
+ @tags.push( md[1] )
+ end
+ attributes = {}
+ attrs.each { |a,b,c| attributes[a] = c }
+ return [ :start_element, md[1], attributes ]
+ end
+ else
+ md = @source.match( TEXT_PATTERN, true )
if md[0].length == 0
#puts "EMPTY = #{empty?}"
#puts "BUFFER = \"#{@source.buffer}\""
@source.match( /(\s+)/, true )
end
#return [ :text, "" ] if md[0].length == 0
- # unnormalized = Text::unnormalize( md[1], self )
- # return PullEvent.new( :text, md[1], unnormalized )
- return [ :text, md[1] ]
- end
- rescue REXML::ParseException
- raise
- rescue Exception, NameError => error
- raise REXML::ParseException.new( "Exception parsing",
- @source, self, (error ? error : $!) )
- end
- return [ :dummy ]
- end
+ # unnormalized = Text::unnormalize( md[1], self )
+ # return PullEvent.new( :text, md[1], unnormalized )
+ return [ :text, md[1] ]
+ end
+ rescue REXML::ParseException
+ raise
+ rescue Exception, NameError => error
+ raise REXML::ParseException.new( "Exception parsing",
+ @source, self, (error ? error : $!) )
+ end
+ return [ :dummy ]
+ end
- def entity( reference, entities )
- value = nil
- value = entities[ reference ] if entities
- if not value
- value = DEFAULT_ENTITIES[ reference ]
- value = value[2] if value
- end
- unnormalize( value, entities ) if value
- end
+ def entity( reference, entities )
+ value = nil
+ value = entities[ reference ] if entities
+ if not value
+ value = DEFAULT_ENTITIES[ reference ]
+ value = value[2] if value
+ end
+ unnormalize( value, entities ) if value
+ end
- # Escapes all possible entities
- def normalize( input, entities=nil, entity_filter=nil )
- copy = input.clone
- # Doing it like this rather than in a loop improves the speed
- copy.gsub!( EREFERENCE, '&amp;' )
- entities.each do |key, value|
- copy.gsub!( value, "&#{key};" ) unless entity_filter and
- entity_filter.include?(entity)
- end if entities
- copy.gsub!( EREFERENCE, '&amp;' )
- DEFAULT_ENTITIES.each do |key, value|
- copy.gsub!( value[3], value[1] )
- end
- copy
- end
+ # Escapes all possible entities
+ def normalize( input, entities=nil, entity_filter=nil )
+ copy = input.clone
+ # Doing it like this rather than in a loop improves the speed
+ copy.gsub!( EREFERENCE, '&amp;' )
+ entities.each do |key, value|
+ copy.gsub!( value, "&#{key};" ) unless entity_filter and
+ entity_filter.include?(entity)
+ end if entities
+ copy.gsub!( EREFERENCE, '&amp;' )
+ DEFAULT_ENTITIES.each do |key, value|
+ copy.gsub!( value[3], value[1] )
+ end
+ copy
+ end
- # Unescapes all possible entities
- def unnormalize( string, entities=nil, filter=nil )
- rv = string.clone
- rv.gsub!( /\r\n?/, "\n" )
- matches = rv.scan( REFERENCE_RE )
- return rv if matches.size == 0
- rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
- m=$1
- m = "0#{m}" if m[0] == ?x
- [Integer(m)].pack('U*')
- }
- matches.collect!{|x|x[0]}.compact!
- if matches.size > 0
- matches.each do |entity_reference|
- unless filter and filter.include?(entity_reference)
- entity_value = entity( entity_reference, entities )
- if entity_value
- re = /&#{entity_reference};/
- rv.gsub!( re, entity_value )
- end
- end
- end
- matches.each do |entity_reference|
- unless filter and filter.include?(entity_reference)
- er = DEFAULT_ENTITIES[entity_reference]
- rv.gsub!( er[0], er[2] ) if er
- end
- end
- rv.gsub!( /&amp;/, '&' )
- end
- rv
- end
- end
- end
+ # Unescapes all possible entities
+ def unnormalize( string, entities=nil, filter=nil )
+ rv = string.clone
+ rv.gsub!( /\r\n?/, "\n" )
+ matches = rv.scan( REFERENCE_RE )
+ return rv if matches.size == 0
+ rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
+ m=$1
+ m = "0#{m}" if m[0] == ?x
+ [Integer(m)].pack('U*')
+ }
+ matches.collect!{|x|x[0]}.compact!
+ if matches.size > 0
+ matches.each do |entity_reference|
+ unless filter and filter.include?(entity_reference)
+ entity_value = entity( entity_reference, entities )
+ if entity_value
+ re = /&#{entity_reference};/
+ rv.gsub!( re, entity_value )
+ end
+ end
+ end
+ matches.each do |entity_reference|
+ unless filter and filter.include?(entity_reference)
+ er = DEFAULT_ENTITIES[entity_reference]
+ rv.gsub!( er[0], er[2] ) if er
+ end
+ end
+ rv.gsub!( /&amp;/, '&' )
+ end
+ rv
+ end
+ end
+ end
end
=begin
diff --git a/lib/rexml/parsers/pullparser.rb b/lib/rexml/parsers/pullparser.rb
index 0a328ea8fc..09ac8948f6 100644
--- a/lib/rexml/parsers/pullparser.rb
+++ b/lib/rexml/parsers/pullparser.rb
@@ -1,95 +1,98 @@
+require 'forwardable'
+
require 'rexml/parseexception'
require 'rexml/parsers/baseparser'
require 'rexml/xmltokens'
module REXML
- module Parsers
- # = Using the Pull Parser
- # <em>This API is experimental, and subject to change.</em>
- # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
- # while parser.has_next?
- # res = parser.next
- # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
- # end
- # See the PullEvent class for information on the content of the results.
- # The data is identical to the arguments passed for the various events to
- # the StreamListener API.
- #
- # Notice that:
- # parser = PullParser.new( "<a>BAD DOCUMENT" )
- # while parser.has_next?
- # res = parser.next
- # raise res[1] if res.error?
- # end
- #
- # Nat Price gave me some good ideas for the API.
- class PullParser
- include XMLTokens
-
- def initialize stream
- @entities = {}
+ module Parsers
+ # = Using the Pull Parser
+ # <em>This API is experimental, and subject to change.</em>
+ # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
+ # while parser.has_next?
+ # res = parser.next
+ # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
+ # end
+ # See the PullEvent class for information on the content of the results.
+ # The data is identical to the arguments passed for the various events to
+ # the StreamListener API.
+ #
+ # Notice that:
+ # parser = PullParser.new( "<a>BAD DOCUMENT" )
+ # while parser.has_next?
+ # res = parser.next
+ # raise res[1] if res.error?
+ # end
+ #
+ # Nat Price gave me some good ideas for the API.
+ class PullParser
+ include XMLTokens
+ extend Forwardable
+
+ def_delegators( :@parser, :has_next? )
+ def_delegators( :@parser, :entity )
+ def_delegators( :@parser, :empty? )
+
+ def initialize stream
+ @entities = {}
@listeners = nil
@parser = BaseParser.new( stream )
- end
+ @my_stack = []
+ end
def add_listener( listener )
@listeners = [] unless @listeners
@listeners << listener
end
- def each
- while has_next?
- yield self.pull
- end
- end
-
- def peek depth=0
- PullEvent.new(@parser.peek(depth))
- end
-
- def has_next?
- @parser.has_next?
+ def each
+ while has_next?
+ yield self.pull
+ end
end
- def pull
- event = @parser.pull
- case event[0]
- when :entitydecl
- @entities[ event[1] ] =
- event[2] unless event[2] =~ /PUBLIC|SYSTEM/
- when :text
- unnormalized = @parser.unnormalize( event[1], @entities )
- event << unnormalized
- end
- PullEvent.new( event )
- end
-
- def unshift token
- @parser.unshift token
+ def peek depth=0
+ if @my_stack.length <= depth
+ (depth - @my_stack.length + 1).times {
+ e = PullEvent.new(@parser.pull)
+ @my_stack.push(e)
+ }
+ end
+ @my_stack[depth]
end
- def entity reference
- @parser.entity( reference )
+ def pull
+ return @my_stack.shift if @my_stack.length > 0
+
+ event = @parser.pull
+ case event[0]
+ when :entitydecl
+ @entities[ event[1] ] =
+ event[2] unless event[2] =~ /PUBLIC|SYSTEM/
+ when :text
+ unnormalized = @parser.unnormalize( event[1], @entities )
+ event << unnormalized
+ end
+ PullEvent.new( event )
end
- def empty?
- @parser.empty?
+ def unshift token
+ @my_stack.unshift token
+ end
+ end
+
+ # A parsing event. The contents of the event are accessed as an +Array?,
+ # and the type is given either by the ...? methods, or by accessing the
+ # +type+ accessor. The contents of this object vary from event to event,
+ # but are identical to the arguments passed to +StreamListener+s for each
+ # event.
+ class PullEvent
+ # The type of this event. Will be one of :tag_start, :tag_end, :text,
+ # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
+ # :notationdecl, :entity, :cdata, :xmldecl, or :error.
+ def initialize(arg)
+ @contents = arg
end
-
- end
-
- # A parsing event. The contents of the event are accessed as an +Array?,
- # and the type is given either by the ...? methods, or by accessing the
- # +type+ accessor. The contents of this object vary from event to event,
- # but are identical to the arguments passed to +StreamListener+s for each
- # event.
- class PullEvent
- # The type of this event. Will be one of :tag_start, :tag_end, :text,
- # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
- # :notationdecl, :entity, :cdata, :xmldecl, or :error.
- def initialize(arg)
- @contents = arg
- end
def []( start, endd=nil)
if start.kind_of? Range
@@ -103,90 +106,90 @@ module REXML
else
raise "Illegal argument #{start.inspect} (#{start.class})"
end
- end
-
- def event_type
- @contents[0]
- end
-
- # Content: [ String tag_name, Hash attributes ]
- def start_element?
- @contents[0] == :start_element
- end
-
- # Content: [ String tag_name ]
- def end_element?
- @contents[0] == :end_element
- end
-
- # Content: [ String raw_text, String unnormalized_text ]
- def text?
- @contents[0] == :text
- end
-
- # Content: [ String text ]
- def instruction?
- @contents[0] == :processing_instruction
- end
-
- # Content: [ String text ]
- def comment?
- @contents[0] == :comment
- end
-
- # Content: [ String name, String pub_sys, String long_name, String uri ]
- def doctype?
- @contents[0] == :start_doctype
- end
-
- # Content: [ String text ]
- def attlistdecl?
- @contents[0] == :attlistdecl
- end
-
- # Content: [ String text ]
- def elementdecl?
- @contents[0] == :elementdecl
- end
-
- # Due to the wonders of DTDs, an entity declaration can be just about
- # anything. There's no way to normalize it; you'll have to interpret the
- # content yourself. However, the following is true:
- #
- # * If the entity declaration is an internal entity:
- # [ String name, String value ]
- # Content: [ String text ]
- def entitydecl?
- @contents[0] == :entitydecl
- end
-
- # Content: [ String text ]
- def notationdecl?
- @contents[0] == :notationdecl
- end
-
- # Content: [ String text ]
- def entity?
- @contents[0] == :entity
- end
-
- # Content: [ String text ]
- def cdata?
- @contents[0] == :cdata
- end
-
- # Content: [ String version, String encoding, String standalone ]
- def xmldecl?
- @contents[0] == :xmldecl
- end
-
- def error?
- @contents[0] == :error
- end
-
- def inspect
+ end
+
+ def event_type
+ @contents[0]
+ end
+
+ # Content: [ String tag_name, Hash attributes ]
+ def start_element?
+ @contents[0] == :start_element
+ end
+
+ # Content: [ String tag_name ]
+ def end_element?
+ @contents[0] == :end_element
+ end
+
+ # Content: [ String raw_text, String unnormalized_text ]
+ def text?
+ @contents[0] == :text
+ end
+
+ # Content: [ String text ]
+ def instruction?
+ @contents[0] == :processing_instruction
+ end
+
+ # Content: [ String text ]
+ def comment?
+ @contents[0] == :comment
+ end
+
+ # Content: [ String name, String pub_sys, String long_name, String uri ]
+ def doctype?
+ @contents[0] == :start_doctype
+ end
+
+ # Content: [ String text ]
+ def attlistdecl?
+ @contents[0] == :attlistdecl
+ end
+
+ # Content: [ String text ]
+ def elementdecl?
+ @contents[0] == :elementdecl
+ end
+
+ # Due to the wonders of DTDs, an entity declaration can be just about
+ # anything. There's no way to normalize it; you'll have to interpret the
+ # content yourself. However, the following is true:
+ #
+ # * If the entity declaration is an internal entity:
+ # [ String name, String value ]
+ # Content: [ String text ]
+ def entitydecl?
+ @contents[0] == :entitydecl
+ end
+
+ # Content: [ String text ]
+ def notationdecl?
+ @contents[0] == :notationdecl
+ end
+
+ # Content: [ String text ]
+ def entity?
+ @contents[0] == :entity
+ end
+
+ # Content: [ String text ]
+ def cdata?
+ @contents[0] == :cdata
+ end
+
+ # Content: [ String version, String encoding, String standalone ]
+ def xmldecl?
+ @contents[0] == :xmldecl
+ end
+
+ def error?
+ @contents[0] == :error
+ end
+
+ def inspect
@contents[0].to_s + ": " + @contents[1..-1].inspect
- end
- end
- end
+ end
+ end
+ end
end
diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb
index d5ee1bcfcd..f1b8246902 100644
--- a/lib/rexml/parsers/sax2parser.rb
+++ b/lib/rexml/parsers/sax2parser.rb
@@ -1,9 +1,11 @@
require 'rexml/parsers/baseparser'
require 'rexml/parseexception'
require 'rexml/namespace'
+require 'rexml/text'
module REXML
module Parsers
+ # SAX2Parser
class SAX2Parser
def initialize source
@parser = BaseParser.new(source)
@@ -36,6 +38,10 @@ module REXML
# :start_prefix_mapping, :end_prefix_mapping, :characters,
# :processing_instruction, :doctype, :attlistdecl, :elementdecl,
# :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
+ #
+ # There is an additional symbol that can be listened for: :progress.
+ # This will be called for every event generated, passing in the current
+ # stream position.
#
# Array contains regular expressions or strings which will be matched
# against fully qualified element names.
@@ -161,6 +167,7 @@ module REXML
:elementdecl, :cdata, :notationdecl, :xmldecl
handle( *event )
end
+ handle( :progress, @parser.source.position )
end
end
diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb
index fcffe13d21..86690b8488 100644
--- a/lib/rexml/rexml.rb
+++ b/lib/rexml/rexml.rb
@@ -11,7 +11,7 @@
# Main page:: http://www.germane-software.com/software/rexml
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
# Version:: 3.1.3
-# Date:: +2005/138
+# Date:: 2005/224
#
# This API documentation can be downloaded from the REXML home page, or can
# be accessed online[http://www.germane-software.com/software/rexml_doc]
@@ -20,7 +20,7 @@
# or can be accessed
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
module REXML
- Copyright = "Copyright © 2001-2005 Sean Russell <ser@germane-software.com>"
- Date = "+2005/138"
+ Copyright = "Copyright © 2001, 2002, 2003, 2004 Sean Russell <ser@germane-software.com>"
+ Date = "2005/224"
Version = "3.1.3"
end
diff --git a/lib/rexml/sax2listener.rb b/lib/rexml/sax2listener.rb
index 40a77ed464..339ae64121 100644
--- a/lib/rexml/sax2listener.rb
+++ b/lib/rexml/sax2listener.rb
@@ -90,5 +90,7 @@ module REXML
# @p comment The content of the comment
def comment comment
end
+ def progress position
+ end
end
end
diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb
index 7251666160..f599d2276c 100644
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@@ -98,6 +98,10 @@ module REXML
@buffer == ""
end
+ def position
+ @orig.index( @buffer )
+ end
+
# @return the current line in the source
def current_line
lines = @orig.split
@@ -194,6 +198,10 @@ module REXML
super and ( @source.nil? || @source.eof? )
end
+ def position
+ @er_source.pos
+ end
+
# @return the current line in the source
def current_line
begin
diff --git a/lib/rexml/validation/validation.rb b/lib/rexml/validation/validation.rb
index fbee315f0b..160ea96b31 100644
--- a/lib/rexml/validation/validation.rb
+++ b/lib/rexml/validation/validation.rb
@@ -82,10 +82,13 @@ module REXML
@event_arg = event_arg
end
- attr_reader :done?
attr_reader :event_type
attr_accessor :event_arg
+ def done?
+ @done
+ end
+
def single?
return (@event_type != :start_element and @event_type != :start_attribute)
end
diff --git a/lib/rexml/xmldecl.rb b/lib/rexml/xmldecl.rb
index 47131ac816..1b7ef813d1 100644
--- a/lib/rexml/xmldecl.rb
+++ b/lib/rexml/xmldecl.rb
@@ -2,71 +2,71 @@ require 'rexml/encoding'
require 'rexml/source'
module REXML
- # NEEDS DOCUMENTATION
- class XMLDecl < Child
- include Encoding
+ # NEEDS DOCUMENTATION
+ class XMLDecl < Child
+ include Encoding
- DEFAULT_VERSION = "1.0";
- DEFAULT_ENCODING = "UTF-8";
- DEFAULT_STANDALONE = "no";
- START = '<\?xml';
- STOP = '\?>';
+ DEFAULT_VERSION = "1.0";
+ DEFAULT_ENCODING = "UTF-8";
+ DEFAULT_STANDALONE = "no";
+ START = '<\?xml';
+ STOP = '\?>';
- attr_accessor :version, :standalone
+ attr_accessor :version, :standalone
attr_reader :writeencoding
- def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
+ def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
@writethis = true
@writeencoding = !encoding.nil?
- if version.kind_of? XMLDecl
- super()
- @version = version.version
- self.encoding = version.encoding
+ if version.kind_of? XMLDecl
+ super()
+ @version = version.version
+ self.encoding = version.encoding
@writeencoding = version.writeencoding
- @standalone = version.standalone
- else
- super()
- @version = version
- self.encoding = encoding
- @standalone = standalone
- end
- @version = DEFAULT_VERSION if @version.nil?
- end
-
- def clone
- XMLDecl.new(self)
- end
-
- def write writer, indent_level=-1, transitive=false, ie_hack=false
+ @standalone = version.standalone
+ else
+ super()
+ @version = version
+ self.encoding = encoding
+ @standalone = standalone
+ end
+ @version = DEFAULT_VERSION if @version.nil?
+ end
+
+ def clone
+ XMLDecl.new(self)
+ end
+
+ def write writer, indent=-1, transitive=false, ie_hack=false
return nil unless @writethis or writer.kind_of? Output
- indent( writer, indent_level )
- writer << START.sub(/\\/u, '')
+ indent( writer, indent )
+ writer << START.sub(/\\/u, '')
if writer.kind_of? Output
writer << " #{content writer.encoding}"
else
writer << " #{content encoding}"
end
- writer << STOP.sub(/\\/u, '')
- end
-
- def ==( other )
- other.kind_of?(XMLDecl) and
- other.version == @version and
- other.encoding == self.encoding and
- other.standalone == @standalone
- end
-
- def xmldecl version, encoding, standalone
- @version = version
- self.encoding = encoding
- @standalone = standalone
- end
-
- def node_type
- :xmldecl
- end
-
- alias :stand_alone? :standalone
+ writer << STOP.sub(/\\/u, '')
+ end
+
+ def ==( other )
+ other.kind_of?(XMLDecl) and
+ other.version == @version and
+ other.encoding == self.encoding and
+ other.standalone == @standalone
+ end
+
+ def xmldecl version, encoding, standalone
+ @version = version
+ self.encoding = encoding
+ @standalone = standalone
+ end
+
+ def node_type
+ :xmldecl
+ end
+
+ alias :stand_alone? :standalone
alias :old_enc= :encoding=
def encoding=( enc )
@@ -98,12 +98,12 @@ module REXML
START.sub(/\\/u, '') + " ... " + STOP.sub(/\\/u, '')
end
- private
- def content(enc)
- rv = "version='#@version'"
- rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
- rv << " standalone='#@standalone'" if @standalone
- rv
- end
- end
+ private
+ def content(enc)
+ rv = "version='#@version'"
+ rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
+ rv << " standalone='#@standalone'" if @standalone
+ rv
+ end
+ end
end
diff --git a/lib/rexml/xpath.rb b/lib/rexml/xpath.rb
index 6875f038e0..1ed440868b 100644
--- a/lib/rexml/xpath.rb
+++ b/lib/rexml/xpath.rb
@@ -2,76 +2,65 @@ require 'rexml/functions'
require 'rexml/xpath_parser'
module REXML
- # Wrapper class. Use this class to access the XPath functions.
- class XPath
- include Functions
- EMPTY_HASH = {}
+ # Wrapper class. Use this class to access the XPath functions.
+ class XPath
+ include Functions
+ EMPTY_HASH = {}
- # Finds and returns the first node that matches the supplied xpath.
- # element::
- # The context element
- # path::
- # The xpath to search for. If not supplied or nil, returns the first
- # node matching '*'.
- # namespaces::
- # If supplied, a Hash which defines a namespace mapping.
- #
- # XPath.first( node )
- # XPath.first( doc, "//b"} )
- # XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
+ # Finds and returns the first node that matches the supplied xpath.
+ # element::
+ # The context element
+ # path::
+ # The xpath to search for. If not supplied or nil, returns the first
+ # node matching '*'.
+ # namespaces::
+ # If supplied, a Hash which defines a namespace mapping.
+ #
+ # XPath.first( node )
+ # XPath.first( doc, "//b"} )
+ # XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
def XPath::first element, path=nil, namespaces={}, variables={}
-=begin
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.kind_of? Hash
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of? Hash
- parser = XPathParser.new
- parser.namespaces = namespaces
- parser.variables = variables
- path = "*" unless path
- parser.first( path, element );
-=end
-#=begin
- raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.kind_of? Hash
- raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of? Hash
- parser = XPathParser.new
- parser.namespaces = namespaces
- parser.variables = variables
- path = "*" unless path
- element = [element] unless element.kind_of? Array
- parser.parse(path, element).flatten[0]
-#=end
- end
+ parser = XPathParser.new
+ parser.namespaces = namespaces
+ parser.variables = variables
+ path = "*" unless path
+ element = [element] unless element.kind_of? Array
+ parser.parse(path, element).flatten[0]
+ end
- # Itterates over nodes that match the given path, calling the supplied
- # block with the match.
- # element::
- # The context element
- # path::
- # The xpath to search for. If not supplied or nil, defaults to '*'
- # namespaces::
- # If supplied, a Hash which defines a namespace mapping
- #
- # XPath.each( node ) { |el| ... }
- # XPath.each( node, '/*[@attr='v']' ) { |el| ... }
- # XPath.each( node, 'ancestor::x' ) { |el| ... }
- def XPath::each element, path=nil, namespaces={}, variables={}, &block
+ # Itterates over nodes that match the given path, calling the supplied
+ # block with the match.
+ # element::
+ # The context element
+ # path::
+ # The xpath to search for. If not supplied or nil, defaults to '*'
+ # namespaces::
+ # If supplied, a Hash which defines a namespace mapping
+ #
+ # XPath.each( node ) { |el| ... }
+ # XPath.each( node, '/*[@attr='v']' ) { |el| ... }
+ # XPath.each( node, 'ancestor::x' ) { |el| ... }
+ def XPath::each element, path=nil, namespaces={}, variables={}, &block
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.kind_of? Hash
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of? Hash
- parser = XPathParser.new
- parser.namespaces = namespaces
- parser.variables = variables
- path = "*" unless path
- element = [element] unless element.kind_of? Array
- parser.parse(path, element).each( &block )
- end
+ parser = XPathParser.new
+ parser.namespaces = namespaces
+ parser.variables = variables
+ path = "*" unless path
+ element = [element] unless element.kind_of? Array
+ parser.parse(path, element).each( &block )
+ end
- # Returns an array of nodes matching a given XPath.
- def XPath::match element, path=nil, namespaces={}, variables={}
- parser = XPathParser.new
- parser.namespaces = namespaces
- parser.variables = variables
- path = "*" unless path
- element = [element] unless element.kind_of? Array
- parser.parse(path,element)
- end
- end
+ # Returns an array of nodes matching a given XPath.
+ def XPath::match element, path=nil, namespaces={}, variables={}
+ parser = XPathParser.new
+ parser.namespaces = namespaces
+ parser.variables = variables
+ path = "*" unless path
+ element = [element] unless element.kind_of? Array
+ parser.parse(path,element)
+ end
+ end
end
diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb
index 91b8ad48c8..7c0d1dc358 100644
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@@ -76,6 +76,8 @@ module REXML
# Performs a depth-first (document order) XPath search, and returns the
# first match. This is the fastest, lightest way to return a single result.
+ #
+ # FIXME: This method is incomplete!
def first( path_stack, node )
#puts "#{depth}) Entering match( #{path.inspect}, #{tree.inspect} )"
return nil if path.size == 0
@@ -123,14 +125,6 @@ module REXML
r = expr( path_stack, nodeset )
#puts "MAIN EXPR => #{r.inspect}"
r
-
- #while ( path_stack.size > 0 and nodeset.size > 0 )
- # #puts "MATCH: #{path_stack.inspect} '#{nodeset.collect{|n|n.class}.inspect}'"
- # nodeset = expr( path_stack, nodeset )
- # #puts "NODESET: #{nodeset.inspect}"
- # #puts "PATH_STACK: #{path_stack.inspect}"
- #end
- #nodeset
end
private