summaryrefslogtreecommitdiff
path: root/lib/rexml
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rexml')
-rw-r--r--lib/rexml/attlistdecl.rb113
-rw-r--r--lib/rexml/attribute.rb305
-rw-r--r--lib/rexml/cdata.rb99
-rw-r--r--lib/rexml/child.rb165
-rw-r--r--lib/rexml/comment.rb120
-rw-r--r--lib/rexml/doctype.rb91
-rw-r--r--lib/rexml/document.rb333
-rw-r--r--lib/rexml/dtd/attlistdecl.rb15
-rw-r--r--lib/rexml/dtd/dtd.rb78
-rw-r--r--lib/rexml/dtd/elementdecl.rb27
-rw-r--r--lib/rexml/dtd/entitydecl.rb99
-rw-r--r--lib/rexml/dtd/notationdecl.rb65
-rw-r--r--lib/rexml/element.rb262
-rw-r--r--lib/rexml/encoding.rb96
-rw-r--r--lib/rexml/encodings/CP-1252.rb103
-rw-r--r--lib/rexml/encodings/EUC-JP.rb35
-rw-r--r--lib/rexml/encodings/ICONV.rb22
-rw-r--r--lib/rexml/encodings/ISO-8859-1.rb7
-rw-r--r--lib/rexml/encodings/ISO-8859-15.rb72
-rw-r--r--lib/rexml/encodings/SHIFT-JIS.rb37
-rw-r--r--lib/rexml/encodings/SHIFT_JIS.rb1
-rw-r--r--lib/rexml/encodings/UNILE.rb34
-rw-r--r--lib/rexml/encodings/US-ASCII.rb30
-rw-r--r--lib/rexml/encodings/UTF-16.rb35
-rw-r--r--lib/rexml/encodings/UTF-8.rb18
-rw-r--r--lib/rexml/entity.rb292
-rw-r--r--lib/rexml/formatters/default.rb13
-rw-r--r--lib/rexml/formatters/pretty.rb19
-rw-r--r--lib/rexml/formatters/transitive.rb8
-rw-r--r--lib/rexml/functions.rb97
-rw-r--r--lib/rexml/instruction.rb101
-rw-r--r--lib/rexml/light/node.rb382
-rw-r--r--lib/rexml/namespace.rb79
-rw-r--r--lib/rexml/node.rb79
-rw-r--r--lib/rexml/output.rb30
-rw-r--r--lib/rexml/parent.rb54
-rw-r--r--lib/rexml/parseexception.rb9
-rw-r--r--lib/rexml/parsers/baseparser.rb431
-rw-r--r--lib/rexml/parsers/lightparser.rb85
-rw-r--r--lib/rexml/parsers/pullparser.rb3
-rw-r--r--lib/rexml/parsers/sax2parser.rb433
-rw-r--r--lib/rexml/parsers/streamparser.rb19
-rw-r--r--lib/rexml/parsers/treeparser.rb16
-rw-r--r--lib/rexml/parsers/ultralightparser.rb81
-rw-r--r--lib/rexml/parsers/xpathparser.rb145
-rw-r--r--lib/rexml/quickpath.rb496
-rw-r--r--lib/rexml/rexml.rb22
-rw-r--r--lib/rexml/sax2listener.rb185
-rw-r--r--lib/rexml/security.rb28
-rw-r--r--lib/rexml/source.rb148
-rw-r--r--lib/rexml/streamlistener.rb181
-rw-r--r--lib/rexml/syncenumerator.rb8
-rw-r--r--lib/rexml/text.rb238
-rw-r--r--lib/rexml/undefinednamespaceexception.rb1
-rw-r--r--lib/rexml/validation/relaxng.rb58
-rw-r--r--lib/rexml/validation/validation.rb23
-rw-r--r--lib/rexml/validation/validationexception.rb1
-rw-r--r--lib/rexml/xmldecl.rb123
-rw-r--r--lib/rexml/xmltokens.rb95
-rw-r--r--lib/rexml/xpath.rb121
-rw-r--r--lib/rexml/xpath_parser.rb186
61 files changed, 3351 insertions, 3201 deletions
diff --git a/lib/rexml/attlistdecl.rb b/lib/rexml/attlistdecl.rb
index ef4721b5ce..dc1d2add0b 100644
--- a/lib/rexml/attlistdecl.rb
+++ b/lib/rexml/attlistdecl.rb
@@ -1,62 +1,63 @@
+# frozen_string_literal: false
#vim:ts=2 sw=2 noexpandtab:
require 'rexml/child'
require 'rexml/source'
module REXML
- # This class needs:
- # * Documentation
- # * Work! Not all types of attlists are intelligently parsed, so we just
- # spew back out what we get in. This works, but it would be better if
- # we formatted the output ourselves.
- #
- # AttlistDecls provide *just* enough support to allow namespace
- # declarations. If you need some sort of generalized support, or have an
- # interesting idea about how to map the hideous, terrible design of DTD
- # AttlistDecls onto an intuitive Ruby interface, let me know. I'm desperate
- # for anything to make DTDs more palateable.
- class AttlistDecl < Child
- include Enumerable
-
- # What is this? Got me.
- attr_reader :element_name
-
- # Create an AttlistDecl, pulling the information from a Source. Notice
- # that this isn't very convenient; to create an AttlistDecl, you basically
- # have to format it yourself, and then have the initializer parse it.
- # Sorry, but for the forseeable future, DTD support in REXML is pretty
- # weak on convenience. Have I mentioned how much I hate DTDs?
- def initialize(source)
- super()
- if (source.kind_of? Array)
- @element_name, @pairs, @contents = *source
- end
- end
-
- # Access the attlist attribute/value pairs.
- # value = attlist_decl[ attribute_name ]
- def [](key)
- @pairs[key]
- end
-
- # Whether an attlist declaration includes the given attribute definition
- # if attlist_decl.include? "xmlns:foobar"
- def include?(key)
- @pairs.keys.include? key
- end
-
- # Iterate over the key/value pairs:
- # attlist_decl.each { |attribute_name, attribute_value| ... }
- def each(&block)
- @pairs.each(&block)
- end
-
- # Write out exactly what we got in.
- def write out, indent=-1
- out << @contents
- end
-
- def node_type
- :attlistdecl
- end
- end
+ # This class needs:
+ # * Documentation
+ # * Work! Not all types of attlists are intelligently parsed, so we just
+ # spew back out what we get in. This works, but it would be better if
+ # we formatted the output ourselves.
+ #
+ # AttlistDecls provide *just* enough support to allow namespace
+ # declarations. If you need some sort of generalized support, or have an
+ # interesting idea about how to map the hideous, terrible design of DTD
+ # AttlistDecls onto an intuitive Ruby interface, let me know. I'm desperate
+ # for anything to make DTDs more palateable.
+ class AttlistDecl < Child
+ include Enumerable
+
+ # What is this? Got me.
+ attr_reader :element_name
+
+ # Create an AttlistDecl, pulling the information from a Source. Notice
+ # that this isn't very convenient; to create an AttlistDecl, you basically
+ # have to format it yourself, and then have the initializer parse it.
+ # Sorry, but for the foreseeable future, DTD support in REXML is pretty
+ # weak on convenience. Have I mentioned how much I hate DTDs?
+ def initialize(source)
+ super()
+ if (source.kind_of? Array)
+ @element_name, @pairs, @contents = *source
+ end
+ end
+
+ # Access the attlist attribute/value pairs.
+ # value = attlist_decl[ attribute_name ]
+ def [](key)
+ @pairs[key]
+ end
+
+ # Whether an attlist declaration includes the given attribute definition
+ # if attlist_decl.include? "xmlns:foobar"
+ def include?(key)
+ @pairs.keys.include? key
+ end
+
+ # Iterate over the key/value pairs:
+ # attlist_decl.each { |attribute_name, attribute_value| ... }
+ def each(&block)
+ @pairs.each(&block)
+ end
+
+ # Write out exactly what we got in.
+ def write out, indent=-1
+ out << @contents
+ end
+
+ def node_type
+ :attlistdecl
+ end
+ end
end
diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb
index 89c1ada36c..ca5984e178 100644
--- a/lib/rexml/attribute.rb
+++ b/lib/rexml/attribute.rb
@@ -1,169 +1,176 @@
+# frozen_string_literal: false
require "rexml/namespace"
require 'rexml/text'
module REXML
- # Defines an Element Attribute; IE, a attribute=value pair, as in:
- # <element attribute="value"/>. Attributes can be in their own
- # namespaces. General users of REXML will not interact with the
- # Attribute class much.
- class Attribute
- include Node
- include Namespace
-
- # The element to which this attribute belongs
- attr_reader :element
- # The normalized value of this attribute. That is, the attribute with
- # entities intact.
- attr_writer :normalized
- PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
-
- # Constructor.
+ # Defines an Element Attribute; IE, a attribute=value pair, as in:
+ # <element attribute="value"/>. Attributes can be in their own
+ # namespaces. General users of REXML will not interact with the
+ # Attribute class much.
+ class Attribute
+ include Node
+ include Namespace
+
+ # The element to which this attribute belongs
+ attr_reader :element
+ # The normalized value of this attribute. That is, the attribute with
+ # entities intact.
+ attr_writer :normalized
+ PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
+
+ NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
+
+ # Constructor.
# FIXME: The parser doesn't catch illegal characters in attributes
#
- # first::
+ # first::
# Either: an Attribute, which this new attribute will become a
# clone of; or a String, which is the name of this attribute
# second::
# If +first+ is an Attribute, then this may be an Element, or nil.
# If nil, then the Element parent of this attribute is the parent
- # of the +first+ Attribute. If the first argument is a String,
- # then this must also be a String, and is the content of the attribute.
+ # of the +first+ Attribute. If the first argument is a String,
+ # then this must also be a String, and is the content of the attribute.
# If this is the content, it must be fully normalized (contain no
# illegal characters).
# parent::
- # Ignored unless +first+ is a String; otherwise, may be the Element
+ # Ignored unless +first+ is a String; otherwise, may be the Element
# parent of this attribute, or nil.
#
- #
- # Attribute.new( attribute_to_clone )
- # Attribute.new( attribute_to_clone, parent_element )
- # Attribute.new( "attr", "attr_value" )
- # Attribute.new( "attr", "attr_value", parent_element )
- def initialize( first, second=nil, parent=nil )
- @normalized = @unnormalized = @element = nil
- if first.kind_of? Attribute
- self.name = first.expanded_name
- @unnormalized = first.value
- if second.kind_of? Element
- @element = second
- else
- @element = first.element
- end
- elsif first.kind_of? String
- @element = parent
- self.name = first
- @normalized = second.to_s
- else
- raise "illegal argument #{first.class.name} to Attribute constructor"
- end
- end
-
- # Returns the namespace of the attribute.
- #
- # e = Element.new( "elns:myelement" )
- # e.add_attribute( "nsa:a", "aval" )
- # e.add_attribute( "b", "bval" )
- # e.attributes.get_attribute( "a" ).prefix # -> "nsa"
- # e.attributes.get_attribute( "b" ).prefix # -> "elns"
- # a = Attribute.new( "x", "y" )
- # a.prefix # -> ""
- def prefix
- pf = super
- if pf == ""
- pf = @element.prefix if @element
- end
- pf
- end
-
- # Returns the namespace URL, if defined, or nil otherwise
- #
- # e = Element.new("el")
- # e.add_attributes({"xmlns:ns", "http://url"})
- # e.namespace( "ns" ) # -> "http://url"
- def namespace arg=nil
- arg = prefix if arg.nil?
- @element.namespace arg
- end
-
- # Returns true if other is an Attribute and has the same name and value,
- # false otherwise.
- def ==( other )
- other.kind_of?(Attribute) and other.name==name and other.value==value
- end
-
- # Creates (and returns) a hash from both the name and value
- def hash
- name.hash + value.hash
- end
-
- # Returns this attribute out as XML source, expanding the name
- #
- # a = Attribute.new( "x", "y" )
- # a.to_string # -> "x='y'"
- # b = Attribute.new( "ns:x", "y" )
- # b.to_string # -> "ns:x='y'"
- def to_string
- if @element and @element.context and @element.context[:attribute_quote] == :quote
- %Q^#@expanded_name="#{to_s().gsub(/"/, '&quote;')}"^
- else
- "#@expanded_name='#{to_s().gsub(/'/, '&apos;')}'"
- end
- end
-
- # Returns the attribute value, with entities replaced
- def to_s
- return @normalized if @normalized
-
- doctype = nil
- if @element
- doc = @element.document
- doctype = doc.doctype if doc
- end
-
- @normalized = Text::normalize( @unnormalized, doctype )
- @unnormalized = nil
+ #
+ # Attribute.new( attribute_to_clone )
+ # Attribute.new( attribute_to_clone, parent_element )
+ # Attribute.new( "attr", "attr_value" )
+ # Attribute.new( "attr", "attr_value", parent_element )
+ def initialize( first, second=nil, parent=nil )
+ @normalized = @unnormalized = @element = nil
+ if first.kind_of? Attribute
+ self.name = first.expanded_name
+ @unnormalized = first.value
+ if second.kind_of? Element
+ @element = second
+ else
+ @element = first.element
+ end
+ elsif first.kind_of? String
+ @element = parent
+ self.name = first
+ @normalized = second.to_s
+ else
+ raise "illegal argument #{first.class.name} to Attribute constructor"
+ end
+ end
+
+ # Returns the namespace of the attribute.
+ #
+ # e = Element.new( "elns:myelement" )
+ # e.add_attribute( "nsa:a", "aval" )
+ # e.add_attribute( "b", "bval" )
+ # e.attributes.get_attribute( "a" ).prefix # -> "nsa"
+ # e.attributes.get_attribute( "b" ).prefix # -> "elns"
+ # a = Attribute.new( "x", "y" )
+ # a.prefix # -> ""
+ def prefix
+ pf = super
+ if pf == ""
+ pf = @element.prefix if @element
+ end
+ pf
+ end
+
+ # Returns the namespace URL, if defined, or nil otherwise
+ #
+ # e = Element.new("el")
+ # e.add_namespace("ns", "http://url")
+ # e.add_attribute("ns:a", "b")
+ # e.add_attribute("nsx:a", "c")
+ # e.attribute("ns:a").namespace # => "http://url"
+ # e.attribute("nsx:a").namespace # => nil
+ def namespace arg=nil
+ arg = prefix if arg.nil?
+ @element.namespace arg
+ end
+
+ # Returns true if other is an Attribute and has the same name and value,
+ # false otherwise.
+ def ==( other )
+ other.kind_of?(Attribute) and other.name==name and other.value==value
+ end
+
+ # Creates (and returns) a hash from both the name and value
+ def hash
+ name.hash + value.hash
+ end
+
+ # Returns this attribute out as XML source, expanding the name
+ #
+ # a = Attribute.new( "x", "y" )
+ # a.to_string # -> "x='y'"
+ # b = Attribute.new( "ns:x", "y" )
+ # b.to_string # -> "ns:x='y'"
+ def to_string
+ if @element and @element.context and @element.context[:attribute_quote] == :quote
+ %Q^#@expanded_name="#{to_s().gsub(/"/, '&quot;')}"^
+ else
+ "#@expanded_name='#{to_s().gsub(/'/, '&apos;')}'"
+ end
+ end
+
+ def doctype
+ if @element
+ doc = @element.document
+ doc.doctype if doc
+ end
+ end
+
+ # Returns the attribute value, with entities replaced
+ def to_s
+ return @normalized if @normalized
+
+ @normalized = Text::normalize( @unnormalized, doctype )
+ @unnormalized = nil
@normalized
- end
-
- # Returns the UNNORMALIZED value of this attribute. That is, entities
- # have been expanded to their values
- def value
- return @unnormalized if @unnormalized
- doctype = nil
- if @element
- doc = @element.document
- doctype = doc.doctype if doc
- end
- @unnormalized = Text::unnormalize( @normalized, doctype )
- @normalized = nil
+ end
+
+ # Returns the UNNORMALIZED value of this attribute. That is, entities
+ # have been expanded to their values
+ def value
+ return @unnormalized if @unnormalized
+ @unnormalized = Text::unnormalize( @normalized, doctype )
+ @normalized = nil
@unnormalized
- end
-
- # Returns a copy of this attribute
- def clone
- Attribute.new self
- end
-
- # Sets the element of which this object is an attribute. Normally, this
- # is not directly called.
- #
- # Returns this attribute
- def element=( element )
- @element = element
- self
- end
-
- # Removes this Attribute from the tree, and returns true if successfull
- #
- # This method is usually not called directly.
- def remove
- @element.attributes.delete self.name unless @element.nil?
- end
-
- # Writes this attribute (EG, puts 'key="value"' to the output)
- def write( output, indent=-1 )
- output << to_string
- end
+ end
+
+ # Returns a copy of this attribute
+ def clone
+ Attribute.new self
+ end
+
+ # Sets the element of which this object is an attribute. Normally, this
+ # is not directly called.
+ #
+ # Returns this attribute
+ def element=( element )
+ @element = element
+
+ if @normalized
+ Text.check( @normalized, NEEDS_A_SECOND_CHECK, doctype )
+ end
+
+ self
+ end
+
+ # Removes this Attribute from the tree, and returns true if successful
+ #
+ # This method is usually not called directly.
+ def remove
+ @element.attributes.delete self.name unless @element.nil?
+ end
+
+ # Writes this attribute (EG, puts 'key="value"' to the output)
+ def write( output, indent=-1 )
+ output << to_string
+ end
def node_type
:attribute
@@ -180,6 +187,6 @@ module REXML
path += "/@#{self.expanded_name}"
return path
end
- end
+ end
end
#vim:ts=2 sw=2 noexpandtab:
diff --git a/lib/rexml/cdata.rb b/lib/rexml/cdata.rb
index efcb71160a..2238446dc4 100644
--- a/lib/rexml/cdata.rb
+++ b/lib/rexml/cdata.rb
@@ -1,39 +1,40 @@
+# frozen_string_literal: false
require "rexml/text"
module REXML
- class CData < Text
- START = '<![CDATA['
- STOP = ']]>'
- ILLEGAL = /(\]\]>)/
+ class CData < Text
+ START = '<![CDATA['
+ STOP = ']]>'
+ ILLEGAL = /(\]\]>)/
- # Constructor. CData is data between <![CDATA[ ... ]]>
- #
- # _Examples_
- # CData.new( source )
- # CData.new( "Here is some CDATA" )
- # CData.new( "Some unprocessed data", respect_whitespace_TF, parent_element )
- def initialize( first, whitespace=true, parent=nil )
- super( first, whitespace, parent, true, true, ILLEGAL )
- end
+ # Constructor. CData is data between <![CDATA[ ... ]]>
+ #
+ # _Examples_
+ # CData.new( source )
+ # CData.new( "Here is some CDATA" )
+ # CData.new( "Some unprocessed data", respect_whitespace_TF, parent_element )
+ def initialize( first, whitespace=true, parent=nil )
+ super( first, whitespace, parent, false, true, ILLEGAL )
+ end
- # Make a copy of this object
- #
- # _Examples_
- # c = CData.new( "Some text" )
- # d = c.clone
- # d.to_s # -> "Some text"
- def clone
- CData.new self
- end
+ # Make a copy of this object
+ #
+ # _Examples_
+ # c = CData.new( "Some text" )
+ # d = c.clone
+ # d.to_s # -> "Some text"
+ def clone
+ CData.new self
+ end
- # Returns the content of this CData object
- #
- # _Examples_
- # c = CData.new( "Some text" )
- # c.to_s # -> "Some text"
- def to_s
- @string
- end
+ # Returns the content of this CData object
+ #
+ # _Examples_
+ # c = CData.new( "Some text" )
+ # c.to_s # -> "Some text"
+ def to_s
+ @string
+ end
def value
@string
@@ -42,26 +43,26 @@ module REXML
# == DEPRECATED
# See the rexml/formatters package
#
- # Generates XML output of this object
- #
- # output::
- # Where to write the string. Defaults to $stdout
- # indent::
+ # Generates XML output of this object
+ #
+ # output::
+ # Where to write the string. Defaults to $stdout
+ # indent::
# The amount to indent this node by
- # transitive::
+ # transitive::
# Ignored
- # ie_hack::
+ # ie_hack::
# Ignored
- #
- # _Examples_
- # c = CData.new( " Some text " )
- # c.write( $stdout ) #-> <![CDATA[ Some text ]]>
- def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
- Kernel.warn( "#{self.class.name}.write is deprecated" )
- indent( output, indent )
- output << START
- output << @string
- output << STOP
- end
- end
+ #
+ # _Examples_
+ # c = CData.new( " Some text " )
+ # c.write( $stdout ) #-> <![CDATA[ Some text ]]>
+ def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
+ Kernel.warn( "#{self.class.name}.write is deprecated", uplevel: 1)
+ indent( output, indent )
+ output << START
+ output << @string
+ output << STOP
+ end
+ end
end
diff --git a/lib/rexml/child.rb b/lib/rexml/child.rb
index 6d3c9df5e6..d23451e71e 100644
--- a/lib/rexml/child.rb
+++ b/lib/rexml/child.rb
@@ -1,96 +1,97 @@
+# frozen_string_literal: false
require "rexml/node"
module REXML
- ##
- # A Child object is something contained by a parent, and this class
- # contains methods to support that. Most user code will not use this
- # class directly.
- class Child
- include Node
- attr_reader :parent # The Parent of this object
+ ##
+ # A Child object is something contained by a parent, and this class
+ # contains methods to support that. Most user code will not use this
+ # class directly.
+ class Child
+ include Node
+ attr_reader :parent # The Parent of this object
- # Constructor. Any inheritors of this class should call super to make
- # sure this method is called.
- # parent::
- # if supplied, the parent of this child will be set to the
- # supplied value, and self will be added to the parent
- def initialize( parent = nil )
- @parent = nil
- # Declare @parent, but don't define it. The next line sets the
- # parent.
- parent.add( self ) if parent
- end
+ # Constructor. Any inheritors of this class should call super to make
+ # sure this method is called.
+ # parent::
+ # if supplied, the parent of this child will be set to the
+ # supplied value, and self will be added to the parent
+ def initialize( parent = nil )
+ @parent = nil
+ # Declare @parent, but don't define it. The next line sets the
+ # parent.
+ parent.add( self ) if parent
+ end
- # Replaces this object with another object. Basically, calls
- # Parent.replace_child
- #
- # Returns:: self
- def replace_with( child )
- @parent.replace_child( self, child )
- self
- end
+ # Replaces this object with another object. Basically, calls
+ # Parent.replace_child
+ #
+ # Returns:: self
+ def replace_with( child )
+ @parent.replace_child( self, child )
+ self
+ end
- # Removes this child from the parent.
- #
- # Returns:: self
- def remove
- unless @parent.nil?
- @parent.delete self
- end
- self
- end
+ # Removes this child from the parent.
+ #
+ # Returns:: self
+ def remove
+ unless @parent.nil?
+ @parent.delete self
+ end
+ self
+ end
- # Sets the parent of this child to the supplied argument.
- #
- # other::
- # Must be a Parent object. If this object is the same object as the
- # existing parent of this child, no action is taken. Otherwise, this
- # child is removed from the current parent (if one exists), and is added
- # to the new parent.
- # Returns:: The parent added
- def parent=( other )
- return @parent if @parent == other
- @parent.delete self if defined? @parent and @parent
- @parent = other
- end
+ # Sets the parent of this child to the supplied argument.
+ #
+ # other::
+ # Must be a Parent object. If this object is the same object as the
+ # existing parent of this child, no action is taken. Otherwise, this
+ # child is removed from the current parent (if one exists), and is added
+ # to the new parent.
+ # Returns:: The parent added
+ def parent=( other )
+ return @parent if @parent == other
+ @parent.delete self if defined? @parent and @parent
+ @parent = other
+ end
- alias :next_sibling :next_sibling_node
- alias :previous_sibling :previous_sibling_node
+ alias :next_sibling :next_sibling_node
+ alias :previous_sibling :previous_sibling_node
- # Sets the next sibling of this child. This can be used to insert a child
- # after some other child.
- # a = Element.new("a")
- # b = a.add_element("b")
- # c = Element.new("c")
- # b.next_sibling = c
- # # => <a><b/><c/></a>
- def next_sibling=( other )
- parent.insert_after self, other
- end
+ # Sets the next sibling of this child. This can be used to insert a child
+ # after some other child.
+ # a = Element.new("a")
+ # b = a.add_element("b")
+ # c = Element.new("c")
+ # b.next_sibling = c
+ # # => <a><b/><c/></a>
+ def next_sibling=( other )
+ parent.insert_after self, other
+ end
- # Sets the previous sibling of this child. This can be used to insert a
- # child before some other child.
- # a = Element.new("a")
- # b = a.add_element("b")
- # c = Element.new("c")
- # b.previous_sibling = c
- # # => <a><b/><c/></a>
- def previous_sibling=(other)
- parent.insert_before self, other
- end
+ # Sets the previous sibling of this child. This can be used to insert a
+ # child before some other child.
+ # a = Element.new("a")
+ # b = a.add_element("b")
+ # c = Element.new("c")
+ # b.previous_sibling = c
+ # # => <a><b/><c/></a>
+ def previous_sibling=(other)
+ parent.insert_before self, other
+ end
- # Returns:: the document this child belongs to, or nil if this child
- # belongs to no document
- def document
- return parent.document unless parent.nil?
- nil
- end
+ # Returns:: the document this child belongs to, or nil if this child
+ # belongs to no document
+ def document
+ return parent.document unless parent.nil?
+ nil
+ end
- # This doesn't yet handle encodings
- def bytes
- encoding = document.encoding
+ # This doesn't yet handle encodings
+ def bytes
+ document.encoding
- to_s
- end
- end
+ to_s
+ end
+ end
end
diff --git a/lib/rexml/comment.rb b/lib/rexml/comment.rb
index 2b9b4b89c9..822fe0d586 100644
--- a/lib/rexml/comment.rb
+++ b/lib/rexml/comment.rb
@@ -1,80 +1,80 @@
+# frozen_string_literal: false
require "rexml/child"
module REXML
- ##
- # Represents an XML comment; that is, text between \<!-- ... -->
- class Comment < Child
- include Comparable
- START = "<!--"
- STOP = "-->"
+ ##
+ # Represents an XML comment; that is, text between \<!-- ... -->
+ class Comment < Child
+ include Comparable
+ START = "<!--"
+ STOP = "-->"
- # The content text
+ # The content text
- attr_accessor :string
+ attr_accessor :string
- ##
- # Constructor. The first argument can be one of three types:
- # @param first If String, the contents of this comment are set to the
- # argument. If Comment, the argument is duplicated. If
- # Source, the argument is scanned for a comment.
- # @param second If the first argument is a Source, this argument
- # should be nil, not supplied, or a Parent to be set as the parent
- # of this object
- def initialize( first, second = nil )
- #puts "IN COMMENT CONSTRUCTOR; SECOND IS #{second.type}"
- super(second)
- if first.kind_of? String
- @string = first
- elsif first.kind_of? Comment
- @string = first.string
- end
- end
+ ##
+ # Constructor. The first argument can be one of three types:
+ # @param first If String, the contents of this comment are set to the
+ # argument. If Comment, the argument is duplicated. If
+ # Source, the argument is scanned for a comment.
+ # @param second If the first argument is a Source, this argument
+ # should be nil, not supplied, or a Parent to be set as the parent
+ # of this object
+ def initialize( first, second = nil )
+ super(second)
+ if first.kind_of? String
+ @string = first
+ elsif first.kind_of? Comment
+ @string = first.string
+ end
+ end
- def clone
- Comment.new self
- end
+ def clone
+ Comment.new self
+ end
# == DEPRECATED
# See REXML::Formatters
#
- # output::
- # Where to write the string
- # indent::
- # An integer. If -1, no indenting will be used; otherwise, the
- # indentation will be this number of spaces, and children will be
- # indented an additional amount.
- # transitive::
- # Ignored by this class. The contents of comments are never modified.
- # ie_hack::
- # Needed for conformity to the child API, but not used by this class.
- def write( output, indent=-1, transitive=false, ie_hack=false )
- Kernel.warn("Comment.write is deprecated. See REXML::Formatters")
- indent( output, indent )
- output << START
- output << @string
- output << STOP
- end
+ # output::
+ # Where to write the string
+ # indent::
+ # An integer. If -1, no indenting will be used; otherwise, the
+ # indentation will be this number of spaces, and children will be
+ # indented an additional amount.
+ # transitive::
+ # Ignored by this class. The contents of comments are never modified.
+ # ie_hack::
+ # Needed for conformity to the child API, but not used by this class.
+ def write( output, indent=-1, transitive=false, ie_hack=false )
+ Kernel.warn("Comment.write is deprecated. See REXML::Formatters", uplevel: 1)
+ indent( output, indent )
+ output << START
+ output << @string
+ output << STOP
+ end
- alias :to_s :string
+ alias :to_s :string
- ##
- # Compares this Comment to another; the contents of the comment are used
- # in the comparison.
- def <=>(other)
- other.to_s <=> @string
- end
+ ##
+ # Compares this Comment to another; the contents of the comment are used
+ # in the comparison.
+ def <=>(other)
+ other.to_s <=> @string
+ end
- ##
- # Compares this Comment to another; the contents of the comment are used
- # in the comparison.
- def ==( other )
- other.kind_of? Comment and
- (other <=> self) == 0
- end
+ ##
+ # Compares this Comment to another; the contents of the comment are used
+ # in the comparison.
+ def ==( other )
+ other.kind_of? Comment and
+ (other <=> self) == 0
+ end
def node_type
:comment
end
- end
+ end
end
#vim:ts=2 sw=2 noexpandtab:
diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb
index 05cd4ab331..cb9bf57406 100644
--- a/lib/rexml/doctype.rb
+++ b/lib/rexml/doctype.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: false
require "rexml/parent"
require "rexml/parseexception"
require "rexml/namespace"
@@ -6,6 +7,39 @@ require 'rexml/attlistdecl'
require 'rexml/xmltokens'
module REXML
+ class ReferenceWriter
+ def initialize(id_type,
+ public_id_literal,
+ system_literal)
+ @id_type = id_type
+ @public_id_literal = public_id_literal
+ @system_literal = system_literal
+ @default_quote = "\""
+ end
+
+ def write(output)
+ output << " #{@id_type}"
+ if @public_id_literal
+ if @public_id_literal.include?("'")
+ quote = "\""
+ else
+ quote = @default_quote
+ end
+ output << " #{quote}#{@public_id_literal}#{quote}"
+ end
+ if @system_literal
+ if @system_literal.include?("'")
+ quote = "\""
+ elsif @system_literal.include?("\"")
+ quote = "'"
+ else
+ quote = @default_quote
+ end
+ output << " #{quote}#{@system_literal}#{quote}"
+ end
+ end
+ end
+
# Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
# ... >. DOCTYPES can be used to declare the DTD of a document, as well as
# being used to declare entities used in the document.
@@ -15,11 +49,11 @@ module REXML
STOP = ">"
SYSTEM = "SYSTEM"
PUBLIC = "PUBLIC"
- DEFAULT_ENTITIES = {
- 'gt'=>EntityConst::GT,
- 'lt'=>EntityConst::LT,
- 'quot'=>EntityConst::QUOT,
- "apos"=>EntityConst::APOS
+ DEFAULT_ENTITIES = {
+ 'gt'=>EntityConst::GT,
+ 'lt'=>EntityConst::LT,
+ 'quot'=>EntityConst::QUOT,
+ "apos"=>EntityConst::APOS
}
# name is the name of the doctype
@@ -33,7 +67,7 @@ module REXML
# dt = DocType.new( doctype_to_clone )
# # Incomplete. Shallow clone of doctype
#
- # +Note+ that the constructor:
+ # +Note+ that the constructor:
#
# Doctype.new( Source.new( "<!DOCTYPE foo 'bar'>" ) )
#
@@ -49,6 +83,8 @@ module REXML
super( parent )
@name = first.name
@external_id = first.external_id
+ @long_name = first.instance_variable_get(:@long_name)
+ @uri = first.instance_variable_get(:@uri)
elsif first.kind_of? Array
super( parent )
@name = first[0]
@@ -111,13 +147,14 @@ module REXML
output << START
output << ' '
output << @name
- output << " #@external_id" if @external_id
- output << " #{@long_name.inspect}" if @long_name
- output << " #{@uri.inspect}" if @uri
+ if @external_id
+ reference_writer = ReferenceWriter.new(@external_id,
+ @long_name,
+ @uri)
+ reference_writer.write(output)
+ end
unless @children.empty?
- next_indent = indent + 1
output << ' ['
- child = nil # speed
@children.each { |child|
output << "\n"
f.write( child, output )
@@ -140,8 +177,8 @@ module REXML
@entities = DEFAULT_ENTITIES.clone if @entities == DEFAULT_ENTITIES
@entities[ child.name ] = child if child.kind_of? Entity
end
-
- # This method retrieves the public identifier identifying the document's
+
+ # This method retrieves the public identifier identifying the document's
# DTD.
#
# Method contributed by Henrik Martensson
@@ -153,7 +190,7 @@ module REXML
strip_quotes(@long_name)
end
end
-
+
# This method retrieves the system identifier identifying the document's DTD
#
# Method contributed by Henrik Martensson
@@ -165,16 +202,16 @@ module REXML
@uri.kind_of?(String) ? strip_quotes(@uri) : nil
end
end
-
+
# This method returns a list of notations that have been declared in the
- # _internal_ DTD subset. Notations in the external DTD subset are not
+ # _internal_ DTD subset. Notations in the external DTD subset are not
# listed.
#
# Method contributed by Henrik Martensson
def notations
children().select {|node| node.kind_of?(REXML::NotationDecl)}
end
-
+
# Retrieves a named notation. Only notations declared in the internal
# DTD subset can be retrieved.
#
@@ -184,12 +221,12 @@ module REXML
notation_decl.name == name
}
end
-
+
private
-
+
# Method contributed by Henrik Martensson
def strip_quotes(quoted_string)
- quoted_string =~ /^[\'\"].*[\´\"]$/ ?
+ quoted_string =~ /^[\'\"].*[\'\"]$/ ?
quoted_string[1, quoted_string.length-2] :
quoted_string
end
@@ -218,7 +255,7 @@ module REXML
output << to_s
end
end
-
+
public
class ElementDecl < Declaration
def initialize( src )
@@ -250,17 +287,17 @@ module REXML
end
def to_s
- "<!NOTATION #@name #@middle#{
- @public ? ' ' + public.inspect : ''
- }#{
- @system ? ' ' +@system.inspect : ''
- }>"
+ notation = "<!NOTATION #{@name}"
+ reference_writer = ReferenceWriter.new(@middle, @public, @system)
+ reference_writer.write(notation)
+ notation << ">"
+ notation
end
def write( output, indent=-1 )
output << to_s
end
-
+
# This method retrieves the name of the notation.
#
# Method contributed by Henrik Martensson
diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb
index 54aa691ad8..806bc499cd 100644
--- a/lib/rexml/document.rb
+++ b/lib/rexml/document.rb
@@ -1,3 +1,5 @@
+# frozen_string_literal: false
+require "rexml/security"
require "rexml/element"
require "rexml/xmldecl"
require "rexml/source"
@@ -16,130 +18,138 @@ module REXML
# Document has a single child that can be accessed by root().
# Note that if you want to have an XML declaration written for a document
# you create, you must add one; REXML documents do not write a default
- # declaration for you. See |DECLARATION| and |write|.
- class Document < Element
- # A convenient default XML declaration. If you want an XML declaration,
- # the easiest way to add one is mydoc << Document::DECLARATION
+ # declaration for you. See |DECLARATION| and |write|.
+ class Document < Element
+ # A convenient default XML declaration. If you want an XML declaration,
+ # the easiest way to add one is mydoc << Document::DECLARATION
# +DEPRECATED+
# Use: mydoc << XMLDecl.default
- DECLARATION = XMLDecl.default
-
- # Constructor
- # @param source if supplied, must be a Document, String, or IO.
- # Documents have their context and Element attributes cloned.
- # Strings are expected to be valid XML documents. IOs are expected
- # to be sources of valid XML documents.
- # @param context if supplied, contains the context of the document;
- # this should be a Hash.
- def initialize( source = nil, context = {} )
- super()
- @context = context
- return if source.nil?
- if source.kind_of? Document
- @context = source.context
- super source
- else
- build( source )
- end
- end
+ DECLARATION = XMLDecl.default
+
+ # Constructor
+ # @param source if supplied, must be a Document, String, or IO.
+ # Documents have their context and Element attributes cloned.
+ # Strings are expected to be valid XML documents. IOs are expected
+ # to be sources of valid XML documents.
+ # @param context if supplied, contains the context of the document;
+ # this should be a Hash.
+ def initialize( source = nil, context = {} )
+ @entity_expansion_count = 0
+ super()
+ @context = context
+ return if source.nil?
+ if source.kind_of? Document
+ @context = source.context
+ super source
+ else
+ build( source )
+ end
+ end
def node_type
:document
end
- # Should be obvious
- def clone
- Document.new self
- end
+ # Should be obvious
+ def clone
+ Document.new self
+ end
- # According to the XML spec, a root node has no expanded name
- def expanded_name
- ''
- #d = doc_type
- #d ? d.name : "UNDEFINED"
- end
+ # According to the XML spec, a root node has no expanded name
+ def expanded_name
+ ''
+ #d = doc_type
+ #d ? d.name : "UNDEFINED"
+ end
- alias :name :expanded_name
+ alias :name :expanded_name
- # We override this, because XMLDecls and DocTypes must go at the start
- # of the document
- def add( child )
- if child.kind_of? XMLDecl
- @children.unshift child
+ # We override this, because XMLDecls and DocTypes must go at the start
+ # of the document
+ def add( child )
+ if child.kind_of? XMLDecl
+ if @children[0].kind_of? XMLDecl
+ @children[0] = child
+ else
+ @children.unshift child
+ end
child.parent = self
- elsif child.kind_of? DocType
- # Find first Element or DocType node and insert the decl right
+ elsif child.kind_of? DocType
+ # Find first Element or DocType node and insert the decl right
# before it. If there is no such node, just insert the child at the
# end. If there is a child and it is an DocType, then replace it.
- insert_before_index = 0
- @children.find { |x|
- insert_before_index += 1
+ insert_before_index = @children.find_index { |x|
x.kind_of?(Element) || x.kind_of?(DocType)
}
- if @children[ insert_before_index ] # Not null = not end of list
- if @children[ insert_before_index ].kind_of DocType
+ if insert_before_index # Not null = not end of list
+ if @children[ insert_before_index ].kind_of? DocType
@children[ insert_before_index ] = child
else
- @children[ index_before_index-1, 0 ] = child
+ @children[ insert_before_index-1, 0 ] = child
end
else # Insert at end of list
- @children[insert_before_index] = child
+ @children << child
end
- child.parent = self
- else
- rv = super
- raise "attempted adding second root element to document" if @elements.size > 1
- rv
- end
- end
- alias :<< :add
-
- def add_element(arg=nil, arg2=nil)
- rv = super
- raise "attempted adding second root element to document" if @elements.size > 1
- rv
- end
-
- # @return the root Element of the document, or nil if this document
- # has no children.
- def root
+ child.parent = self
+ else
+ rv = super
+ raise "attempted adding second root element to document" if @elements.size > 1
+ rv
+ end
+ end
+ alias :<< :add
+
+ def add_element(arg=nil, arg2=nil)
+ rv = super
+ raise "attempted adding second root element to document" if @elements.size > 1
+ rv
+ end
+
+ # @return the root Element of the document, or nil if this document
+ # has no children.
+ def root
elements[1]
#self
#@children.find { |item| item.kind_of? Element }
- end
-
- # @return the DocType child of the document, if one exists,
- # and nil otherwise.
- def doctype
- @children.find { |item| item.kind_of? DocType }
- end
-
- # @return the XMLDecl of this document; if no XMLDecl has been
- # set, the default declaration is returned.
- def xml_decl
- rv = @children[0]
+ end
+
+ # @return the DocType child of the document, if one exists,
+ # and nil otherwise.
+ def doctype
+ @children.find { |item| item.kind_of? DocType }
+ end
+
+ # @return the XMLDecl of this document; if no XMLDecl has been
+ # set, the default declaration is returned.
+ def xml_decl
+ rv = @children[0]
return rv if rv.kind_of? XMLDecl
- rv = @children.unshift(XMLDecl.default)[0]
- end
-
- # @return the XMLDecl version of this document as a String.
- # If no XMLDecl has been set, returns the default version.
- def version
- xml_decl().version
- end
-
- # @return the XMLDecl encoding of this document as a String.
- # If no XMLDecl has been set, returns the default encoding.
- def encoding
- xml_decl().encoding
- end
-
- # @return the XMLDecl standalone value of this document as a String.
- # If no XMLDecl has been set, returns the default setting.
- def stand_alone?
- xml_decl().stand_alone?
- end
+ @children.unshift(XMLDecl.default)[0]
+ end
+
+ # @return the XMLDecl version of this document as a String.
+ # If no XMLDecl has been set, returns the default version.
+ def version
+ xml_decl().version
+ end
+
+ # @return the XMLDecl encoding of this document as an
+ # Encoding object.
+ # If no XMLDecl has been set, returns the default encoding.
+ def encoding
+ xml_decl().encoding
+ end
+
+ # @return the XMLDecl standalone value of this document as a String.
+ # If no XMLDecl has been set, returns the default setting.
+ def stand_alone?
+ xml_decl().stand_alone?
+ end
+ # :call-seq:
+ # doc.write(output=$stdout, indent=-1, transtive=false, ie_hack=false, encoding=nil)
+ # doc.write(options={:output => $stdout, :indent => -1, :transtive => false, :ie_hack => false, :encoding => nil})
+ #
# Write the XML tree out, optionally with indent. This writes out the
# entire XML document, including XML declarations, doctype declarations,
# and processing instructions (if any are given).
@@ -150,41 +160,73 @@ module REXML
# specified, because it adds unnecessary bandwidth to applications such
# as XML-RPC.
#
+ # Accept Nth argument style and options Hash style as argument.
+ # The recommended style is options Hash style for one or more
+ # arguments case.
+ #
+ # _Examples_
+ # Document.new("<a><b/></a>").write
+ #
+ # output = ""
+ # Document.new("<a><b/></a>").write(output)
+ #
+ # output = ""
+ # Document.new("<a><b/></a>").write(:output => output, :indent => 2)
+ #
# See also the classes in the rexml/formatters package for the proper way
- # to change the default formatting of XML output
+ # to change the default formatting of XML output.
#
# _Examples_
- # Document.new("<a><b/></a>").serialize
#
- # output_string = ""
- # tr = Transitive.new( output_string )
- # Document.new("<a><b/></a>").serialize( tr )
+ # output = ""
+ # tr = Transitive.new
+ # tr.write(Document.new("<a><b/></a>"), output)
#
# output::
- # output an object which supports '<< string'; this is where the
+ # output an object which supports '<< string'; this is where the
# document will be written.
# indent::
# An integer. If -1, no indenting will be used; otherwise, the
# indentation will be twice this number of spaces, and children will be
- # indented an additional amount. For a value of 3, every item will be
+ # indented an additional amount. For a value of 3, every item will be
# indented 3 more levels, or 6 more spaces (2 * 3). Defaults to -1
- # trans::
+ # transitive::
# If transitive is true and indent is >= 0, then the output will be
# pretty-printed in such a way that the added whitespace does not affect
# the absolute *value* of the document -- that is, it leaves the value
# and number of Text nodes in the document unchanged.
# ie_hack::
- # Internet Explorer is the worst piece of crap to have ever been
- # written, with the possible exception of Windows itself. Since IE is
- # unable to parse proper XML, we have to provide a hack to generate XML
- # that IE's limited abilities can handle. This hack inserts a space
- # before the /> on empty tags. Defaults to false
- def write( output=$stdout, indent=-1, trans=false, ie_hack=false )
- if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
- output = Output.new( output, xml_decl.encoding )
+ # This hack inserts a space before the /> on empty tags to address
+ # a limitation of Internet Explorer. Defaults to false
+ # encoding::
+ # Encoding name as String. Change output encoding to specified encoding
+ # instead of encoding in XML declaration.
+ # Defaults to nil. It means encoding in XML declaration is used.
+ def write(*arguments)
+ if arguments.size == 1 and arguments[0].class == Hash
+ options = arguments[0]
+
+ output = options[:output]
+ indent = options[:indent]
+ transitive = options[:transitive]
+ ie_hack = options[:ie_hack]
+ encoding = options[:encoding]
+ else
+ output, indent, transitive, ie_hack, encoding, = *arguments
+ end
+
+ output ||= $stdout
+ indent ||= -1
+ transitive = false if transitive.nil?
+ ie_hack = false if ie_hack.nil?
+ encoding ||= xml_decl.encoding
+
+ if encoding != 'UTF-8' && !output.kind_of?(Output)
+ output = Output.new( output, encoding )
end
formatter = if indent > -1
- if trans
+ if transitive
+ require "rexml/formatters/transitive"
REXML::Formatters::Transitive.new( indent, ie_hack )
else
REXML::Formatters::Pretty.new( indent, ie_hack )
@@ -193,16 +235,57 @@ module REXML
REXML::Formatters::Default.new( ie_hack )
end
formatter.write( self, output )
- end
+ end
+
+
+ def Document::parse_stream( source, listener )
+ Parsers::StreamParser.new( source, listener ).parse
+ end
+
+ # Set the entity expansion limit. By default the limit is set to 10000.
+ #
+ # Deprecated. Use REXML::Security.entity_expansion_limit= instead.
+ def Document::entity_expansion_limit=( val )
+ Security.entity_expansion_limit = val
+ end
+
+ # Get the entity expansion limit. By default the limit is set to 10000.
+ #
+ # Deprecated. Use REXML::Security.entity_expansion_limit= instead.
+ def Document::entity_expansion_limit
+ return Security.entity_expansion_limit
+ end
+
+ # Set the entity expansion limit. By default the limit is set to 10240.
+ #
+ # Deprecated. Use REXML::Security.entity_expansion_text_limit= instead.
+ def Document::entity_expansion_text_limit=( val )
+ Security.entity_expansion_text_limit = val
+ end
+
+ # Get the entity expansion limit. By default the limit is set to 10240.
+ #
+ # Deprecated. Use REXML::Security.entity_expansion_text_limit instead.
+ def Document::entity_expansion_text_limit
+ return Security.entity_expansion_text_limit
+ end
+
+ attr_reader :entity_expansion_count
-
- def Document::parse_stream( source, listener )
- Parsers::StreamParser.new( source, listener ).parse
- end
+ def record_entity_expansion
+ @entity_expansion_count += 1
+ if @entity_expansion_count > Security.entity_expansion_limit
+ raise "number of entity expansions exceeded, processing aborted."
+ end
+ end
- private
- def build( source )
+ def document
+ self
+ end
+
+ private
+ def build( source )
Parsers::TreeParser.new( source, self ).parse
- end
- end
+ end
+ end
end
diff --git a/lib/rexml/dtd/attlistdecl.rb b/lib/rexml/dtd/attlistdecl.rb
index e176bb0749..32847daadb 100644
--- a/lib/rexml/dtd/attlistdecl.rb
+++ b/lib/rexml/dtd/attlistdecl.rb
@@ -1,10 +1,11 @@
+# frozen_string_literal: false
require "rexml/child"
module REXML
- module DTD
- class AttlistDecl < Child
- START = "<!ATTLIST"
- START_RE = /^\s*#{START}/um
- PATTERN_RE = /\s*(#{START}.*?>)/um
- end
- end
+ module DTD
+ class AttlistDecl < Child
+ START = "<!ATTLIST"
+ START_RE = /^\s*#{START}/um
+ PATTERN_RE = /\s*(#{START}.*?>)/um
+ end
+ end
end
diff --git a/lib/rexml/dtd/dtd.rb b/lib/rexml/dtd/dtd.rb
index 4f735d4812..927d5d847b 100644
--- a/lib/rexml/dtd/dtd.rb
+++ b/lib/rexml/dtd/dtd.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: false
require "rexml/dtd/elementdecl"
require "rexml/dtd/entitydecl"
require "rexml/comment"
@@ -6,46 +7,41 @@ require "rexml/dtd/attlistdecl"
require "rexml/parent"
module REXML
- module DTD
- class Parser
- def Parser.parse( input )
- case input
- when String
- parse_helper input
- when File
- parse_helper input.read
- end
- end
+ module DTD
+ class Parser
+ def Parser.parse( input )
+ case input
+ when String
+ parse_helper input
+ when File
+ parse_helper input.read
+ end
+ end
- # Takes a String and parses it out
- def Parser.parse_helper( input )
- contents = Parent.new
- while input.size > 0
- case input
- when ElementDecl.PATTERN_RE
- match = $&
- source = $'
- contents << ElementDecl.new( match )
- when AttlistDecl.PATTERN_RE
- matchdata = $~
- source = $'
- contents << AttlistDecl.new( matchdata )
- when EntityDecl.PATTERN_RE
- matchdata = $~
- source = $'
- contents << EntityDecl.new( matchdata )
- when Comment.PATTERN_RE
- matchdata = $~
- source = $'
- contents << Comment.new( matchdata )
- when NotationDecl.PATTERN_RE
- matchdata = $~
- source = $'
- contents << NotationDecl.new( matchdata )
- end
- end
- contents
- end
- end
- end
+ # Takes a String and parses it out
+ def Parser.parse_helper( input )
+ contents = Parent.new
+ while input.size > 0
+ case input
+ when ElementDecl.PATTERN_RE
+ match = $&
+ contents << ElementDecl.new( match )
+ when AttlistDecl.PATTERN_RE
+ matchdata = $~
+ contents << AttlistDecl.new( matchdata )
+ when EntityDecl.PATTERN_RE
+ matchdata = $~
+ contents << EntityDecl.new( matchdata )
+ when Comment.PATTERN_RE
+ matchdata = $~
+ contents << Comment.new( matchdata )
+ when NotationDecl.PATTERN_RE
+ matchdata = $~
+ contents << NotationDecl.new( matchdata )
+ end
+ end
+ contents
+ end
+ end
+ end
end
diff --git a/lib/rexml/dtd/elementdecl.rb b/lib/rexml/dtd/elementdecl.rb
index c4e620f389..119fd41a8f 100644
--- a/lib/rexml/dtd/elementdecl.rb
+++ b/lib/rexml/dtd/elementdecl.rb
@@ -1,17 +1,18 @@
+# frozen_string_literal: false
require "rexml/child"
module REXML
- module DTD
- class ElementDecl < Child
- START = "<!ELEMENT"
- START_RE = /^\s*#{START}/um
- PATTERN_RE = /^\s*(#{START}.*?)>/um
- PATTERN_RE = /^\s*#{START}\s+((?:[:\w_][-\.\w_]*:)?[-!\*\.\w_]*)(.*?)>/
- #\s*((((["']).*?\5)|[^\/'">]*)*?)(\/)?>/um, true)
+ module DTD
+ class ElementDecl < Child
+ START = "<!ELEMENT"
+ START_RE = /^\s*#{START}/um
+ # PATTERN_RE = /^\s*(#{START}.*?)>/um
+ PATTERN_RE = /^\s*#{START}\s+((?:[:\w][-\.\w]*:)?[-!\*\.\w]*)(.*?)>/
+ #\s*((((["']).*?\5)|[^\/'">]*)*?)(\/)?>/um, true)
- def initialize match
- @name = match[1]
- @rest = match[2]
- end
- end
- end
+ def initialize match
+ @name = match[1]
+ @rest = match[2]
+ end
+ end
+ end
end
diff --git a/lib/rexml/dtd/entitydecl.rb b/lib/rexml/dtd/entitydecl.rb
index a5f1520f2b..45707e2f42 100644
--- a/lib/rexml/dtd/entitydecl.rb
+++ b/lib/rexml/dtd/entitydecl.rb
@@ -1,56 +1,57 @@
+# frozen_string_literal: false
require "rexml/child"
module REXML
- module DTD
- class EntityDecl < Child
- START = "<!ENTITY"
- START_RE = /^\s*#{START}/um
- PUBLIC = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+PUBLIC\s+((["']).*?\3)\s+((["']).*?\5)\s*>/um
- SYSTEM = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+SYSTEM\s+((["']).*?\3)(?:\s+NDATA\s+\w+)?\s*>/um
- PLAIN = /^\s*#{START}\s+(\w+)\s+((["']).*?\3)\s*>/um
- PERCENT = /^\s*#{START}\s+%\s+(\w+)\s+((["']).*?\3)\s*>/um
- # <!ENTITY name SYSTEM "...">
- # <!ENTITY name "...">
- def initialize src
- super()
- md = nil
- if src.match( PUBLIC )
- md = src.match( PUBLIC, true )
- @middle = "PUBLIC"
- @content = "#{md[2]} #{md[4]}"
- elsif src.match( SYSTEM )
- md = src.match( SYSTEM, true )
- @middle = "SYSTEM"
- @content = md[2]
- elsif src.match( PLAIN )
- md = src.match( PLAIN, true )
- @middle = ""
- @content = md[2]
- elsif src.match( PERCENT )
- md = src.match( PERCENT, true )
- @middle = ""
- @content = md[2]
- end
- raise ParseException.new("failed Entity match", src) if md.nil?
- @name = md[1]
- end
+ module DTD
+ class EntityDecl < Child
+ START = "<!ENTITY"
+ START_RE = /^\s*#{START}/um
+ PUBLIC = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+PUBLIC\s+((["']).*?\3)\s+((["']).*?\5)\s*>/um
+ SYSTEM = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+SYSTEM\s+((["']).*?\3)(?:\s+NDATA\s+\w+)?\s*>/um
+ PLAIN = /^\s*#{START}\s+(\w+)\s+((["']).*?\3)\s*>/um
+ PERCENT = /^\s*#{START}\s+%\s+(\w+)\s+((["']).*?\3)\s*>/um
+ # <!ENTITY name SYSTEM "...">
+ # <!ENTITY name "...">
+ def initialize src
+ super()
+ md = nil
+ if src.match( PUBLIC )
+ md = src.match( PUBLIC, true )
+ @middle = "PUBLIC"
+ @content = "#{md[2]} #{md[4]}"
+ elsif src.match( SYSTEM )
+ md = src.match( SYSTEM, true )
+ @middle = "SYSTEM"
+ @content = md[2]
+ elsif src.match( PLAIN )
+ md = src.match( PLAIN, true )
+ @middle = ""
+ @content = md[2]
+ elsif src.match( PERCENT )
+ md = src.match( PERCENT, true )
+ @middle = ""
+ @content = md[2]
+ end
+ raise ParseException.new("failed Entity match", src) if md.nil?
+ @name = md[1]
+ end
- def to_s
- rv = "<!ENTITY #@name "
- rv << "#@middle " if @middle.size > 0
- rv << @content
- rv
- end
+ def to_s
+ rv = "<!ENTITY #@name "
+ rv << "#@middle " if @middle.size > 0
+ rv << @content
+ rv
+ end
- def write( output, indent )
+ def write( output, indent )
indent( output, indent )
- output << to_s
- end
+ output << to_s
+ end
- def EntityDecl.parse_source source, listener
- md = source.match( PATTERN_RE, true )
- thing = md[0].squeeze(" \t\n\r")
- listener.send inspect.downcase, thing
- end
- end
- end
+ def EntityDecl.parse_source source, listener
+ md = source.match( PATTERN_RE, true )
+ thing = md[0].squeeze(" \t\n\r")
+ listener.send inspect.downcase, thing
+ end
+ end
+ end
end
diff --git a/lib/rexml/dtd/notationdecl.rb b/lib/rexml/dtd/notationdecl.rb
index a47ff8f24b..cfdf0b9b74 100644
--- a/lib/rexml/dtd/notationdecl.rb
+++ b/lib/rexml/dtd/notationdecl.rb
@@ -1,39 +1,40 @@
+# frozen_string_literal: false
require "rexml/child"
module REXML
- module DTD
- class NotationDecl < Child
- START = "<!NOTATION"
- START_RE = /^\s*#{START}/um
- PUBLIC = /^\s*#{START}\s+(\w[\w-]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
- SYSTEM = /^\s*#{START}\s+(\w[\w-]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
- def initialize src
- super()
- if src.match( PUBLIC )
- md = src.match( PUBLIC, true )
- elsif src.match( SYSTEM )
- md = src.match( SYSTEM, true )
- else
- raise ParseException.new( "error parsing notation: no matching pattern", src )
- end
- @name = md[1]
- @middle = md[2]
- @rest = md[3]
- end
+ module DTD
+ class NotationDecl < Child
+ START = "<!NOTATION"
+ START_RE = /^\s*#{START}/um
+ PUBLIC = /^\s*#{START}\s+(\w[\w-]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
+ SYSTEM = /^\s*#{START}\s+(\w[\w-]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
+ def initialize src
+ super()
+ if src.match( PUBLIC )
+ md = src.match( PUBLIC, true )
+ elsif src.match( SYSTEM )
+ md = src.match( SYSTEM, true )
+ else
+ raise ParseException.new( "error parsing notation: no matching pattern", src )
+ end
+ @name = md[1]
+ @middle = md[2]
+ @rest = md[3]
+ end
- def to_s
- "<!NOTATION #@name #@middle #@rest>"
- end
+ def to_s
+ "<!NOTATION #@name #@middle #@rest>"
+ end
- def write( output, indent )
+ def write( output, indent )
indent( output, indent )
- output << to_s
- end
+ output << to_s
+ end
- def NotationDecl.parse_source source, listener
- md = source.match( PATTERN_RE, true )
- thing = md[0].squeeze(" \t\n\r")
- listener.send inspect.downcase, thing
- end
- end
- end
+ def NotationDecl.parse_source source, listener
+ md = source.match( PATTERN_RE, true )
+ thing = md[0].squeeze(" \t\n\r")
+ listener.send inspect.downcase, thing
+ end
+ end
+ end
end
diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb
index 3db87c6126..ac9b10872c 100644
--- a/lib/rexml/element.rb
+++ b/lib/rexml/element.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: false
require "rexml/parent"
require "rexml/namespace"
require "rexml/attribute"
@@ -20,7 +21,7 @@ module REXML
class Element < Parent
include Namespace
- UNDEFINED = "UNDEFINED"; # The default name
+ UNDEFINED = "UNDEFINED"; # The default name
# Mechanisms for accessing attributes and child elements of this
# element.
@@ -30,18 +31,18 @@ module REXML
attr_accessor :context
# Constructor
- # arg::
- # if not supplied, will be set to the default value.
- # If a String, the name of this object will be set to the argument.
- # If an Element, the object will be shallowly cloned; name,
- # attributes, and namespaces will be copied. Children will +not+ be
- # copied.
- # parent::
- # if supplied, must be a Parent, and will be used as
- # the parent of this object.
+ # arg::
+ # if not supplied, will be set to the default value.
+ # If a String, the name of this object will be set to the argument.
+ # If an Element, the object will be shallowly cloned; name,
+ # attributes, and namespaces will be copied. Children will +not+ be
+ # copied.
+ # parent::
+ # if supplied, must be a Parent, and will be used as
+ # the parent of this object.
# context::
- # If supplied, must be a hash containing context items. Context items
- # include:
+ # If supplied, must be a hash containing context items. Context items
+ # include:
# * <tt>:respect_whitespace</tt> the value of this is :+all+ or an array of
# strings being the names of the elements to respect
# whitespace for. Defaults to :+all+.
@@ -97,7 +98,7 @@ module REXML
self.class.new self
end
- # Evaluates to the root node of the document that this element
+ # Evaluates to the root node of the document that this element
# belongs to. If this element doesn't belong to a document, but does
# belong to another Element, the parent's root will be returned, until the
# earliest ancestor is found.
@@ -137,8 +138,8 @@ module REXML
# is the case if:
# 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value
# 2. The context has :+respect_whitespace+ set to :+all+ or
- # an array containing the name of this element, and
- # :+compress_whitespace+ isn't set to :+all+ or an array containing the
+ # an array containing the name of this element, and
+ # :+compress_whitespace+ isn't set to :+all+ or an array containing the
# name of this element.
# The evaluation is tested against +expanded_name+, and so is namespace
# sensitive.
@@ -162,7 +163,7 @@ module REXML
@ignore_whitespace_nodes = false
if @context
if @context[:ignore_whitespace_nodes]
- @ignore_whitespace_nodes =
+ @ignore_whitespace_nodes =
(@context[:ignore_whitespace_nodes] == :all or
@context[:ignore_whitespace_nodes].include? expanded_name)
end
@@ -206,13 +207,13 @@ module REXML
return namespaces
end
- # Evalutas to the URI for a prefix, or the empty string if no such
+ # Evaluates to the URI for a prefix, or the empty string if no such
# namespace is declared for this element. Evaluates recursively for
# ancestors. Returns the default namespace, if there is one.
- # prefix::
+ # prefix::
# the prefix to search for. If not supplied, returns the default
# namespace if one exists
- # Returns::
+ # Returns::
# the namespace URI as a String, or nil if no such namespace
# exists. If the namespace is undefined, returns an empty string
# doc = Document.new("<a xmlns='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
@@ -235,10 +236,10 @@ module REXML
end
# Adds a namespace to this element.
- # prefix::
+ # prefix::
# the prefix string, or the namespace URI if +uri+ is not
# supplied
- # uri::
+ # uri::
# the namespace URI. May be nil, in which +prefix+ is used as
# the URI
# Evaluates to: this Element
@@ -280,12 +281,12 @@ module REXML
# Adds a child to this element, optionally setting attributes in
# the element.
- # element::
+ # element::
# optional. If Element, the element is added.
# Otherwise, a new Element is constructed with the argument (see
# Element.initialize).
- # attrs::
- # If supplied, must be a Hash containing String name,value
+ # attrs::
+ # If supplied, must be a Hash containing String name,value
# pairs, which will be used to set the attributes of the new Element.
# Returns:: the Element that was added
# el = doc.add_element 'my-tag'
@@ -296,15 +297,15 @@ module REXML
raise "First argument must be either an element name, or an Element object" if element.nil?
el = @elements.add(element)
attrs.each do |key, value|
- el.attributes[key]=Attribute.new(key,value,self)
- end if attrs.kind_of? Hash
+ el.attributes[key]=value
+ end if attrs.kind_of? Hash
el
end
# Deletes a child element.
- # element::
- # Must be an +Element+, +String+, or +Integer+. If Element,
- # the element is removed. If String, the element is found (via XPath)
+ # element::
+ # Must be an +Element+, +String+, or +Integer+. If Element,
+ # the element is removed. If String, the element is found (via XPath)
# and removed. <em>This means that any parent can remove any
# descendant.<em> If Integer, the Element indexed by that number will be
# removed.
@@ -327,14 +328,14 @@ module REXML
# Iterates through the child elements, yielding for each Element that
# has a particular attribute set.
- # key::
+ # key::
# the name of the attribute to search for
- # value::
+ # value::
# the value of the attribute
- # max::
- # (optional) causes this method to return after yielding
+ # max::
+ # (optional) causes this method to return after yielding
# for this number of matching children
- # name::
+ # name::
# (optional) if supplied, this is an XPath that filters
# the children to check.
#
@@ -348,7 +349,7 @@ module REXML
# # Yields d
# doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e}
def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element
- each_with_something( proc {|child|
+ each_with_something( proc {|child|
if value.nil?
child.attributes[key] != nil
else
@@ -359,13 +360,13 @@ module REXML
# Iterates through the children, yielding for each Element that
# has a particular text set.
- # text::
+ # text::
# the text to search for. If nil, or not supplied, will iterate
# over all +Element+ children that contain at least one +Text+ node.
- # max::
+ # max::
# (optional) causes this method to return after yielding
# for this number of matching children
- # name::
+ # name::
# (optional) if supplied, this is an XPath that filters
# the children to check.
#
@@ -379,7 +380,7 @@ module REXML
# # Yields d
# doc.each_element_with_text(nil, 0, 'd'){|e|p e}
def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element
- each_with_something( proc {|child|
+ each_with_something( proc {|child|
if text.nil?
child.has_text?
else
@@ -408,7 +409,7 @@ module REXML
# doc.root.elements['c'].next_element #-> nil
def next_element
element = next_sibling
- element = element.next_sibling until element.nil? or element.kind_of? Element
+ element = element.next_sibling until element.nil? or element.kind_of? Element
return element
end
@@ -477,7 +478,7 @@ module REXML
# this method with a nil argument. In this case, the next Text
# child becomes the first Text child. In no case is the order of
# any siblings disturbed.
- # text::
+ # text::
# If a String, a new Text child is created and added to
# this Element as the first Text child. If Text, the text is set
# as the first Child element. If nil, then any existing first Text
@@ -492,7 +493,7 @@ module REXML
def text=( text )
if text.kind_of? String
text = Text.new( text, whitespace(), nil, raw() )
- elsif text and !text.kind_of? Text
+ elsif !text.nil? and !text.kind_of? Text
text = Text.new( text.to_s, whitespace(), nil, raw() )
end
old_text = get_text
@@ -520,7 +521,7 @@ module REXML
# Note that at the end of this example, the branch has <b>3</b> nodes; the 'e'
# element and <b>2</b> Text node children.
def add_text( text )
- if text.kind_of? String
+ if text.kind_of? String
if @children[-1].kind_of? Text
@children[-1] << text
return
@@ -550,11 +551,51 @@ module REXML
# Attributes #
#################################################
+ # Fetches an attribute value or a child.
+ #
+ # If String or Symbol is specified, it's treated as attribute
+ # name. Attribute value as String or +nil+ is returned. This case
+ # is shortcut of +attributes[name]+.
+ #
+ # If Integer is specified, it's treated as the index of
+ # child. It returns Nth child.
+ #
+ # doc = REXML::Document.new("<a attr='1'><b/><c/></a>")
+ # doc.root["attr"] # => "1"
+ # doc.root.attributes["attr"] # => "1"
+ # doc.root[1] # => <c/>
+ def [](name_or_index)
+ case name_or_index
+ when String
+ attributes[name_or_index]
+ when Symbol
+ attributes[name_or_index.to_s]
+ else
+ super
+ end
+ end
+
def attribute( name, namespace=nil )
prefix = nil
- prefix = namespaces.index(namespace) if namespace
+ if namespaces.respond_to? :key
+ prefix = namespaces.key(namespace) if namespace
+ else
+ prefix = namespaces.index(namespace) if namespace
+ end
prefix = nil if prefix == 'xmlns'
- attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
+
+ ret_val =
+ attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
+
+ return ret_val unless ret_val.nil?
+ return nil if prefix.nil?
+
+ # now check that prefix'es namespace is not the same as the
+ # default namespace
+ return nil unless ( namespaces[ prefix ] == namespaces[ 'xmlns' ] )
+
+ attributes.get_attribute( name )
+
end
# Evaluates to +true+ if this element has any attributes set, false
@@ -570,7 +611,7 @@ module REXML
# the attribute is added to the list of Element attributes. If String,
# the argument is used as the name of the new attribute, and the value
# parameter must be supplied.
- # value::
+ # value::
# Required if +key+ is a String, and ignored if the first argument is
# an Attribute. This is a String, and is used as the value
# of the new Attribute. This should be the unnormalized value of the
@@ -605,7 +646,7 @@ module REXML
# either an Attribute or a String. In either case, the
# attribute is found by matching the attribute name to the argument,
# and then removed. If no attribute is found, no action is taken.
- # Returns::
+ # Returns::
# the attribute removed, or nil if this Element did not contain
# a matching attribute
# e = Element.new('E')
@@ -622,7 +663,7 @@ module REXML
# Other Utilities #
#################################################
- # Get an array of all CData children.
+ # Get an array of all CData children.
# IMMUTABLE
def cdatas
find_all { |child| child.kind_of? CData }.freeze
@@ -651,7 +692,7 @@ module REXML
#
# Writes out this element, and recursively, all children.
# output::
- # output an object which supports '<< string'; this is where the
+ # output an object which supports '<< string'; this is where the
# document will be written.
# indent::
# An integer. If -1, no indenting will be used; otherwise, the
@@ -662,19 +703,17 @@ module REXML
# pretty-printed in such a way that the added whitespace does not affect
# the parse tree of the document
# ie_hack::
- # Internet Explorer is the worst piece of crap to have ever been
- # written, with the possible exception of Windows itself. Since IE is
- # unable to parse proper XML, we have to provide a hack to generate XML
- # that IE's limited abilities can handle. This hack inserts a space
- # before the /> on empty tags. Defaults to false
+ # This hack inserts a space before the /> on empty tags to address
+ # a limitation of Internet Explorer. Defaults to false
#
# out = ''
# doc.write( out ) #-> doc is written to the string 'out'
# doc.write( $stdout ) #-> doc written to the console
- def write(writer=$stdout, indent=-1, transitive=false, ie_hack=false)
- Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
+ def write(output=$stdout, indent=-1, transitive=false, ie_hack=false)
+ Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters", uplevel: 1)
formatter = if indent > -1
if transitive
+ require "rexml/formatters/transitive"
REXML::Formatters::Transitive.new( indent, ie_hack )
else
REXML::Formatters::Pretty.new( indent, ie_hack )
@@ -690,8 +729,8 @@ module REXML
def __to_xpath_helper node
rv = node.expanded_name.clone
if node.parent
- results = node.parent.find_all {|n|
- n.kind_of?(REXML::Element) and n.expanded_name == node.expanded_name
+ results = node.parent.find_all {|n|
+ n.kind_of?(REXML::Element) and n.expanded_name == node.expanded_name
}
if results.length > 1
idx = results.index( node )
@@ -704,7 +743,6 @@ module REXML
# A private helper method
def each_with_something( test, max=0, name=nil )
num = 0
- child=nil
@elements.each( name ){ |child|
yield child if test.call(child) and num += 1
return if max>0 and num == max
@@ -718,7 +756,7 @@ module REXML
# A class which provides filtering of children for Elements, and
# XPath search support. You are expected to only encounter this class as
- # the <tt>element.elements</tt> object. Therefore, you are
+ # the <tt>element.elements</tt> object. Therefore, you are
# _not_ expected to instantiate this yourself.
class Elements
include Enumerable
@@ -730,7 +768,7 @@ module REXML
# Fetches a child element. Filters only Element children, regardless of
# the XPath match.
- # index::
+ # index::
# the search parameter. This is either an Integer, which
# will be used to find the index'th child Element, or an XPath,
# which will be used to search for the Element. <em>Because
@@ -740,7 +778,7 @@ module REXML
# child element is at index 1, not 0, and the +n+th element is at index
# +n+, not <tt>n-1</tt>. This is because XPath indexes element children
# starting from 1, not 0, and the indexes should be the same.
- # name::
+ # name::
# optional, and only used in the first argument is an
# Integer. In that case, the index'th child Element that has the
# supplied name will be returned. Note again that the indexes start at 1.
@@ -754,16 +792,15 @@ module REXML
raise "index (#{index}) must be >= 1" if index < 1
name = literalize(name) if name
num = 0
- child = nil
@element.find { |child|
child.kind_of? Element and
- (name.nil? ? true : child.has_name?( name )) and
+ (name.nil? ? true : child.has_name?( name )) and
(num += 1) == index
}
else
return XPath::first( @element, index )
- #{ |element|
- # return element if element.kind_of? Element
+ #{ |element|
+ # return element if element.kind_of? Element
#}
#return nil
end
@@ -772,7 +809,7 @@ module REXML
# Sets an element, replacing any previous matching element. If no
# existing element is found ,the element is added.
# index:: Used to find a matching element to replace. See []().
- # element::
+ # element::
# The element to replace the existing element with
# the previous element
# Returns:: nil if no previous element was found.
@@ -797,12 +834,12 @@ module REXML
@element.find{ |child| child.kind_of? Element}.nil?
end
- # Returns the index of the supplied child (starting at 1), or -1 if
+ # Returns the index of the supplied child (starting at 1), or -1 if
# the element is not a child
# element:: an +Element+ child
def index element
rv = 0
- found = @element.find do |child|
+ found = @element.find do |child|
child.kind_of? Element and
(rv += 1) and
child == element
@@ -812,7 +849,7 @@ module REXML
end
# Deletes a child Element
- # element::
+ # element::
# Either an Element, which is removed directly; an
# xpath, where the first matching child is removed; or an Integer,
# where the n'th Element is removed.
@@ -839,7 +876,7 @@ module REXML
# deleted = doc.elements.delete_all 'a/c' #-> [<c/>, <c/>, <c/>, <c/>]
def delete_all( xpath )
rv = []
- XPath::each( @element, xpath) {|element|
+ XPath::each( @element, xpath) {|element|
rv << element if element.kind_of? Element
}
rv.each do |element|
@@ -850,7 +887,7 @@ module REXML
end
# Adds an element
- # element::
+ # element::
# if supplied, is either an Element, String, or
# Source (see Element.initialize). If not supplied or nil, a
# new, default Element will be constructed
@@ -859,7 +896,6 @@ module REXML
# a.elements.add(Element.new('b')) #-> <a><b/></a>
# a.elements.add('c') #-> <a><b/><c/></a>
def add element=nil
- rv = nil
if element.nil?
Element.new("", self, @element.context)
elsif not element.kind_of?(Element)
@@ -875,31 +911,31 @@ module REXML
# Iterates through all of the child Elements, optionally filtering
# them by a given XPath
- # xpath::
- # optional. If supplied, this is a String XPath, and is used to
+ # xpath::
+ # optional. If supplied, this is a String XPath, and is used to
# filter the children, so that only matching children are yielded. Note
# that XPaths are automatically filtered for Elements, so that
# non-Element children will not be yielded
# doc = Document.new '<a><b/><c/><d/>sean<b/><c/><d/></a>'
- # doc.root.each {|e|p e} #-> Yields b, c, d, b, c, d elements
- # doc.root.each('b') {|e|p e} #-> Yields b, b elements
- # doc.root.each('child::node()') {|e|p e}
+ # doc.root.elements.each {|e|p e} #-> Yields b, c, d, b, c, d elements
+ # doc.root.elements.each('b') {|e|p e} #-> Yields b, b elements
+ # doc.root.elements.each('child::node()') {|e|p e}
# #-> Yields <b/>, <c/>, <d/>, <b/>, <c/>, <d/>
# XPath.each(doc.root, 'child::node()', &block)
# #-> Yields <b/>, <c/>, <d/>, sean, <b/>, <c/>, <d/>
- def each( xpath=nil, &block)
+ def each( xpath=nil )
XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element }
end
- def collect( xpath=nil, &block )
+ def collect( xpath=nil )
collection = []
- XPath::each( @element, xpath ) {|e|
- collection << yield(e) if e.kind_of?(Element)
+ XPath::each( @element, xpath ) {|e|
+ collection << yield(e) if e.kind_of?(Element)
}
collection
end
- def inject( xpath=nil, initial=nil, &block )
+ def inject( xpath=nil, initial=nil )
first = true
XPath::each( @element, xpath ) {|e|
if (e.kind_of? Element)
@@ -929,7 +965,7 @@ module REXML
# supplied XPath matches non-Element children.
# doc = Document.new '<a>sean<b/>elliott<c/></a>'
# doc.root.elements.to_a #-> [ <b/>, <c/> ]
- # doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ]
+ # doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ]
# XPath.match(doc.root, "child::node()") #-> [ sean, <b/>, elliott, <c/> ]
def to_a( xpath=nil )
rv = XPath.match( @element, xpath )
@@ -949,7 +985,7 @@ module REXML
# ATTRIBUTES #
########################################################################
- # A class that defines the set of Attributes of an Element and provides
+ # A class that defines the set of Attributes of an Element and provides
# operations for accessing elements in that set.
class Attributes < Hash
# Constructor
@@ -961,11 +997,11 @@ module REXML
# Fetches an attribute value. If you want to get the Attribute itself,
# use get_attribute()
# name:: an XPath attribute name. Namespaces are relevant here.
- # Returns::
+ # Returns::
# the String value of the matching attribute, or +nil+ if no
# matching attribute was found. This is the unnormalized value
# (with entities expanded).
- #
+ #
# doc = Document.new "<a foo:att='1' bar:att='2' att='&lt;'/>"
# doc.root.attributes['att'] #-> '<'
# doc.root.attributes['bar:att'] #-> '2'
@@ -976,7 +1012,7 @@ module REXML
end
def to_a
- values.flatten
+ enum_for(:each_attribute).to_a
end
# Returns the number of attributes the owning Element contains.
@@ -991,7 +1027,7 @@ module REXML
# Iterates over the attributes of an Element. Yields actual Attribute
# nodes, not String values.
- #
+ #
# doc = Document.new '<a x="1" y="2"/>'
# doc.root.attributes.each_attribute {|attr|
# p attr.expanded_name+" => "+attr.value
@@ -1013,12 +1049,12 @@ module REXML
# doc.root.attributes.each {|name, value| p name+" => "+value }
def each
each_attribute do |attr|
- yield attr.expanded_name, attr.value
+ yield [attr.expanded_name, attr.value]
end
end
# Fetches an attribute
- # name::
+ # name::
# the name by which to search for the attribute. Can be a
# <tt>prefix:name</tt> namespace name.
# Returns:: The first matching attribute, or nil if there was none. This
@@ -1062,22 +1098,22 @@ module REXML
# Sets an attribute, overwriting any existing attribute value by the
# same name. Namespace is significant.
# name:: the name of the attribute
- # value::
+ # value::
# (optional) If supplied, the value of the attribute. If
# nil, any existing matching attribute is deleted.
- # Returns::
+ # Returns::
# Owning element
# doc = Document.new "<a x:foo='1' foo='3'/>"
# doc.root.attributes['y:foo'] = '2'
# doc.root.attributes['foo'] = '4'
# doc.root.attributes['x:foo'] = nil
def []=( name, value )
- if value.nil? # Delete the named attribute
+ if value.nil? # Delete the named attribute
attr = get_attribute(name)
delete attr
return
end
- element_document = @element.document
+
unless value.kind_of? Attribute
if @element.document and @element.document.doctype
value = Text::normalize( value, @element.document.doctype )
@@ -1094,23 +1130,23 @@ module REXML
old_attr[value.prefix] = value
elsif old_attr.prefix != value.prefix
# Check for conflicting namespaces
- raise ParseException.new(
+ raise ParseException.new(
"Namespace conflict in adding attribute \"#{value.name}\": "+
"Prefix \"#{old_attr.prefix}\" = "+
"\"#{@element.namespace(old_attr.prefix)}\" and prefix "+
- "\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if
+ "\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if
value.prefix != "xmlns" and old_attr.prefix != "xmlns" and
- @element.namespace( old_attr.prefix ) ==
+ @element.namespace( old_attr.prefix ) ==
@element.namespace( value.prefix )
- store value.name, { old_attr.prefix => old_attr,
- value.prefix => value }
+ store value.name, { old_attr.prefix => old_attr,
+ value.prefix => value }
else
store value.name, value
end
return @element
end
- # Returns an array of Strings containing all of the prefixes declared
+ # Returns an array of Strings containing all of the prefixes declared
# by this set of # attributes. The array does not include the default
# namespace declaration, if one exists.
# doc = Document.new("<a xmlns='foo' xmlns:x='bar' xmlns:y='twee' "+
@@ -1149,7 +1185,7 @@ module REXML
end
# Removes an attribute
- # attribute::
+ # attribute::
# either a String, which is the name of the attribute to remove --
# namespaces are significant here -- or the attribute to remove.
# Returns:: the owning element
@@ -1170,9 +1206,8 @@ module REXML
prefix = '' unless prefix
end
old = fetch(name, nil)
- attr = nil
if old.kind_of? Hash # the supplied attribute is one of many
- attr = old.delete(prefix)
+ old.delete(prefix)
if old.size == 1
repl = nil
old.each_value{|v| repl = v}
@@ -1181,8 +1216,7 @@ module REXML
elsif old.nil?
return @element
else # the supplied attribute is a top-level one
- attr = old
- res = super(name)
+ super(name)
end
@element
end
@@ -1197,12 +1231,12 @@ module REXML
alias :<< :add
# Deletes all attributes matching a name. Namespaces are significant.
- # name::
+ # name::
# A String; all attributes that match this path will be removed
# Returns:: an Array of the Attributes that were removed
def delete_all( name )
rv = []
- each_attribute { |attribute|
+ each_attribute { |attribute|
rv << attribute if attribute.expanded_name == name
}
rv.each{ |attr| attr.remove }
@@ -1212,16 +1246,20 @@ module REXML
# The +get_attribute_ns+ method retrieves a method by its namespace
# and name. Thus it is possible to reliably identify an attribute
# even if an XML processor has changed the prefix.
- #
+ #
# Method contributed by Henrik Martensson
def get_attribute_ns(namespace, name)
+ result = nil
each_attribute() { |attribute|
if name == attribute.name &&
- namespace == attribute.namespace()
- return attribute
+ namespace == attribute.namespace() &&
+ ( !namespace.empty? || !attribute.fully_expanded_name.index(':') )
+ # foo will match xmlns:foo, but only if foo isn't also an attribute
+ result = attribute if !result or !namespace.empty? or
+ !attribute.fully_expanded_name.index(':')
end
}
- nil
+ result
end
end
end
diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb
index a01763be99..da2d70d6c9 100644
--- a/lib/rexml/encoding.rb
+++ b/lib/rexml/encoding.rb
@@ -1,71 +1,51 @@
-# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
+# coding: US-ASCII
+# frozen_string_literal: false
module REXML
module Encoding
- @encoding_methods = {}
- def self.register(enc, &block)
- @encoding_methods[enc] = block
- end
- def self.apply(obj, enc)
- @encoding_methods[enc][obj]
- end
- def self.encoding_method(enc)
- @encoding_methods[enc]
- end
-
- # Native, default format is UTF-8, so it is declared here rather than in
- # an encodings/ definition.
- UTF_8 = 'UTF-8'
- UTF_16 = 'UTF-16'
- UNILE = 'UNILE'
-
# ID ---> Encoding name
attr_reader :encoding
- def encoding=( enc )
- old_verbosity = $VERBOSE
- begin
- $VERBOSE = false
- enc = enc.nil? ? nil : enc.upcase
- return false if defined? @encoding and enc == @encoding
- if enc and enc != UTF_8
- @encoding = enc
- raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
- @encoding.untaint
- begin
- require 'rexml/encodings/ICONV.rb'
- Encoding.apply(self, "ICONV")
- rescue LoadError, Exception
- begin
- enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
- require enc_file
- Encoding.apply(self, @encoding)
- rescue LoadError => err
- puts err.message
- raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
- end
- end
- else
- @encoding = UTF_8
- require 'rexml/encodings/UTF-8.rb'
- Encoding.apply(self, @encoding)
+ def encoding=(encoding)
+ encoding = encoding.name if encoding.is_a?(Encoding)
+ if encoding.is_a?(String)
+ original_encoding = encoding
+ encoding = find_encoding(encoding)
+ unless encoding
+ raise ArgumentError, "Bad encoding name #{original_encoding}"
end
- ensure
- $VERBOSE = old_verbosity
+ end
+ return false if defined?(@encoding) and encoding == @encoding
+ if encoding
+ @encoding = encoding.upcase
+ else
+ @encoding = 'UTF-8'
end
true
end
- def check_encoding str
- # We have to recognize UTF-16, LSB UTF-16, and UTF-8
- if str[0] == 0xfe && str[1] == 0xff
- str[0,2] = ""
- return UTF_16
- elsif str[0] == 0xff && str[1] == 0xfe
- str[0,2] = ""
- return UNILE
+ def encode(string)
+ string.encode(@encoding)
+ end
+
+ def decode(string)
+ string.encode(::Encoding::UTF_8, @encoding)
+ end
+
+ private
+ def find_encoding(name)
+ case name
+ when /\Ashift-jis\z/i
+ return "SHIFT_JIS"
+ when /\ACP-(\d+)\z/
+ name = "CP#{$1}"
+ when /\AUTF-8\z/i
+ return name
+ end
+ begin
+ ::Encoding::Converter.search_convpath(name, 'UTF-8')
+ rescue ::Encoding::ConverterNotFoundError
+ return nil
end
- str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um
- return $3.upcase if $3
- return UTF_8
+ name
end
end
end
diff --git a/lib/rexml/encodings/CP-1252.rb b/lib/rexml/encodings/CP-1252.rb
deleted file mode 100644
index 8675f9ff98..0000000000
--- a/lib/rexml/encodings/CP-1252.rb
+++ /dev/null
@@ -1,103 +0,0 @@
-#
-# This class was contributed by Mikko Tiihonen mikko DOT tiihonen AT hut DOT fi
-#
-module REXML
- module Encoding
- register( "CP-1252" ) do |o|
- class << o
- alias encode encode_cp1252
- alias decode decode_cp1252
- end
- end
-
- # Convert from UTF-8
- def encode_cp1252(content)
- array_utf8 = content.unpack('U*')
- array_enc = []
- array_utf8.each do |num|
- case num
- # shortcut first bunch basic characters
- when 0..0xFF; array_enc << num
- # characters added compared to iso-8859-1
- when 0x20AC; array_enc << 0x80 # 0xe2 0x82 0xac
- when 0x201A; array_enc << 0x82 # 0xe2 0x82 0x9a
- when 0x0192; array_enc << 0x83 # 0xc6 0x92
- when 0x201E; array_enc << 0x84 # 0xe2 0x82 0x9e
- when 0x2026; array_enc << 0x85 # 0xe2 0x80 0xa6
- when 0x2020; array_enc << 0x86 # 0xe2 0x80 0xa0
- when 0x2021; array_enc << 0x87 # 0xe2 0x80 0xa1
- when 0x02C6; array_enc << 0x88 # 0xcb 0x86
- when 0x2030; array_enc << 0x89 # 0xe2 0x80 0xb0
- when 0x0160; array_enc << 0x8A # 0xc5 0xa0
- when 0x2039; array_enc << 0x8B # 0xe2 0x80 0xb9
- when 0x0152; array_enc << 0x8C # 0xc5 0x92
- when 0x017D; array_enc << 0x8E # 0xc5 0xbd
- when 0x2018; array_enc << 0x91 # 0xe2 0x80 0x98
- when 0x2019; array_enc << 0x92 # 0xe2 0x80 0x99
- when 0x201C; array_enc << 0x93 # 0xe2 0x80 0x9c
- when 0x201D; array_enc << 0x94 # 0xe2 0x80 0x9d
- when 0x2022; array_enc << 0x95 # 0xe2 0x80 0xa2
- when 0x2013; array_enc << 0x96 # 0xe2 0x80 0x93
- when 0x2014; array_enc << 0x97 # 0xe2 0x80 0x94
- when 0x02DC; array_enc << 0x98 # 0xcb 0x9c
- when 0x2122; array_enc << 0x99 # 0xe2 0x84 0xa2
- when 0x0161; array_enc << 0x9A # 0xc5 0xa1
- when 0x203A; array_enc << 0x9B # 0xe2 0x80 0xba
- when 0x0152; array_enc << 0x9C # 0xc5 0x93
- when 0x017E; array_enc << 0x9E # 0xc5 0xbe
- when 0x0178; array_enc << 0x9F # 0xc5 0xb8
- else
- # all remaining basic characters can be used directly
- if num <= 0xFF
- array_enc << num
- else
- # Numeric entity (&#nnnn;); shard by Stefan Scholl
- array_enc.concat "&\##{num};".unpack('C*')
- end
- end
- end
- array_enc.pack('C*')
- end
-
- # Convert to UTF-8
- def decode_cp1252(str)
- array_latin9 = str.unpack('C*')
- array_enc = []
- array_latin9.each do |num|
- case num
- # characters that added compared to iso-8859-1
- when 0x80; array_enc << 0x20AC # 0xe2 0x82 0xac
- when 0x82; array_enc << 0x201A # 0xe2 0x82 0x9a
- when 0x83; array_enc << 0x0192 # 0xc6 0x92
- when 0x84; array_enc << 0x201E # 0xe2 0x82 0x9e
- when 0x85; array_enc << 0x2026 # 0xe2 0x80 0xa6
- when 0x86; array_enc << 0x2020 # 0xe2 0x80 0xa0
- when 0x87; array_enc << 0x2021 # 0xe2 0x80 0xa1
- when 0x88; array_enc << 0x02C6 # 0xcb 0x86
- when 0x89; array_enc << 0x2030 # 0xe2 0x80 0xb0
- when 0x8A; array_enc << 0x0160 # 0xc5 0xa0
- when 0x8B; array_enc << 0x2039 # 0xe2 0x80 0xb9
- when 0x8C; array_enc << 0x0152 # 0xc5 0x92
- when 0x8E; array_enc << 0x017D # 0xc5 0xbd
- when 0x91; array_enc << 0x2018 # 0xe2 0x80 0x98
- when 0x92; array_enc << 0x2019 # 0xe2 0x80 0x99
- when 0x93; array_enc << 0x201C # 0xe2 0x80 0x9c
- when 0x94; array_enc << 0x201D # 0xe2 0x80 0x9d
- when 0x95; array_enc << 0x2022 # 0xe2 0x80 0xa2
- when 0x96; array_enc << 0x2013 # 0xe2 0x80 0x93
- when 0x97; array_enc << 0x2014 # 0xe2 0x80 0x94
- when 0x98; array_enc << 0x02DC # 0xcb 0x9c
- when 0x99; array_enc << 0x2122 # 0xe2 0x84 0xa2
- when 0x9A; array_enc << 0x0161 # 0xc5 0xa1
- when 0x9B; array_enc << 0x203A # 0xe2 0x80 0xba
- when 0x9C; array_enc << 0x0152 # 0xc5 0x93
- when 0x9E; array_enc << 0x017E # 0xc5 0xbe
- when 0x9F; array_enc << 0x0178 # 0xc5 0xb8
- else
- array_enc << num
- end
- end
- array_enc.pack('U*')
- end
- end
-end
diff --git a/lib/rexml/encodings/EUC-JP.rb b/lib/rexml/encodings/EUC-JP.rb
deleted file mode 100644
index db37b6bf0d..0000000000
--- a/lib/rexml/encodings/EUC-JP.rb
+++ /dev/null
@@ -1,35 +0,0 @@
-module REXML
- module Encoding
- begin
- require 'uconv'
-
- def decode_eucjp(str)
- Uconv::euctou8(str)
- end
-
- def encode_eucjp content
- Uconv::u8toeuc(content)
- end
- rescue LoadError
- require 'nkf'
-
- EUCTOU8 = '-Ewm0'
- U8TOEUC = '-Wem0'
-
- def decode_eucjp(str)
- NKF.nkf(EUCTOU8, str)
- end
-
- def encode_eucjp content
- NKF.nkf(U8TOEUC, content)
- end
- end
-
- register("EUC-JP") do |obj|
- class << obj
- alias decode decode_eucjp
- alias encode encode_eucjp
- end
- end
- end
-end
diff --git a/lib/rexml/encodings/ICONV.rb b/lib/rexml/encodings/ICONV.rb
deleted file mode 100644
index 172fba7cd1..0000000000
--- a/lib/rexml/encodings/ICONV.rb
+++ /dev/null
@@ -1,22 +0,0 @@
-require "iconv"
-raise LoadError unless defined? Iconv
-
-module REXML
- module Encoding
- def decode_iconv(str)
- Iconv.conv(UTF_8, @encoding, str)
- end
-
- def encode_iconv(content)
- Iconv.conv(@encoding, UTF_8, content)
- end
-
- register("ICONV") do |obj|
- Iconv.conv(UTF_8, obj.encoding, nil)
- class << obj
- alias decode decode_iconv
- alias encode encode_iconv
- end
- end
- end
-end
diff --git a/lib/rexml/encodings/ISO-8859-1.rb b/lib/rexml/encodings/ISO-8859-1.rb
deleted file mode 100644
index 2873d13bf0..0000000000
--- a/lib/rexml/encodings/ISO-8859-1.rb
+++ /dev/null
@@ -1,7 +0,0 @@
-require 'rexml/encodings/US-ASCII'
-
-module REXML
- module Encoding
- register("ISO-8859-1", &encoding_method("US-ASCII"))
- end
-end
diff --git a/lib/rexml/encodings/ISO-8859-15.rb b/lib/rexml/encodings/ISO-8859-15.rb
deleted file mode 100644
index 8dea0d38a4..0000000000
--- a/lib/rexml/encodings/ISO-8859-15.rb
+++ /dev/null
@@ -1,72 +0,0 @@
-#
-# This class was contributed by Mikko Tiihonen mikko DOT tiihonen AT hut DOT fi
-#
-module REXML
- module Encoding
- register("ISO-8859-15") do |o|
- alias encode to_iso_8859_15
- alias decode from_iso_8859_15
- end
-
- # Convert from UTF-8
- def to_iso_8859_15(content)
- array_utf8 = content.unpack('U*')
- array_enc = []
- array_utf8.each do |num|
- case num
- # shortcut first bunch basic characters
- when 0..0xA3; array_enc << num
- # characters removed compared to iso-8859-1
- when 0xA4; array_enc << '&#164;'
- when 0xA6; array_enc << '&#166;'
- when 0xA8; array_enc << '&#168;'
- when 0xB4; array_enc << '&#180;'
- when 0xB8; array_enc << '&#184;'
- when 0xBC; array_enc << '&#188;'
- when 0xBD; array_enc << '&#189;'
- when 0xBE; array_enc << '&#190;'
- # characters added compared to iso-8859-1
- when 0x20AC; array_enc << 0xA4 # 0xe2 0x82 0xac
- when 0x0160; array_enc << 0xA6 # 0xc5 0xa0
- when 0x0161; array_enc << 0xA8 # 0xc5 0xa1
- when 0x017D; array_enc << 0xB4 # 0xc5 0xbd
- when 0x017E; array_enc << 0xB8 # 0xc5 0xbe
- when 0x0152; array_enc << 0xBC # 0xc5 0x92
- when 0x0153; array_enc << 0xBD # 0xc5 0x93
- when 0x0178; array_enc << 0xBE # 0xc5 0xb8
- else
- # all remaining basic characters can be used directly
- if num <= 0xFF
- array_enc << num
- else
- # Numeric entity (&#nnnn;); shard by Stefan Scholl
- array_enc.concat "&\##{num};".unpack('C*')
- end
- end
- end
- array_enc.pack('C*')
- end
-
- # Convert to UTF-8
- def from_iso_8859_15(str)
- array_latin9 = str.unpack('C*')
- array_enc = []
- array_latin9.each do |num|
- case num
- # characters that differ compared to iso-8859-1
- when 0xA4; array_enc << 0x20AC
- when 0xA6; array_enc << 0x0160
- when 0xA8; array_enc << 0x0161
- when 0xB4; array_enc << 0x017D
- when 0xB8; array_enc << 0x017E
- when 0xBC; array_enc << 0x0152
- when 0xBD; array_enc << 0x0153
- when 0xBE; array_enc << 0x0178
- else
- array_enc << num
- end
- end
- array_enc.pack('U*')
- end
- end
-end
diff --git a/lib/rexml/encodings/SHIFT-JIS.rb b/lib/rexml/encodings/SHIFT-JIS.rb
deleted file mode 100644
index 9e0f4af20e..0000000000
--- a/lib/rexml/encodings/SHIFT-JIS.rb
+++ /dev/null
@@ -1,37 +0,0 @@
-module REXML
- module Encoding
- begin
- require 'uconv'
-
- def decode_sjis content
- Uconv::sjistou8(content)
- end
-
- def encode_sjis(str)
- Uconv::u8tosjis(str)
- end
- rescue LoadError
- require 'nkf'
-
- SJISTOU8 = '-Swm0x'
- U8TOSJIS = '-Wsm0x'
-
- def decode_sjis(str)
- NKF.nkf(SJISTOU8, str)
- end
-
- def encode_sjis content
- NKF.nkf(U8TOSJIS, content)
- end
- end
-
- b = proc do |obj|
- class << obj
- alias decode decode_sjis
- alias encode encode_sjis
- end
- end
- register("SHIFT-JIS", &b)
- register("SHIFT_JIS", &b)
- end
-end
diff --git a/lib/rexml/encodings/SHIFT_JIS.rb b/lib/rexml/encodings/SHIFT_JIS.rb
deleted file mode 100644
index e355704a7c..0000000000
--- a/lib/rexml/encodings/SHIFT_JIS.rb
+++ /dev/null
@@ -1 +0,0 @@
-require 'rexml/encodings/SHIFT-JIS'
diff --git a/lib/rexml/encodings/UNILE.rb b/lib/rexml/encodings/UNILE.rb
deleted file mode 100644
index d054140c40..0000000000
--- a/lib/rexml/encodings/UNILE.rb
+++ /dev/null
@@ -1,34 +0,0 @@
-module REXML
- module Encoding
- def encode_unile content
- array_utf8 = content.unpack("U*")
- array_enc = []
- array_utf8.each do |num|
- if ((num>>16) > 0)
- array_enc << ??
- array_enc << 0
- else
- array_enc << (num & 0xFF)
- array_enc << (num >> 8)
- end
- end
- array_enc.pack('C*')
- end
-
- def decode_unile(str)
- array_enc=str.unpack('C*')
- array_utf8 = []
- 0.step(array_enc.size-1, 2){|i|
- array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100)
- }
- array_utf8.pack('U*')
- end
-
- register(UNILE) do |obj|
- class << obj
- alias decode decode_unile
- alias encode encode_unile
- end
- end
- end
-end
diff --git a/lib/rexml/encodings/US-ASCII.rb b/lib/rexml/encodings/US-ASCII.rb
deleted file mode 100644
index fb4c217074..0000000000
--- a/lib/rexml/encodings/US-ASCII.rb
+++ /dev/null
@@ -1,30 +0,0 @@
-module REXML
- module Encoding
- # Convert from UTF-8
- def encode_ascii content
- array_utf8 = content.unpack('U*')
- array_enc = []
- array_utf8.each do |num|
- if num <= 0x7F
- array_enc << num
- else
- # Numeric entity (&#nnnn;); shard by Stefan Scholl
- array_enc.concat "&\##{num};".unpack('C*')
- end
- end
- array_enc.pack('C*')
- end
-
- # Convert to UTF-8
- def decode_ascii(str)
- str.unpack('C*').pack('U*')
- end
-
- register("US-ASCII") do |obj|
- class << obj
- alias decode decode_ascii
- alias encode encode_ascii
- end
- end
- end
-end
diff --git a/lib/rexml/encodings/UTF-16.rb b/lib/rexml/encodings/UTF-16.rb
deleted file mode 100644
index 007c493d9c..0000000000
--- a/lib/rexml/encodings/UTF-16.rb
+++ /dev/null
@@ -1,35 +0,0 @@
-module REXML
- module Encoding
- def encode_utf16 content
- array_utf8 = content.unpack("U*")
- array_enc = []
- array_utf8.each do |num|
- if ((num>>16) > 0)
- array_enc << 0
- array_enc << ??
- else
- array_enc << (num >> 8)
- array_enc << (num & 0xFF)
- end
- end
- array_enc.pack('C*')
- end
-
- def decode_utf16(str)
- str = str[2..-1] if /^\376\377/n =~ str
- array_enc=str.unpack('C*')
- array_utf8 = []
- 0.step(array_enc.size-1, 2){|i|
- array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100)
- }
- array_utf8.pack('U*')
- end
-
- register(UTF_16) do |obj|
- class << obj
- alias decode decode_utf16
- alias encode encode_utf16
- end
- end
- end
-end
diff --git a/lib/rexml/encodings/UTF-8.rb b/lib/rexml/encodings/UTF-8.rb
deleted file mode 100644
index bb08f44100..0000000000
--- a/lib/rexml/encodings/UTF-8.rb
+++ /dev/null
@@ -1,18 +0,0 @@
-module REXML
- module Encoding
- def encode_utf8 content
- content
- end
-
- def decode_utf8(str)
- str
- end
-
- register(UTF_8) do |obj|
- class << obj
- alias decode decode_utf8
- alias encode encode_utf8
- end
- end
- end
-end
diff --git a/lib/rexml/entity.rb b/lib/rexml/entity.rb
index ff2d45f39b..97c7b6b42f 100644
--- a/lib/rexml/entity.rb
+++ b/lib/rexml/entity.rb
@@ -1,165 +1,171 @@
+# frozen_string_literal: false
require 'rexml/child'
require 'rexml/source'
require 'rexml/xmltokens'
module REXML
- # God, I hate DTDs. I really do. Why this idiot standard still
- # plagues us is beyond me.
- class Entity < Child
- include XMLTokens
- PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
- SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
- PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
- EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
- NDATADECL = "\\s+NDATA\\s+#{NAME}"
- PEREFERENCE = "%#{NAME};"
- ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
- PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
- ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
- PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
- GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
- ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
+ class Entity < Child
+ include XMLTokens
+ PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
+ SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
+ PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
+ EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
+ NDATADECL = "\\s+NDATA\\s+#{NAME}"
+ PEREFERENCE = "%#{NAME};"
+ ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
+ PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
+ ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
+ PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
+ GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
+ ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
- attr_reader :name, :external, :ref, :ndata, :pubid
+ attr_reader :name, :external, :ref, :ndata, :pubid
- # Create a new entity. Simple entities can be constructed by passing a
- # name, value to the constructor; this creates a generic, plain entity
- # reference. For anything more complicated, you have to pass a Source to
- # the constructor with the entity definiton, or use the accessor methods.
- # +WARNING+: There is no validation of entity state except when the entity
- # is read from a stream. If you start poking around with the accessors,
- # you can easily create a non-conformant Entity. The best thing to do is
- # dump the stupid DTDs and use XMLSchema instead.
- #
- # e = Entity.new( 'amp', '&' )
- def initialize stream, value=nil, parent=nil, reference=false
- super(parent)
- @ndata = @pubid = @value = @external = nil
- if stream.kind_of? Array
- @name = stream[1]
- if stream[-1] == '%'
- @reference = true
- stream.pop
- else
- @reference = false
- end
- if stream[2] =~ /SYSTEM|PUBLIC/
- @external = stream[2]
- if @external == 'SYSTEM'
- @ref = stream[3]
- @ndata = stream[4] if stream.size == 5
- else
- @pubid = stream[3]
- @ref = stream[4]
- end
- else
- @value = stream[2]
- end
- else
- @reference = reference
- @external = nil
- @name = stream
- @value = value
- end
- end
+ # Create a new entity. Simple entities can be constructed by passing a
+ # name, value to the constructor; this creates a generic, plain entity
+ # reference. For anything more complicated, you have to pass a Source to
+ # the constructor with the entity definition, or use the accessor methods.
+ # +WARNING+: There is no validation of entity state except when the entity
+ # is read from a stream. If you start poking around with the accessors,
+ # you can easily create a non-conformant Entity.
+ #
+ # e = Entity.new( 'amp', '&' )
+ def initialize stream, value=nil, parent=nil, reference=false
+ super(parent)
+ @ndata = @pubid = @value = @external = nil
+ if stream.kind_of? Array
+ @name = stream[1]
+ if stream[-1] == '%'
+ @reference = true
+ stream.pop
+ else
+ @reference = false
+ end
+ if stream[2] =~ /SYSTEM|PUBLIC/
+ @external = stream[2]
+ if @external == 'SYSTEM'
+ @ref = stream[3]
+ @ndata = stream[4] if stream.size == 5
+ else
+ @pubid = stream[3]
+ @ref = stream[4]
+ end
+ else
+ @value = stream[2]
+ end
+ else
+ @reference = reference
+ @external = nil
+ @name = stream
+ @value = value
+ end
+ end
- # Evaluates whether the given string matchs an entity definition,
- # returning true if so, and false otherwise.
- def Entity::matches? string
- (ENTITYDECL =~ string) == 0
- end
+ # Evaluates whether the given string matches an entity definition,
+ # returning true if so, and false otherwise.
+ def Entity::matches? string
+ (ENTITYDECL =~ string) == 0
+ end
- # Evaluates to the unnormalized value of this entity; that is, replacing
- # all entities -- both %ent; and &ent; entities. This differs from
- # +value()+ in that +value+ only replaces %ent; entities.
- def unnormalized
- v = value()
- return nil if v.nil?
- @unnormalized = Text::unnormalize(v, parent)
- @unnormalized
- end
+ # Evaluates to the unnormalized value of this entity; that is, replacing
+ # all entities -- both %ent; and &ent; entities. This differs from
+ # +value()+ in that +value+ only replaces %ent; entities.
+ def unnormalized
+ document.record_entity_expansion unless document.nil?
+ v = value()
+ return nil if v.nil?
+ @unnormalized = Text::unnormalize(v, parent)
+ @unnormalized
+ end
- #once :unnormalized
+ #once :unnormalized
- # Returns the value of this entity unprocessed -- raw. This is the
- # normalized value; that is, with all %ent; and &ent; entities intact
- def normalized
- @value
- end
+ # Returns the value of this entity unprocessed -- raw. This is the
+ # normalized value; that is, with all %ent; and &ent; entities intact
+ def normalized
+ @value
+ end
- # Write out a fully formed, correct entity definition (assuming the Entity
- # object itself is valid.)
+ # Write out a fully formed, correct entity definition (assuming the Entity
+ # object itself is valid.)
#
# out::
# An object implementing <TT>&lt;&lt;<TT> to which the entity will be
# output
# indent::
# *DEPRECATED* and ignored
- def write out, indent=-1
- out << '<!ENTITY '
- out << '% ' if @reference
- out << @name
- out << ' '
- if @external
- out << @external << ' '
- if @pubid
- q = @pubid.include?('"')?"'":'"'
- out << q << @pubid << q << ' '
- end
- q = @ref.include?('"')?"'":'"'
- out << q << @ref << q
- out << ' NDATA ' << @ndata if @ndata
- else
- q = @value.include?('"')?"'":'"'
- out << q << @value << q
- end
- out << '>'
- end
+ def write out, indent=-1
+ out << '<!ENTITY '
+ out << '% ' if @reference
+ out << @name
+ out << ' '
+ if @external
+ out << @external << ' '
+ if @pubid
+ q = @pubid.include?('"')?"'":'"'
+ out << q << @pubid << q << ' '
+ end
+ q = @ref.include?('"')?"'":'"'
+ out << q << @ref << q
+ out << ' NDATA ' << @ndata if @ndata
+ else
+ q = @value.include?('"')?"'":'"'
+ out << q << @value << q
+ end
+ out << '>'
+ end
- # Returns this entity as a string. See write().
- def to_s
- rv = ''
- write rv
- rv
- end
+ # Returns this entity as a string. See write().
+ def to_s
+ rv = ''
+ write rv
+ rv
+ end
- PEREFERENCE_RE = /#{PEREFERENCE}/um
- # Returns the value of this entity. At the moment, only internal entities
- # are processed. If the value contains internal references (IE,
- # %blah;), those are replaced with their values. IE, if the doctype
- # contains:
- # <!ENTITY % foo "bar">
- # <!ENTITY yada "nanoo %foo; nanoo>
- # then:
- # doctype.entity('yada').value #-> "nanoo bar nanoo"
- def value
- if @value
- matches = @value.scan(PEREFERENCE_RE)
- rv = @value.clone
- if @parent
- matches.each do |entity_reference|
- entity_value = @parent.entity( entity_reference[0] )
- rv.gsub!( /%#{entity_reference};/um, entity_value )
- end
- end
- return rv
- end
- nil
- end
- end
+ PEREFERENCE_RE = /#{PEREFERENCE}/um
+ # Returns the value of this entity. At the moment, only internal entities
+ # are processed. If the value contains internal references (IE,
+ # %blah;), those are replaced with their values. IE, if the doctype
+ # contains:
+ # <!ENTITY % foo "bar">
+ # <!ENTITY yada "nanoo %foo; nanoo>
+ # then:
+ # doctype.entity('yada').value #-> "nanoo bar nanoo"
+ def value
+ if @value
+ matches = @value.scan(PEREFERENCE_RE)
+ rv = @value.clone
+ if @parent
+ sum = 0
+ matches.each do |entity_reference|
+ entity_value = @parent.entity( entity_reference[0] )
+ if sum + entity_value.bytesize > Security.entity_expansion_text_limit
+ raise "entity expansion has grown too large"
+ else
+ sum += entity_value.bytesize
+ end
+ rv.gsub!( /%#{entity_reference.join};/um, entity_value )
+ end
+ end
+ return rv
+ end
+ nil
+ end
+ end
- # This is a set of entity constants -- the ones defined in the XML
- # specification. These are +gt+, +lt+, +amp+, +quot+ and +apos+.
- module EntityConst
- # +>+
- GT = Entity.new( 'gt', '>' )
- # +<+
- LT = Entity.new( 'lt', '<' )
- # +&+
- AMP = Entity.new( 'amp', '&' )
- # +"+
- QUOT = Entity.new( 'quot', '"' )
- # +'+
- APOS = Entity.new( 'apos', "'" )
- end
+ # This is a set of entity constants -- the ones defined in the XML
+ # specification. These are +gt+, +lt+, +amp+, +quot+ and +apos+.
+ # CAUTION: these entities does not have parent and document
+ module EntityConst
+ # +>+
+ GT = Entity.new( 'gt', '>' )
+ # +<+
+ LT = Entity.new( 'lt', '<' )
+ # +&+
+ AMP = Entity.new( 'amp', '&' )
+ # +"+
+ QUOT = Entity.new( 'quot', '"' )
+ # +'+
+ APOS = Entity.new( 'apos', "'" )
+ end
end
diff --git a/lib/rexml/formatters/default.rb b/lib/rexml/formatters/default.rb
index 77381bdf84..c375f1468b 100644
--- a/lib/rexml/formatters/default.rb
+++ b/lib/rexml/formatters/default.rb
@@ -1,7 +1,8 @@
+# frozen_string_literal: false
module REXML
module Formatters
class Default
- # Prints out the XML document with no formatting -- except if id_hack is
+ # Prints out the XML document with no formatting -- except if ie_hack is
# set.
#
# ie_hack::
@@ -21,8 +22,8 @@ module REXML
def write( node, output )
case node
- when Document
- if node.xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
+ when Document
+ if node.xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
output = Output.new( output, node.xml_decl.encoding )
end
write_document( node, output )
@@ -63,14 +64,16 @@ module REXML
def write_element( node, output )
output << "<#{node.expanded_name}"
- node.attributes.each_attribute do |attr|
+ node.attributes.to_a.map { |a|
+ Hash === a ? a.values : a
+ }.flatten.sort_by {|attr| attr.name}.each do |attr|
output << " "
attr.write( output )
end unless node.attributes.empty?
if node.children.empty?
output << " " if @ie_hack
- output << "/"
+ output << "/"
else
output << ">"
node.children.each { |child|
diff --git a/lib/rexml/formatters/pretty.rb b/lib/rexml/formatters/pretty.rb
index d21175d34a..a80274bdad 100644
--- a/lib/rexml/formatters/pretty.rb
+++ b/lib/rexml/formatters/pretty.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: false
require 'rexml/formatters/default'
module REXML
@@ -24,13 +25,14 @@ module REXML
# is undefined. Defaults to 2.
# ie_hack::
# If true, the printer will insert whitespace before closing empty
- # tags, thereby allowing Internet Explorer's feeble XML parser to
+ # tags, thereby allowing Internet Explorer's XML parser to
# function. Defaults to false.
def initialize( indentation=2, ie_hack=false )
@indentation = indentation
@level = 0
@ie_hack = ie_hack
@width = 80
+ @compact = false
end
protected
@@ -47,7 +49,7 @@ module REXML
if @ie_hack
output << " "
end
- output << "/"
+ output << "/"
else
output << ">"
# If compact and all children are text, and if the formatted output
@@ -87,7 +89,7 @@ module REXML
s = node.to_s()
s.gsub!(/\s/,' ')
s.squeeze!(" ")
- s = wrap(s, 80-@level)
+ s = wrap(s, @width - @level)
s = indent_text(s, @level, " ", true)
output << (' '*@level + s)
end
@@ -125,10 +127,13 @@ module REXML
end
def wrap(string, width)
- # Recursively wrap string at width.
- return string if string.length <= width
- place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
- return string[0,place] + "\n" + wrap(string[place+1..-1], width)
+ parts = []
+ while string.length > width and place = string.rindex(' ', width)
+ parts << string[0...place]
+ string = string[place+1..-1]
+ end
+ parts << string
+ parts.join("\n")
end
end
diff --git a/lib/rexml/formatters/transitive.rb b/lib/rexml/formatters/transitive.rb
index 1d80f21fbb..81e67f3274 100644
--- a/lib/rexml/formatters/transitive.rb
+++ b/lib/rexml/formatters/transitive.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: false
require 'rexml/formatters/pretty'
module REXML
@@ -12,9 +13,10 @@ module REXML
# formatted. Since this formatter does not alter whitespace nodes, the
# results of formatting already formatted XML will be odd.
class Transitive < Default
- def initialize( indentation=2 )
+ def initialize( indentation=2, ie_hack=false )
@indentation = indentation
@level = 0
+ @ie_hack = ie_hack
end
protected
@@ -29,13 +31,13 @@ module REXML
output << "\n"
output << ' '*@level
if node.children.empty?
- output << "/"
+ output << " " if @ie_hack
+ output << "/"
else
output << ">"
# If compact and all children are text, and if the formatted output
# is less than the specified width, then try to print everything on
# one line
- skip = false
@level += @indentation
node.children.each { |child|
write( child, output )
diff --git a/lib/rexml/functions.rb b/lib/rexml/functions.rb
index fefc9ef940..cd879fdd28 100644
--- a/lib/rexml/functions.rb
+++ b/lib/rexml/functions.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: false
module REXML
# If you add a method, keep in mind two things:
# (1) the first argument will always be a list of nodes from which to
@@ -7,10 +8,28 @@ module REXML
# Therefore, in XML, "local-name()" is identical (and actually becomes)
# "local_name()"
module Functions
+ @@available_functions = {}
@@context = nil
@@namespace_context = {}
@@variables = {}
+ INTERNAL_METHODS = [
+ :namespace_context,
+ :namespace_context=,
+ :variables,
+ :variables=,
+ :context=,
+ :get_namespace,
+ :send,
+ ]
+ class << self
+ def singleton_method_added(name)
+ unless INTERNAL_METHODS.include?(name)
+ @@available_functions[name] = true
+ end
+ end
+ end
+
def Functions::namespace_context=(x) ; @@namespace_context=x ; end
def Functions::variables=(x) ; @@variables=x ; end
def Functions::namespace_context ; @@namespace_context ; end
@@ -28,6 +47,7 @@ module REXML
end
end
+ # Returns the last node of the given list of nodes.
def Functions::last( )
@@context[:size]
end
@@ -36,6 +56,7 @@ module REXML
@@context[:index]
end
+ # Returns the size of the given list of nodes.
def Functions::count( node_set )
node_set.size
end
@@ -48,7 +69,7 @@ module REXML
# UNTESTED
def Functions::local_name( node_set=nil )
get_namespace( node_set ) do |node|
- return node.local_name
+ return node.local_name
end
end
@@ -57,7 +78,7 @@ module REXML
end
def Functions::name( node_set=nil )
- get_namespace( node_set ) do |node|
+ get_namespace( node_set ) do |node|
node.expanded_name
end
end
@@ -66,7 +87,7 @@ module REXML
def Functions::get_namespace( node_set = nil )
if node_set == nil
yield @@context[:node] if defined? @@context[:node].namespace
- else
+ else
if node_set.respond_to? :each
node_set.each { |node| yield node if defined? node.namespace }
elsif node_set.respond_to? :namespace
@@ -81,15 +102,15 @@ module REXML
#
# A number is converted to a string as follows
#
- # NaN is converted to the string NaN
+ # NaN is converted to the string NaN
#
- # positive zero is converted to the string 0
+ # positive zero is converted to the string 0
#
- # negative zero is converted to the string 0
+ # negative zero is converted to the string 0
#
- # positive infinity is converted to the string Infinity
+ # positive infinity is converted to the string Infinity
#
- # negative infinity is converted to the string -Infinity
+ # negative infinity is converted to the string -Infinity
#
# if the number is an integer, the number is represented in decimal form
# as a Number with no decimal point and no leading zeros, preceded by a
@@ -129,6 +150,11 @@ module REXML
end
end
+ # A node-set is converted to a string by
+ # returning the concatenation of the string-value
+ # of each of the children of the node in the
+ # node-set that is first in document order.
+ # If the node-set is empty, an empty string is returned.
def Functions::string_value( o )
rv = ""
o.children.each { |e|
@@ -156,7 +182,7 @@ module REXML
string(string).include?(string(test))
end
- # Kouhei fixed this
+ # Kouhei fixed this
def Functions::substring_before( string, test )
ruby_string = string(string)
ruby_index = ruby_string.index(string(test))
@@ -166,20 +192,19 @@ module REXML
ruby_string[ 0...ruby_index ]
end
end
-
+
# Kouhei fixed this too
def Functions::substring_after( string, test )
ruby_string = string(string)
- test_string = string(test)
return $1 if ruby_string =~ /#{test}(.*)/
""
end
- # Take equal portions of Mike Stok and Sean Russell; mix
+ # Take equal portions of Mike Stok and Sean Russell; mix
# vigorously, and pour into a tall, chilled glass. Serves 10,000.
def Functions::substring( string, start, length=nil )
ruby_string = string(string)
- ruby_length = if length.nil?
+ ruby_length = if length.nil?
ruby_string.length.to_f
else
number(length)
@@ -188,18 +213,18 @@ module REXML
# Handle the special cases
return '' if (
- ruby_length.nan? or
+ ruby_length.nan? or
ruby_start.nan? or
ruby_start.infinite?
)
infinite_length = ruby_length.infinite? == 1
ruby_length = ruby_string.length if infinite_length
-
- # Now, get the bounds. The XPath bounds are 1..length; the ruby bounds
+
+ # Now, get the bounds. The XPath bounds are 1..length; the ruby bounds
# are 0..length. Therefore, we have to offset the bounds by one.
- ruby_start = ruby_start.round - 1
- ruby_length = ruby_length.round
+ ruby_start = round(ruby_start) - 1
+ ruby_length = round(ruby_length)
if ruby_start < 0
ruby_length += ruby_start unless infinite_length
@@ -247,7 +272,7 @@ module REXML
0.upto(from.length - 1) { |pos|
from_char = from[pos]
unless map.has_key? from_char
- map[from_char] =
+ map[from_char] =
if pos < to.length
to[pos]
else
@@ -256,9 +281,15 @@ module REXML
end
}
- string(string).unpack('U*').collect { |c|
- if map.has_key? c then map[c] else c end
- }.compact.pack('U*')
+ if ''.respond_to? :chars
+ string(string).chars.collect { |c|
+ if map.has_key? c then map[c] else c end
+ }.compact.join
+ else
+ string(string).unpack('U*').collect { |c|
+ if map.has_key? c then map[c] else c end
+ }.compact.pack('U*')
+ end
end
# UNTESTED
@@ -351,9 +382,9 @@ module REXML
def Functions::sum( nodes )
nodes = [nodes] unless nodes.kind_of? Array
- nodes.inject(0) { |r,n| r += number(string(n)) }
+ nodes.inject(0) { |r,n| r + number(string(n)) }
end
-
+
def Functions::floor( number )
number(number).floor
end
@@ -363,10 +394,13 @@ module REXML
end
def Functions::round( number )
+ number = number(number)
begin
- number(number).round
+ neg = number.negative?
+ number = number.abs.round(half: :up)
+ neg ? -number : number
rescue FloatDomainError
- number(number)
+ number
end
end
@@ -374,9 +408,14 @@ module REXML
node.node_type == :processing_instruction
end
- def Functions::method_missing( id )
- puts "METHOD MISSING #{id.id2name}"
- XPath.match( @@context[:node], id.id2name )
+ def Functions::send(name, *args)
+ if @@available_functions[name.to_sym]
+ super
+ else
+ # TODO: Maybe, this is not XPath spec behavior.
+ # This behavior must be reconsidered.
+ XPath.match(@@context[:node], name.to_s)
+ end
end
end
end
diff --git a/lib/rexml/instruction.rb b/lib/rexml/instruction.rb
index c16b894b4a..c4f65eefc1 100644
--- a/lib/rexml/instruction.rb
+++ b/lib/rexml/instruction.rb
@@ -1,63 +1,64 @@
+# frozen_string_literal: false
require "rexml/child"
require "rexml/source"
module REXML
- # Represents an XML Instruction; IE, <? ... ?>
- # TODO: Add parent arg (3rd arg) to constructor
- class Instruction < Child
- START = '<\?'
- STOP = '\?>'
+ # Represents an XML Instruction; IE, <? ... ?>
+ # TODO: Add parent arg (3rd arg) to constructor
+ class Instruction < Child
+ START = '<\?'
+ STOP = '\?>'
- # target is the "name" of the Instruction; IE, the "tag" in <?tag ...?>
- # content is everything else.
- attr_accessor :target, :content
+ # target is the "name" of the Instruction; IE, the "tag" in <?tag ...?>
+ # content is everything else.
+ attr_accessor :target, :content
- # Constructs a new Instruction
- # @param target can be one of a number of things. If String, then
- # the target of this instruction is set to this. If an Instruction,
- # then the Instruction is shallowly cloned (target and content are
- # copied). If a Source, then the source is scanned and parsed for
- # an Instruction declaration.
- # @param content Must be either a String, or a Parent. Can only
- # be a Parent if the target argument is a Source. Otherwise, this
- # String is set as the content of this instruction.
- def initialize(target, content=nil)
- if target.kind_of? String
- super()
- @target = target
- @content = content
- elsif target.kind_of? Instruction
- super(content)
- @target = target.target
- @content = target.content
- end
- @content.strip! if @content
- end
+ # Constructs a new Instruction
+ # @param target can be one of a number of things. If String, then
+ # the target of this instruction is set to this. If an Instruction,
+ # then the Instruction is shallowly cloned (target and content are
+ # copied). If a Source, then the source is scanned and parsed for
+ # an Instruction declaration.
+ # @param content Must be either a String, or a Parent. Can only
+ # be a Parent if the target argument is a Source. Otherwise, this
+ # String is set as the content of this instruction.
+ def initialize(target, content=nil)
+ if target.kind_of? String
+ super()
+ @target = target
+ @content = content
+ elsif target.kind_of? Instruction
+ super(content)
+ @target = target.target
+ @content = target.content
+ end
+ @content.strip! if @content
+ end
+
+ def clone
+ Instruction.new self
+ end
- def clone
- Instruction.new self
- end
-
# == DEPRECATED
# See the rexml/formatters package
#
- def write writer, indent=-1, transitive=false, ie_hack=false
- Kernel.warn( "#{self.class.name}.write is deprecated" )
- indent(writer, indent)
- writer << START.sub(/\\/u, '')
- writer << @target
- writer << ' '
- writer << @content
- writer << STOP.sub(/\\/u, '')
- end
+ def write writer, indent=-1, transitive=false, ie_hack=false
+ Kernel.warn( "#{self.class.name}.write is deprecated", uplevel: 1)
+ indent(writer, indent)
+ writer << START.sub(/\\/u, '')
+ writer << @target
+ writer << ' '
+ writer << @content
+ writer << STOP.sub(/\\/u, '')
+ end
- # @return true if other is an Instruction, and the content and target
- # of the other matches the target and content of this object.
- def ==( other )
- other.kind_of? Instruction and
- other.target == @target and
- other.content == @content
- end
+ # @return true if other is an Instruction, and the content and target
+ # of the other matches the target and content of this object.
+ def ==( other )
+ other.kind_of? Instruction and
+ other.target == @target and
+ other.content == @content
+ end
def node_type
:processing_instruction
@@ -66,5 +67,5 @@ module REXML
def inspect
"<?p-i #{target} ...?>"
end
- end
+ end
end
diff --git a/lib/rexml/light/node.rb b/lib/rexml/light/node.rb
index 943ec5f1a0..d58119a3a4 100644
--- a/lib/rexml/light/node.rb
+++ b/lib/rexml/light/node.rb
@@ -1,196 +1,196 @@
+# frozen_string_literal: false
require 'rexml/xmltokens'
-require 'rexml/light/node'
# [ :element, parent, name, attributes, children* ]
- # a = Node.new
- # a << "B" # => <a>B</a>
- # a.b # => <a>B<b/></a>
- # a.b[1] # => <a>B<b/><b/><a>
- # a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
- # a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
- # a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
+ # a = Node.new
+ # a << "B" # => <a>B</a>
+ # a.b # => <a>B<b/></a>
+ # a.b[1] # => <a>B<b/><b/><a>
+ # a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
+ # a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
+ # a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
module REXML
- module Light
- # Represents a tagged XML element. Elements are characterized by
- # having children, attributes, and names, and can themselves be
- # children.
- class Node
- NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u
- PARENTS = [ :element, :document, :doctype ]
- # Create a new element.
- def initialize node=nil
- @node = node
- if node.kind_of? String
- node = [ :text, node ]
- elsif node.nil?
- node = [ :document, nil, nil ]
- elsif node[0] == :start_element
- node[0] = :element
- elsif node[0] == :start_doctype
- node[0] = :doctype
- elsif node[0] == :start_document
- node[0] = :document
- end
- end
-
- def size
- if PARENTS.include? @node[0]
- @node[-1].size
- else
- 0
- end
- end
-
- def each( &block )
- size.times { |x| yield( at(x+4) ) }
- end
-
- def name
- at(2)
- end
-
- def name=( name_str, ns=nil )
- pfx = ''
- pfx = "#{prefix(ns)}:" if ns
- _old_put(2, "#{pfx}#{name_str}")
- end
-
- def parent=( node )
- _old_put(1,node)
- end
-
- def local_name
- namesplit
- @name
- end
-
- def local_name=( name_str )
- _old_put( 1, "#@prefix:#{name_str}" )
- end
-
- def prefix( namespace=nil )
- prefix_of( self, namespace )
- end
-
- def namespace( prefix=prefix() )
- namespace_of( self, prefix )
- end
-
- def namespace=( namespace )
- @prefix = prefix( namespace )
- pfx = ''
- pfx = "#@prefix:" if @prefix.size > 0
- _old_put(1, "#{pfx}#@name")
- end
-
- def []( reference, ns=nil )
- if reference.kind_of? String
- pfx = ''
- pfx = "#{prefix(ns)}:" if ns
- at(3)["#{pfx}#{reference}"]
- elsif reference.kind_of? Range
- _old_get( Range.new(4+reference.begin, reference.end, reference.exclude_end?) )
- else
- _old_get( 4+reference )
- end
- end
-
- def =~( path )
- XPath.match( self, path )
- end
-
- # Doesn't handle namespaces yet
- def []=( reference, ns, value=nil )
- if reference.kind_of? String
- value = ns unless value
- at( 3 )[reference] = value
- elsif reference.kind_of? Range
- _old_put( Range.new(3+reference.begin, reference.end, reference.exclude_end?), ns )
- else
- if value
- _old_put( 4+reference, ns, value )
- else
- _old_put( 4+reference, ns )
- end
- end
- end
-
- # Append a child to this element, optionally under a provided namespace.
- # The namespace argument is ignored if the element argument is an Element
- # object. Otherwise, the element argument is a string, the namespace (if
- # provided) is the namespace the element is created in.
- def << element
- if node_type() == :text
- at(-1) << element
- else
- newnode = Node.new( element )
- newnode.parent = self
- self.push( newnode )
- end
- at(-1)
- end
-
- def node_type
- _old_get(0)
- end
-
- def text=( foo )
- replace = at(4).kind_of?(String)? 1 : 0
- self._old_put(4,replace, normalizefoo)
- end
-
- def root
- context = self
- context = context.at(1) while context.at(1)
- end
-
- def has_name?( name, namespace = '' )
- at(3) == name and namespace() == namespace
- end
-
- def children
- self
- end
-
- def parent
- at(1)
- end
-
- def to_s
-
- end
-
- private
-
- def namesplit
- return if @name.defined?
- at(2) =~ NAMESPLIT
- @prefix = '' || $1
- @name = $2
- end
-
- def namespace_of( node, prefix=nil )
- if not prefix
- name = at(2)
- name =~ NAMESPLIT
- prefix = $1
- end
- to_find = 'xmlns'
- to_find = "xmlns:#{prefix}" if not prefix.nil?
- ns = at(3)[ to_find ]
- ns ? ns : namespace_of( @node[0], prefix )
- end
-
- def prefix_of( node, namespace=nil )
- if not namespace
- name = node.name
- name =~ NAMESPLIT
- $1
- else
- ns = at(3).find { |k,v| v == namespace }
- ns ? ns : prefix_of( node.parent, namespace )
- end
- end
- end
- end
+ module Light
+ # Represents a tagged XML element. Elements are characterized by
+ # having children, attributes, and names, and can themselves be
+ # children.
+ class Node
+ NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u
+ PARENTS = [ :element, :document, :doctype ]
+ # Create a new element.
+ def initialize node=nil
+ @node = node
+ if node.kind_of? String
+ node = [ :text, node ]
+ elsif node.nil?
+ node = [ :document, nil, nil ]
+ elsif node[0] == :start_element
+ node[0] = :element
+ elsif node[0] == :start_doctype
+ node[0] = :doctype
+ elsif node[0] == :start_document
+ node[0] = :document
+ end
+ end
+
+ def size
+ if PARENTS.include? @node[0]
+ @node[-1].size
+ else
+ 0
+ end
+ end
+
+ def each
+ size.times { |x| yield( at(x+4) ) }
+ end
+
+ def name
+ at(2)
+ end
+
+ def name=( name_str, ns=nil )
+ pfx = ''
+ pfx = "#{prefix(ns)}:" if ns
+ _old_put(2, "#{pfx}#{name_str}")
+ end
+
+ def parent=( node )
+ _old_put(1,node)
+ end
+
+ def local_name
+ namesplit
+ @name
+ end
+
+ def local_name=( name_str )
+ _old_put( 1, "#@prefix:#{name_str}" )
+ end
+
+ def prefix( namespace=nil )
+ prefix_of( self, namespace )
+ end
+
+ def namespace( prefix=prefix() )
+ namespace_of( self, prefix )
+ end
+
+ def namespace=( namespace )
+ @prefix = prefix( namespace )
+ pfx = ''
+ pfx = "#@prefix:" if @prefix.size > 0
+ _old_put(1, "#{pfx}#@name")
+ end
+
+ def []( reference, ns=nil )
+ if reference.kind_of? String
+ pfx = ''
+ pfx = "#{prefix(ns)}:" if ns
+ at(3)["#{pfx}#{reference}"]
+ elsif reference.kind_of? Range
+ _old_get( Range.new(4+reference.begin, reference.end, reference.exclude_end?) )
+ else
+ _old_get( 4+reference )
+ end
+ end
+
+ def =~( path )
+ XPath.match( self, path )
+ end
+
+ # Doesn't handle namespaces yet
+ def []=( reference, ns, value=nil )
+ if reference.kind_of? String
+ value = ns unless value
+ at( 3 )[reference] = value
+ elsif reference.kind_of? Range
+ _old_put( Range.new(3+reference.begin, reference.end, reference.exclude_end?), ns )
+ else
+ if value
+ _old_put( 4+reference, ns, value )
+ else
+ _old_put( 4+reference, ns )
+ end
+ end
+ end
+
+ # Append a child to this element, optionally under a provided namespace.
+ # The namespace argument is ignored if the element argument is an Element
+ # object. Otherwise, the element argument is a string, the namespace (if
+ # provided) is the namespace the element is created in.
+ def << element
+ if node_type() == :text
+ at(-1) << element
+ else
+ newnode = Node.new( element )
+ newnode.parent = self
+ self.push( newnode )
+ end
+ at(-1)
+ end
+
+ def node_type
+ _old_get(0)
+ end
+
+ def text=( foo )
+ replace = at(4).kind_of?(String)? 1 : 0
+ self._old_put(4,replace, normalizefoo)
+ end
+
+ def root
+ context = self
+ context = context.at(1) while context.at(1)
+ end
+
+ def has_name?( name, namespace = '' )
+ at(3) == name and namespace() == namespace
+ end
+
+ def children
+ self
+ end
+
+ def parent
+ at(1)
+ end
+
+ def to_s
+
+ end
+
+ private
+
+ def namesplit
+ return if @name.defined?
+ at(2) =~ NAMESPLIT
+ @prefix = '' || $1
+ @name = $2
+ end
+
+ def namespace_of( node, prefix=nil )
+ if not prefix
+ name = at(2)
+ name =~ NAMESPLIT
+ prefix = $1
+ end
+ to_find = 'xmlns'
+ to_find = "xmlns:#{prefix}" if not prefix.nil?
+ ns = at(3)[ to_find ]
+ ns ? ns : namespace_of( @node[0], prefix )
+ end
+
+ def prefix_of( node, namespace=nil )
+ if not namespace
+ name = node.name
+ name =~ NAMESPLIT
+ $1
+ else
+ ns = at(3).find { |k,v| v == namespace }
+ ns ? ns : prefix_of( node.parent, namespace )
+ end
+ end
+ end
+ end
end
diff --git a/lib/rexml/namespace.rb b/lib/rexml/namespace.rb
index 3e8790580b..90ba7cc635 100644
--- a/lib/rexml/namespace.rb
+++ b/lib/rexml/namespace.rb
@@ -1,47 +1,48 @@
+# frozen_string_literal: false
require 'rexml/xmltokens'
module REXML
- # Adds named attributes to an object.
- module Namespace
- # The name of the object, valid if set
- attr_reader :name, :expanded_name
- # The expanded name of the object, valid if name is set
- attr_accessor :prefix
- include XMLTokens
- NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
+ # Adds named attributes to an object.
+ module Namespace
+ # The name of the object, valid if set
+ attr_reader :name, :expanded_name
+ # The expanded name of the object, valid if name is set
+ attr_accessor :prefix
+ include XMLTokens
+ NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
- # Sets the name and the expanded name
- def name=( name )
- @expanded_name = name
- name =~ NAMESPLIT
- if $1
- @prefix = $1
- else
- @prefix = ""
- @namespace = ""
- end
- @name = $2
- end
+ # Sets the name and the expanded name
+ def name=( name )
+ @expanded_name = name
+ name =~ NAMESPLIT
+ if $1
+ @prefix = $1
+ else
+ @prefix = ""
+ @namespace = ""
+ end
+ @name = $2
+ end
- # Compares names optionally WITH namespaces
- def has_name?( other, ns=nil )
- if ns
- return (namespace() == ns and name() == other)
- elsif other.include? ":"
- return fully_expanded_name == other
- else
- return name == other
- end
- end
+ # Compares names optionally WITH namespaces
+ def has_name?( other, ns=nil )
+ if ns
+ return (namespace() == ns and name() == other)
+ elsif other.include? ":"
+ return fully_expanded_name == other
+ else
+ return name == other
+ end
+ end
- alias :local_name :name
+ alias :local_name :name
- # Fully expand the name, even if the prefix wasn't specified in the
- # source file.
- def fully_expanded_name
- ns = prefix
- return "#{ns}:#@name" if ns.size > 0
- return @name
- end
- end
+ # Fully expand the name, even if the prefix wasn't specified in the
+ # source file.
+ def fully_expanded_name
+ ns = prefix
+ return "#{ns}:#@name" if ns.size > 0
+ return @name
+ end
+ end
end
diff --git a/lib/rexml/node.rb b/lib/rexml/node.rb
index d5e8456e53..52337ade44 100644
--- a/lib/rexml/node.rb
+++ b/lib/rexml/node.rb
@@ -1,31 +1,32 @@
+# frozen_string_literal: false
require "rexml/parseexception"
require "rexml/formatters/pretty"
require "rexml/formatters/default"
module REXML
- # Represents a node in the tree. Nodes are never encountered except as
- # superclasses of other objects. Nodes have siblings.
- module Node
- # @return the next sibling (nil if unset)
- def next_sibling_node
- return nil if @parent.nil?
- @parent[ @parent.index(self) + 1 ]
- end
+ # Represents a node in the tree. Nodes are never encountered except as
+ # superclasses of other objects. Nodes have siblings.
+ module Node
+ # @return the next sibling (nil if unset)
+ def next_sibling_node
+ return nil if @parent.nil?
+ @parent[ @parent.index(self) + 1 ]
+ end
- # @return the previous sibling (nil if unset)
- def previous_sibling_node
- return nil if @parent.nil?
- ind = @parent.index(self)
- return nil if ind == 0
- @parent[ ind - 1 ]
- end
+ # @return the previous sibling (nil if unset)
+ def previous_sibling_node
+ return nil if @parent.nil?
+ ind = @parent.index(self)
+ return nil if ind == 0
+ @parent[ ind - 1 ]
+ end
# indent::
# *DEPRECATED* This parameter is now ignored. See the formatters in the
# REXML::Formatters package for changing the output style.
- def to_s indent=nil
+ def to_s indent=nil
unless indent.nil?
- Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated" )
+ Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated", uplevel: 1)
f = REXML::Formatters::Pretty.new( indent )
f.write( self, rv = "" )
else
@@ -33,33 +34,33 @@ module REXML
f.write( self, rv = "" )
end
return rv
- end
+ end
- def indent to, ind
- if @parent and @parent.context and not @parent.context[:indentstyle].nil? then
- indentstyle = @parent.context[:indentstyle]
- else
- indentstyle = ' '
- end
- to << indentstyle*ind unless ind<1
- end
+ def indent to, ind
+ if @parent and @parent.context and not @parent.context[:indentstyle].nil? then
+ indentstyle = @parent.context[:indentstyle]
+ else
+ indentstyle = ' '
+ end
+ to << indentstyle*ind unless ind<1
+ end
- def parent?
- false;
- end
+ def parent?
+ false;
+ end
- # Visit all subnodes of +self+ recursively
- def each_recursive(&block) # :yields: node
- self.elements.each {|node|
- block.call(node)
- node.each_recursive(&block)
- }
- end
+ # Visit all subnodes of +self+ recursively
+ def each_recursive(&block) # :yields: node
+ self.elements.each {|node|
+ block.call(node)
+ node.each_recursive(&block)
+ }
+ end
- # Find (and return) first subnode (recursively) for which the block
+ # Find (and return) first subnode (recursively) for which the block
# evaluates to true. Returns +nil+ if none was found.
- def find_first_recursive(&block) # :yields: node
+ def find_first_recursive(&block) # :yields: node
each_recursive {|node|
return node if block.call(node)
}
@@ -71,5 +72,5 @@ module REXML
def index_in_parent
parent.index(self)+1
end
- end
+ end
end
diff --git a/lib/rexml/output.rb b/lib/rexml/output.rb
index be4d23d42d..96dfea570e 100644
--- a/lib/rexml/output.rb
+++ b/lib/rexml/output.rb
@@ -1,24 +1,30 @@
+# frozen_string_literal: false
require 'rexml/encoding'
module REXML
- class Output
- include Encoding
-
+ class Output
+ include Encoding
+
attr_reader :encoding
- def initialize real_IO, encd="iso-8859-1"
- @output = real_IO
- self.encoding = encd
+ def initialize real_IO, encd="iso-8859-1"
+ @output = real_IO
+ self.encoding = encd
+
+ @to_utf = encoding != 'UTF-8'
- @to_utf = encd == UTF_8 ? false : true
- end
+ if encoding == "UTF-16"
+ @output << "\ufeff".encode("UTF-16BE")
+ self.encoding = "UTF-16BE"
+ end
+ end
- def <<( content )
- @output << (@to_utf ? self.encode(content) : content)
- end
+ def <<( content )
+ @output << (@to_utf ? self.encode(content) : content)
+ end
def to_s
"Output[#{encoding}]"
end
- end
+ end
end
diff --git a/lib/rexml/parent.rb b/lib/rexml/parent.rb
index a20aaaef6b..3bd0a96255 100644
--- a/lib/rexml/parent.rb
+++ b/lib/rexml/parent.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: false
require "rexml/child"
module REXML
@@ -6,62 +7,61 @@ module REXML
# object.
class Parent < Child
include Enumerable
-
+
# Constructor
# @param parent if supplied, will be set as the parent of this object
def initialize parent=nil
super(parent)
@children = []
end
-
+
def add( object )
- #puts "PARENT GOTS #{size} CHILDREN"
object.parent = self
@children << object
- #puts "PARENT NOW GOTS #{size} CHILDREN"
object
end
-
+
alias :push :add
alias :<< :push
-
+
def unshift( object )
object.parent = self
@children.unshift object
end
-
+
def delete( object )
found = false
@children.delete_if {|c| c.equal?(object) and found = true }
object.parent = nil if found
+ found ? object : nil
end
-
+
def each(&block)
@children.each(&block)
end
-
+
def delete_if( &block )
@children.delete_if(&block)
end
-
+
def delete_at( index )
@children.delete_at index
end
-
+
def each_index( &block )
@children.each_index(&block)
end
-
+
# Fetches a child at a given index
# @param index the Integer index of the child to fetch
def []( index )
@children[index]
end
-
+
alias :each_child :each
-
-
-
+
+
+
# Set an index entry. See Array.[]=
# @param index the index of the element to set
# @param opt either the object to set, or an Integer length
@@ -71,7 +71,7 @@ module REXML
args[-1].parent = self
@children[*args[0..-2]] = args[-1]
end
-
+
# Inserts an child before another child
# @param child1 this is either an xpath or an Element. If an Element,
# child2 will be inserted before child1 in the child list of the parent.
@@ -91,7 +91,7 @@ module REXML
end
self
end
-
+
# Inserts an child after another child
# @param child1 this is either an xpath or an Element. If an Element,
# child2 will be inserted after child1 in the child list of the parent.
@@ -111,11 +111,11 @@ module REXML
end
self
end
-
+
def to_a
@children.dup
end
-
+
# Fetches the index of a given child
# @param child the child to get the index of
# @return the index of the child, or nil if the object is not a child
@@ -125,24 +125,24 @@ module REXML
@children.find { |i| count += 1 ; i.hash == child.hash }
count
end
-
+
# @return the number of children of this parent
def size
@children.size
end
-
+
alias :length :size
-
+
# Replaces one child with another, making sure the nodelist is correct
# @param to_replace the child to replace (must be a Child)
- # @param replacement the child to insert into the nodelist (must be a
+ # @param replacement the child to insert into the nodelist (must be a
# Child)
def replace_child( to_replace, replacement )
@children.map! {|c| c.equal?( to_replace ) ? replacement : c }
to_replace.parent = nil
replacement.parent = self
end
-
+
# Deeply clones this object. This creates a complete duplicate of this
# Parent, including all descendants.
def deep_clone
@@ -156,9 +156,9 @@ module REXML
end
cl
end
-
+
alias :children :to_a
-
+
def parent?
true
end
diff --git a/lib/rexml/parseexception.rb b/lib/rexml/parseexception.rb
index feb7a7e638..7b16cd1a41 100644
--- a/lib/rexml/parseexception.rb
+++ b/lib/rexml/parseexception.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: false
module REXML
class ParseException < RuntimeError
attr_accessor :source, :parser, :continued_exception
@@ -28,9 +29,9 @@ module REXML
err << "\nLine: #{line}\n"
err << "Position: #{position}\n"
err << "Last 80 unconsumed characters:\n"
- err << @source.buffer[0..80].gsub(/\n/, ' ')
+ err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ')
end
-
+
err
end
@@ -40,12 +41,12 @@ module REXML
end
def line
- @source.current_line[2] if @source and defined? @source.current_line and
+ @source.current_line[2] if @source and defined? @source.current_line and
@source.current_line
end
def context
@source.current_line
end
- end
+ end
end
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index fc2354a67f..e7ef695912 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -1,3 +1,7 @@
+# frozen_string_literal: false
+
+require "strscan"
+
require 'rexml/parseexception'
require 'rexml/undefinednamespaceexception'
require 'rexml/source'
@@ -25,41 +29,50 @@ module REXML
#
# Nat Price gave me some good ideas for the API.
class BaseParser
- NCNAME_STR= '[\w:][\-\w\d.]*'
- NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
+ LETTER = '[:alpha:]'
+ DIGIT = '[:digit:]'
+
+ COMBININGCHAR = '' # TODO
+ EXTENDER = '' # TODO
+
+ NCNAME_STR= "[#{LETTER}_][-[:alnum:]._#{COMBININGCHAR}#{EXTENDER}]*"
+ QNAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
+ QNAME = /(#{QNAME_STR})/
+
+ # Just for backward compatibility. For example, kramdown uses this.
+ # It's not used in REXML.
UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
- NAMECHAR = '[\-\w\d\.:]'
+ NAMECHAR = '[\-\w\.:]'
NAME = "([\\w:]#{NAMECHAR}*)"
NMTOKEN = "(?:#{NAMECHAR})+"
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
- REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
+ REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
REFERENCE_RE = /#{REFERENCE}/
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
- DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
- ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
+ DOCTYPE_END = /\A\s*\]\s*>/um
+ ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
COMMENT_START = /\A<!--/u
COMMENT_PATTERN = /<!--(.*?)-->/um
CDATA_START = /\A<!\[CDATA\[/u
- CDATA_END = /^\s*\]\s*>/um
+ CDATA_END = /\A\s*\]\s*>/um
CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
XMLDECL_START = /\A<\?xml\s/u;
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
INSTRUCTION_START = /\A<\?/u
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
- TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um
- CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
+ TAG_MATCH = /\A<((?>#{QNAME_STR}))/um
+ CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
- STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um
+ STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
- ENTITY_START = /^\s*<!ENTITY/
- IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
- ELEMENTDECL_START = /^\s*<!ELEMENT/um
- ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
- SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
+ ENTITY_START = /\A\s*<!ENTITY/
+ ELEMENTDECL_START = /\A\s*<!ELEMENT/um
+ ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
+ SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
@@ -68,11 +81,8 @@ module REXML
DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
ATTDEF_RE = /#{ATTDEF}/
- ATTLISTDECL_START = /^\s*<!ATTLIST/um
- ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
- NOTATIONDECL_START = /^\s*<!NOTATION/um
- PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
- SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
+ ATTLISTDECL_START = /\A\s*<!ATTLIST/um
+ ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
TEXT_PATTERN = /\A([^<]*)/um
@@ -90,13 +100,18 @@ module REXML
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
+ NOTATIONDECL_START = /\A\s*<!NOTATION/um
+ EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
+ EXTERNAL_ID_SYSTEM = /\A\s*SYSTEM\s+#{SYSTEMLITERAL}\s*/um
+ PUBLIC_ID = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s*/um
+
EREFERENCE = /&(?!#{NAME};)/
- DEFAULT_ENTITIES = {
- 'gt' => [/&gt;/, '&gt;', '>', />/],
- 'lt' => [/&lt;/, '&lt;', '<', /</],
- 'quot' => [/&quot;/, '&quot;', '"', /"/],
- "apos" => [/&apos;/, "&apos;", "'", /'/]
+ DEFAULT_ENTITIES = {
+ 'gt' => [/&gt;/, '&gt;', '>', />/],
+ 'lt' => [/&lt;/, '&lt;', '<', /</],
+ 'quot' => [/&quot;/, '&quot;', '"', /"/],
+ "apos" => [/&apos;/, "&apos;", "'", /'/]
}
@@ -104,26 +119,14 @@ module REXML
# These are patterns to identify common markup errors, to make the
# error messages more informative.
######################################################################
- MISSING_ATTRIBUTE_QUOTES = /^<#{NAME_STR}\s+#{NAME_STR}\s*=\s*[^"']/um
+ MISSING_ATTRIBUTE_QUOTES = /^<#{QNAME_STR}\s+#{QNAME_STR}\s*=\s*[^"']/um
def initialize( source )
self.stream = source
+ @listeners = []
end
def add_listener( listener )
- if !defined?(@listeners) or !@listeners
- @listeners = []
- instance_eval <<-EOL
- alias :_old_pull :pull
- def pull
- event = _old_pull
- @listeners.each do |listener|
- listener.receive event
- end
- event
- end
- EOL
- end
@listeners << listener
end
@@ -167,9 +170,9 @@ module REXML
# Peek at the +depth+ event in the stack. The first element on the stack
# is at depth 0. If +depth+ is -1, will parse to the end of the input
# stream and return the last event, which is always :end_document.
- # Be aware that this causes the stream to be parsed up to the +depth+
- # event, so you can effectively pre-parse the entire document (pull the
- # entire thing into memory) using this method.
+ # Be aware that this causes the stream to be parsed up to the +depth+
+ # event, so you can effectively pre-parse the entire document (pull the
+ # entire thing into memory) using this method.
def peek depth=0
raise %Q[Illegal argument "#{depth}"] if depth < -1
temp = []
@@ -186,6 +189,14 @@ module REXML
# Returns the next event. This is a +PullEvent+ object.
def pull
+ pull_event.tap do |event|
+ @listeners.each do |listener|
+ listener.receive event
+ end
+ end
+ end
+
+ def pull_event
if @closed
x, @closed = @closed, nil
return [ :end_element, x ]
@@ -193,11 +204,9 @@ module REXML
return [ :end_document ] if empty?
return @stack.shift if @stack.size > 0
#STDERR.puts @source.encoding
- @source.read if @source.buffer.size<2
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
if @document_status == nil
- #@source.consume( /^\s*/um )
- word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
+ word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um )
word = word[1] unless word.nil?
#STDERR.puts "WORD = #{word.inspect}"
case word
@@ -210,44 +219,63 @@ module REXML
version = version[1] unless version.nil?
encoding = ENCODING.match(results)
encoding = encoding[1] unless encoding.nil?
- @source.encoding = encoding
+ if need_source_encoding_update?(encoding)
+ @source.encoding = encoding
+ end
+ if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
+ encoding = "UTF-16"
+ end
standalone = STANDALONE.match(results)
standalone = standalone[1] unless standalone.nil?
return [ :xmldecl, version, encoding, standalone ]
when INSTRUCTION_START
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
when DOCTYPE_START
- md = @source.match( DOCTYPE_PATTERN, true )
+ base_error_message = "Malformed DOCTYPE"
+ @source.match(DOCTYPE_START, true)
@nsstack.unshift(curr_ns=Set.new)
- identity = md[1]
- close = md[2]
- identity =~ IDENTITY
- name = $1
- raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
- pub_sys = $2.nil? ? nil : $2.strip
- long_name = $4.nil? ? nil : $4.strip
- uri = $6.nil? ? nil : $6.strip
- args = [ :start_doctype, name, pub_sys, long_name, uri ]
- if close == ">"
+ name = parse_name(base_error_message)
+ if @source.match(/\A\s*\[/um, true)
+ id = [nil, nil, nil]
+ @document_status = :in_doctype
+ elsif @source.match(/\A\s*>/um, true)
+ id = [nil, nil, nil]
@document_status = :after_doctype
- @source.read if @source.buffer.size<2
- md = @source.match(/^\s*/um, true)
- @stack << [ :end_doctype ]
else
- @document_status = :in_doctype
+ id = parse_id(base_error_message,
+ accept_external_id: true,
+ accept_public_id: false)
+ if id[0] == "SYSTEM"
+ # For backward compatibility
+ id[1], id[2] = id[2], nil
+ end
+ if @source.match(/\A\s*\[/um, true)
+ @document_status = :in_doctype
+ elsif @source.match(/\A\s*>/um, true)
+ @document_status = :after_doctype
+ else
+ message = "#{base_error_message}: garbage after external ID"
+ raise REXML::ParseException.new(message, @source)
+ end
+ end
+ args = [:start_doctype, name, *id]
+ if @document_status == :after_doctype
+ @source.match(/\A\s*/um, true)
+ @stack << [ :end_doctype ]
end
return args
- when /^\s+/
+ when /\A\s+/
else
@document_status = :after_doctype
- @source.read if @source.buffer.size<2
- md = @source.match(/\s*/um, true)
+ if @source.encoding == "UTF-8"
+ @source.buffer.force_encoding(::Encoding::UTF_8)
+ end
end
end
if @document_status == :in_doctype
- md = @source.match(/\s*(.*?>)/um)
+ md = @source.match(/\A\s*(.*?>)/um)
case md[1]
- when SYSTEMENTITY
+ when SYSTEMENTITY
match = @source.match( SYSTEMENTITY, true )[1]
return [ :externalentity, match ]
@@ -272,7 +300,8 @@ module REXML
# External reference
match[3] = match[3][1..-2] # PUBID
match[4] = match[4][1..-2] # HREF
- # match is [ :entity, name, PUBLIC, pubid, href ]
+ match.delete_at(5) if match.size > 5 # Chop out NDATA decl
+ # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
else
match[2] = match[2][1..-2]
match.pop if match.size == 4
@@ -301,33 +330,50 @@ module REXML
end
return [ :attlistdecl, element, pairs, contents ]
when NOTATIONDECL_START
- md = nil
- if @source.match( PUBLIC )
- md = @source.match( PUBLIC, true )
- vals = [md[1],md[2],md[4],md[6]]
- elsif @source.match( SYSTEM )
- md = @source.match( SYSTEM, true )
- vals = [md[1],md[2],nil,md[4]]
- else
- raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
+ base_error_message = "Malformed notation declaration"
+ unless @source.match(/\A\s*<!NOTATION\s+/um, true)
+ if @source.match(/\A\s*<!NOTATION\s*>/um)
+ message = "#{base_error_message}: name is missing"
+ else
+ message = "#{base_error_message}: invalid declaration name"
+ end
+ raise REXML::ParseException.new(message, @source)
end
- return [ :notationdecl, *vals ]
- when CDATA_END
+ name = parse_name(base_error_message)
+ id = parse_id(base_error_message,
+ accept_external_id: true,
+ accept_public_id: true)
+ unless @source.match(/\A\s*>/um, true)
+ message = "#{base_error_message}: garbage before end >"
+ raise REXML::ParseException.new(message, @source)
+ end
+ return [:notationdecl, name, *id]
+ when DOCTYPE_END
@document_status = :after_doctype
- @source.match( CDATA_END, true )
+ @source.match( DOCTYPE_END, true )
return [ :end_doctype ]
end
end
+ if @document_status == :after_doctype
+ @source.match(/\A\s*/um, true)
+ end
begin
+ @source.read if @source.buffer.size<2
if @source.buffer[0] == ?<
if @source.buffer[1] == ?/
@nsstack.shift
last_tag = @tags.pop
#md = @source.match_to_consume( '>', CLOSE_MATCH)
md = @source.match( CLOSE_MATCH, true )
- raise REXML::ParseException.new( "Missing end tag for "+
- "'#{last_tag}' (got \"#{md[1]}\")",
- @source) unless last_tag == md[1]
+ if md and !last_tag
+ message = "Unexpected top-level end tag (got '#{md[1]}')"
+ raise REXML::ParseException.new(message, @source)
+ end
+ if md.nil? or last_tag != md[1]
+ message = "Missing end tag for '#{last_tag}'"
+ message << " (got '#{md[1]}')" if md
+ raise REXML::ParseException.new(message, @source)
+ end
return [ :end_element, last_tag ]
elsif @source.buffer[1] == ?!
md = @source.match(/\A(\s*[^>]*>)/um)
@@ -335,6 +381,12 @@ module REXML
raise REXML::ParseException.new("Malformed node", @source) unless md
if md[0][2] == ?-
md = @source.match( COMMENT_PATTERN, true )
+
+ case md[1]
+ when /--/, /-\z/
+ raise REXML::ParseException.new("Malformed comment", @source)
+ end
+
return [ :comment, md[1] ] if md
else
md = @source.match( CDATA_PATTERN, true )
@@ -353,36 +405,13 @@ module REXML
unless md
# Check for missing attribute quotes
raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
- raise REXML::ParseException.new("malformed XML: missing tag start", @source)
+ raise REXML::ParseException.new("malformed XML: missing tag start", @source)
end
- attributes = {}
+ @document_status = :in_element
prefixes = Set.new
prefixes << md[2] if md[2]
@nsstack.unshift(curr_ns=Set.new)
- if md[4].size > 0
- attrs = md[4].scan( ATTRIBUTE_PATTERN )
- raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
- attrs.each { |a,b,c,d,e|
- if b == "xmlns"
- if c == "xml"
- if d != "http://www.w3.org/XML/1998/namespace"
- msg = "The 'xml' prefix must not be bound to any other namespace "+
- "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
- raise REXML::ParseException.new( msg, @source, self )
- end
- elsif c == "xmlns"
- msg = "The 'xmlns' prefix must not be declared "+
- "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
- raise REXML::ParseException.new( msg, @source, self)
- end
- curr_ns << c
- elsif b
- prefixes << b unless b == "xml"
- end
- attributes[a] = e
- }
- end
-
+ attributes, closed = parse_attributes(prefixes, curr_ns)
# Verify that all of the prefixes have been defined
for prefix in prefixes
unless @nsstack.find{|k| k.member?(prefix)}
@@ -390,7 +419,7 @@ module REXML
end
end
- if md[6]
+ if closed
@closed = md[1]
@nsstack.shift
else
@@ -419,6 +448,7 @@ module REXML
end
return [ :dummy ]
end
+ private :pull_event
def entity( reference, entities )
value = nil
@@ -436,7 +466,7 @@ module REXML
# Doing it like this rather than in a loop improves the speed
copy.gsub!( EREFERENCE, '&amp;' )
entities.each do |key, value|
- copy.gsub!( value, "&#{key};" ) unless entity_filter and
+ copy.gsub!( value, "&#{key};" ) unless entity_filter and
entity_filter.include?(entity)
end if entities
copy.gsub!( EREFERENCE, '&amp;' )
@@ -452,7 +482,7 @@ module REXML
rv.gsub!( /\r\n?/, "\n" )
matches = rv.scan( REFERENCE_RE )
return rv if matches.size == 0
- rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
+ rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
m=$1
m = "0#{m}" if m[0] == ?x
[Integer(m)].pack('U*')
@@ -465,19 +495,186 @@ module REXML
if entity_value
re = /&#{entity_reference};/
rv.gsub!( re, entity_value )
+ else
+ er = DEFAULT_ENTITIES[entity_reference]
+ rv.gsub!( er[0], er[2] ) if er
end
end
end
- matches.each do |entity_reference|
- unless filter and filter.include?(entity_reference)
- er = DEFAULT_ENTITIES[entity_reference]
- rv.gsub!( er[0], er[2] ) if er
- end
- end
rv.gsub!( /&amp;/, '&' )
end
rv
end
+
+ private
+ def need_source_encoding_update?(xml_declaration_encoding)
+ return false if xml_declaration_encoding.nil?
+ return false if /\AUTF-16\z/i =~ xml_declaration_encoding
+ true
+ end
+
+ def parse_name(base_error_message)
+ md = @source.match(/\A\s*#{NAME}/um, true)
+ unless md
+ if @source.match(/\A\s*\S/um)
+ message = "#{base_error_message}: invalid name"
+ else
+ message = "#{base_error_message}: name is missing"
+ end
+ raise REXML::ParseException.new(message, @source)
+ end
+ md[1]
+ end
+
+ def parse_id(base_error_message,
+ accept_external_id:,
+ accept_public_id:)
+ if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true))
+ pubid = system = nil
+ pubid_literal = md[1]
+ pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
+ system_literal = md[2]
+ system = system_literal[1..-2] if system_literal # Remove quote
+ ["PUBLIC", pubid, system]
+ elsif accept_public_id and (md = @source.match(PUBLIC_ID, true))
+ pubid = system = nil
+ pubid_literal = md[1]
+ pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
+ ["PUBLIC", pubid, nil]
+ elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true))
+ system = nil
+ system_literal = md[1]
+ system = system_literal[1..-2] if system_literal # Remove quote
+ ["SYSTEM", nil, system]
+ else
+ details = parse_id_invalid_details(accept_external_id: accept_external_id,
+ accept_public_id: accept_public_id)
+ message = "#{base_error_message}: #{details}"
+ raise REXML::ParseException.new(message, @source)
+ end
+ end
+
+ def parse_id_invalid_details(accept_external_id:,
+ accept_public_id:)
+ public = /\A\s*PUBLIC/um
+ system = /\A\s*SYSTEM/um
+ if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
+ if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
+ return "public ID literal is missing"
+ end
+ unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
+ return "invalid public ID literal"
+ end
+ if accept_public_id
+ if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
+ return "system ID literal is missing"
+ end
+ unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
+ return "invalid system literal"
+ end
+ "garbage after system literal"
+ else
+ "garbage after public ID literal"
+ end
+ elsif accept_external_id and @source.match(/#{system}/um)
+ if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
+ return "system literal is missing"
+ end
+ unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
+ return "invalid system literal"
+ end
+ "garbage after system literal"
+ else
+ unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
+ return "invalid ID type"
+ end
+ "ID type is missing"
+ end
+ end
+
+ def parse_attributes(prefixes, curr_ns)
+ attributes = {}
+ closed = false
+ match_data = @source.match(/^(.*?)(\/)?>/um, true)
+ if match_data.nil?
+ message = "Start tag isn't ended"
+ raise REXML::ParseException.new(message, @source)
+ end
+
+ raw_attributes = match_data[1]
+ closed = !match_data[2].nil?
+ return attributes, closed if raw_attributes.nil?
+ return attributes, closed if raw_attributes.empty?
+
+ scanner = StringScanner.new(raw_attributes)
+ until scanner.eos?
+ if scanner.scan(/\s+/)
+ break if scanner.eos?
+ end
+
+ pos = scanner.pos
+ loop do
+ break if scanner.scan(ATTRIBUTE_PATTERN)
+ unless scanner.scan(QNAME)
+ message = "Invalid attribute name: <#{scanner.rest}>"
+ raise REXML::ParseException.new(message, @source)
+ end
+ name = scanner[0]
+ unless scanner.scan(/\s*=\s*/um)
+ message = "Missing attribute equal: <#{name}>"
+ raise REXML::ParseException.new(message, @source)
+ end
+ quote = scanner.scan(/['"]/)
+ unless quote
+ message = "Missing attribute value start quote: <#{name}>"
+ raise REXML::ParseException.new(message, @source)
+ end
+ unless scanner.scan(/.*#{Regexp.escape(quote)}/um)
+ match_data = @source.match(/^(.*?)(\/)?>/um, true)
+ if match_data
+ scanner << "/" if closed
+ scanner << ">"
+ scanner << match_data[1]
+ scanner.pos = pos
+ closed = !match_data[2].nil?
+ next
+ end
+ message =
+ "Missing attribute value end quote: <#{name}>: <#{quote}>"
+ raise REXML::ParseException.new(message, @source)
+ end
+ end
+ name = scanner[1]
+ prefix = scanner[2]
+ local_part = scanner[3]
+ # quote = scanner[4]
+ value = scanner[5]
+ if prefix == "xmlns"
+ if local_part == "xml"
+ if value != "http://www.w3.org/XML/1998/namespace"
+ msg = "The 'xml' prefix must not be bound to any other namespace "+
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+ raise REXML::ParseException.new( msg, @source, self )
+ end
+ elsif local_part == "xmlns"
+ msg = "The 'xmlns' prefix must not be declared "+
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+ raise REXML::ParseException.new( msg, @source, self)
+ end
+ curr_ns << local_part
+ elsif prefix
+ prefixes << prefix unless prefix == "xml"
+ end
+
+ if attributes.has_key?(name)
+ msg = "Duplicate attribute #{name.inspect}"
+ raise REXML::ParseException.new(msg, @source, self)
+ end
+
+ attributes[name] = value
+ end
+ return attributes, closed
+ end
end
end
end
diff --git a/lib/rexml/parsers/lightparser.rb b/lib/rexml/parsers/lightparser.rb
index 0f35034993..f0601ae51b 100644
--- a/lib/rexml/parsers/lightparser.rb
+++ b/lib/rexml/parsers/lightparser.rb
@@ -1,14 +1,15 @@
+# frozen_string_literal: false
require 'rexml/parsers/streamparser'
require 'rexml/parsers/baseparser'
require 'rexml/light/node'
module REXML
- module Parsers
- class LightParser
- def initialize stream
- @stream = stream
- @parser = REXML::Parsers::BaseParser.new( stream )
- end
+ module Parsers
+ class LightParser
+ def initialize stream
+ @stream = stream
+ @parser = REXML::Parsers::BaseParser.new( stream )
+ end
def add_listener( listener )
@parser.add_listener( listener )
@@ -19,42 +20,40 @@ module REXML
@parser.stream = @stream
end
- def parse
- root = context = [ :document ]
- while true
- event = @parser.pull
- case event[0]
- when :end_document
- break
- when :end_doctype
- context = context[1]
- when :start_element, :start_doctype
- new_node = event
- context << new_node
- new_node[1,0] = [context]
- context = new_node
- when :end_element, :end_doctype
- context = context[1]
- else
- new_node = event
- context << new_node
- new_node[1,0] = [context]
- end
- end
- root
- end
- end
+ def parse
+ root = context = [ :document ]
+ while true
+ event = @parser.pull
+ case event[0]
+ when :end_document
+ break
+ when :start_element, :start_doctype
+ new_node = event
+ context << new_node
+ new_node[1,0] = [context]
+ context = new_node
+ when :end_element, :end_doctype
+ context = context[1]
+ else
+ new_node = event
+ context << new_node
+ new_node[1,0] = [context]
+ end
+ end
+ root
+ end
+ end
- # An element is an array. The array contains:
- # 0 The parent element
- # 1 The tag name
- # 2 A hash of attributes
- # 3..-1 The child elements
- # An element is an array of size > 3
- # Text is a String
- # PIs are [ :processing_instruction, target, data ]
- # Comments are [ :comment, data ]
- # DocTypes are DocType structs
- # The root is an array with XMLDecls, Text, DocType, Array, Text
- end
+ # An element is an array. The array contains:
+ # 0 The parent element
+ # 1 The tag name
+ # 2 A hash of attributes
+ # 3..-1 The child elements
+ # An element is an array of size > 3
+ # Text is a String
+ # PIs are [ :processing_instruction, target, data ]
+ # Comments are [ :comment, data ]
+ # DocTypes are DocType structs
+ # The root is an array with XMLDecls, Text, DocType, Array, Text
+ end
end
diff --git a/lib/rexml/parsers/pullparser.rb b/lib/rexml/parsers/pullparser.rb
index 36dc7160c3..8c49217553 100644
--- a/lib/rexml/parsers/pullparser.rb
+++ b/lib/rexml/parsers/pullparser.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: false
require 'forwardable'
require 'rexml/parseexception'
@@ -68,7 +69,7 @@ module REXML
event = @parser.pull
case event[0]
when :entitydecl
- @entities[ event[1] ] =
+ @entities[ event[1] ] =
event[2] unless event[2] =~ /PUBLIC|SYSTEM/
when :text
unnormalized = @parser.unnormalize( event[1], @entities )
diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb
index e402eb7747..1386f69c83 100644
--- a/lib/rexml/parsers/sax2parser.rb
+++ b/lib/rexml/parsers/sax2parser.rb
@@ -1,165 +1,175 @@
+# frozen_string_literal: false
require 'rexml/parsers/baseparser'
require 'rexml/parseexception'
require 'rexml/namespace'
require 'rexml/text'
module REXML
- module Parsers
+ module Parsers
# SAX2Parser
- class SAX2Parser
- def initialize source
- @parser = BaseParser.new(source)
- @listeners = []
- @procs = []
- @namespace_stack = []
- @has_listeners = false
- @tag_stack = []
+ class SAX2Parser
+ def initialize source
+ @parser = BaseParser.new(source)
+ @listeners = []
+ @procs = []
+ @namespace_stack = []
+ @has_listeners = false
+ @tag_stack = []
@entities = {}
- end
+ end
def source
@parser.source
end
-
+
def add_listener( listener )
@parser.add_listener( listener )
end
- # Listen arguments:
- #
- # Symbol, Array, Block
- # Listen to Symbol events on Array elements
- # Symbol, Block
- # Listen to Symbol events
- # Array, Listener
- # Listen to all events on Array elements
- # Array, Block
- # Listen to :start_element events on Array elements
- # Listener
- # Listen to All events
- #
- # Symbol can be one of: :start_element, :end_element,
- # :start_prefix_mapping, :end_prefix_mapping, :characters,
- # :processing_instruction, :doctype, :attlistdecl, :elementdecl,
- # :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
+ # Listen arguments:
+ #
+ # Symbol, Array, Block
+ # Listen to Symbol events on Array elements
+ # Symbol, Block
+ # Listen to Symbol events
+ # Array, Listener
+ # Listen to all events on Array elements
+ # Array, Block
+ # Listen to :start_element events on Array elements
+ # Listener
+ # Listen to All events
+ #
+ # Symbol can be one of: :start_element, :end_element,
+ # :start_prefix_mapping, :end_prefix_mapping, :characters,
+ # :processing_instruction, :doctype, :attlistdecl, :elementdecl,
+ # :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
#
# There is an additional symbol that can be listened for: :progress.
- # This will be called for every event generated, passing in the current
+ # This will be called for every event generated, passing in the current
# stream position.
- #
- # Array contains regular expressions or strings which will be matched
- # against fully qualified element names.
- #
- # Listener must implement the methods in SAX2Listener
- #
- # Block will be passed the same arguments as a SAX2Listener method would
- # be, where the method name is the same as the matched Symbol.
- # See the SAX2Listener for more information.
- def listen( *args, &blok )
- if args[0].kind_of? Symbol
- if args.size == 2
- args[1].each { |match| @procs << [args[0], match, blok] }
- else
- add( [args[0], nil, blok] )
- end
- elsif args[0].kind_of? Array
- if args.size == 2
- args[0].each { |match| add( [nil, match, args[1]] ) }
- else
- args[0].each { |match| add( [ :start_element, match, blok ] ) }
- end
- else
- add([nil, nil, args[0]])
- end
- end
-
- def deafen( listener=nil, &blok )
- if listener
- @listeners.delete_if {|item| item[-1] == listener }
- @has_listeners = false if @listeners.size == 0
- else
- @procs.delete_if {|item| item[-1] == blok }
- end
- end
-
- def parse
- @procs.each { |sym,match,block| block.call if sym == :start_document }
- @listeners.each { |sym,match,block|
- block.start_document if sym == :start_document or sym.nil?
- }
- root = context = []
- while true
- event = @parser.pull
- case event[0]
- when :end_document
- handle( :end_document )
- break
+ #
+ # Array contains regular expressions or strings which will be matched
+ # against fully qualified element names.
+ #
+ # Listener must implement the methods in SAX2Listener
+ #
+ # Block will be passed the same arguments as a SAX2Listener method would
+ # be, where the method name is the same as the matched Symbol.
+ # See the SAX2Listener for more information.
+ def listen( *args, &blok )
+ if args[0].kind_of? Symbol
+ if args.size == 2
+ args[1].each { |match| @procs << [args[0], match, blok] }
+ else
+ add( [args[0], nil, blok] )
+ end
+ elsif args[0].kind_of? Array
+ if args.size == 2
+ args[0].each { |match| add( [nil, match, args[1]] ) }
+ else
+ args[0].each { |match| add( [ :start_element, match, blok ] ) }
+ end
+ else
+ add([nil, nil, args[0]])
+ end
+ end
+
+ def deafen( listener=nil, &blok )
+ if listener
+ @listeners.delete_if {|item| item[-1] == listener }
+ @has_listeners = false if @listeners.size == 0
+ else
+ @procs.delete_if {|item| item[-1] == blok }
+ end
+ end
+
+ def parse
+ @procs.each { |sym,match,block| block.call if sym == :start_document }
+ @listeners.each { |sym,match,block|
+ block.start_document if sym == :start_document or sym.nil?
+ }
+ context = []
+ while true
+ event = @parser.pull
+ case event[0]
+ when :end_document
+ handle( :end_document )
+ break
when :start_doctype
handle( :doctype, *event[1..-1])
- when :end_doctype
- context = context[1]
- when :start_element
- @tag_stack.push(event[1])
- # find the observers for namespaces
- procs = get_procs( :start_prefix_mapping, event[1] )
- listeners = get_listeners( :start_prefix_mapping, event[1] )
- if procs or listeners
- # break out the namespace declarations
- # The attributes live in event[2]
- event[2].each {|n, v| event[2][n] = @parser.normalize(v)}
- nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
- nsdecl.collect! { |n, value| [ n[6..-1], value ] }
- @namespace_stack.push({})
- nsdecl.each do |n,v|
- @namespace_stack[-1][n] = v
- # notify observers of namespaces
- procs.each { |ob| ob.call( n, v ) } if procs
- listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
- end
- end
- event[1] =~ Namespace::NAMESPLIT
- prefix = $1
- local = $2
- uri = get_namespace(prefix)
- # find the observers for start_element
- procs = get_procs( :start_element, event[1] )
- listeners = get_listeners( :start_element, event[1] )
- # notify observers
- procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
- listeners.each { |ob|
- ob.start_element( uri, local, event[1], event[2] )
- } if listeners
- when :end_element
- @tag_stack.pop
- event[1] =~ Namespace::NAMESPLIT
- prefix = $1
- local = $2
- uri = get_namespace(prefix)
- # find the observers for start_element
- procs = get_procs( :end_element, event[1] )
- listeners = get_listeners( :end_element, event[1] )
- # notify observers
- procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
- listeners.each { |ob|
- ob.end_element( uri, local, event[1] )
- } if listeners
+ when :end_doctype
+ context = context[1]
+ when :start_element
+ @tag_stack.push(event[1])
+ # find the observers for namespaces
+ procs = get_procs( :start_prefix_mapping, event[1] )
+ listeners = get_listeners( :start_prefix_mapping, event[1] )
+ if procs or listeners
+ # break out the namespace declarations
+ # The attributes live in event[2]
+ event[2].each {|n, v| event[2][n] = @parser.normalize(v)}
+ nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
+ nsdecl.collect! { |n, value| [ n[6..-1], value ] }
+ @namespace_stack.push({})
+ nsdecl.each do |n,v|
+ @namespace_stack[-1][n] = v
+ # notify observers of namespaces
+ procs.each { |ob| ob.call( n, v ) } if procs
+ listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
+ end
+ end
+ event[1] =~ Namespace::NAMESPLIT
+ prefix = $1
+ local = $2
+ uri = get_namespace(prefix)
+ # find the observers for start_element
+ procs = get_procs( :start_element, event[1] )
+ listeners = get_listeners( :start_element, event[1] )
+ # notify observers
+ procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
+ listeners.each { |ob|
+ ob.start_element( uri, local, event[1], event[2] )
+ } if listeners
+ when :end_element
+ @tag_stack.pop
+ event[1] =~ Namespace::NAMESPLIT
+ prefix = $1
+ local = $2
+ uri = get_namespace(prefix)
+ # find the observers for start_element
+ procs = get_procs( :end_element, event[1] )
+ listeners = get_listeners( :end_element, event[1] )
+ # notify observers
+ procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
+ listeners.each { |ob|
+ ob.end_element( uri, local, event[1] )
+ } if listeners
- namespace_mapping = @namespace_stack.pop
- # find the observers for namespaces
- procs = get_procs( :end_prefix_mapping, event[1] )
- listeners = get_listeners( :end_prefix_mapping, event[1] )
- if procs or listeners
- namespace_mapping.each do |prefix, uri|
- # notify observers of namespaces
- procs.each { |ob| ob.call( prefix ) } if procs
- listeners.each { |ob| ob.end_prefix_mapping(prefix) } if listeners
- end
- end
- when :text
+ namespace_mapping = @namespace_stack.pop
+ # find the observers for namespaces
+ procs = get_procs( :end_prefix_mapping, event[1] )
+ listeners = get_listeners( :end_prefix_mapping, event[1] )
+ if procs or listeners
+ namespace_mapping.each do |ns_prefix, ns_uri|
+ # notify observers of namespaces
+ procs.each { |ob| ob.call( ns_prefix ) } if procs
+ listeners.each { |ob| ob.end_prefix_mapping(ns_prefix) } if listeners
+ end
+ end
+ when :text
#normalized = @parser.normalize( event[1] )
#handle( :characters, normalized )
copy = event[1].clone
- @entities.each { |key, value| copy = copy.gsub("&#{key};", value) }
+
+ esub = proc { |match|
+ if @entities.has_key?($1)
+ @entities[$1].gsub(Text::REFERENCE, &esub)
+ else
+ match
+ end
+ }
+
+ copy.gsub!( Text::REFERENCE, &esub )
copy.gsub!( Text::NUMERICENTITY ) {|m|
m=$1
m = "0#{m}" if m[0] == ?x
@@ -167,72 +177,97 @@ module REXML
}
handle( :characters, copy )
when :entitydecl
- @entities[ event[1] ] = event[2] if event.size == 3
- handle( *event )
- when :processing_instruction, :comment, :attlistdecl,
- :elementdecl, :cdata, :notationdecl, :xmldecl
- handle( *event )
- end
+ handle_entitydecl( event )
+ when :processing_instruction, :comment, :attlistdecl,
+ :elementdecl, :cdata, :notationdecl, :xmldecl
+ handle( *event )
+ end
handle( :progress, @parser.position )
- end
- end
+ end
+ end
- private
- def handle( symbol, *arguments )
- tag = @tag_stack[-1]
- procs = get_procs( symbol, tag )
- listeners = get_listeners( symbol, tag )
- # notify observers
- procs.each { |ob| ob.call( *arguments ) } if procs
- listeners.each { |l|
- l.send( symbol.to_s, *arguments )
- } if listeners
- end
+ private
+ def handle( symbol, *arguments )
+ tag = @tag_stack[-1]
+ procs = get_procs( symbol, tag )
+ listeners = get_listeners( symbol, tag )
+ # notify observers
+ procs.each { |ob| ob.call( *arguments ) } if procs
+ listeners.each { |l|
+ l.send( symbol.to_s, *arguments )
+ } if listeners
+ end
- # The following methods are duplicates, but it is faster than using
- # a helper
- def get_procs( symbol, name )
- return nil if @procs.size == 0
- @procs.find_all do |sym, match, block|
- #puts sym.inspect+"=="+symbol.inspect+ "\t"+match.inspect+"=="+name.inspect+ "\t"+( (sym.nil? or symbol == sym) and ((name.nil? and match.nil?) or match.nil? or ( (name == match) or (match.kind_of? Regexp and name =~ match)))).to_s
- (
- (sym.nil? or symbol == sym) and
- ((name.nil? and match.nil?) or match.nil? or (
- (name == match) or
- (match.kind_of? Regexp and name =~ match)
- )
- )
- )
- end.collect{|x| x[-1]}
- end
- def get_listeners( symbol, name )
- return nil if @listeners.size == 0
- @listeners.find_all do |sym, match, block|
- (
- (sym.nil? or symbol == sym) and
- ((name.nil? and match.nil?) or match.nil? or (
- (name == match) or
- (match.kind_of? Regexp and name =~ match)
- )
- )
- )
- end.collect{|x| x[-1]}
- end
+ def handle_entitydecl( event )
+ @entities[ event[1] ] = event[2] if event.size == 3
+ parameter_reference_p = false
+ case event[2]
+ when "SYSTEM"
+ if event.size == 5
+ if event.last == "%"
+ parameter_reference_p = true
+ else
+ event[4, 0] = "NDATA"
+ end
+ end
+ when "PUBLIC"
+ if event.size == 6
+ if event.last == "%"
+ parameter_reference_p = true
+ else
+ event[5, 0] = "NDATA"
+ end
+ end
+ else
+ parameter_reference_p = (event.size == 4)
+ end
+ event[1, 0] = event.pop if parameter_reference_p
+ handle( event[0], event[1..-1] )
+ end
- def add( pair )
- if pair[-1].respond_to? :call
- @procs << pair unless @procs.include? pair
- else
- @listeners << pair unless @listeners.include? pair
- @has_listeners = true
- end
- end
+ # The following methods are duplicates, but it is faster than using
+ # a helper
+ def get_procs( symbol, name )
+ return nil if @procs.size == 0
+ @procs.find_all do |sym, match, block|
+ (
+ (sym.nil? or symbol == sym) and
+ ((name.nil? and match.nil?) or match.nil? or (
+ (name == match) or
+ (match.kind_of? Regexp and name =~ match)
+ )
+ )
+ )
+ end.collect{|x| x[-1]}
+ end
+ def get_listeners( symbol, name )
+ return nil if @listeners.size == 0
+ @listeners.find_all do |sym, match, block|
+ (
+ (sym.nil? or symbol == sym) and
+ ((name.nil? and match.nil?) or match.nil? or (
+ (name == match) or
+ (match.kind_of? Regexp and name =~ match)
+ )
+ )
+ )
+ end.collect{|x| x[-1]}
+ end
- def get_namespace( prefix )
+ def add( pair )
+ if pair[-1].respond_to? :call
+ @procs << pair unless @procs.include? pair
+ else
+ @listeners << pair unless @listeners.include? pair
+ @has_listeners = true
+ end
+ end
+
+ def get_namespace( prefix )
uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
- (@namespace_stack.find { |ns| not ns[nil].nil? })
- uris[-1][prefix] unless uris.nil? or 0 == uris.size
- end
- end
- end
+ (@namespace_stack.find { |ns| not ns[nil].nil? })
+ uris[-1][prefix] unless uris.nil? or 0 == uris.size
+ end
+ end
+ end
end
diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb
index 256d0f611c..f6a8bfa802 100644
--- a/lib/rexml/parsers/streamparser.rb
+++ b/lib/rexml/parsers/streamparser.rb
@@ -1,29 +1,40 @@
+# frozen_string_literal: false
+require "rexml/parsers/baseparser"
+
module REXML
module Parsers
class StreamParser
def initialize source, listener
@listener = listener
@parser = BaseParser.new( source )
+ @tag_stack = []
end
-
+
def add_listener( listener )
@parser.add_listener( listener )
end
-
+
def parse
# entity string
while true
event = @parser.pull
case event[0]
when :end_document
+ unless @tag_stack.empty?
+ tag_path = "/" + @tag_stack.join("/")
+ raise ParseException.new("Missing end tag for '#{tag_path}'",
+ @parser.source)
+ end
return
when :start_element
+ @tag_stack << event[1]
attrs = event[2].each do |n, v|
event[2][n] = @parser.unnormalize( v )
end
@listener.tag_start( event[1], attrs )
when :end_element
@listener.tag_end( event[1] )
+ @tag_stack.pop
when :text
normalized = @parser.unnormalize( event[1] )
@listener.text( normalized )
@@ -38,6 +49,10 @@ module REXML
@listener.send( event[0].to_s, *event[1..-1] )
when :entitydecl, :notationdecl
@listener.send( event[0].to_s, event[1..-1] )
+ when :externalentity
+ entity_reference = event[1]
+ content = entity_reference.gsub(/\A%|;\z/, "")
+ @listener.entity(content)
end
end
end
diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb
index 5c3e142ea7..fc0993c72a 100644
--- a/lib/rexml/parsers/treeparser.rb
+++ b/lib/rexml/parsers/treeparser.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: false
require 'rexml/validation/validationexception'
require 'rexml/undefinednamespaceexception'
@@ -24,13 +25,16 @@ module REXML
case event[0]
when :end_document
unless tag_stack.empty?
- #raise ParseException.new("No close tag for #{tag_stack.inspect}")
- raise ParseException.new("No close tag for #{@build_context.xpath}")
+ raise ParseException.new("No close tag for #{@build_context.xpath}",
+ @parser.source, @parser)
end
return
when :start_element
tag_stack.push(event[1])
- el = @build_context = @build_context.add_element( event[1], event[2] )
+ el = @build_context = @build_context.add_element( event[1] )
+ event[2].each do |key, value|
+ el.attributes[key]=Attribute.new(key,value,self)
+ end
when :end_element
tag_stack.pop
@build_context = @build_context.parent
@@ -39,8 +43,8 @@ module REXML
if @build_context[-1].instance_of? Text
@build_context[-1] << event[1]
else
- @build_context.add(
- Text.new(event[1], @build_context.whitespace, nil, true)
+ @build_context.add(
+ Text.new(event[1], @build_context.whitespace, nil, true)
) unless (
@build_context.ignore_whitespace_nodes and
event[1].strip.size==0
@@ -86,7 +90,7 @@ module REXML
end
rescue REXML::Validation::ValidationException
raise
- rescue REXML::UndefinedNamespaceException
+ rescue REXML::ParseException
raise
rescue
raise ParseException.new( $!.message, @parser.source, @parser, $! )
diff --git a/lib/rexml/parsers/ultralightparser.rb b/lib/rexml/parsers/ultralightparser.rb
index adc4af18e2..6571d119bd 100644
--- a/lib/rexml/parsers/ultralightparser.rb
+++ b/lib/rexml/parsers/ultralightparser.rb
@@ -1,13 +1,14 @@
+# frozen_string_literal: false
require 'rexml/parsers/streamparser'
require 'rexml/parsers/baseparser'
module REXML
- module Parsers
- class UltraLightParser
- def initialize stream
- @stream = stream
- @parser = REXML::Parsers::BaseParser.new( stream )
- end
+ module Parsers
+ class UltraLightParser
+ def initialize stream
+ @stream = stream
+ @parser = REXML::Parsers::BaseParser.new( stream )
+ end
def add_listener( listener )
@parser.add_listener( listener )
@@ -18,39 +19,39 @@ module REXML
@parser.stream = @stream
end
- def parse
- root = context = []
- while true
- event = @parser.pull
- case event[0]
- when :end_document
- break
- when :end_doctype
- context = context[1]
- when :start_element, :doctype
- context << event
- event[1,0] = [context]
- context = event
- when :end_element
- context = context[1]
- else
- context << event
- end
- end
- root
- end
- end
+ def parse
+ root = context = []
+ while true
+ event = @parser.pull
+ case event[0]
+ when :end_document
+ break
+ when :end_doctype
+ context = context[1]
+ when :start_element, :start_doctype
+ context << event
+ event[1,0] = [context]
+ context = event
+ when :end_element
+ context = context[1]
+ else
+ context << event
+ end
+ end
+ root
+ end
+ end
- # An element is an array. The array contains:
- # 0 The parent element
- # 1 The tag name
- # 2 A hash of attributes
- # 3..-1 The child elements
- # An element is an array of size > 3
- # Text is a String
- # PIs are [ :processing_instruction, target, data ]
- # Comments are [ :comment, data ]
- # DocTypes are DocType structs
- # The root is an array with XMLDecls, Text, DocType, Array, Text
- end
+ # An element is an array. The array contains:
+ # 0 The parent element
+ # 1 The tag name
+ # 2 A hash of attributes
+ # 3..-1 The child elements
+ # An element is an array of size > 3
+ # Text is a String
+ # PIs are [ :processing_instruction, target, data ]
+ # Comments are [ :comment, data ]
+ # DocTypes are DocType structs
+ # The root is an array with XMLDecls, Text, DocType, Array, Text
+ end
end
diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb
index de2530e347..32b70bb798 100644
--- a/lib/rexml/parsers/xpathparser.rb
+++ b/lib/rexml/parsers/xpathparser.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: false
require 'rexml/namespace'
require 'rexml/xmltokens'
@@ -17,10 +18,11 @@ module REXML
end
def parse path
+ path = path.dup
path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
- path.gsub!( /\s+([\]\)])/, '\1' )
+ path.gsub!( /\s+([\]\)])/, '\1')
parsed = []
- path = OrExpr(path, parsed)
+ OrExpr(path, parsed)
parsed
end
@@ -39,10 +41,10 @@ module REXML
case op
when :node
when :attribute
- string << "/" if string.size > 0
- string << "@"
+ string << "/" if string.size > 0
+ string << "@"
when :child
- string << "/" if string.size > 0
+ string << "/" if string.size > 0
when :descendant_or_self
string << "/"
when :self
@@ -51,10 +53,10 @@ module REXML
string << ".."
when :any
string << "*"
- when :text
- string << "text()"
- when :following, :following_sibling,
- :ancestor, :ancestor_or_self, :descendant,
+ when :text
+ string << "text()"
+ when :following, :following_sibling,
+ :ancestor, :ancestor_or_self, :descendant,
:namespace, :preceding, :preceding_sibling
string << "/" unless string.size == 0
string << op.to_s.tr("_", "-")
@@ -70,13 +72,13 @@ module REXML
string << ']'
when :document
document = true
- when :function
- string << path.shift
- string << "( "
- string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
- string << " )"
- when :literal
- string << %Q{ "#{path.shift}" }
+ when :function
+ string << path.shift
+ string << "( "
+ string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
+ string << " )"
+ when :literal
+ string << %Q{ "#{path.shift}" }
else
string << "/" unless string.size == 0
string << "UNKNOWN("
@@ -84,7 +86,7 @@ module REXML
string << ")"
end
end
- string = "/"+string if document
+ string = "/"+string if document
return string
end
@@ -97,7 +99,7 @@ module REXML
case op
when :node
string << "node()"
- when :attribute, :child, :following, :following_sibling,
+ when :attribute, :child, :following, :following_sibling,
:ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
:namespace, :preceding, :preceding_sibling, :self, :parent
string << "/" unless string.size == 0
@@ -183,7 +185,6 @@ module REXML
# | '/' RelativeLocationPath?
# | '//' RelativeLocationPath
def LocationPath path, parsed
- #puts "LocationPath '#{path}'"
path = path.strip
if path[0] == ?/
parsed << :document
@@ -195,7 +196,6 @@ module REXML
path = path[1..-1]
end
end
- #puts parsed.inspect
return RelativeLocationPath( path, parsed ) if path.size > 0
end
@@ -209,7 +209,6 @@ module REXML
# | RelativeLocationPath '//' Step
AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
def RelativeLocationPath path, parsed
- #puts "RelativeLocationPath #{path}"
while path.size > 0
# (axis or @ or <child::>) nodetest predicate >
# OR > / Step
@@ -226,7 +225,6 @@ module REXML
end
else
if path[0] == ?@
- #puts "ATTRIBUTE"
parsed << :attribute
path = path[1..-1]
# Goto Nodetest
@@ -238,10 +236,8 @@ module REXML
parsed << :child
end
- #puts "NODETESTING '#{path}'"
n = []
path = NodeTest( path, n)
- #puts "NODETEST RETURNED '#{path}'"
if path[0] == ?[
path = Predicate( path, n )
@@ -249,7 +245,7 @@ module REXML
parsed.concat(n)
end
-
+
if path.size > 0
if path[0] == ?/
if path[1] == ?/
@@ -281,8 +277,6 @@ module REXML
NODE_TYPE = /^(comment|text|node)\(\s*\)/m
PI = /^processing-instruction\(/
def NodeTest path, parsed
- #puts "NodeTest with #{path}"
- res = nil
case path
when /^\*/
path = $'
@@ -304,13 +298,11 @@ module REXML
parsed << :processing_instruction
parsed << (literal || '')
when NCNAMETEST
- #puts "NCNAMETEST"
prefix = $1
path = $'
parsed << :namespace
parsed << prefix
when QNAME
- #puts "QNAME"
prefix = $1
name = $2
path = $'
@@ -324,22 +316,18 @@ module REXML
# Filters the supplied nodeset on the predicate(s)
def Predicate path, parsed
- #puts "PREDICATE with #{path}"
return nil unless path[0] == ?[
predicates = []
while path[0] == ?[
path, expr = get_group(path)
predicates << expr[1..-2] if expr
end
- #puts "PREDICATES = #{predicates.inspect}"
- predicates.each{ |expr|
- #puts "ORING #{expr}"
+ predicates.each{ |pred|
preds = []
parsed << :predicate
parsed << preds
- OrExpr(expr, preds)
+ OrExpr(pred, preds)
}
- #puts "PREDICATES = #{predicates.inspect}"
path
end
@@ -350,10 +338,8 @@ module REXML
#| OrExpr S 'or' S AndExpr
#| AndExpr
def OrExpr path, parsed
- #puts "OR >>> #{path}"
n = []
rest = AndExpr( path, n )
- #puts "OR <<< #{rest}"
if rest != path
while rest =~ /^\s*( or )/
n = [ :or, n, [] ]
@@ -371,16 +357,12 @@ module REXML
#| AndExpr S 'and' S EqualityExpr
#| EqualityExpr
def AndExpr path, parsed
- #puts "AND >>> #{path}"
n = []
rest = EqualityExpr( path, n )
- #puts "AND <<< #{rest}"
if rest != path
while rest =~ /^\s*( and )/
n = [ :and, n, [] ]
- #puts "AND >>> #{rest}"
rest = EqualityExpr( $', n[-1] )
- #puts "AND <<< #{rest}"
end
end
if parsed.size == 0 and n.size != 0
@@ -394,10 +376,8 @@ module REXML
#| EqualityExpr ('=' | '!=') RelationalExpr
#| RelationalExpr
def EqualityExpr path, parsed
- #puts "EQUALITY >>> #{path}"
n = []
rest = RelationalExpr( path, n )
- #puts "EQUALITY <<< #{rest}"
if rest != path
while rest =~ /^\s*(!?=)\s*/
if $1[0] == ?!
@@ -419,10 +399,8 @@ module REXML
#| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr
#| AdditiveExpr
def RelationalExpr path, parsed
- #puts "RELATION >>> #{path}"
n = []
rest = AdditiveExpr( path, n )
- #puts "RELATION <<< #{rest}"
if rest != path
while rest =~ /^\s*([<>]=?)\s*/
if $1[0] == ?<
@@ -446,10 +424,8 @@ module REXML
#| AdditiveExpr ('+' | S '-') MultiplicativeExpr
#| MultiplicativeExpr
def AdditiveExpr path, parsed
- #puts "ADDITIVE >>> #{path}"
n = []
rest = MultiplicativeExpr( path, n )
- #puts "ADDITIVE <<< #{rest}"
if rest != path
while rest =~ /^\s*(\+| -)\s*/
if $1[0] == ?+
@@ -471,10 +447,8 @@ module REXML
#| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr
#| UnaryExpr
def MultiplicativeExpr path, parsed
- #puts "MULT >>> #{path}"
n = []
rest = UnaryExpr( path, n )
- #puts "MULT <<< #{rest}"
if rest != path
while rest =~ /^\s*(\*| div | mod )\s*/
if $1[0] == ?*
@@ -507,10 +481,8 @@ module REXML
end
parsed << :neg if mult < 0
- #puts "UNARY >>> #{path}"
n = []
path = UnionExpr( path, n )
- #puts "UNARY <<< #{path}"
parsed.concat( n )
path
end
@@ -518,10 +490,8 @@ module REXML
#| UnionExpr '|' PathExpr
#| PathExpr
def UnionExpr path, parsed
- #puts "UNION >>> #{path}"
n = []
rest = PathExpr( path, n )
- #puts "UNION <<< #{rest}"
if rest != path
while rest =~ /^\s*(\|)\s*/
n = [ :union, n, [] ]
@@ -541,17 +511,14 @@ module REXML
def PathExpr path, parsed
path =~ /^\s*/
path = $'
- #puts "PATH >>> #{path}"
n = []
rest = FilterExpr( path, n )
- #puts "PATH <<< '#{rest}'"
if rest != path
if rest and rest[0] == ?/
return RelativeLocationPath(rest, n)
end
end
- #puts "BEFORE WITH '#{rest}'"
- rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w_*]/
+ rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w*]/
parsed.concat(n)
return rest
end
@@ -559,12 +526,9 @@ module REXML
#| FilterExpr Predicate
#| PrimaryExpr
def FilterExpr path, parsed
- #puts "FILTER >>> #{path}"
n = []
path = PrimaryExpr( path, n )
- #puts "FILTER <<< #{path}"
path = Predicate(path, n) if path and path[0] == ?[
- #puts "FILTER <<< #{path}"
parsed.concat(n)
path
end
@@ -578,7 +542,6 @@ module REXML
NUMBER = /^(\d*\.?\d+)/
NT = /^comment|text|processing-instruction|node$/
def PrimaryExpr path, parsed
- arry = []
case path
when VARIABLE_REFERENCE
varname = $1
@@ -587,26 +550,22 @@ module REXML
parsed << varname
#arry << @variables[ varname ]
when /^(\w[-\w]*)(?:\()/
- #puts "PrimaryExpr :: Function >>> #$1 -- '#$''"
fname = $1
tmp = $'
- #puts "#{fname} =~ #{NT.inspect}"
return path if fname =~ NT
path = tmp
parsed << :function
parsed << fname
path = FunctionCall(path, parsed)
when NUMBER
- #puts "LITERAL or NUMBER: #$1"
varname = $1.nil? ? $2 : $1
path = $'
- parsed << :literal
+ parsed << :literal
parsed << (varname.include?('.') ? varname.to_f : varname.to_i)
when LITERAL
- #puts "LITERAL or NUMBER: #$1"
varname = $1.nil? ? $2 : $1
path = $'
- parsed << :literal
+ parsed << :literal
parsed << varname
when /^\(/ #/
path, contents = get_group(path)
@@ -649,43 +608,43 @@ module REXML
return nil unless depth==0
[string[ind..-1], string[0..ind-1]]
end
-
+
def parse_args( string )
arguments = []
ind = 0
- inquot = false
- inapos = false
+ inquot = false
+ inapos = false
depth = 1
begin
case string[ind]
when ?"
- inquot = !inquot unless inapos
+ inquot = !inquot unless inapos
when ?'
- inapos = !inapos unless inquot
+ inapos = !inapos unless inquot
else
- unless inquot or inapos
- case string[ind]
- when ?(
- depth += 1
+ unless inquot or inapos
+ case string[ind]
+ when ?(
+ depth += 1
if depth == 1
- string = string[1..-1]
- ind -= 1
+ string = string[1..-1]
+ ind -= 1
+ end
+ when ?)
+ depth -= 1
+ if depth == 0
+ s = string[0,ind].strip
+ arguments << s unless s == ""
+ string = string[ind+1..-1]
end
- when ?)
- depth -= 1
- if depth == 0
- s = string[0,ind].strip
- arguments << s unless s == ""
- string = string[ind+1..-1]
- end
- when ?,
- if depth == 1
- s = string[0,ind].strip
- arguments << s unless s == ""
- string = string[ind+1..-1]
- ind = -1
- end
- end
+ when ?,
+ if depth == 1
+ s = string[0,ind].strip
+ arguments << s unless s == ""
+ string = string[ind+1..-1]
+ ind = -1
+ end
+ end
end
end
ind += 1
diff --git a/lib/rexml/quickpath.rb b/lib/rexml/quickpath.rb
index c099db8579..5d6c77ca38 100644
--- a/lib/rexml/quickpath.rb
+++ b/lib/rexml/quickpath.rb
@@ -1,266 +1,266 @@
+# frozen_string_literal: false
require 'rexml/functions'
require 'rexml/xmltokens'
module REXML
- class QuickPath
- include Functions
- include XMLTokens
+ class QuickPath
+ include Functions
+ include XMLTokens
- EMPTY_HASH = {}
+ # A base Hash object to be used when initializing a
+ # default empty namespaces set.
+ EMPTY_HASH = {}
- def QuickPath::first element, path, namespaces=EMPTY_HASH
- match(element, path, namespaces)[0]
- end
+ def QuickPath::first element, path, namespaces=EMPTY_HASH
+ match(element, path, namespaces)[0]
+ end
- def QuickPath::each element, path, namespaces=EMPTY_HASH, &block
- path = "*" unless path
- match(element, path, namespaces).each( &block )
- end
+ def QuickPath::each element, path, namespaces=EMPTY_HASH, &block
+ path = "*" unless path
+ match(element, path, namespaces).each( &block )
+ end
- def QuickPath::match element, path, namespaces=EMPTY_HASH
- raise "nil is not a valid xpath" unless path
- results = nil
- Functions::namespace_context = namespaces
- case path
- when /^\/([^\/]|$)/u
- # match on root
- path = path[1..-1]
- return [element.root.parent] if path == ''
- results = filter([element.root], path)
- when /^[-\w]*::/u
- results = filter([element], path)
- when /^\*/u
- results = filter(element.to_a, path)
- when /^[\[!\w:]/u
- # match on child
- matches = []
- children = element.to_a
- results = filter(children, path)
- else
- results = filter([element], path)
- end
- return results
- end
+ def QuickPath::match element, path, namespaces=EMPTY_HASH
+ raise "nil is not a valid xpath" unless path
+ results = nil
+ Functions::namespace_context = namespaces
+ case path
+ when /^\/([^\/]|$)/u
+ # match on root
+ path = path[1..-1]
+ return [element.root.parent] if path == ''
+ results = filter([element.root], path)
+ when /^[-\w]*::/u
+ results = filter([element], path)
+ when /^\*/u
+ results = filter(element.to_a, path)
+ when /^[\[!\w:]/u
+ # match on child
+ children = element.to_a
+ results = filter(children, path)
+ else
+ results = filter([element], path)
+ end
+ return results
+ end
- # Given an array of nodes it filters the array based on the path. The
- # result is that when this method returns, the array will contain elements
- # which match the path
- def QuickPath::filter elements, path
- return elements if path.nil? or path == '' or elements.size == 0
- case path
- when /^\/\//u # Descendant
- return axe( elements, "descendant-or-self", $' )
- when /^\/?\b(\w[-\w]*)\b::/u # Axe
- axe_name = $1
- rest = $'
- return axe( elements, $1, $' )
- when /^\/(?=\b([:!\w][-\.\w]*:)?[-!\*\.\w]*\b([^:(]|$)|\*)/u # Child
- rest = $'
- results = []
- elements.each do |element|
- results |= filter( element.to_a, rest )
- end
- return results
- when /^\/?(\w[-\w]*)\(/u # / Function
- return function( elements, $1, $' )
- when Namespace::NAMESPLIT # Element name
- name = $2
- ns = $1
- rest = $'
- elements.delete_if do |element|
- !(element.kind_of? Element and
- (element.expanded_name == name or
- (element.name == name and
- element.namespace == Functions.namespace_context[ns])))
- end
- return filter( elements, rest )
- when /^\/\[/u
- matches = []
- elements.each do |element|
- matches |= predicate( element.to_a, path[1..-1] ) if element.kind_of? Element
- end
- return matches
- when /^\[/u # Predicate
- return predicate( elements, path )
- when /^\/?\.\.\./u # Ancestor
- return axe( elements, "ancestor", $' )
- when /^\/?\.\./u # Parent
- return filter( elements.collect{|e|e.parent}, $' )
- when /^\/?\./u # Self
- return filter( elements, $' )
- when /^\*/u # Any
- results = []
- elements.each do |element|
- results |= filter( [element], $' ) if element.kind_of? Element
- #if element.kind_of? Element
- # children = element.to_a
- # children.delete_if { |child| !child.kind_of?(Element) }
- # results |= filter( children, $' )
- #end
- end
- return results
- end
- return []
- end
+ # Given an array of nodes it filters the array based on the path. The
+ # result is that when this method returns, the array will contain elements
+ # which match the path
+ def QuickPath::filter elements, path
+ return elements if path.nil? or path == '' or elements.size == 0
+ case path
+ when /^\/\//u # Descendant
+ return axe( elements, "descendant-or-self", $' )
+ when /^\/?\b(\w[-\w]*)\b::/u # Axe
+ return axe( elements, $1, $' )
+ when /^\/(?=\b([:!\w][-\.\w]*:)?[-!\*\.\w]*\b([^:(]|$)|\*)/u # Child
+ rest = $'
+ results = []
+ elements.each do |element|
+ results |= filter( element.to_a, rest )
+ end
+ return results
+ when /^\/?(\w[-\w]*)\(/u # / Function
+ return function( elements, $1, $' )
+ when Namespace::NAMESPLIT # Element name
+ name = $2
+ ns = $1
+ rest = $'
+ elements.delete_if do |element|
+ !(element.kind_of? Element and
+ (element.expanded_name == name or
+ (element.name == name and
+ element.namespace == Functions.namespace_context[ns])))
+ end
+ return filter( elements, rest )
+ when /^\/\[/u
+ matches = []
+ elements.each do |element|
+ matches |= predicate( element.to_a, path[1..-1] ) if element.kind_of? Element
+ end
+ return matches
+ when /^\[/u # Predicate
+ return predicate( elements, path )
+ when /^\/?\.\.\./u # Ancestor
+ return axe( elements, "ancestor", $' )
+ when /^\/?\.\./u # Parent
+ return filter( elements.collect{|e|e.parent}, $' )
+ when /^\/?\./u # Self
+ return filter( elements, $' )
+ when /^\*/u # Any
+ results = []
+ elements.each do |element|
+ results |= filter( [element], $' ) if element.kind_of? Element
+ #if element.kind_of? Element
+ # children = element.to_a
+ # children.delete_if { |child| !child.kind_of?(Element) }
+ # results |= filter( children, $' )
+ #end
+ end
+ return results
+ end
+ return []
+ end
- def QuickPath::axe( elements, axe_name, rest )
- matches = []
- matches = filter( elements.dup, rest ) if axe_name =~ /-or-self$/u
- case axe_name
- when /^descendant/u
- elements.each do |element|
- matches |= filter( element.to_a, "descendant-or-self::#{rest}" ) if element.kind_of? Element
- end
- when /^ancestor/u
- elements.each do |element|
- while element.parent
- matches << element.parent
- element = element.parent
- end
- end
- matches = filter( matches, rest )
- when "self"
- matches = filter( elements, rest )
- when "child"
- elements.each do |element|
- matches |= filter( element.to_a, rest ) if element.kind_of? Element
- end
- when "attribute"
- elements.each do |element|
- matches << element.attributes[ rest ] if element.kind_of? Element
- end
- when "parent"
- matches = filter(elements.collect{|element| element.parent}.uniq, rest)
- when "following-sibling"
- matches = filter(elements.collect{|element| element.next_sibling}.uniq,
- rest)
- when "previous-sibling"
- matches = filter(elements.collect{|element|
- element.previous_sibling}.uniq, rest )
- end
- return matches.uniq
- end
+ def QuickPath::axe( elements, axe_name, rest )
+ matches = []
+ matches = filter( elements.dup, rest ) if axe_name =~ /-or-self$/u
+ case axe_name
+ when /^descendant/u
+ elements.each do |element|
+ matches |= filter( element.to_a, "descendant-or-self::#{rest}" ) if element.kind_of? Element
+ end
+ when /^ancestor/u
+ elements.each do |element|
+ while element.parent
+ matches << element.parent
+ element = element.parent
+ end
+ end
+ matches = filter( matches, rest )
+ when "self"
+ matches = filter( elements, rest )
+ when "child"
+ elements.each do |element|
+ matches |= filter( element.to_a, rest ) if element.kind_of? Element
+ end
+ when "attribute"
+ elements.each do |element|
+ matches << element.attributes[ rest ] if element.kind_of? Element
+ end
+ when "parent"
+ matches = filter(elements.collect{|element| element.parent}.uniq, rest)
+ when "following-sibling"
+ matches = filter(elements.collect{|element| element.next_sibling}.uniq,
+ rest)
+ when "previous-sibling"
+ matches = filter(elements.collect{|element|
+ element.previous_sibling}.uniq, rest )
+ end
+ return matches.uniq
+ end
- # A predicate filters a node-set with respect to an axis to produce a
- # new node-set. For each node in the node-set to be filtered, the
- # PredicateExpr is evaluated with that node as the context node, with
- # the number of nodes in the node-set as the context size, and with the
- # proximity position of the node in the node-set with respect to the
- # axis as the context position; if PredicateExpr evaluates to true for
- # that node, the node is included in the new node-set; otherwise, it is
- # not included.
- #
- # A PredicateExpr is evaluated by evaluating the Expr and converting
- # the result to a boolean. If the result is a number, the result will
- # be converted to true if the number is equal to the context position
- # and will be converted to false otherwise; if the result is not a
- # number, then the result will be converted as if by a call to the
- # boolean function. Thus a location path para[3] is equivalent to
- # para[position()=3].
- def QuickPath::predicate( elements, path )
- ind = 1
- bcount = 1
- while bcount > 0
- bcount += 1 if path[ind] == ?[
- bcount -= 1 if path[ind] == ?]
- ind += 1
- end
- ind -= 1
- predicate = path[1..ind-1]
- rest = path[ind+1..-1]
+ OPERAND_ = '((?=(?:(?!and|or).)*[^\s<>=])[^\s<>=]+)'
+ # A predicate filters a node-set with respect to an axis to produce a
+ # new node-set. For each node in the node-set to be filtered, the
+ # PredicateExpr is evaluated with that node as the context node, with
+ # the number of nodes in the node-set as the context size, and with the
+ # proximity position of the node in the node-set with respect to the
+ # axis as the context position; if PredicateExpr evaluates to true for
+ # that node, the node is included in the new node-set; otherwise, it is
+ # not included.
+ #
+ # A PredicateExpr is evaluated by evaluating the Expr and converting
+ # the result to a boolean. If the result is a number, the result will
+ # be converted to true if the number is equal to the context position
+ # and will be converted to false otherwise; if the result is not a
+ # number, then the result will be converted as if by a call to the
+ # boolean function. Thus a location path para[3] is equivalent to
+ # para[position()=3].
+ def QuickPath::predicate( elements, path )
+ ind = 1
+ bcount = 1
+ while bcount > 0
+ bcount += 1 if path[ind] == ?[
+ bcount -= 1 if path[ind] == ?]
+ ind += 1
+ end
+ ind -= 1
+ predicate = path[1..ind-1]
+ rest = path[ind+1..-1]
- # have to change 'a [=<>] b [=<>] c' into 'a [=<>] b and b [=<>] c'
- predicate.gsub!( /([^\s(and)(or)<>=]+)\s*([<>=])\s*([^\s(and)(or)<>=]+)\s*([<>=])\s*([^\s(and)(or)<>=]+)/u ) {
- "#$1 #$2 #$3 and #$3 #$4 #$5"
- }
- # Let's do some Ruby trickery to avoid some work:
- predicate.gsub!( /&/u, "&&" )
- predicate.gsub!( /=/u, "==" )
- predicate.gsub!( /@(\w[-\w.]*)/u ) {
- "attribute(\"#$1\")"
- }
- predicate.gsub!( /\bmod\b/u, "%" )
- predicate.gsub!( /\b(\w[-\w.]*\()/u ) {
- fname = $1
- fname.gsub( /-/u, "_" )
- }
-
- Functions.pair = [ 0, elements.size ]
- results = []
- elements.each do |element|
- Functions.pair[0] += 1
- Functions.node = element
- res = eval( predicate )
- case res
- when true
- results << element
- when Fixnum
- results << element if Functions.pair[0] == res
- when String
- results << element
- end
- end
- return filter( results, rest )
- end
+ # have to change 'a [=<>] b [=<>] c' into 'a [=<>] b and b [=<>] c'
+ #
+ predicate.gsub!(
+ /#{OPERAND_}\s*([<>=])\s*#{OPERAND_}\s*([<>=])\s*#{OPERAND_}/u,
+ '\1 \2 \3 and \3 \4 \5' )
+ # Let's do some Ruby trickery to avoid some work:
+ predicate.gsub!( /&/u, "&&" )
+ predicate.gsub!( /=/u, "==" )
+ predicate.gsub!( /@(\w[-\w.]*)/u, 'attribute("\1")' )
+ predicate.gsub!( /\bmod\b/u, "%" )
+ predicate.gsub!( /\b(\w[-\w.]*\()/u ) {
+ fname = $1
+ fname.gsub( /-/u, "_" )
+ }
- def QuickPath::attribute( name )
- return Functions.node.attributes[name] if Functions.node.kind_of? Element
- end
+ Functions.pair = [ 0, elements.size ]
+ results = []
+ elements.each do |element|
+ Functions.pair[0] += 1
+ Functions.node = element
+ res = eval( predicate )
+ case res
+ when true
+ results << element
+ when Integer
+ results << element if Functions.pair[0] == res
+ when String
+ results << element
+ end
+ end
+ return filter( results, rest )
+ end
- def QuickPath::name()
- return Functions.node.name if Functions.node.kind_of? Element
- end
+ def QuickPath::attribute( name )
+ return Functions.node.attributes[name] if Functions.node.kind_of? Element
+ end
- def QuickPath::method_missing( id, *args )
- begin
- Functions.send( id.id2name, *args )
- rescue Exception
- raise "METHOD: #{id.id2name}(#{args.join ', '})\n#{$!.message}"
- end
- end
+ def QuickPath::name()
+ return Functions.node.name if Functions.node.kind_of? Element
+ end
- def QuickPath::function( elements, fname, rest )
- args = parse_args( elements, rest )
- Functions.pair = [0, elements.size]
- results = []
- elements.each do |element|
- Functions.pair[0] += 1
- Functions.node = element
- res = Functions.send( fname, *args )
- case res
- when true
- results << element
- when Fixnum
- results << element if Functions.pair[0] == res
- end
- end
- return results
- end
+ def QuickPath::method_missing( id, *args )
+ begin
+ Functions.send( id.id2name, *args )
+ rescue Exception
+ raise "METHOD: #{id.id2name}(#{args.join ', '})\n#{$!.message}"
+ end
+ end
- def QuickPath::parse_args( element, string )
- # /.*?(?:\)|,)/
- arguments = []
- buffer = ""
- while string and string != ""
- c = string[0]
- string.sub!(/^./u, "")
- case c
- when ?,
- # if depth = 1, then we start a new argument
- arguments << evaluate( buffer )
- #arguments << evaluate( string[0..count] )
- when ?(
- # start a new method call
- function( element, buffer, string )
- buffer = ""
- when ?)
- # close the method call and return arguments
- return arguments
- else
- buffer << c
- end
- end
- ""
- end
- end
+ def QuickPath::function( elements, fname, rest )
+ args = parse_args( elements, rest )
+ Functions.pair = [0, elements.size]
+ results = []
+ elements.each do |element|
+ Functions.pair[0] += 1
+ Functions.node = element
+ res = Functions.send( fname, *args )
+ case res
+ when true
+ results << element
+ when Integer
+ results << element if Functions.pair[0] == res
+ end
+ end
+ return results
+ end
+
+ def QuickPath::parse_args( element, string )
+ # /.*?(?:\)|,)/
+ arguments = []
+ buffer = ""
+ while string and string != ""
+ c = string[0]
+ string.sub!(/^./u, "")
+ case c
+ when ?,
+ # if depth = 1, then we start a new argument
+ arguments << evaluate( buffer )
+ #arguments << evaluate( string[0..count] )
+ when ?(
+ # start a new method call
+ function( element, buffer, string )
+ buffer = ""
+ when ?)
+ # close the method call and return arguments
+ return arguments
+ else
+ buffer << c
+ end
+ end
+ ""
+ end
+ end
end
diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb
index 8af1697e51..652d6429af 100644
--- a/lib/rexml/rexml.rb
+++ b/lib/rexml/rexml.rb
@@ -1,31 +1,31 @@
# -*- encoding: utf-8 -*-
+# frozen_string_literal: false
# REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby.
#
# REXML is a _pure_ Ruby, XML 1.0 conforming,
# non-validating[http://www.w3.org/TR/2004/REC-xml-20040204/#sec-conformance]
# toolkit with an intuitive API. REXML passes 100% of the non-validating Oasis
# tests[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml],
-# and provides tree, stream, SAX2, pull, and lightweight APIs. REXML also
-# includes a full XPath[http://www.w3c.org/tr/xpath] 1.0 implementation. Since
+# and provides tree, stream, SAX2, pull, and lightweight APIs. REXML also
+# includes a full XPath[http://www.w3c.org/tr/xpath] 1.0 implementation. Since
# Ruby 1.8, REXML is included in the standard Ruby distribution.
#
# Main page:: http://www.germane-software.com/software/rexml
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
-# Version:: 3.1.7.2
-# Date:: 2007/275
-# Revision:: $Revision$
-#
+# Date:: 2008/019
+# Version:: 3.1.7.3
+#
# This API documentation can be downloaded from the REXML home page, or can
# be accessed online[http://www.germane-software.com/software/rexml_doc]
#
# A tutorial is available in the REXML distribution in docs/tutorial.html,
-# or can be accessed
+# or can be accessed
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
module REXML
- COPYRIGHT = "Copyright \xC2\xA9 2001-2006 Sean Russell <ser@germane-software.com>"
- VERSION = "3.1.7.2"
- DATE = "2007/275"
- REVISION = "$Revision$".gsub(/\$Revision:|\$/,'').strip
+ COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
+ DATE = "2008/019"
+ VERSION = "3.1.7.4"
+ REVISION = %w$Revision$[1] || ''
Copyright = COPYRIGHT
Version = VERSION
diff --git a/lib/rexml/sax2listener.rb b/lib/rexml/sax2listener.rb
index 8db1389d06..5afdc80890 100644
--- a/lib/rexml/sax2listener.rb
+++ b/lib/rexml/sax2listener.rb
@@ -1,97 +1,98 @@
+# frozen_string_literal: false
module REXML
- # A template for stream parser listeners.
- # Note that the declarations (attlistdecl, elementdecl, etc) are trivially
- # processed; REXML doesn't yet handle doctype entity declarations, so you
- # have to parse them out yourself.
- # === Missing methods from SAX2
- # ignorable_whitespace
- # === Methods extending SAX2
- # +WARNING+
- # These methods are certainly going to change, until DTDs are fully
- # supported. Be aware of this.
- # start_document
- # end_document
- # doctype
- # elementdecl
- # attlistdecl
- # entitydecl
- # notationdecl
- # cdata
- # xmldecl
- # comment
- module SAX2Listener
- def start_document
- end
- def end_document
- end
- def start_prefix_mapping prefix, uri
- end
- def end_prefix_mapping prefix
- end
- def start_element uri, localname, qname, attributes
- end
- def end_element uri, localname, qname
- end
- def characters text
- end
- def processing_instruction target, data
- end
- # Handles a doctype declaration. Any attributes of the doctype which are
- # not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
- # @p name the name of the doctype; EG, "me"
- # @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
- # @p long_name the supplied long name, or nil. EG, "foo"
- # @p uri the uri of the doctype, or nil. EG, "bar"
- def doctype name, pub_sys, long_name, uri
- end
- # If a doctype includes an ATTLIST declaration, it will cause this
- # method to be called. The content is the declaration itself, unparsed.
- # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
- # attr CDATA #REQUIRED". This is the same for all of the .*decl
- # methods.
- def attlistdecl(element, pairs, contents)
- end
- # <!ELEMENT ...>
- def elementdecl content
- end
- # <!ENTITY ...>
- # The argument passed to this method is an array of the entity
- # declaration. It can be in a number of formats, but in general it
- # returns (example, result):
- # <!ENTITY % YN '"Yes"'>
- # ["%", "YN", "'\"Yes\"'", "\""]
- # <!ENTITY % YN 'Yes'>
- # ["%", "YN", "'Yes'", "s"]
- # <!ENTITY WhatHeSaid "He said %YN;">
- # ["WhatHeSaid", "\"He said %YN;\"", "YN"]
- # <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
- # ["open-hatch", "SYSTEM", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
- # <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
- # ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
- # <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
- # ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
- def entitydecl name, decl
- end
- # <!NOTATION ...>
- def notationdecl content
- end
- # Called when <![CDATA[ ... ]]> is encountered in a document.
- # @p content "..."
- def cdata content
- end
- # Called when an XML PI is encountered in the document.
- # EG: <?xml version="1.0" encoding="utf"?>
- # @p version the version attribute value. EG, "1.0"
- # @p encoding the encoding attribute value, or nil. EG, "utf"
- # @p standalone the standalone attribute value, or nil. EG, nil
+ # A template for stream parser listeners.
+ # Note that the declarations (attlistdecl, elementdecl, etc) are trivially
+ # processed; REXML doesn't yet handle doctype entity declarations, so you
+ # have to parse them out yourself.
+ # === Missing methods from SAX2
+ # ignorable_whitespace
+ # === Methods extending SAX2
+ # +WARNING+
+ # These methods are certainly going to change, until DTDs are fully
+ # supported. Be aware of this.
+ # start_document
+ # end_document
+ # doctype
+ # elementdecl
+ # attlistdecl
+ # entitydecl
+ # notationdecl
+ # cdata
+ # xmldecl
+ # comment
+ module SAX2Listener
+ def start_document
+ end
+ def end_document
+ end
+ def start_prefix_mapping prefix, uri
+ end
+ def end_prefix_mapping prefix
+ end
+ def start_element uri, localname, qname, attributes
+ end
+ def end_element uri, localname, qname
+ end
+ def characters text
+ end
+ def processing_instruction target, data
+ end
+ # Handles a doctype declaration. Any attributes of the doctype which are
+ # not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
+ # @p name the name of the doctype; EG, "me"
+ # @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
+ # @p long_name the supplied long name, or nil. EG, "foo"
+ # @p uri the uri of the doctype, or nil. EG, "bar"
+ def doctype name, pub_sys, long_name, uri
+ end
+ # If a doctype includes an ATTLIST declaration, it will cause this
+ # method to be called. The content is the declaration itself, unparsed.
+ # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
+ # attr CDATA #REQUIRED". This is the same for all of the .*decl
+ # methods.
+ def attlistdecl(element, pairs, contents)
+ end
+ # <!ELEMENT ...>
+ def elementdecl content
+ end
+ # <!ENTITY ...>
+ # The argument passed to this method is an array of the entity
+ # declaration. It can be in a number of formats, but in general it
+ # returns (example, result):
+ # <!ENTITY % YN '"Yes"'>
+ # ["%", "YN", "\"Yes\""]
+ # <!ENTITY % YN 'Yes'>
+ # ["%", "YN", "Yes"]
+ # <!ENTITY WhatHeSaid "He said %YN;">
+ # ["WhatHeSaid", "He said %YN;"]
+ # <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
+ # ["open-hatch", "SYSTEM", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
+ # <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
+ # ["open-hatch", "PUBLIC", "-//Textuality//TEXT Standard open-hatch boilerplate//EN", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
+ # <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
+ # ["hatch-pic", "SYSTEM", "../grafix/OpenHatch.gif", "NDATA", "gif"]
+ def entitydecl declaration
+ end
+ # <!NOTATION ...>
+ def notationdecl name, public_or_system, public_id, system_id
+ end
+ # Called when <![CDATA[ ... ]]> is encountered in a document.
+ # @p content "..."
+ def cdata content
+ end
+ # Called when an XML PI is encountered in the document.
+ # EG: <?xml version="1.0" encoding="utf"?>
+ # @p version the version attribute value. EG, "1.0"
+ # @p encoding the encoding attribute value, or nil. EG, "utf"
+ # @p standalone the standalone attribute value, or nil. EG, nil
# @p spaced the declaration is followed by a line break
- def xmldecl version, encoding, standalone
- end
- # Called when a comment is encountered.
- # @p comment The content of the comment
- def comment comment
- end
+ def xmldecl version, encoding, standalone
+ end
+ # Called when a comment is encountered.
+ # @p comment The content of the comment
+ def comment comment
+ end
def progress position
end
- end
+ end
end
diff --git a/lib/rexml/security.rb b/lib/rexml/security.rb
new file mode 100644
index 0000000000..99b7460772
--- /dev/null
+++ b/lib/rexml/security.rb
@@ -0,0 +1,28 @@
+# frozen_string_literal: false
+module REXML
+ module Security
+ @@entity_expansion_limit = 10_000
+
+ # Set the entity expansion limit. By default the limit is set to 10000.
+ def self.entity_expansion_limit=( val )
+ @@entity_expansion_limit = val
+ end
+
+ # Get the entity expansion limit. By default the limit is set to 10000.
+ def self.entity_expansion_limit
+ return @@entity_expansion_limit
+ end
+
+ @@entity_expansion_text_limit = 10_240
+
+ # Set the entity expansion limit. By default the limit is set to 10240.
+ def self.entity_expansion_text_limit=( val )
+ @@entity_expansion_text_limit = val
+ end
+
+ # Get the entity expansion limit. By default the limit is set to 10240.
+ def self.entity_expansion_text_limit
+ return @@entity_expansion_text_limit
+ end
+ end
+end
diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb
index ce7a2c98b0..af65cf4751 100644
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@@ -1,3 +1,5 @@
+# coding: US-ASCII
+# frozen_string_literal: false
require 'rexml/encoding'
module REXML
@@ -7,13 +9,14 @@ module REXML
# @param arg Either a String, or an IO
# @return a Source, or nil if a bad argument was given
def SourceFactory::create_from(arg)
- if arg.kind_of? String
- Source.new(arg)
- elsif arg.respond_to? :read and
- arg.respond_to? :readline and
- arg.respond_to? :nil? and
- arg.respond_to? :eof?
+ if arg.respond_to? :read and
+ arg.respond_to? :readline and
+ arg.respond_to? :nil? and
+ arg.respond_to? :eof?
IOSource.new(arg)
+ elsif arg.respond_to? :to_str
+ require 'stringio'
+ IOSource.new(StringIO.new(arg))
elsif arg.kind_of? Source
arg
else
@@ -42,7 +45,7 @@ module REXML
if encoding
self.encoding = encoding
else
- self.encoding = check_encoding( @buffer )
+ detect_encoding
end
@line = 0
end
@@ -52,22 +55,16 @@ module REXML
# Overridden to support optimized en/decoding
def encoding=(enc)
return unless super
- @line_break = encode( '>' )
- if enc != UTF_8
- @buffer = decode(@buffer)
- @to_utf = true
- else
- @to_utf = false
- end
+ encoding_updated
end
# Scans the source for a given pattern. Note, that this is not your
# usual scan() method. For one thing, the pattern argument has some
# requirements; for another, the source can be consumed. You can easily
# confuse this method. Originally, the patterns were easier
- # to construct and this method more robust, because this method
- # generated search regexes on the fly; however, this was
- # computationally expensive and slowed down the entire REXML package
+ # to construct and this method more robust, because this method
+ # generated search regexps on the fly; however, this was
+ # computationally expensive and slowed down the entire REXML package
# considerably, since this is by far the most commonly called method.
# @param pattern must be a Regexp, and must be in the form of
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
@@ -123,6 +120,38 @@ module REXML
res = res[-1] if res.kind_of? Array
lines.index( res ) if res
end
+
+ private
+ def detect_encoding
+ buffer_encoding = @buffer.encoding
+ detected_encoding = "UTF-8"
+ begin
+ @buffer.force_encoding("ASCII-8BIT")
+ if @buffer[0, 2] == "\xfe\xff"
+ @buffer[0, 2] = ""
+ detected_encoding = "UTF-16BE"
+ elsif @buffer[0, 2] == "\xff\xfe"
+ @buffer[0, 2] = ""
+ detected_encoding = "UTF-16LE"
+ elsif @buffer[0, 3] == "\xef\xbb\xbf"
+ @buffer[0, 3] = ""
+ detected_encoding = "UTF-8"
+ end
+ ensure
+ @buffer.force_encoding(buffer_encoding)
+ end
+ self.encoding = detected_encoding
+ end
+
+ def encoding_updated
+ if @encoding != 'UTF-8'
+ @buffer = decode(@buffer)
+ @to_utf = true
+ else
+ @to_utf = false
+ @buffer.force_encoding ::Encoding::UTF_8
+ end
+ end
end
# A Source that wraps an IO. See the Source class for method
@@ -134,30 +163,22 @@ module REXML
def initialize(arg, block_size=500, encoding=nil)
@er_source = @source = arg
@to_utf = false
+ @pending_buffer = nil
- # Determining the encoding is a deceptively difficult issue to resolve.
- # First, we check the first two bytes for UTF-16. Then we
- # assume that the encoding is at least ASCII enough for the '>', and
- # we read until we get one of those. This gives us the XML declaration,
- # if there is one. If there isn't one, the file MUST be UTF-8, as per
- # the XML spec. If there is one, we can determine the encoding from
- # it.
- @buffer = ""
- str = @source.read( 2 )
if encoding
- self.encoding = encoding
- elsif 0xfe == str[0] && 0xff == str[1]
- @line_break = "\000>"
- elsif 0xff == str[0] && 0xfe == str[1]
- @line_break = ">\000"
- elsif 0xef == str[0] && 0xbb == str[1]
- str += @source.read(1)
- str = '' if (0xbf == str[2])
- @line_break = ">"
+ super("", encoding)
+ else
+ super(@source.read(3) || "")
+ end
+
+ if !@to_utf and
+ @buffer.respond_to?(:force_encoding) and
+ @source.respond_to?(:external_encoding) and
+ @source.external_encoding != ::Encoding::UTF_8
+ @force_utf8 = true
else
- @line_break = ">"
+ @force_utf8 = false
end
- super str+@source.readline( @line_break )
end
def scan(pattern, cons=false)
@@ -165,16 +186,12 @@ module REXML
# You'll notice that this next section is very similar to the same
# section in match(), but just a liiittle different. This is
# because it is a touch faster to do it this way with scan()
- # than the way match() does it; enough faster to warrent duplicating
+ # than the way match() does it; enough faster to warrant duplicating
# some code
if rv.size == 0
until @buffer =~ pattern or @source.nil?
begin
- # READLINE OPT
- #str = @source.read(@block_size)
- str = @source.readline(@line_break)
- str = decode(str) if @to_utf and str
- @buffer << str
+ @buffer << readline
rescue Iconv::IllegalSequence
raise
rescue
@@ -189,9 +206,7 @@ module REXML
def read
begin
- str = @source.readline(@line_break)
- str = decode(str) if @to_utf and str
- @buffer << str
+ @buffer << readline
rescue Exception, NameError
@source = nil
end
@@ -206,9 +221,7 @@ module REXML
@buffer = $' if cons and rv
while !rv and @source
begin
- str = @source.readline(@line_break)
- str = decode(str) if @to_utf and str
- @buffer << str
+ @buffer << readline
rv = pattern.match(@buffer)
@buffer = $' if cons and rv
rescue
@@ -218,13 +231,13 @@ module REXML
rv.taint
rv
end
-
+
def empty?
super and ( @source.nil? || @source.eof? )
end
def position
- @er_source.stat.pipe? ? 0 : @er_source.pos
+ @er_source.pos rescue 0
end
# @return the current line in the source
@@ -247,5 +260,38 @@ module REXML
end
[pos, lineno, line]
end
+
+ private
+ def readline
+ str = @source.readline(@line_break)
+ if @pending_buffer
+ if str.nil?
+ str = @pending_buffer
+ else
+ str = @pending_buffer + str
+ end
+ @pending_buffer = nil
+ end
+ return nil if str.nil?
+
+ if @to_utf
+ decode(str)
+ else
+ str.force_encoding(::Encoding::UTF_8) if @force_utf8
+ str
+ end
+ end
+
+ def encoding_updated
+ case @encoding
+ when "UTF-16BE", "UTF-16LE"
+ @source.binmode
+ @source.set_encoding(@encoding, @encoding)
+ end
+ @line_break = encode(">")
+ @pending_buffer, @buffer = @buffer, ""
+ @pending_buffer.force_encoding(@encoding)
+ super
+ end
end
end
diff --git a/lib/rexml/streamlistener.rb b/lib/rexml/streamlistener.rb
index 6f401125b5..30c8945179 100644
--- a/lib/rexml/streamlistener.rb
+++ b/lib/rexml/streamlistener.rb
@@ -1,92 +1,93 @@
+# frozen_string_literal: false
module REXML
- # A template for stream parser listeners.
- # Note that the declarations (attlistdecl, elementdecl, etc) are trivially
- # processed; REXML doesn't yet handle doctype entity declarations, so you
- # have to parse them out yourself.
- module StreamListener
- # Called when a tag is encountered.
- # @p name the tag name
- # @p attrs an array of arrays of attribute/value pairs, suitable for
- # use with assoc or rassoc. IE, <tag attr1="value1" attr2="value2">
- # will result in
- # tag_start( "tag", # [["attr1","value1"],["attr2","value2"]])
- def tag_start name, attrs
- end
- # Called when the end tag is reached. In the case of <tag/>, tag_end
- # will be called immidiately after tag_start
- # @p the name of the tag
- def tag_end name
- end
- # Called when text is encountered in the document
- # @p text the text content.
- def text text
- end
- # Called when an instruction is encountered. EG: <?xsl sheet='foo'?>
- # @p name the instruction name; in the example, "xsl"
- # @p instruction the rest of the instruction. In the example,
- # "sheet='foo'"
- def instruction name, instruction
- end
- # Called when a comment is encountered.
- # @p comment The content of the comment
- def comment comment
- end
- # Handles a doctype declaration. Any attributes of the doctype which are
- # not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
- # @p name the name of the doctype; EG, "me"
- # @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
- # @p long_name the supplied long name, or nil. EG, "foo"
- # @p uri the uri of the doctype, or nil. EG, "bar"
- def doctype name, pub_sys, long_name, uri
- end
- # Called when the doctype is done
- def doctype_end
- end
- # If a doctype includes an ATTLIST declaration, it will cause this
- # method to be called. The content is the declaration itself, unparsed.
- # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
- # attr CDATA #REQUIRED". This is the same for all of the .*decl
- # methods.
- def attlistdecl element_name, attributes, raw_content
- end
- # <!ELEMENT ...>
- def elementdecl content
- end
- # <!ENTITY ...>
- # The argument passed to this method is an array of the entity
- # declaration. It can be in a number of formats, but in general it
- # returns (example, result):
- # <!ENTITY % YN '"Yes"'>
- # ["%", "YN", "'\"Yes\"'", "\""]
- # <!ENTITY % YN 'Yes'>
- # ["%", "YN", "'Yes'", "s"]
- # <!ENTITY WhatHeSaid "He said %YN;">
- # ["WhatHeSaid", "\"He said %YN;\"", "YN"]
- # <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
- # ["open-hatch", "SYSTEM", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
- # <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
- # ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
- # <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
- # ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
- def entitydecl content
- end
- # <!NOTATION ...>
- def notationdecl content
- end
- # Called when %foo; is encountered in a doctype declaration.
- # @p content "foo"
- def entity content
- end
- # Called when <![CDATA[ ... ]]> is encountered in a document.
- # @p content "..."
- def cdata content
- end
- # Called when an XML PI is encountered in the document.
- # EG: <?xml version="1.0" encoding="utf"?>
- # @p version the version attribute value. EG, "1.0"
- # @p encoding the encoding attribute value, or nil. EG, "utf"
- # @p standalone the standalone attribute value, or nil. EG, nil
- def xmldecl version, encoding, standalone
- end
- end
+ # A template for stream parser listeners.
+ # Note that the declarations (attlistdecl, elementdecl, etc) are trivially
+ # processed; REXML doesn't yet handle doctype entity declarations, so you
+ # have to parse them out yourself.
+ module StreamListener
+ # Called when a tag is encountered.
+ # @p name the tag name
+ # @p attrs an array of arrays of attribute/value pairs, suitable for
+ # use with assoc or rassoc. IE, <tag attr1="value1" attr2="value2">
+ # will result in
+ # tag_start( "tag", # [["attr1","value1"],["attr2","value2"]])
+ def tag_start name, attrs
+ end
+ # Called when the end tag is reached. In the case of <tag/>, tag_end
+ # will be called immediately after tag_start
+ # @p the name of the tag
+ def tag_end name
+ end
+ # Called when text is encountered in the document
+ # @p text the text content.
+ def text text
+ end
+ # Called when an instruction is encountered. EG: <?xsl sheet='foo'?>
+ # @p name the instruction name; in the example, "xsl"
+ # @p instruction the rest of the instruction. In the example,
+ # "sheet='foo'"
+ def instruction name, instruction
+ end
+ # Called when a comment is encountered.
+ # @p comment The content of the comment
+ def comment comment
+ end
+ # Handles a doctype declaration. Any attributes of the doctype which are
+ # not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
+ # @p name the name of the doctype; EG, "me"
+ # @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
+ # @p long_name the supplied long name, or nil. EG, "foo"
+ # @p uri the uri of the doctype, or nil. EG, "bar"
+ def doctype name, pub_sys, long_name, uri
+ end
+ # Called when the doctype is done
+ def doctype_end
+ end
+ # If a doctype includes an ATTLIST declaration, it will cause this
+ # method to be called. The content is the declaration itself, unparsed.
+ # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
+ # attr CDATA #REQUIRED". This is the same for all of the .*decl
+ # methods.
+ def attlistdecl element_name, attributes, raw_content
+ end
+ # <!ELEMENT ...>
+ def elementdecl content
+ end
+ # <!ENTITY ...>
+ # The argument passed to this method is an array of the entity
+ # declaration. It can be in a number of formats, but in general it
+ # returns (example, result):
+ # <!ENTITY % YN '"Yes"'>
+ # ["YN", "\"Yes\"", "%"]
+ # <!ENTITY % YN 'Yes'>
+ # ["YN", "Yes", "%"]
+ # <!ENTITY WhatHeSaid "He said %YN;">
+ # ["WhatHeSaid", "He said %YN;"]
+ # <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
+ # ["open-hatch", "SYSTEM", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
+ # <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
+ # ["open-hatch", "PUBLIC", "-//Textuality//TEXT Standard open-hatch boilerplate//EN", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
+ # <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
+ # ["hatch-pic", "SYSTEM", "../grafix/OpenHatch.gif", "gif"]
+ def entitydecl content
+ end
+ # <!NOTATION ...>
+ def notationdecl content
+ end
+ # Called when %foo; is encountered in a doctype declaration.
+ # @p content "foo"
+ def entity content
+ end
+ # Called when <![CDATA[ ... ]]> is encountered in a document.
+ # @p content "..."
+ def cdata content
+ end
+ # Called when an XML PI is encountered in the document.
+ # EG: <?xml version="1.0" encoding="utf"?>
+ # @p version the version attribute value. EG, "1.0"
+ # @p encoding the encoding attribute value, or nil. EG, "utf"
+ # @p standalone the standalone attribute value, or nil. EG, nil
+ def xmldecl version, encoding, standalone
+ end
+ end
end
diff --git a/lib/rexml/syncenumerator.rb b/lib/rexml/syncenumerator.rb
index 955e006cb2..a9d2ad7f9c 100644
--- a/lib/rexml/syncenumerator.rb
+++ b/lib/rexml/syncenumerator.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: false
module REXML
class SyncEnumerator
include Enumerable
@@ -6,8 +7,7 @@ module REXML
# Enumerable objects.
def initialize(*enums)
@gens = enums
- @biggest = @gens[0]
- @gens.each {|x| @biggest = x if x.size > @biggest.size }
+ @length = @gens.collect {|x| x.size }.max
end
# Returns the number of enumerated Enumerable objects, i.e. the size
@@ -24,8 +24,8 @@ module REXML
# Enumerates rows of the Enumerable objects.
def each
- @biggest.zip( *@gens ) {|a|
- yield(*a[1..-1])
+ @length.times {|i|
+ yield @gens.collect {|x| x[i]}
}
self
end
diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb
index 2bc00429b3..86269dea1e 100644
--- a/lib/rexml/text.rb
+++ b/lib/rexml/text.rb
@@ -1,3 +1,5 @@
+# frozen_string_literal: false
+require 'rexml/security'
require 'rexml/entity'
require 'rexml/doctype'
require 'rexml/child'
@@ -18,25 +20,57 @@ module REXML
# If +raw+ is true, then REXML leaves the value alone
attr_accessor :raw
- ILLEGAL = /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um
- NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
+ NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
+ NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
+ VALID_CHAR = [
+ 0x9, 0xA, 0xD,
+ (0x20..0xD7FF),
+ (0xE000..0xFFFD),
+ (0x10000..0x10FFFF)
+ ]
+
+ if String.method_defined? :encode
+ VALID_XML_CHARS = Regexp.new('^['+
+ VALID_CHAR.map { |item|
+ case item
+ when Integer
+ [item].pack('U').force_encoding('utf-8')
+ when Range
+ [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
+ end
+ }.join +
+ ']*$')
+ else
+ VALID_XML_CHARS = /^(
+ [\x09\x0A\x0D\x20-\x7E] # ASCII
+ | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
+ | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
+ | [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
+ | \xEF[\x80-\xBE]{2} #
+ | \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
+ | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
+ | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
+ | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
+ | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
+ )*$/nx;
+ end
# Constructor
# +arg+ if a String, the content is set to the String. If a Text,
- # the object is shallowly cloned.
+ # the object is shallowly cloned.
#
# +respect_whitespace+ (boolean, false) if true, whitespace is
# respected
#
# +parent+ (nil) if this is a Parent object, the parent
- # will be set to this.
+ # will be set to this.
#
# +raw+ (nil) This argument can be given three values.
- # If true, then the value of used to construct this object is expected to
- # contain no unescaped XML markup, and REXML will not change the text. If
+ # If true, then the value of used to construct this object is expected to
+ # contain no unescaped XML markup, and REXML will not change the text. If
# this value is false, the string may contain any characters, and REXML will
# escape any and all defined entities whose values are contained in the
- # text. If this value is nil (the default), then the raw value of the
+ # text. If this value is nil (the default), then the raw value of the
# parent will be used as the raw value for this node. If there is no raw
# value for the parent, and no value is supplied, the default is false.
# Use this field if you have entities defined for some text, and you don't
@@ -56,25 +90,24 @@ module REXML
# Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
# In the last example, the +entity_filter+ argument is ignored.
#
- # +pattern+ INTERNAL USE ONLY
- def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
- entity_filter=nil, illegal=ILLEGAL )
+ # +illegal+ INTERNAL USE ONLY
+ def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
+ entity_filter=nil, illegal=NEEDS_A_SECOND_CHECK )
@raw = false
+ @parent = nil
if parent
super( parent )
- @raw = parent.raw
- else
- @parent = nil
+ @raw = parent.raw
end
@raw = raw unless raw.nil?
@entity_filter = entity_filter
- @normalized = @unnormalized = nil
+ clear_cache
if arg.kind_of? String
- @string = arg.clone
+ @string = arg.dup
@string.squeeze!(" \n\t") unless respect_whitespace
elsif arg.kind_of? Text
@string = arg.to_s
@@ -85,10 +118,55 @@ module REXML
@string.gsub!( /\r\n?/, "\n" )
- # check for illegal characters
- if @raw
- if @string =~ illegal
- raise "Illegal character '#{$1}' in raw string \"#{@string}\""
+ Text.check(@string, illegal, doctype) if @raw
+ end
+
+ def parent= parent
+ super(parent)
+ Text.check(@string, NEEDS_A_SECOND_CHECK, doctype) if @raw and @parent
+ end
+
+ # check for illegal characters
+ def Text.check string, pattern, doctype
+
+ # illegal anywhere
+ if string !~ VALID_XML_CHARS
+ if String.method_defined? :encode
+ string.chars.each do |c|
+ case c.ord
+ when *VALID_CHAR
+ else
+ raise "Illegal character #{c.inspect} in raw string \"#{string}\""
+ end
+ end
+ else
+ string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
+ case c.unpack('U')
+ when *VALID_CHAR
+ else
+ raise "Illegal character #{c.inspect} in raw string \"#{string}\""
+ end
+ end
+ end
+ end
+
+ # context sensitive
+ string.scan(pattern) do
+ if $1[-1] != ?;
+ raise "Illegal character '#{$1}' in raw string \"#{string}\""
+ elsif $1[0] == ?&
+ if $5 and $5[0] == ?#
+ case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
+ when *VALID_CHAR
+ else
+ raise "Illegal character '#{$1}' in raw string \"#{string}\""
+ end
+ # FIXME: below can't work but this needs API change.
+ # elsif @parent and $3 and !SUBSTITUTES.include?($1)
+ # if !doctype or !doctype.entities.has_key?($3)
+ # raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
+ # end
+ end
end
end
end
@@ -109,8 +187,13 @@ module REXML
# Appends text to this text node. The text is appended in the +raw+ mode
# of this text node.
+ #
+ # +returns+ the text itself to enable method chain like
+ # 'text << "XXX" << "YYY"'.
def <<( to_append )
@string << to_append.gsub( /\r\n?/, "\n" )
+ clear_cache
+ self
end
@@ -120,17 +203,24 @@ module REXML
to_s() <=> other.to_s
end
+ def doctype
+ if @parent
+ doc = @parent.document
+ doc.doctype if doc
+ end
+ end
+
REFERENCE = /#{Entity::REFERENCE}/
# Returns the string value of this text node. This string is always
# escaped, meaning that it is a valid XML text node string, and all
# entities that can be escaped, have been inserted. This method respects
# the entity filter set in the constructor.
- #
- # # Assume that the entity "s" is defined to be "sean", and that the
+ #
+ # # Assume that the entity "s" is defined to be "sean", and that the
# # entity "r" is defined to be "russell"
- # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
+ # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
# t.to_s #-> "&lt; &amp; &s; russell"
- # t = Text.new( "< & &s; russell", false, nil, false )
+ # t = Text.new( "< & &s; russell", false, nil, false )
# t.to_s #-> "&lt; &amp; &s; russell"
# u = Text.new( "sean russell", false, nil, true )
# u.to_s #-> "sean russell"
@@ -138,12 +228,6 @@ module REXML
return @string if @raw
return @normalized if @normalized
- doctype = nil
- if @parent
- doc = @parent.document
- doctype = doc.doctype if doc
- end
-
@normalized = Text::normalize( @string, doctype, @entity_filter )
end
@@ -156,25 +240,20 @@ module REXML
# console. This ignores the 'raw' attribute setting, and any
# entity_filter.
#
- # # Assume that the entity "s" is defined to be "sean", and that the
+ # # Assume that the entity "s" is defined to be "sean", and that the
# # entity "r" is defined to be "russell"
- # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
+ # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
# t.value #-> "< & sean russell"
# t = Text.new( "< & &s; russell", false, nil, false )
# t.value #-> "< & sean russell"
# u = Text.new( "sean russell", false, nil, true )
# u.value #-> "sean russell"
def value
- @unnormalized if @unnormalized
- doctype = nil
- if @parent
- doc = @parent.document
- doctype = doc.doctype if doc
- end
+ return @unnormalized if @unnormalized
@unnormalized = Text::unnormalize( @string, doctype )
end
- # Sets the contents of this text node. This expects the text to be
+ # Sets the contents of this text node. This expects the text to be
# unnormalized. It returns self.
#
# e = Element.new( "a" )
@@ -183,11 +262,10 @@ module REXML
# e[0].value = "<a>" # <a>&lt;a&gt;</a>
def value=( val )
@string = val.gsub( /\r\n?/, "\n" )
- @unnormalized = nil
- @normalized = nil
+ clear_cache
@raw = false
end
-
+
def wrap(string, width, addnewline=false)
# Recursively wrap string at width.
return string if string.length <= width
@@ -202,7 +280,7 @@ module REXML
def indent_text(string, level=1, style="\t", indentfirstline=true)
return string if level < 0
new_string = ''
- string.each { |line|
+ string.each_line { |line|
indent_string = style * level
new_line = (indent_string + line).sub(/[\s]+$/,'')
new_string << new_line
@@ -210,12 +288,12 @@ module REXML
new_string.strip! unless indentfirstline
return new_string
end
-
+
# == DEPRECATED
# See REXML::Formatters
#
- def write( writer, indent=-1, transitive=false, ie_hack=false )
- Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
+ def write( writer, indent=-1, transitive=false, ie_hack=false )
+ Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters", uplevel: 1)
formatter = if indent > -1
REXML::Formatters::Pretty.new( indent )
else
@@ -258,6 +336,12 @@ module REXML
out << copy
end
+ private
+ def clear_cache
+ @normalized = nil
+ @unnormalized = nil
+ end
+
# Reads text, substituting entities
def Text::read_with_substitution( input, illegal=nil )
copy = input.clone
@@ -265,7 +349,7 @@ module REXML
if copy =~ illegal
raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
end if illegal
-
+
copy.gsub!( /\r\n?/, "\n" )
if copy.include? ?&
copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
@@ -273,7 +357,7 @@ module REXML
copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
- copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m|
+ copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {
m=$1
#m='0' if m==''
m = "0#{m}" if m[0] == ?x
@@ -286,16 +370,16 @@ module REXML
EREFERENCE = /&(?!#{Entity::NAME};)/
# Escapes all possible entities
def Text::normalize( input, doctype=nil, entity_filter=nil )
- copy = input
+ copy = input.to_s
# Doing it like this rather than in a loop improves the speed
#copy = copy.gsub( EREFERENCE, '&amp;' )
copy = copy.gsub( "&", "&amp;" )
if doctype
# Replace all ampersands that aren't part of an entity
doctype.entities.each_value do |entity|
- copy = copy.gsub( entity.value,
- "&#{entity.name};" ) if entity.value and
- not( entity_filter and entity_filter.include?(entity) )
+ copy = copy.gsub( entity.value,
+ "&#{entity.name};" ) if entity.value and
+ not( entity_filter and entity_filter.include?(entity.name) )
end
else
# Replace all ampersands that aren't part of an entity
@@ -308,37 +392,35 @@ module REXML
# Unescapes all possible entities
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
- rv = string.clone
- rv.gsub!( /\r\n?/, "\n" )
- matches = rv.scan( REFERENCE )
- return rv if matches.size == 0
- rv.gsub!( NUMERICENTITY ) {|m|
- m=$1
- m = "0#{m}" if m[0] == ?x
- [Integer(m)].pack('U*')
+ sum = 0
+ string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
+ s = Text.expand($&, doctype, filter)
+ if sum + s.bytesize > Security.entity_expansion_text_limit
+ raise "entity expansion has grown too large"
+ else
+ sum += s.bytesize
+ end
+ s
}
- matches.collect!{|x|x[0]}.compact!
- if matches.size > 0
- if doctype
- matches.each do |entity_reference|
- unless filter and filter.include?(entity_reference)
- entity_value = doctype.entity( entity_reference )
- re = /&#{entity_reference};/
- rv.gsub!( re, entity_value ) if entity_value
- end
- end
+ end
+
+ def Text.expand(ref, doctype, filter)
+ if ref[1] == ?#
+ if ref[2] == ?x
+ [ref[3...-1].to_i(16)].pack('U*')
else
- matches.each do |entity_reference|
- unless filter and filter.include?(entity_reference)
- entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
- re = /&#{entity_reference};/
- rv.gsub!( re, entity_value.value ) if entity_value
- end
- end
+ [ref[2...-1].to_i].pack('U*')
end
- rv.gsub!( /&amp;/, '&' )
+ elsif ref == '&amp;'
+ '&'
+ elsif filter and filter.include?( ref[1...-1] )
+ ref
+ elsif doctype
+ doctype.entity( ref[1...-1] ) or ref
+ else
+ entity_value = DocType::DEFAULT_ENTITIES[ ref[1...-1] ]
+ entity_value ? entity_value.value : ref
end
- rv
end
end
end
diff --git a/lib/rexml/undefinednamespaceexception.rb b/lib/rexml/undefinednamespaceexception.rb
index 8ebfdfd0a9..e522ed57ea 100644
--- a/lib/rexml/undefinednamespaceexception.rb
+++ b/lib/rexml/undefinednamespaceexception.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: false
require 'rexml/parseexception'
module REXML
class UndefinedNamespaceException < ParseException
diff --git a/lib/rexml/validation/relaxng.rb b/lib/rexml/validation/relaxng.rb
index 969f51bc95..fb52438290 100644
--- a/lib/rexml/validation/relaxng.rb
+++ b/lib/rexml/validation/relaxng.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: false
require "rexml/validation/validation"
require "rexml/parsers/baseparser"
@@ -79,7 +80,7 @@ module REXML
when "mixed"
states << Interleave.new( self )
states[-2] << states[-1]
- states[-1] << TEXT
+ states[-1] << TEXT
when "define"
states << [ event[2]["name"] ]
when "ref"
@@ -102,7 +103,7 @@ module REXML
case event[1]
when "element", "attribute"
states[-1] << event
- when "zeroOrMore", "oneOrMore", "choice", "optional",
+ when "zeroOrMore", "oneOrMore", "choice", "optional",
"interleave", "group", "mixed"
states.pop
when "define"
@@ -139,13 +140,12 @@ module REXML
@events.each {|s| s.reset if s.kind_of? State }
end
- def previous=( previous )
+ def previous=( previous )
@previous << previous
end
def next( event )
#print "In next with #{event.inspect}. "
- #puts "Next (#@current) is #{@events[@current]}"
#p @previous
return @previous.pop.next( event ) if @events[@current].nil?
expand_ref_in( @events, @current ) if @events[@current].class == Ref
@@ -154,19 +154,15 @@ module REXML
@events[@current-1].previous = self
return @events[@current-1].next( event )
end
- #puts "Current isn't a state"
if ( @events[@current].matches?(event) )
@current += 1
if @events[@current].nil?
- #puts "#{inspect[0,5]} 1RETURNING #{@previous.inspect[0,5]}"
return @previous.pop
elsif @events[@current].kind_of? State
@current += 1
- #puts "#{inspect[0,5]} 2RETURNING (#{@current-1}) #{@events[@current-1].inspect[0,5]}; on return, next is #{@events[@current]}"
@events[@current-1].previous = self
return @events[@current-1]
else
- #puts "#{inspect[0,5]} RETURNING self w/ next(#@current) = #{@events[@current]}"
return self
end
else
@@ -183,7 +179,7 @@ module REXML
end
def inspect
- "< #{to_s} #{@events.collect{|e|
+ "< #{to_s} #{@events.collect{|e|
pre = e == @events[@current] ? '#' : ''
pre + e.inspect unless self == e
}.join(', ')} >"
@@ -201,15 +197,15 @@ module REXML
protected
def expand_ref_in( arry, ind )
new_events = []
- @references[ arry[ind].to_s ].each{ |evt|
+ @references[ arry[ind].to_s ].each{ |evt|
add_event_to_arry(new_events,evt)
}
arry[ind,1] = new_events
end
- def add_event_to_arry( arry, evt )
+ def add_event_to_arry( arry, evt )
evt = generate_event( evt )
- if evt.kind_of? String
+ if evt.kind_of? String
arry[-1].event_arg = evt if arry[-1].kind_of? Event and @value
@value = false
else
@@ -272,7 +268,7 @@ module REXML
end
def matches?(event)
- @events[@current].matches?(event) ||
+ @events[@current].matches?(event) ||
(@current == 0 and @previous[-1].matches?(event))
end
@@ -319,7 +315,7 @@ module REXML
end
def reset
- super
+ super
@ord = 0
end
@@ -345,7 +341,7 @@ module REXML
end
def matches?( event )
- @events[@current].matches?(event) ||
+ @events[@current].matches?(event) ||
(@current == 0 and @ord > 0 and @previous[-1].matches?(event))
end
@@ -393,13 +389,10 @@ module REXML
# Remove the references
# Find the events
end
- #puts "In next with #{event.inspect}."
- #puts "events is #{@events.inspect}"
unless @events
@events = []
return nil
end
- #puts "current = #@current"
super
end
@@ -409,10 +402,8 @@ module REXML
end
def expected
- #puts "IN CHOICE EXPECTED"
- #puts "EVENTS = #{@events.inspect}"
return [@events[@current]] if @events.size > 0
- return @choices.collect do |x|
+ return @choices.collect do |x|
if x[0].kind_of? State
x[0].expected
else
@@ -426,17 +417,17 @@ module REXML
end
protected
- def add_event_to_arry( arry, evt )
+ def add_event_to_arry( arry, evt )
if evt.kind_of? State or evt.class == Ref
arry << [evt]
- elsif evt[0] == :text
+ elsif evt[0] == :text
if arry[-1] and
- arry[-1][-1].kind_of?( Event ) and
+ arry[-1][-1].kind_of?( Event ) and
arry[-1][-1].event_type == :text and @value
arry[-1][-1].event_arg = evt[1]
@value = false
- end
+ end
else
arry << [] if evt[0] == :start_element
arry[-1] << generate_event( evt )
@@ -478,9 +469,7 @@ module REXML
@choices[idx] = old
@choice += 1
end
-
- #puts "In next with #{event.inspect}."
- #puts "events is #{@events.inspect}"
+
@events = [] unless @events
end
@@ -490,30 +479,23 @@ module REXML
next_current(event) unless @events[@current]
return nil unless @events[@current]
- expand_ref_in( @events, @current ) if @events[@current].class == Ref
- #puts "In next with #{event.inspect}."
- #puts "Next (#@current) is #{@events[@current]}"
+ expand_ref_in( @events, @current ) if @events[@current].class == Ref
if ( @events[@current].kind_of? State )
@current += 1
@events[@current-1].previous = self
return @events[@current-1].next( event )
end
- #puts "Current isn't a state"
return @previous.pop.next( event ) if @events[@current].nil?
if ( @events[@current].matches?(event) )
@current += 1
if @events[@current].nil?
- #puts "#{inspect[0,5]} 1RETURNING self" unless @choices[@choice].nil?
return self unless @choices[@choice].nil?
- #puts "#{inspect[0,5]} 1RETURNING #{@previous[-1].inspect[0,5]}"
return @previous.pop
elsif @events[@current].kind_of? State
@current += 1
- #puts "#{inspect[0,5]} 2RETURNING (#{@current-1}) #{@events[@current-1].inspect[0,5]}; on return, next is #{@events[@current]}"
@events[@current-1].previous = self
return @events[@current-1]
else
- #puts "#{inspect[0,5]} RETURNING self w/ next(#@current) = #{@events[@current]}"
return self
end
else
@@ -527,10 +509,8 @@ module REXML
end
def expected
- #puts "IN CHOICE EXPECTED"
- #puts "EVENTS = #{@events.inspect}"
return [@events[@current]] if @events[@current]
- return @choices[@choice..-1].collect do |x|
+ return @choices[@choice..-1].collect do |x|
if x[0].kind_of? State
x[0].expected
else
diff --git a/lib/rexml/validation/validation.rb b/lib/rexml/validation/validation.rb
index 160ea96b31..f0c76f976c 100644
--- a/lib/rexml/validation/validation.rb
+++ b/lib/rexml/validation/validation.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: false
require 'rexml/validation/validationexception'
module REXML
@@ -14,9 +15,7 @@ module REXML
def dump
puts @root.inspect
end
- def validate( event )
- #puts "Current: #@current"
- #puts "Event: #{event.inspect}"
+ def validate( event )
@attr_stack = [] unless defined? @attr_stack
match = @current.next(event)
raise ValidationException.new( "Validation error. Expected: "+
@@ -27,30 +26,21 @@ module REXML
# Check for attributes
case event[0]
when :start_element
- #puts "Checking attributes"
@attr_stack << event[2]
begin
sattr = [:start_attribute, nil]
eattr = [:end_attribute]
text = [:text, nil]
- k,v = event[2].find { |k,v|
- sattr[1] = k
- #puts "Looking for #{sattr.inspect}"
+ k, = event[2].find { |key,value|
+ sattr[1] = key
m = @current.next( sattr )
- #puts "Got #{m.inspect}"
- if m
+ if m
# If the state has text children...
- #puts "Looking for #{eattr.inspect}"
- #puts "Expect #{m.expected}"
if m.matches?( eattr )
- #puts "Got end"
@current = m
else
- #puts "Didn't get end"
- text[1] = v
- #puts "Looking for #{text.inspect}"
+ text[1] = value
m = m.next( text )
- #puts "Got #{m.inspect}"
text[1] = nil
return false unless m
@current = m if m
@@ -94,7 +84,6 @@ module REXML
end
def matches?( event )
- #puts "#@event_type =? #{event[0]} && #@event_arg =? #{event[1]} "
return false unless event[0] == @event_type
case event[0]
when nil
diff --git a/lib/rexml/validation/validationexception.rb b/lib/rexml/validation/validationexception.rb
index 4723d9e4d3..78cd63fd04 100644
--- a/lib/rexml/validation/validationexception.rb
+++ b/lib/rexml/validation/validationexception.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: false
module REXML
module Validation
class ValidationException < RuntimeError
diff --git a/lib/rexml/xmldecl.rb b/lib/rexml/xmldecl.rb
index 427eb78cf8..a37e9f3ddc 100644
--- a/lib/rexml/xmldecl.rb
+++ b/lib/rexml/xmldecl.rb
@@ -1,41 +1,42 @@
+# frozen_string_literal: false
require 'rexml/encoding'
require 'rexml/source'
module REXML
- # NEEDS DOCUMENTATION
- class XMLDecl < Child
- include Encoding
+ # NEEDS DOCUMENTATION
+ class XMLDecl < Child
+ include Encoding
- DEFAULT_VERSION = "1.0";
- DEFAULT_ENCODING = "UTF-8";
- DEFAULT_STANDALONE = "no";
- START = '<\?xml';
- STOP = '\?>';
+ DEFAULT_VERSION = "1.0";
+ DEFAULT_ENCODING = "UTF-8";
+ DEFAULT_STANDALONE = "no";
+ START = '<\?xml';
+ STOP = '\?>';
- attr_accessor :version, :standalone
+ attr_accessor :version, :standalone
attr_reader :writeencoding, :writethis
- def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
+ def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
@writethis = true
@writeencoding = !encoding.nil?
- if version.kind_of? XMLDecl
- super()
- @version = version.version
- self.encoding = version.encoding
+ if version.kind_of? XMLDecl
+ super()
+ @version = version.version
+ self.encoding = version.encoding
@writeencoding = version.writeencoding
- @standalone = version.standalone
- else
- super()
- @version = version
- self.encoding = encoding
- @standalone = standalone
- end
- @version = DEFAULT_VERSION if @version.nil?
- end
-
- def clone
- XMLDecl.new(self)
- end
+ @standalone = version.standalone
+ else
+ super()
+ @version = version
+ self.encoding = encoding
+ @standalone = standalone
+ end
+ @version = DEFAULT_VERSION if @version.nil?
+ end
+
+ def clone
+ XMLDecl.new(self)
+ end
# indent::
# Ignored. There must be no whitespace before an XML declaration
@@ -43,35 +44,31 @@ module REXML
# Ignored
# ie_hack::
# Ignored
- def write(writer, indent=-1, transitive=false, ie_hack=false)
+ def write(writer, indent=-1, transitive=false, ie_hack=false)
return nil unless @writethis or writer.kind_of? Output
- writer << START.sub(/\\/u, '')
- if writer.kind_of? Output
- writer << " #{content writer.encoding}"
- else
- writer << " #{content encoding}"
- end
- writer << STOP.sub(/\\/u, '')
- end
-
- def ==( other )
- other.kind_of?(XMLDecl) and
- other.version == @version and
- other.encoding == self.encoding and
- other.standalone == @standalone
- end
-
- def xmldecl version, encoding, standalone
- @version = version
- self.encoding = encoding
- @standalone = standalone
- end
-
- def node_type
- :xmldecl
- end
-
- alias :stand_alone? :standalone
+ writer << START.sub(/\\/u, '')
+ writer << " #{content encoding}"
+ writer << STOP.sub(/\\/u, '')
+ end
+
+ def ==( other )
+ other.kind_of?(XMLDecl) and
+ other.version == @version and
+ other.encoding == self.encoding and
+ other.standalone == @standalone
+ end
+
+ def xmldecl version, encoding, standalone
+ @version = version
+ self.encoding = encoding
+ @standalone = standalone
+ end
+
+ def node_type
+ :xmldecl
+ end
+
+ alias :stand_alone? :standalone
alias :old_enc= :encoding=
def encoding=( enc )
@@ -108,12 +105,12 @@ module REXML
START.sub(/\\/u, '') + " ... " + STOP.sub(/\\/u, '')
end
- private
- def content(enc)
- rv = "version='#@version'"
- rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
- rv << " standalone='#@standalone'" if @standalone
- rv
- end
- end
+ private
+ def content(enc)
+ rv = "version='#@version'"
+ rv << " encoding='#{enc}'" if @writeencoding || enc !~ /\Autf-8\z/i
+ rv << " standalone='#@standalone'" if @standalone
+ rv
+ end
+ end
end
diff --git a/lib/rexml/xmltokens.rb b/lib/rexml/xmltokens.rb
index 6bbe5b07d5..392b47b1d3 100644
--- a/lib/rexml/xmltokens.rb
+++ b/lib/rexml/xmltokens.rb
@@ -1,18 +1,85 @@
+# frozen_string_literal: false
module REXML
- # Defines a number of tokens used for parsing XML. Not for general
- # consumption.
- module XMLTokens
- NCNAME_STR= '[\w:][\-\w\d.]*'
- NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
+ # Defines a number of tokens used for parsing XML. Not for general
+ # consumption.
+ module XMLTokens
+ # From http://www.w3.org/TR/REC-xml/#sec-common-syn
+ #
+ # [4] NameStartChar ::=
+ # ":" |
+ # [A-Z] |
+ # "_" |
+ # [a-z] |
+ # [#xC0-#xD6] |
+ # [#xD8-#xF6] |
+ # [#xF8-#x2FF] |
+ # [#x370-#x37D] |
+ # [#x37F-#x1FFF] |
+ # [#x200C-#x200D] |
+ # [#x2070-#x218F] |
+ # [#x2C00-#x2FEF] |
+ # [#x3001-#xD7FF] |
+ # [#xF900-#xFDCF] |
+ # [#xFDF0-#xFFFD] |
+ # [#x10000-#xEFFFF]
+ name_start_chars = [
+ ":",
+ "A-Z",
+ "_",
+ "a-z",
+ "\\u00C0-\\u00D6",
+ "\\u00D8-\\u00F6",
+ "\\u00F8-\\u02FF",
+ "\\u0370-\\u037D",
+ "\\u037F-\\u1FFF",
+ "\\u200C-\\u200D",
+ "\\u2070-\\u218F",
+ "\\u2C00-\\u2FEF",
+ "\\u3001-\\uD7FF",
+ "\\uF900-\\uFDCF",
+ "\\uFDF0-\\uFFFD",
+ "\\u{10000}-\\u{EFFFF}",
+ ]
+ # From http://www.w3.org/TR/REC-xml/#sec-common-syn
+ #
+ # [4a] NameChar ::=
+ # NameStartChar |
+ # "-" |
+ # "." |
+ # [0-9] |
+ # #xB7 |
+ # [#x0300-#x036F] |
+ # [#x203F-#x2040]
+ name_chars = name_start_chars + [
+ "\\-",
+ "\\.",
+ "0-9",
+ "\\u00B7",
+ "\\u0300-\\u036F",
+ "\\u203F-\\u2040",
+ ]
+ NAME_START_CHAR = "[#{name_start_chars.join('')}]"
+ NAME_CHAR = "[#{name_chars.join('')}]"
+ NAMECHAR = NAME_CHAR # deprecated. Use NAME_CHAR instead.
- NAMECHAR = '[\-\w\d\.:]'
- NAME = "([\\w:]#{NAMECHAR}*)"
- NMTOKEN = "(?:#{NAMECHAR})+"
- NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
- REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
+ # From http://www.w3.org/TR/xml-names11/#NT-NCName
+ #
+ # [6] NCNameStartChar ::= NameStartChar - ':'
+ ncname_start_chars = name_start_chars - [":"]
+ # From http://www.w3.org/TR/xml-names11/#NT-NCName
+ #
+ # [5] NCNameChar ::= NameChar - ':'
+ ncname_chars = name_chars - [":"]
+ NCNAME_STR = "[#{ncname_start_chars.join('')}][#{ncname_chars.join('')}]*"
+ NAME_STR = "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
- #REFERENCE = "(?:#{ENTITYREF}|#{CHARREF})"
- #ENTITYREF = "&#{NAME};"
- #CHARREF = "&#\\d+;|&#x[0-9a-fA-F]+;"
- end
+ NAME = "(#{NAME_START_CHAR}#{NAME_CHAR}*)"
+ NMTOKEN = "(?:#{NAME_CHAR})+"
+ NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
+ REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
+
+ #REFERENCE = "(?:#{ENTITYREF}|#{CHARREF})"
+ #ENTITYREF = "&#{NAME};"
+ #CHARREF = "&#\\d+;|&#x[0-9a-fA-F]+;"
+ end
end
diff --git a/lib/rexml/xpath.rb b/lib/rexml/xpath.rb
index e8813efd3e..f1cb99baea 100644
--- a/lib/rexml/xpath.rb
+++ b/lib/rexml/xpath.rb
@@ -1,66 +1,81 @@
+# frozen_string_literal: false
require 'rexml/functions'
require 'rexml/xpath_parser'
module REXML
- # Wrapper class. Use this class to access the XPath functions.
- class XPath
- include Functions
- EMPTY_HASH = {}
+ # Wrapper class. Use this class to access the XPath functions.
+ class XPath
+ include Functions
+ # A base Hash object, supposing to be used when initializing a
+ # default empty namespaces set, but is currently unused.
+ # TODO: either set the namespaces=EMPTY_HASH, or deprecate this.
+ EMPTY_HASH = {}
- # Finds and returns the first node that matches the supplied xpath.
- # element::
- # The context element
- # path::
- # The xpath to search for. If not supplied or nil, returns the first
- # node matching '*'.
- # namespaces::
- # If supplied, a Hash which defines a namespace mapping.
- #
- # XPath.first( node )
- # XPath.first( doc, "//b"} )
- # XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
+ # Finds and returns the first node that matches the supplied xpath.
+ # element::
+ # The context element
+ # path::
+ # The xpath to search for. If not supplied or nil, returns the first
+ # node matching '*'.
+ # namespaces::
+ # If supplied, a Hash which defines a namespace mapping.
+ # variables::
+ # If supplied, a Hash which maps $variables in the query
+ # to values. This can be used to avoid XPath injection attacks
+ # or to automatically handle escaping string values.
+ #
+ # XPath.first( node )
+ # XPath.first( doc, "//b"} )
+ # XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
+ # XPath.first( node, '/book/publisher/text()=$publisher', {}, {"publisher"=>"O'Reilly"})
def XPath::first element, path=nil, namespaces=nil, variables={}
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash)
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash)
- parser = XPathParser.new
- parser.namespaces = namespaces
- parser.variables = variables
- path = "*" unless path
- element = [element] unless element.kind_of? Array
- parser.parse(path, element).flatten[0]
- end
+ parser = XPathParser.new
+ parser.namespaces = namespaces
+ parser.variables = variables
+ path = "*" unless path
+ element = [element] unless element.kind_of? Array
+ parser.parse(path, element).flatten[0]
+ end
- # Iterates over nodes that match the given path, calling the supplied
- # block with the match.
- # element::
- # The context element
- # path::
- # The xpath to search for. If not supplied or nil, defaults to '*'
- # namespaces::
- # If supplied, a Hash which defines a namespace mapping
- #
- # XPath.each( node ) { |el| ... }
- # XPath.each( node, '/*[@attr='v']' ) { |el| ... }
- # XPath.each( node, 'ancestor::x' ) { |el| ... }
- def XPath::each element, path=nil, namespaces=nil, variables={}, &block
+ # Iterates over nodes that match the given path, calling the supplied
+ # block with the match.
+ # element::
+ # The context element
+ # path::
+ # The xpath to search for. If not supplied or nil, defaults to '*'
+ # namespaces::
+ # If supplied, a Hash which defines a namespace mapping
+ # variables::
+ # If supplied, a Hash which maps $variables in the query
+ # to values. This can be used to avoid XPath injection attacks
+ # or to automatically handle escaping string values.
+ #
+ # XPath.each( node ) { |el| ... }
+ # XPath.each( node, '/*[@attr='v']' ) { |el| ... }
+ # XPath.each( node, 'ancestor::x' ) { |el| ... }
+ # XPath.each( node, '/book/publisher/text()=$publisher', {}, {"publisher"=>"O'Reilly"}) \
+ # {|el| ... }
+ def XPath::each element, path=nil, namespaces=nil, variables={}, &block
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash)
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash)
- parser = XPathParser.new
- parser.namespaces = namespaces
- parser.variables = variables
- path = "*" unless path
- element = [element] unless element.kind_of? Array
- parser.parse(path, element).each( &block )
- end
+ parser = XPathParser.new
+ parser.namespaces = namespaces
+ parser.variables = variables
+ path = "*" unless path
+ element = [element] unless element.kind_of? Array
+ parser.parse(path, element).each( &block )
+ end
- # Returns an array of nodes matching a given XPath.
- def XPath::match element, path=nil, namespaces=nil, variables={}
- parser = XPathParser.new
- parser.namespaces = namespaces
- parser.variables = variables
- path = "*" unless path
- element = [element] unless element.kind_of? Array
- parser.parse(path,element)
- end
- end
+ # Returns an array of nodes matching a given XPath.
+ def XPath::match element, path=nil, namespaces=nil, variables={}
+ parser = XPathParser.new
+ parser.namespaces = namespaces
+ parser.variables = variables
+ path = "*" unless path
+ element = [element] unless element.kind_of? Array
+ parser.parse(path,element)
+ end
+ end
end
diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb
index eb608fdb34..181b2b6e85 100644
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: false
require 'rexml/namespace'
require 'rexml/xmltokens'
require 'rexml/attribute'
@@ -5,20 +6,30 @@ require 'rexml/syncenumerator'
require 'rexml/parsers/xpathparser'
class Object
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object types
def dclone
clone
end
end
class Symbol
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object types
def dclone ; self ; end
end
-class Fixnum
+class Integer
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object types
def dclone ; self ; end
end
class Float
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object types
def dclone ; self ; end
end
class Array
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object+ types
def dclone
klone = self.clone
klone.clear
@@ -53,19 +64,13 @@ module REXML
end
def parse path, nodeset
- #puts "#"*40
- path_stack = @parser.parse( path )
- #puts "PARSE: #{path} => #{path_stack.inspect}"
- #puts "PARSE: nodeset = #{nodeset.inspect}"
- match( path_stack, nodeset )
+ path_stack = @parser.parse( path )
+ match( path_stack, nodeset )
end
def get_first path, nodeset
- #puts "#"*40
- path_stack = @parser.parse( path )
- #puts "PARSE: #{path} => #{path_stack.inspect}"
- #puts "PARSE: nodeset = #{nodeset.inspect}"
- first( path_stack, nodeset )
+ path_stack = @parser.parse( path )
+ first( path_stack, nodeset )
end
def predicate path, nodeset
@@ -83,25 +88,20 @@ module REXML
#
# FIXME: This method is incomplete!
def first( path_stack, node )
- #puts "#{depth}) Entering match( #{path.inspect}, #{tree.inspect} )"
return nil if path.size == 0
case path[0]
when :document
- # do nothing
+ # do nothing
return first( path[1..-1], node )
when :child
for c in node.children
- #puts "#{depth}) CHILD checking #{name(c)}"
r = first( path[1..-1], c )
- #puts "#{depth}) RETURNING #{r.inspect}" if r
return r if r
end
when :qname
name = path[2]
- #puts "#{depth}) QNAME #{name(tree)} == #{name} (path => #{path.size})"
if node.name == name
- #puts "#{depth}) RETURNING #{tree.inspect}" if path.size == 3
return node if path.size == 3
return first( path[3..-1], node )
else
@@ -123,11 +123,8 @@ module REXML
end
- def match( path_stack, nodeset )
- #puts "MATCH: path_stack = #{path_stack.inspect}"
- #puts "MATCH: nodeset = #{nodeset.inspect}"
+ def match( path_stack, nodeset )
r = expr( path_stack, nodeset )
- #puts "MAIN EXPR => #{r.inspect}"
r
end
@@ -136,7 +133,7 @@ module REXML
# Returns a String namespace for a node, given a prefix
# The rules are:
- #
+ #
# 1. Use the supplied namespace mapping first.
# 2. If no mapping was supplied, use the context node to look up the namespace
def get_namespace( node, prefix )
@@ -154,15 +151,9 @@ module REXML
ALL = [ :attribute, :element, :text, :processing_instruction, :comment ]
ELEMENTS = [ :element ]
def expr( path_stack, nodeset, context=nil )
- #puts "#"*15
- #puts "In expr with #{path_stack.inspect}"
- #puts "Returning" if path_stack.length == 0 || nodeset.length == 0
node_types = ELEMENTS
return nodeset if path_stack.length == 0 || nodeset.length == 0
while path_stack.length > 0
- #puts "#"*5
- #puts "Path stack = #{path_stack.inspect}"
- #puts "Nodeset is #{nodeset.inspect}"
if nodeset.length == 0
path_stack.clear
return []
@@ -170,34 +161,25 @@ module REXML
case (op = path_stack.shift)
when :document
nodeset = [ nodeset[0].root_node ]
- #puts ":document, nodeset = #{nodeset.inspect}"
when :qname
- #puts "IN QNAME"
prefix = path_stack.shift
name = path_stack.shift
nodeset.delete_if do |node|
# FIXME: This DOUBLES the time XPath searches take
ns = get_namespace( node, prefix )
- #puts "NS = #{ns.inspect}"
- #puts "node.node_type == :element => #{node.node_type == :element}"
if node.node_type == :element
- #puts "node.name == #{name} => #{node.name == name}"
if node.name == name
- #puts "node.namespace == #{ns.inspect} => #{node.namespace == ns}"
end
end
- !(node.node_type == :element and
- node.name == name and
+ !(node.node_type == :element and
+ node.name == name and
node.namespace == ns )
end
node_types = ELEMENTS
when :any
- #puts "ANY 1: nodeset = #{nodeset.inspect}"
- #puts "ANY 1: node_types = #{node_types.inspect}"
nodeset.delete_if { |node| !node_types.include?(node.node_type) }
- #puts "ANY 2: nodeset = #{nodeset.inspect}"
when :self
# This space left intentionally blank
@@ -205,7 +187,7 @@ module REXML
when :processing_instruction
target = path_stack.shift
nodeset.delete_if do |node|
- (node.node_type != :processing_instruction) or
+ (node.node_type != :processing_instruction) or
( target!='' and ( node.target != target ) )
end
@@ -222,7 +204,7 @@ module REXML
when :child
new_nodeset = []
nt = nil
- for node in nodeset
+ nodeset.each do |node|
nt = node.node_type
new_nodeset += node.children if nt == :element or nt == :document
end
@@ -231,7 +213,7 @@ module REXML
when :literal
return path_stack.shift
-
+
when :attribute
new_nodeset = []
case path_stack.shift
@@ -240,15 +222,11 @@ module REXML
name = path_stack.shift
for element in nodeset
if element.node_type == :element
- #puts "Element name = #{element.name}"
- #puts "get_namespace( #{element.inspect}, #{prefix} ) = #{get_namespace(element, prefix)}"
attrib = element.attribute( name, get_namespace(element, prefix) )
- #puts "attrib = #{attrib.inspect}"
new_nodeset << attrib if attrib
end
end
when :any
- #puts "ANY"
for element in nodeset
if element.node_type == :element
new_nodeset += element.attributes.to_a
@@ -258,15 +236,13 @@ module REXML
nodeset = new_nodeset
when :parent
- #puts "PARENT 1: nodeset = #{nodeset}"
nodeset = nodeset.collect{|n| n.parent}.compact
#nodeset = expr(path_stack.dclone, nodeset.collect{|n| n.parent}.compact)
- #puts "PARENT 2: nodeset = #{nodeset.inspect}"
node_types = ELEMENTS
when :ancestor
new_nodeset = []
- for node in nodeset
+ nodeset.each do |node|
while node.parent
node = node.parent
new_nodeset << node unless new_nodeset.include? node
@@ -277,7 +253,7 @@ module REXML
when :ancestor_or_self
new_nodeset = []
- for node in nodeset
+ nodeset.each do |node|
if node.node_type == :element
new_nodeset << node
while ( node.parent )
@@ -295,41 +271,30 @@ module REXML
pred = path_stack.shift
nodeset.each_with_index { |node, index|
subcontext[ :node ] = node
- #puts "PREDICATE SETTING CONTEXT INDEX TO #{index+1}"
subcontext[ :index ] = index+1
pc = pred.dclone
- #puts "#{node.hash}) Recursing with #{pred.inspect} and [#{node.inspect}]"
result = expr( pc, [node], subcontext )
result = result[0] if result.kind_of? Array and result.length == 1
- #puts "#{node.hash}) Result = #{result.inspect} (#{result.class.name})"
if result.kind_of? Numeric
- #puts "Adding node #{node.inspect}" if result == (index+1)
new_nodeset << node if result == (index+1)
elsif result.instance_of? Array
if result.size > 0 and result.inject(false) {|k,s| s or k}
- #puts "Adding node #{node.inspect}" if result.size > 0
new_nodeset << node if result.size > 0
end
else
- #puts "Adding node #{node.inspect}" if result
new_nodeset << node if result
end
}
- #puts "New nodeset = #{new_nodeset.inspect}"
- #puts "Path_stack = #{path_stack.inspect}"
nodeset = new_nodeset
=begin
predicate = path_stack.shift
ns = nodeset.clone
result = expr( predicate, ns )
- #puts "Result = #{result.inspect} (#{result.class.name})"
- #puts "nodeset = #{nodeset.inspect}"
if result.kind_of? Array
nodeset = result.zip(ns).collect{|m,n| n if m}.compact
else
nodeset = result ? nodeset : []
end
- #puts "Outgoing NS = #{nodeset.inspect}"
=end
when :descendant_or_self
@@ -341,7 +306,7 @@ module REXML
when :descendant
results = []
nt = nil
- for node in nodeset
+ nodeset.each do |node|
nt = node.node_type
results += expr( path_stack.dclone.unshift( :descendant_or_self ),
node.children ) if nt == :element or nt == :document
@@ -350,7 +315,6 @@ module REXML
node_types = ELEMENTS
when :following_sibling
- #puts "FOLLOWING_SIBLING 1: nodeset = #{nodeset}"
results = []
nodeset.each do |node|
next if node.parent.nil?
@@ -359,7 +323,6 @@ module REXML
following_siblings = all_siblings[ current_index+1 .. -1 ]
results += expr( path_stack.dclone, following_siblings )
end
- #puts "FOLLOWING_SIBLING 2: nodeset = #{nodeset}"
nodeset = results
when :preceding_sibling
@@ -376,26 +339,24 @@ module REXML
when :preceding
new_nodeset = []
- for node in nodeset
+ nodeset.each do |node|
new_nodeset += preceding( node )
end
- #puts "NEW NODESET => #{new_nodeset.inspect}"
nodeset = new_nodeset
node_types = ELEMENTS
when :following
new_nodeset = []
- for node in nodeset
+ nodeset.each do |node|
new_nodeset += following( node )
end
nodeset = new_nodeset
node_types = ELEMENTS
when :namespace
- #puts "In :namespace"
new_nodeset = []
prefix = path_stack.shift
- for node in nodeset
+ nodeset.each do |node|
if (node.node_type == :element or node.node_type == :attribute)
if @namespaces
namespaces = @namespaces
@@ -404,9 +365,6 @@ module REXML
else
namespaces = node.element.namesapces
end
- #puts "Namespaces = #{namespaces.inspect}"
- #puts "Prefix = #{prefix.inspect}"
- #puts "Node.namespace = #{node.namespace}"
if (node.namespace == namespaces[prefix])
new_nodeset << node
end
@@ -419,28 +377,23 @@ module REXML
return @variables[ var_name ]
# :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq
- # TODO: Special case for :or and :and -- not evaluate the right
- # operand if the left alone determines result (i.e. is true for
- # :or and false for :and).
- when :eq, :neq, :lt, :lteq, :gt, :gteq, :and, :or
+ # TODO: Special case for :or and :and -- not evaluate the right
+ # operand if the left alone determines result (i.e. is true for
+ # :or and false for :and).
+ when :eq, :neq, :lt, :lteq, :gt, :gteq, :or
left = expr( path_stack.shift, nodeset.dup, context )
- #puts "LEFT => #{left.inspect} (#{left.class.name})"
right = expr( path_stack.shift, nodeset.dup, context )
- #puts "RIGHT => #{right.inspect} (#{right.class.name})"
res = equality_relational_compare( left, op, right )
- #puts "RES => #{res.inspect}"
return res
when :and
left = expr( path_stack.shift, nodeset.dup, context )
- #puts "LEFT => #{left.inspect} (#{left.class.name})"
- if left == false || left.nil? || !left.inject(false) {|a,b| a | b}
+ return [] unless left
+ if left.respond_to?(:inject) and !left.inject(false) {|a,b| a | b}
return []
end
right = expr( path_stack.shift, nodeset.dup, context )
- #puts "RIGHT => #{right.inspect} (#{right.class.name})"
res = equality_relational_compare( left, op, right )
- #puts "RES => #{res.inspect}"
return res
when :div
@@ -481,32 +434,27 @@ module REXML
when :function
func_name = path_stack.shift.tr('-','_')
arguments = path_stack.shift
- #puts "FUNCTION 0: #{func_name}(#{arguments.collect{|a|a.inspect}.join(', ')})"
subcontext = context ? nil : { :size => nodeset.size }
res = []
cont = context
- nodeset.each_with_index { |n, i|
+ nodeset.each_with_index { |n, i|
if subcontext
subcontext[:node] = n
subcontext[:index] = i
cont = subcontext
end
arg_clone = arguments.dclone
- args = arg_clone.collect { |arg|
- #puts "FUNCTION 1: Calling expr( #{arg.inspect}, [#{n.inspect}] )"
- expr( arg, [n], cont )
+ args = arg_clone.collect { |arg|
+ expr( arg, [n], cont )
}
- #puts "FUNCTION 2: #{func_name}(#{args.collect{|a|a.inspect}.join(', ')})"
Functions.context = cont
res << Functions.send( func_name, *args )
- #puts "FUNCTION 3: #{res[-1].inspect}"
}
return res
end
end # while
- #puts "EXPR returning #{nodeset.inspect}"
return nodeset
end
@@ -515,27 +463,21 @@ module REXML
# FIXME
# The next two methods are BAD MOJO!
# This is my achilles heel. If anybody thinks of a better
- # way of doing this, be my guest. This really sucks, but
+ # way of doing this, be my guest. This really sucks, but
# it is a wonder it works at all.
# ########################################################
-
+
def descendant_or_self( path_stack, nodeset )
rs = []
- #puts "#"*80
- #puts "PATH_STACK = #{path_stack.inspect}"
- #puts "NODESET = #{nodeset.collect{|n|n.inspect}.inspect}"
d_o_s( path_stack, nodeset, rs )
- #puts "RS = #{rs.collect{|n|n.inspect}.inspect}"
document_order(rs.flatten.compact)
#rs.flatten.compact
end
def d_o_s( p, ns, r )
- #puts "IN DOS with #{ns.inspect}; ALREADY HAVE #{r.inspect}"
nt = nil
ns.each_index do |i|
n = ns[i]
- #puts "P => #{p.inspect}"
x = expr( p.dclone, [ n ] )
nt = n.node_type
d_o_s( p, n.children, x ) if nt == :element or nt == :document and n.children.size > 0
@@ -547,7 +489,7 @@ module REXML
# Reorders an array of nodes so that they are in document order
# It tries to do this efficiently.
#
- # FIXME: I need to get rid of this, but the issue is that most of the XPath
+ # FIXME: I need to get rid of this, but the issue is that most of the XPath
# interpreter functions as a filter, which means that we lose context going
# in and out of function calls. If I knew what the index of the nodes was,
# I wouldn't have to do this. Maybe add a document IDX for each node?
@@ -555,7 +497,7 @@ module REXML
def document_order( array_of_nodes )
new_arry = []
array_of_nodes.each { |node|
- node_idx = []
+ node_idx = []
np = node.node_type == :attribute ? node.element : node
while np.parent and np.parent.node_type == :element
node_idx << np.parent.index( np )
@@ -563,7 +505,6 @@ module REXML
end
new_arry << [ node_idx.reverse, node ]
}
- #puts "new_arry = #{new_arry.inspect}"
new_arry.sort{ |s1, s2| s1[0] <=> s2[0] }.collect{ |s| s[1] }
end
@@ -579,10 +520,9 @@ module REXML
# Builds a nodeset of all of the preceding nodes of the supplied node,
# in reverse document order
- # preceding:: includes every element in the document that precedes this node,
+ # preceding:: includes every element in the document that precedes this node,
# except for ancestors
def preceding( node )
- #puts "IN PRECEDING"
ancestors = []
p = node.parent
while p
@@ -592,7 +532,6 @@ module REXML
acc = []
p = preceding_node_of( node )
- #puts "P = #{p.inspect}"
while p
if ancestors.include? p
ancestors.delete(p)
@@ -600,18 +539,14 @@ module REXML
acc << p
end
p = preceding_node_of( p )
- #puts "P = #{p.inspect}"
end
acc
end
def preceding_node_of( node )
- #puts "NODE: #{node.inspect}"
- #puts "PREVIOUS NODE: #{node.previous_sibling_node.inspect}"
- #puts "PARENT NODE: #{node.parent}"
- psn = node.previous_sibling_node
+ psn = node.previous_sibling_node
if psn.nil?
- if node.parent.nil? or node.parent.class == Document
+ if node.parent.nil? or node.parent.class == Document
return nil
end
return node.parent
@@ -624,22 +559,16 @@ module REXML
end
def following( node )
- #puts "IN PRECEDING"
acc = []
p = next_sibling_node( node )
- #puts "P = #{p.inspect}"
while p
acc << p
p = following_node_of( p )
- #puts "P = #{p.inspect}"
end
acc
end
def following_node_of( node )
- #puts "NODE: #{node.inspect}"
- #puts "PREVIOUS NODE: #{node.previous_sibling_node.inspect}"
- #puts "PARENT NODE: #{node.parent}"
if node.kind_of? Element and node.children.size > 0
return node.children[0]
end
@@ -647,14 +576,13 @@ module REXML
end
def next_sibling_node(node)
- psn = node.next_sibling_node
+ psn = node.next_sibling_node
while psn.nil?
- if node.parent.nil? or node.parent.class == Document
+ if node.parent.nil? or node.parent.class == Document
return nil
end
node = node.parent
psn = node.next_sibling_node
- #puts "psn = #{psn.inspect}"
end
return psn
end
@@ -673,22 +601,17 @@ module REXML
end
def equality_relational_compare( set1, op, set2 )
- #puts "EQ_REL_COMP(#{set1.inspect} #{op.inspect} #{set2.inspect})"
if set1.kind_of? Array and set2.kind_of? Array
- #puts "#{set1.size} & #{set2.size}"
if set1.size == 1 and set2.size == 1
set1 = set1[0]
set2 = set2[0]
elsif set1.size == 0 or set2.size == 0
nd = set1.size==0 ? set2 : set1
rv = nd.collect { |il| compare( il, op, nil ) }
- #puts "RV = #{rv.inspect}"
return rv
else
res = []
- enum = SyncEnumerator.new( set1, set2 ).each { |i1, i2|
- #puts "i1 = #{i1.inspect} (#{i1.class.name})"
- #puts "i2 = #{i2.inspect} (#{i2.class.name})"
+ SyncEnumerator.new( set1, set2 ).each { |i1, i2|
i1 = norm( i1 )
i2 = norm( i2 )
res << compare( i1, op, i2 )
@@ -696,8 +619,6 @@ module REXML
return res
end
end
- #puts "EQ_REL_COMP: #{set1.inspect} (#{set1.class.name}), #{op}, #{set2.inspect} (#{set2.class.name})"
- #puts "COMPARING VALUES"
# If one is nodeset and other is number, compare number to each item
# in nodeset s.t. number op number(string(item))
# If one is nodeset and other is string, compare string to each item
@@ -705,7 +626,6 @@ module REXML
# If one is nodeset and other is boolean, compare boolean to each item
# in nodeset s.t. boolean op boolean(item)
if set1.kind_of? Array or set2.kind_of? Array
- #puts "ISA ARRAY"
if set1.kind_of? Array
a = set1
b = set2
@@ -721,10 +641,8 @@ module REXML
return a.collect {|v| compare( Functions::number(v), op, b )}
when /^\d+(\.\d+)?$/
b = Functions::number( b )
- #puts "B = #{b.inspect}"
return a.collect {|v| compare( Functions::number(v), op, b )}
else
- #puts "Functions::string( #{b}(#{b.class.name}) ) = #{Functions::string(b)}"
b = Functions::string( b )
return a.collect { |v| compare( Functions::string(v), op, b ) }
end
@@ -738,10 +656,7 @@ module REXML
# Convert both to numbers and compare
s1 = set1.to_s
s2 = set2.to_s
- #puts "EQ_REL_COMP: #{set1}=>#{s1}, #{set2}=>#{s2}"
if s1 == 'true' or s1 == 'false' or s2 == 'true' or s2 == 'false'
- #puts "Functions::boolean(#{set1})=>#{Functions::boolean(set1)}"
- #puts "Functions::boolean(#{set2})=>#{Functions::boolean(set2)}"
set1 = Functions::boolean( set1 )
set2 = Functions::boolean( set2 )
else
@@ -758,15 +673,12 @@ module REXML
set2 = Functions::number( set2 )
end
end
- #puts "EQ_REL_COMP: #{set1} #{op} #{set2}"
- #puts ">>> #{compare( set1, op, set2 )}"
return compare( set1, op, set2 )
end
return false
end
def compare a, op, b
- #puts "COMPARE #{a.inspect}(#{a.class.name}) #{op} #{b.inspect}(#{b.class.name})"
case op
when :eq
a == b