diff options
-rw-r--r-- | lib/rexml/attribute.rb | 36 | ||||
-rw-r--r-- | lib/rexml/cdata.rb | 21 | ||||
-rw-r--r-- | lib/rexml/comment.rb | 4 | ||||
-rw-r--r-- | lib/rexml/doctype.rb | 27 | ||||
-rw-r--r-- | lib/rexml/document.rb | 90 | ||||
-rw-r--r-- | lib/rexml/dtd/dtd.rb | 2 | ||||
-rw-r--r-- | lib/rexml/element.rb | 2269 | ||||
-rw-r--r-- | lib/rexml/encoding.rb | 4 | ||||
-rw-r--r-- | lib/rexml/encodings/CP-1252.rb | 13 | ||||
-rw-r--r-- | lib/rexml/encodings/ISO-8859-15.rb | 9 | ||||
-rw-r--r-- | lib/rexml/encodings/UTF-16.rb | 2 | ||||
-rw-r--r-- | lib/rexml/entity.rb | 6 | ||||
-rw-r--r-- | lib/rexml/formatters/default.rb | 109 | ||||
-rw-r--r-- | lib/rexml/formatters/pretty.rb | 134 | ||||
-rw-r--r-- | lib/rexml/formatters/transitive.rb | 56 | ||||
-rw-r--r-- | lib/rexml/functions.rb | 1 | ||||
-rw-r--r-- | lib/rexml/instruction.rb | 4 | ||||
-rw-r--r-- | lib/rexml/node.rb | 17 | ||||
-rw-r--r-- | lib/rexml/parsers/baseparser.rb | 8 | ||||
-rw-r--r-- | lib/rexml/parsers/sax2parser.rb | 4 | ||||
-rw-r--r-- | lib/rexml/parsers/xpathparser.rb | 2 | ||||
-rw-r--r-- | lib/rexml/rexml.rb | 11 | ||||
-rw-r--r-- | lib/rexml/source.rb | 349 | ||||
-rw-r--r-- | lib/rexml/text.rb | 17 | ||||
-rw-r--r-- | lib/rexml/xmldecl.rb | 11 | ||||
-rw-r--r-- | lib/rexml/xpath_parser.rb | 21 |
26 files changed, 1783 insertions, 1444 deletions
diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb index a169148f32..029035d675 100644 --- a/lib/rexml/attribute.rb +++ b/lib/rexml/attribute.rb @@ -18,16 +18,32 @@ module REXML PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um # Constructor. + # FIXME: The parser doesn't catch illegal characters in attributes + # + # first:: + # Either: an Attribute, which this new attribute will become a + # clone of; or a String, which is the name of this attribute + # second:: + # If +first+ is an Attribute, then this may be an Element, or nil. + # If nil, then the Element parent of this attribute is the parent + # of the +first+ Attribute. If the first argument is a String, + # then this must also be a String, and is the content of the attribute. + # If this is the content, it must be fully normalized (contain no + # illegal characters). + # parent:: + # Ignored unless +first+ is a String; otherwise, may be the Element + # parent of this attribute, or nil. + # # # Attribute.new( attribute_to_clone ) - # Attribute.new( source ) + # Attribute.new( attribute_to_clone, parent_element ) # Attribute.new( "attr", "attr_value" ) # Attribute.new( "attr", "attr_value", parent_element ) def initialize( first, second=nil, parent=nil ) @normalized = @unnormalized = @element = nil if first.kind_of? Attribute self.name = first.expanded_name - @value = first.value + @unnormalized = first.value if second.kind_of? Element @element = second else @@ -36,7 +52,7 @@ module REXML elsif first.kind_of? String @element = parent if parent.kind_of? Element self.name = first - @value = second.to_s + @normalized = second.to_s else raise "illegal argument #{first.class.name} to Attribute constructor" end @@ -72,7 +88,7 @@ module REXML # Returns true if other is an Attribute and has the same name and value, # false otherwise. def ==( other ) - other.kind_of?(Attribute) and other.name==name and other.value==@value + other.kind_of?(Attribute) and other.name==name and other.value==value end # Creates (and returns) a hash from both the name and value @@ -87,7 +103,11 @@ module REXML # b = Attribute.new( "ns:x", "y" ) # b.to_string # -> "ns:x='y'" def to_string - "#@expanded_name='#{to_s().gsub(/'/, ''')}'" + if @element and @element.context and @element.context[:attribute_quote] == :quote + %Q^#@expanded_name="#{to_s().gsub(/"/, '"e;')}"^ + else + "#@expanded_name='#{to_s().gsub(/'/, ''')}'" + end end # Returns the attribute value, with entities replaced @@ -100,8 +120,9 @@ module REXML doctype = doc.doctype if doc end + @normalized = Text::normalize( @unnormalized, doctype ) @unnormalized = nil - @normalized = Text::normalize( @value, doctype ) + @normalized end # Returns the UNNORMALIZED value of this attribute. That is, entities @@ -113,8 +134,9 @@ module REXML doc = @element.document doctype = doc.doctype if doc end + @unnormalized = Text::unnormalize( @normalized, doctype ) @normalized = nil - @unnormalized = Text::unnormalize( @value, doctype ) + @unnormalized end # Returns a copy of this attribute diff --git a/lib/rexml/cdata.rb b/lib/rexml/cdata.rb index 046012ba61..efcb71160a 100644 --- a/lib/rexml/cdata.rb +++ b/lib/rexml/cdata.rb @@ -39,31 +39,26 @@ module REXML @string end + # == DEPRECATED + # See the rexml/formatters package + # # Generates XML output of this object # # output:: # Where to write the string. Defaults to $stdout # indent:: - # An integer. If -1, no indenting will be used; otherwise, the - # indentation will be this number of spaces, and children will be - # indented an additional amount. Defaults to -1. + # The amount to indent this node by # transitive:: - # If transitive is true and indent is >= 0, then the output will be - # pretty-printed in such a way that the added whitespace does not affect - # the absolute *value* of the document -- that is, it leaves the value - # and number of Text nodes in the document unchanged. + # Ignored # ie_hack:: - # Internet Explorer is the worst piece of crap to have ever been - # written, with the possible exception of Windows itself. Since IE is - # unable to parse proper XML, we have to provide a hack to generate XML - # that IE's limited abilities can handle. This hack inserts a space - # before the /> on empty tags. + # Ignored # # _Examples_ # c = CData.new( " Some text " ) # c.write( $stdout ) #-> <![CDATA[ Some text ]]> def write( output=$stdout, indent=-1, transitive=false, ie_hack=false ) - #indent( output, indent ) unless transitive + Kernel.warn( "#{self.class.name}.write is deprecated" ) + indent( output, indent ) output << START output << @string output << STOP diff --git a/lib/rexml/comment.rb b/lib/rexml/comment.rb index a4fcb58c8d..2b9b4b89c9 100644 --- a/lib/rexml/comment.rb +++ b/lib/rexml/comment.rb @@ -34,6 +34,9 @@ module REXML Comment.new self end + # == DEPRECATED + # See REXML::Formatters + # # output:: # Where to write the string # indent:: @@ -45,6 +48,7 @@ module REXML # ie_hack:: # Needed for conformity to the child API, but not used by this class. def write( output, indent=-1, transitive=false, ie_hack=false ) + Kernel.warn("Comment.write is deprecated. See REXML::Formatters") indent( output, indent ) output << START output << @string diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb index 4a1ffb4336..05cd4ab331 100644 --- a/lib/rexml/doctype.rb +++ b/lib/rexml/doctype.rb @@ -98,38 +98,30 @@ module REXML # output:: # Where to write the string # indent:: - # An integer. If -1, no indenting will be used; otherwise, the + # An integer. If -1, no indentation will be used; otherwise, the # indentation will be this number of spaces, and children will be # indented an additional amount. # transitive:: - # If transitive is true and indent is >= 0, then the output will be - # pretty-printed in such a way that the added whitespace does not affect - # the absolute *value* of the document -- that is, it leaves the value - # and number of Text nodes in the document unchanged. + # Ignored # ie_hack:: - # Internet Explorer is the worst piece of crap to have ever been - # written, with the possible exception of Windows itself. Since IE is - # unable to parse proper XML, we have to provide a hack to generate XML - # that IE's limited abilities can handle. This hack inserts a space - # before the /> on empty tags. - # + # Ignored def write( output, indent=0, transitive=false, ie_hack=false ) + f = REXML::Formatters::Default.new indent( output, indent ) output << START output << ' ' output << @name output << " #@external_id" if @external_id - output << " #@long_name" if @long_name - output << " #@uri" if @uri + output << " #{@long_name.inspect}" if @long_name + output << " #{@uri.inspect}" if @uri unless @children.empty? next_indent = indent + 1 output << ' [' child = nil # speed @children.each { |child| output << "\n" - child.write( output, next_indent ) + f.write( child, output ) } - #output << ' '*next_indent output << "\n]" end output << STOP @@ -219,8 +211,10 @@ module REXML @string+'>' end + # == DEPRECATED + # See REXML::Formatters + # def write( output, indent ) - output << (' '*indent) if indent > 0 output << to_s end end @@ -264,7 +258,6 @@ module REXML end def write( output, indent=-1 ) - output << (' '*indent) if indent > 0 output << to_s end diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb index 619a844257..558b5d731a 100644 --- a/lib/rexml/document.rb +++ b/lib/rexml/document.rb @@ -31,9 +31,6 @@ module REXML # to be sources of valid XML documents. # @param context if supplied, contains the context of the document; # this should be a Hash. - # NOTE that I'm not sure what the context is for; I cloned it out of - # the Electric XML API (in which it also seems to do nothing), and it - # is now legacy. It may do something, someday... it may disappear. def initialize( source = nil, context = {} ) super() @context = context @@ -142,42 +139,59 @@ module REXML xml_decl().stand_alone? end - # Write the XML tree out, optionally with indent. This writes out the - # entire XML document, including XML declarations, doctype declarations, - # and processing instructions (if any are given). - # A controversial point is whether Document should always write the XML - # declaration (<?xml version='1.0'?>) whether or not one is given by the - # user (or source document). REXML does not write one if one was not - # specified, because it adds unneccessary bandwidth to applications such - # as XML-RPC. - # - # - # output:: - # output an object which supports '<< string'; this is where the - # document will be written. - # indent:: - # An integer. If -1, no indenting will be used; otherwise, the - # indentation will be this number of spaces, and children will be - # indented an additional amount. Defaults to -1 - # transitive:: - # If transitive is true and indent is >= 0, then the output will be - # pretty-printed in such a way that the added whitespace does not affect - # the absolute *value* of the document -- that is, it leaves the value - # and number of Text nodes in the document unchanged. - # ie_hack:: - # Internet Explorer is the worst piece of crap to have ever been - # written, with the possible exception of Windows itself. Since IE is - # unable to parse proper XML, we have to provide a hack to generate XML - # that IE's limited abilities can handle. This hack inserts a space - # before the /> on empty tags. Defaults to false - def write( output=$stdout, indent=-1, transitive=false, ie_hack=false ) - output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output) - @children.each { |node| - indent( output, indent ) if node.node_type == :element - if node.write( output, indent, transitive, ie_hack ) - output << "\n" unless indent<0 or node == @children[-1] + # Write the XML tree out, optionally with indent. This writes out the + # entire XML document, including XML declarations, doctype declarations, + # and processing instructions (if any are given). + # + # A controversial point is whether Document should always write the XML + # declaration (<?xml version='1.0'?>) whether or not one is given by the + # user (or source document). REXML does not write one if one was not + # specified, because it adds unneccessary bandwidth to applications such + # as XML-RPC. + # + # See also the classes in the rexml/formatters package for the proper way + # to change the default formatting of XML output + # + # _Examples_ + # Document.new("<a><b/></a>").serialize + # + # output_string = "" + # tr = Transitive.new( output_string ) + # Document.new("<a><b/></a>").serialize( tr ) + # + # output:: + # output an object which supports '<< string'; this is where the + # document will be written. + # indent:: + # An integer. If -1, no indenting will be used; otherwise, the + # indentation will be twice this number of spaces, and children will be + # indented an additional amount. For a value of 3, every item will be + # indented 3 more levels, or 6 more spaces (2 * 3). Defaults to -1 + # trans:: + # If transitive is true and indent is >= 0, then the output will be + # pretty-printed in such a way that the added whitespace does not affect + # the absolute *value* of the document -- that is, it leaves the value + # and number of Text nodes in the document unchanged. + # ie_hack:: + # Internet Explorer is the worst piece of crap to have ever been + # written, with the possible exception of Windows itself. Since IE is + # unable to parse proper XML, we have to provide a hack to generate XML + # that IE's limited abilities can handle. This hack inserts a space + # before the /> on empty tags. Defaults to false + def write( output=$stdout, indent=-1, trans=false, ie_hack=false ) + if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output) + output = Output.new( output, xml_decl.encoding ) + end + formatter = if indent > -1 + if transitive + REXML::Formatters::Transitive.new( indent, ie_hack ) + else + REXML::Formatters::Pretty.new( indent, ie_hack ) + end + else + REXML::Formatters::Default.new( ie_hack ) end - } + formatter.write( self, output ) end diff --git a/lib/rexml/dtd/dtd.rb b/lib/rexml/dtd/dtd.rb index 81119cfa9b..4f735d4812 100644 --- a/lib/rexml/dtd/dtd.rb +++ b/lib/rexml/dtd/dtd.rb @@ -25,7 +25,7 @@ module REXML when ElementDecl.PATTERN_RE match = $& source = $' - contents << EleemntDecl.new( match ) + contents << ElementDecl.new( match ) when AttlistDecl.PATTERN_RE matchdata = $~ source = $' diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index 11e2039609..f757cfffa8 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -14,64 +14,64 @@ module REXML # context node and convert it back when we write it. @@namespaces = {} - # Represents a tagged XML element. Elements are characterized by - # having children, attributes, and names, and can themselves be - # children. - class Element < Parent - include Namespace - - UNDEFINED = "UNDEFINED"; # The default name - - # Mechanisms for accessing attributes and child elements of this - # element. - attr_reader :attributes, :elements - # The context holds information about the processing environment, such as - # whitespace handling. - attr_accessor :context - - # Constructor - # arg:: - # if not supplied, will be set to the default value. - # If a String, the name of this object will be set to the argument. - # If an Element, the object will be shallowly cloned; name, - # attributes, and namespaces will be copied. Children will +not+ be - # copied. - # parent:: - # if supplied, must be a Parent, and will be used as - # the parent of this object. - # context:: - # If supplied, must be a hash containing context items. Context items - # include: - # * <tt>:respect_whitespace</tt> the value of this is :+all+ or an array of - # strings being the names of the elements to respect - # whitespace for. Defaults to :+all+. - # * <tt>:compress_whitespace</tt> the value can be :+all+ or an array of - # strings being the names of the elements to ignore whitespace on. - # Overrides :+respect_whitespace+. - # * <tt>:ignore_whitespace_nodes</tt> the value can be :+all+ or an array - # of strings being the names of the elements in which to ignore - # whitespace-only nodes. If this is set, Text nodes which contain only - # whitespace will not be added to the document tree. - # * <tt>:raw</tt> can be :+all+, or an array of strings being the names of - # the elements to process in raw mode. In raw mode, special - # characters in text is not converted to or from entities. - def initialize( arg = UNDEFINED, parent=nil, context=nil ) - super(parent) - - @elements = Elements.new(self) - @attributes = Attributes.new(self) - @context = context - - if arg.kind_of? String - self.name = arg - elsif arg.kind_of? Element - self.name = arg.expanded_name - arg.attributes.each_attribute{ |attribute| - @attributes << Attribute.new( attribute ) - } - @context = arg.context - end - end + # Represents a tagged XML element. Elements are characterized by + # having children, attributes, and names, and can themselves be + # children. + class Element < Parent + include Namespace + + UNDEFINED = "UNDEFINED"; # The default name + + # Mechanisms for accessing attributes and child elements of this + # element. + attr_reader :attributes, :elements + # The context holds information about the processing environment, such as + # whitespace handling. + attr_accessor :context + + # Constructor + # arg:: + # if not supplied, will be set to the default value. + # If a String, the name of this object will be set to the argument. + # If an Element, the object will be shallowly cloned; name, + # attributes, and namespaces will be copied. Children will +not+ be + # copied. + # parent:: + # if supplied, must be a Parent, and will be used as + # the parent of this object. + # context:: + # If supplied, must be a hash containing context items. Context items + # include: + # * <tt>:respect_whitespace</tt> the value of this is :+all+ or an array of + # strings being the names of the elements to respect + # whitespace for. Defaults to :+all+. + # * <tt>:compress_whitespace</tt> the value can be :+all+ or an array of + # strings being the names of the elements to ignore whitespace on. + # Overrides :+respect_whitespace+. + # * <tt>:ignore_whitespace_nodes</tt> the value can be :+all+ or an array + # of strings being the names of the elements in which to ignore + # whitespace-only nodes. If this is set, Text nodes which contain only + # whitespace will not be added to the document tree. + # * <tt>:raw</tt> can be :+all+, or an array of strings being the names of + # the elements to process in raw mode. In raw mode, special + # characters in text is not converted to or from entities. + def initialize( arg = UNDEFINED, parent=nil, context=nil ) + super(parent) + + @elements = Elements.new(self) + @attributes = Attributes.new(self) + @context = context + + if arg.kind_of? String + self.name = arg + elsif arg.kind_of? Element + self.name = arg.expanded_name + arg.attributes.each_attribute{ |attribute| + @attributes << Attribute.new( attribute ) + } + @context = arg.context + end + end def inspect rv = "<#@expanded_name" @@ -89,18 +89,18 @@ module REXML end - # Creates a shallow copy of self. - # d = Document.new "<a><b/><b/><c><d/></c></a>" - # new_a = d.root.clone - # puts new_a # => "<a/>" - def clone - self.class.new self - end + # Creates a shallow copy of self. + # d = Document.new "<a><b/><b/><c><d/></c></a>" + # new_a = d.root.clone + # puts new_a # => "<a/>" + def clone + self.class.new self + end - # Evaluates to the root node of the document that this element - # belongs to. If this element doesn't belong to a document, but does - # belong to another Element, the parent's root will be returned, until the - # earliest ancestor is found. + # Evaluates to the root node of the document that this element + # belongs to. If this element doesn't belong to a document, but does + # belong to another Element, the parent's root will be returned, until the + # earliest ancestor is found. # # Note that this is not the same as the document element. # In the following example, <a> is the document element, and the root @@ -111,14 +111,14 @@ module REXML # The only time this isn't true is when an Element is created that is # not part of any Document. In this case, the ancestor that has no # parent acts as the root node. - # d = Document.new '<a><b><c/></b></a>' - # a = d[1] ; c = a[1][1] - # d.root_node == d # TRUE - # a.root_node # namely, d - # c.root_node # again, d - def root_node - parent.nil? ? self : parent.root_node - end + # d = Document.new '<a><b><c/></b></a>' + # a = d[1] ; c = a[1][1] + # d.root_node == d # TRUE + # a.root_node # namely, d + # c.root_node # again, d + def root_node + parent.nil? ? self : parent.root_node + end def root return elements[1] if self.kind_of? Document @@ -126,415 +126,410 @@ module REXML return parent.root end - # Evaluates to the document to which this element belongs, or nil if this - # element doesn't belong to a document. - def document + # Evaluates to the document to which this element belongs, or nil if this + # element doesn't belong to a document. + def document rt = root - rt.parent if rt - end - - # Evaluates to +true+ if whitespace is respected for this element. This - # is the case if: - # 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value - # 2. The context has :+respect_whitespace+ set to :+all+ or - # an array containing the name of this element, and + rt.parent if rt + end + + # Evaluates to +true+ if whitespace is respected for this element. This + # is the case if: + # 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value + # 2. The context has :+respect_whitespace+ set to :+all+ or + # an array containing the name of this element, and # :+compress_whitespace+ isn't set to :+all+ or an array containing the # name of this element. - # The evaluation is tested against +expanded_name+, and so is namespace - # sensitive. - def whitespace - @whitespace = nil - if @context - if @context[:respect_whitespace] - @whitespace = (@context[:respect_whitespace] == :all or - @context[:respect_whitespace].include? expanded_name) - end - @whitespace = false if (@context[:compress_whitespace] and - (@context[:compress_whitespace] == :all or - @context[:compress_whitespace].include? expanded_name) - ) - end - @whitespace = true unless @whitespace == false - @whitespace - end - - def ignore_whitespace_nodes - @ignore_whitespace_nodes = false - if @context - if @context[:ignore_whitespace_nodes] - @ignore_whitespace_nodes = - (@context[:ignore_whitespace_nodes] == :all or - @context[:ignore_whitespace_nodes].include? expanded_name) - end - end - end - - # Evaluates to +true+ if raw mode is set for this element. This - # is the case if the context has :+raw+ set to :+all+ or - # an array containing the name of this element. - # - # The evaluation is tested against +expanded_name+, and so is namespace - # sensitive. - def raw - @raw = (@context and @context[:raw] and - (@context[:raw] == :all or - @context[:raw].include? expanded_name)) - @raw - end - - #once :whitespace, :raw, :ignore_whitespace_nodes - - ################################################# - # Namespaces # - ################################################# - - # Evaluates to an +Array+ containing the prefixes (names) of all defined - # namespaces at this context node. - # doc = Document.new("<a xmlns:x='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>") - # doc.elements['//b'].prefixes # -> ['x', 'y'] - def prefixes - prefixes = [] - prefixes = parent.prefixes if parent - prefixes |= attributes.prefixes - return prefixes - end - - def namespaces - namespaces = {} - namespaces = parent.namespaces if parent - namespaces = namespaces.merge( attributes.namespaces ) - return namespaces - end - - # Evalutas to the URI for a prefix, or the empty string if no such - # namespace is declared for this element. Evaluates recursively for - # ancestors. Returns the default namespace, if there is one. - # prefix:: - # the prefix to search for. If not supplied, returns the default - # namespace if one exists - # Returns:: - # the namespace URI as a String, or nil if no such namespace - # exists. If the namespace is undefined, returns an empty string - # doc = Document.new("<a xmlns='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>") - # b = doc.elements['//b'] - # b.namespace # -> '1' - # b.namespace("y") # -> '2' - def namespace(prefix=nil) - if prefix.nil? - prefix = prefix() - end - if prefix == '' - prefix = "xmlns" - else - prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns' - end - ns = attributes[ prefix ] - ns = parent.namespace(prefix) if ns.nil? and parent - ns = '' if ns.nil? and prefix == 'xmlns' - return ns - end - - # Adds a namespace to this element. - # prefix:: - # the prefix string, or the namespace URI if +uri+ is not - # supplied - # uri:: - # the namespace URI. May be nil, in which +prefix+ is used as - # the URI - # Evaluates to: this Element - # a = Element.new("a") - # a.add_namespace("xmlns:foo", "bar" ) - # a.add_namespace("foo", "bar") # shorthand for previous line - # a.add_namespace("twiddle") - # puts a #-> <a xmlns:foo='bar' xmlns='twiddle'/> - def add_namespace( prefix, uri=nil ) - unless uri - @attributes["xmlns"] = prefix - else - prefix = "xmlns:#{prefix}" unless prefix =~ /^xmlns:/ - @attributes[ prefix ] = uri - end - self - end - - # Removes a namespace from this node. This only works if the namespace is - # actually declared in this node. If no argument is passed, deletes the - # default namespace. - # - # Evaluates to: this element - # doc = Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>" - # doc.root.delete_namespace - # puts doc # -> <a xmlns:foo='bar'/> - # doc.root.delete_namespace 'foo' - # puts doc # -> <a/> - def delete_namespace namespace="xmlns" - namespace = "xmlns:#{namespace}" unless namespace == 'xmlns' - attribute = attributes.get_attribute(namespace) - attribute.remove unless attribute.nil? - self - end - - ################################################# - # Elements # - ################################################# - - # Adds a child to this element, optionally setting attributes in - # the element. - # element:: - # optional. If Element, the element is added. - # Otherwise, a new Element is constructed with the argument (see - # Element.initialize). - # attrs:: - # If supplied, must be a Hash containing String name,value - # pairs, which will be used to set the attributes of the new Element. - # Returns:: the Element that was added - # el = doc.add_element 'my-tag' - # el = doc.add_element 'my-tag', {'attr1'=>'val1', 'attr2'=>'val2'} - # el = Element.new 'my-tag' - # doc.add_element el - def add_element element, attrs=nil + # The evaluation is tested against +expanded_name+, and so is namespace + # sensitive. + def whitespace + @whitespace = nil + if @context + if @context[:respect_whitespace] + @whitespace = (@context[:respect_whitespace] == :all or + @context[:respect_whitespace].include? expanded_name) + end + @whitespace = false if (@context[:compress_whitespace] and + (@context[:compress_whitespace] == :all or + @context[:compress_whitespace].include? expanded_name) + ) + end + @whitespace = true unless @whitespace == false + @whitespace + end + + def ignore_whitespace_nodes + @ignore_whitespace_nodes = false + if @context + if @context[:ignore_whitespace_nodes] + @ignore_whitespace_nodes = + (@context[:ignore_whitespace_nodes] == :all or + @context[:ignore_whitespace_nodes].include? expanded_name) + end + end + end + + # Evaluates to +true+ if raw mode is set for this element. This + # is the case if the context has :+raw+ set to :+all+ or + # an array containing the name of this element. + # + # The evaluation is tested against +expanded_name+, and so is namespace + # sensitive. + def raw + @raw = (@context and @context[:raw] and + (@context[:raw] == :all or + @context[:raw].include? expanded_name)) + @raw + end + + #once :whitespace, :raw, :ignore_whitespace_nodes + + ################################################# + # Namespaces # + ################################################# + + # Evaluates to an +Array+ containing the prefixes (names) of all defined + # namespaces at this context node. + # doc = Document.new("<a xmlns:x='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>") + # doc.elements['//b'].prefixes # -> ['x', 'y'] + def prefixes + prefixes = [] + prefixes = parent.prefixes if parent + prefixes |= attributes.prefixes + return prefixes + end + + def namespaces + namespaces = {} + namespaces = parent.namespaces if parent + namespaces = namespaces.merge( attributes.namespaces ) + return namespaces + end + + # Evalutas to the URI for a prefix, or the empty string if no such + # namespace is declared for this element. Evaluates recursively for + # ancestors. Returns the default namespace, if there is one. + # prefix:: + # the prefix to search for. If not supplied, returns the default + # namespace if one exists + # Returns:: + # the namespace URI as a String, or nil if no such namespace + # exists. If the namespace is undefined, returns an empty string + # doc = Document.new("<a xmlns='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>") + # b = doc.elements['//b'] + # b.namespace # -> '1' + # b.namespace("y") # -> '2' + def namespace(prefix=nil) + if prefix.nil? + prefix = prefix() + end + if prefix == '' + prefix = "xmlns" + else + prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns' + end + ns = attributes[ prefix ] + ns = parent.namespace(prefix) if ns.nil? and parent + ns = '' if ns.nil? and prefix == 'xmlns' + return ns + end + + # Adds a namespace to this element. + # prefix:: + # the prefix string, or the namespace URI if +uri+ is not + # supplied + # uri:: + # the namespace URI. May be nil, in which +prefix+ is used as + # the URI + # Evaluates to: this Element + # a = Element.new("a") + # a.add_namespace("xmlns:foo", "bar" ) + # a.add_namespace("foo", "bar") # shorthand for previous line + # a.add_namespace("twiddle") + # puts a #-> <a xmlns:foo='bar' xmlns='twiddle'/> + def add_namespace( prefix, uri=nil ) + unless uri + @attributes["xmlns"] = prefix + else + prefix = "xmlns:#{prefix}" unless prefix =~ /^xmlns:/ + @attributes[ prefix ] = uri + end + self + end + + # Removes a namespace from this node. This only works if the namespace is + # actually declared in this node. If no argument is passed, deletes the + # default namespace. + # + # Evaluates to: this element + # doc = Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>" + # doc.root.delete_namespace + # puts doc # -> <a xmlns:foo='bar'/> + # doc.root.delete_namespace 'foo' + # puts doc # -> <a/> + def delete_namespace namespace="xmlns" + namespace = "xmlns:#{namespace}" unless namespace == 'xmlns' + attribute = attributes.get_attribute(namespace) + attribute.remove unless attribute.nil? + self + end + + ################################################# + # Elements # + ################################################# + + # Adds a child to this element, optionally setting attributes in + # the element. + # element:: + # optional. If Element, the element is added. + # Otherwise, a new Element is constructed with the argument (see + # Element.initialize). + # attrs:: + # If supplied, must be a Hash containing String name,value + # pairs, which will be used to set the attributes of the new Element. + # Returns:: the Element that was added + # el = doc.add_element 'my-tag' + # el = doc.add_element 'my-tag', {'attr1'=>'val1', 'attr2'=>'val2'} + # el = Element.new 'my-tag' + # doc.add_element el + def add_element element, attrs=nil raise "First argument must be either an element name, or an Element object" if element.nil? - el = @elements.add(element) - if attrs.kind_of? Hash - attrs.each do |key, value| - el.attributes[key]=value if key =~ /^xmlns:/ - end - attrs.each do |key, value| - el.attributes[key]=value if key !~ /^xmlns:/ - end - end - el - end - - # Deletes a child element. - # element:: - # Must be an +Element+, +String+, or +Integer+. If Element, - # the element is removed. If String, the element is found (via XPath) - # and removed. <em>This means that any parent can remove any - # descendant.<em> If Integer, the Element indexed by that number will be - # removed. - # Returns:: the element that was removed. - # doc.delete_element "/a/b/c[@id='4']" - # doc.delete_element doc.elements["//k"] - # doc.delete_element 1 - def delete_element element - @elements.delete element - end - - # Evaluates to +true+ if this element has at least one child Element - # doc = Document.new "<a><b/><c>Text</c></a>" - # doc.root.has_elements # -> true - # doc.elements["/a/b"].has_elements # -> false - # doc.elements["/a/c"].has_elements # -> false - def has_elements? - !@elements.empty? - end - - # Iterates through the child elements, yielding for each Element that - # has a particular attribute set. - # key:: - # the name of the attribute to search for - # value:: - # the value of the attribute - # max:: - # (optional) causes this method to return after yielding - # for this number of matching children - # name:: - # (optional) if supplied, this is an XPath that filters - # the children to check. - # - # doc = Document.new "<a><b @id='1'/><c @id='2'/><d @id='1'/><e/></a>" - # # Yields b, c, d - # doc.root.each_element_with_attribute( 'id' ) {|e| p e} - # # Yields b, d - # doc.root.each_element_with_attribute( 'id', '1' ) {|e| p e} - # # Yields b - # doc.root.each_element_with_attribute( 'id', '1', 1 ) {|e| p e} - # # Yields d - # doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e} - def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element - each_with_something( proc {|child| - if value.nil? - child.attributes[key] != nil - else - child.attributes[key]==value - end - }, max, name, &block ) - end - - # Iterates through the children, yielding for each Element that - # has a particular text set. - # text:: - # the text to search for. If nil, or not supplied, will itterate - # over all +Element+ children that contain at least one +Text+ node. - # max:: - # (optional) causes this method to return after yielding - # for this number of matching children - # name:: - # (optional) if supplied, this is an XPath that filters - # the children to check. - # - # doc = Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>' - # # Yields b, c, d - # doc.each_element_with_text {|e|p e} - # # Yields b, c - # doc.each_element_with_text('b'){|e|p e} - # # Yields b - # doc.each_element_with_text('b', 1){|e|p e} - # # Yields d - # doc.each_element_with_text(nil, 0, 'd'){|e|p e} - def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element - each_with_something( proc {|child| - if text.nil? - child.has_text? - else - child.text == text - end - }, max, name, &block ) - end - - # Synonym for Element.elements.each - def each_element( xpath=nil, &block ) # :yields: Element - @elements.each( xpath, &block ) - end - - # Synonym for Element.to_a - # This is a little slower than calling elements.each directly. - # xpath:: any XPath by which to search for elements in the tree - # Returns:: an array of Elements that match the supplied path - def get_elements( xpath ) - @elements.to_a( xpath ) - end - - # Returns the next sibling that is an element, or nil if there is - # no Element sibling after this one - # doc = Document.new '<a><b/>text<c/></a>' - # doc.root.elements['b'].next_element #-> <c/> - # doc.root.elements['c'].next_element #-> nil - def next_element - element = next_sibling - element = element.next_sibling until element.nil? or element.kind_of? Element - return element - end - - # Returns the previous sibling that is an element, or nil if there is - # no Element sibling prior to this one - # doc = Document.new '<a><b/>text<c/></a>' - # doc.root.elements['c'].previous_element #-> <b/> - # doc.root.elements['b'].previous_element #-> nil - def previous_element - element = previous_sibling - element = element.previous_sibling until element.nil? or element.kind_of? Element - return element - end - - - ################################################# - # Text # - ################################################# - - # Evaluates to +true+ if this element has at least one Text child - def has_text? - not text().nil? - end - - # A convenience method which returns the String value of the _first_ - # child text element, if one exists, and +nil+ otherwise. - # - # <em>Note that an element may have multiple Text elements, perhaps - # separated by other children</em>. Be aware that this method only returns - # the first Text node. - # - # This method returns the +value+ of the first text child node, which - # ignores the +raw+ setting, so always returns normalized text. See - # the Text::value documentation. - # - # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>" - # # The element 'p' has two text elements, "some text " and " more text". - # doc.root.text #-> "some text " - def text( path = nil ) - rv = get_text(path) - return rv.value unless rv.nil? - nil - end - - # Returns the first child Text node, if any, or +nil+ otherwise. - # This method returns the actual +Text+ node, rather than the String content. - # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>" - # # The element 'p' has two text elements, "some text " and " more text". - # doc.root.get_text.value #-> "some text " - def get_text path = nil - rv = nil - if path - element = @elements[ path ] - rv = element.get_text unless element.nil? - else - rv = @children.find { |node| node.kind_of? Text } - end - return rv - end - - # Sets the first Text child of this object. See text() for a - # discussion about Text children. - # - # If a Text child already exists, the child is replaced by this - # content. This means that Text content can be deleted by calling - # this method with a nil argument. In this case, the next Text - # child becomes the first Text child. In no case is the order of - # any siblings disturbed. - # text:: - # If a String, a new Text child is created and added to - # this Element as the first Text child. If Text, the text is set - # as the first Child element. If nil, then any existing first Text - # child is removed. - # Returns:: this Element. - # doc = Document.new '<a><b/></a>' - # doc.root.text = 'Sean' #-> '<a><b/>Sean</a>' - # doc.root.text = 'Elliott' #-> '<a><b/>Elliott</a>' - # doc.root.add_element 'c' #-> '<a><b/>Elliott<c/></a>' - # doc.root.text = 'Russell' #-> '<a><b/>Russell<c/></a>' - # doc.root.text = nil #-> '<a><b/><c/></a>' + el = @elements.add(element) + attrs.each do |key, value| + el.attributes[key]=Attribute.new(key,value,self) + end if attrs.kind_of? Hash + el + end + + # Deletes a child element. + # element:: + # Must be an +Element+, +String+, or +Integer+. If Element, + # the element is removed. If String, the element is found (via XPath) + # and removed. <em>This means that any parent can remove any + # descendant.<em> If Integer, the Element indexed by that number will be + # removed. + # Returns:: the element that was removed. + # doc.delete_element "/a/b/c[@id='4']" + # doc.delete_element doc.elements["//k"] + # doc.delete_element 1 + def delete_element element + @elements.delete element + end + + # Evaluates to +true+ if this element has at least one child Element + # doc = Document.new "<a><b/><c>Text</c></a>" + # doc.root.has_elements # -> true + # doc.elements["/a/b"].has_elements # -> false + # doc.elements["/a/c"].has_elements # -> false + def has_elements? + !@elements.empty? + end + + # Iterates through the child elements, yielding for each Element that + # has a particular attribute set. + # key:: + # the name of the attribute to search for + # value:: + # the value of the attribute + # max:: + # (optional) causes this method to return after yielding + # for this number of matching children + # name:: + # (optional) if supplied, this is an XPath that filters + # the children to check. + # + # doc = Document.new "<a><b @id='1'/><c @id='2'/><d @id='1'/><e/></a>" + # # Yields b, c, d + # doc.root.each_element_with_attribute( 'id' ) {|e| p e} + # # Yields b, d + # doc.root.each_element_with_attribute( 'id', '1' ) {|e| p e} + # # Yields b + # doc.root.each_element_with_attribute( 'id', '1', 1 ) {|e| p e} + # # Yields d + # doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e} + def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element + each_with_something( proc {|child| + if value.nil? + child.attributes[key] != nil + else + child.attributes[key]==value + end + }, max, name, &block ) + end + + # Iterates through the children, yielding for each Element that + # has a particular text set. + # text:: + # the text to search for. If nil, or not supplied, will itterate + # over all +Element+ children that contain at least one +Text+ node. + # max:: + # (optional) causes this method to return after yielding + # for this number of matching children + # name:: + # (optional) if supplied, this is an XPath that filters + # the children to check. + # + # doc = Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>' + # # Yields b, c, d + # doc.each_element_with_text {|e|p e} + # # Yields b, c + # doc.each_element_with_text('b'){|e|p e} + # # Yields b + # doc.each_element_with_text('b', 1){|e|p e} + # # Yields d + # doc.each_element_with_text(nil, 0, 'd'){|e|p e} + def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element + each_with_something( proc {|child| + if text.nil? + child.has_text? + else + child.text == text + end + }, max, name, &block ) + end + + # Synonym for Element.elements.each + def each_element( xpath=nil, &block ) # :yields: Element + @elements.each( xpath, &block ) + end + + # Synonym for Element.to_a + # This is a little slower than calling elements.each directly. + # xpath:: any XPath by which to search for elements in the tree + # Returns:: an array of Elements that match the supplied path + def get_elements( xpath ) + @elements.to_a( xpath ) + end + + # Returns the next sibling that is an element, or nil if there is + # no Element sibling after this one + # doc = Document.new '<a><b/>text<c/></a>' + # doc.root.elements['b'].next_element #-> <c/> + # doc.root.elements['c'].next_element #-> nil + def next_element + element = next_sibling + element = element.next_sibling until element.nil? or element.kind_of? Element + return element + end + + # Returns the previous sibling that is an element, or nil if there is + # no Element sibling prior to this one + # doc = Document.new '<a><b/>text<c/></a>' + # doc.root.elements['c'].previous_element #-> <b/> + # doc.root.elements['b'].previous_element #-> nil + def previous_element + element = previous_sibling + element = element.previous_sibling until element.nil? or element.kind_of? Element + return element + end + + + ################################################# + # Text # + ################################################# + + # Evaluates to +true+ if this element has at least one Text child + def has_text? + not text().nil? + end + + # A convenience method which returns the String value of the _first_ + # child text element, if one exists, and +nil+ otherwise. + # + # <em>Note that an element may have multiple Text elements, perhaps + # separated by other children</em>. Be aware that this method only returns + # the first Text node. + # + # This method returns the +value+ of the first text child node, which + # ignores the +raw+ setting, so always returns normalized text. See + # the Text::value documentation. + # + # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>" + # # The element 'p' has two text elements, "some text " and " more text". + # doc.root.text #-> "some text " + def text( path = nil ) + rv = get_text(path) + return rv.value unless rv.nil? + nil + end + + # Returns the first child Text node, if any, or +nil+ otherwise. + # This method returns the actual +Text+ node, rather than the String content. + # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>" + # # The element 'p' has two text elements, "some text " and " more text". + # doc.root.get_text.value #-> "some text " + def get_text path = nil + rv = nil + if path + element = @elements[ path ] + rv = element.get_text unless element.nil? + else + rv = @children.find { |node| node.kind_of? Text } + end + return rv + end + + # Sets the first Text child of this object. See text() for a + # discussion about Text children. + # + # If a Text child already exists, the child is replaced by this + # content. This means that Text content can be deleted by calling + # this method with a nil argument. In this case, the next Text + # child becomes the first Text child. In no case is the order of + # any siblings disturbed. + # text:: + # If a String, a new Text child is created and added to + # this Element as the first Text child. If Text, the text is set + # as the first Child element. If nil, then any existing first Text + # child is removed. + # Returns:: this Element. + # doc = Document.new '<a><b/></a>' + # doc.root.text = 'Sean' #-> '<a><b/>Sean</a>' + # doc.root.text = 'Elliott' #-> '<a><b/>Elliott</a>' + # doc.root.add_element 'c' #-> '<a><b/>Elliott<c/></a>' + # doc.root.text = 'Russell' #-> '<a><b/>Russell<c/></a>' + # doc.root.text = nil #-> '<a><b/><c/></a>' def text=( text ) if text.kind_of? String text = Text.new( text, whitespace(), nil, raw() ) elsif text and !text.kind_of? Text text = Text.new( text.to_s, whitespace(), nil, raw() ) end - old_text = get_text - if text.nil? - old_text.remove unless old_text.nil? - else - if old_text.nil? - self << text - else - old_text.replace_with( text ) - end - end - return self - end - - # A helper method to add a Text child. Actual Text instances can - # be added with regular Parent methods, such as add() and <<() - # text:: - # if a String, a new Text instance is created and added - # to the parent. If Text, the object is added directly. - # Returns:: this Element - # e = Element.new('a') #-> <e/> - # e.add_text 'foo' #-> <e>foo</e> - # e.add_text Text.new(' bar') #-> <e>foo bar</e> - # Note that at the end of this example, the branch has <b>3</b> nodes; the 'e' - # element and <b>2</b> Text node children. - def add_text( text ) - if text.kind_of? String - if @children[-1].kind_of? Text - @children[-1] << text - return - end - text = Text.new( text, whitespace(), nil, raw() ) - end - self << text unless text.nil? - return self - end + old_text = get_text + if text.nil? + old_text.remove unless old_text.nil? + else + if old_text.nil? + self << text + else + old_text.replace_with( text ) + end + end + return self + end + + # A helper method to add a Text child. Actual Text instances can + # be added with regular Parent methods, such as add() and <<() + # text:: + # if a String, a new Text instance is created and added + # to the parent. If Text, the object is added directly. + # Returns:: this Element + # e = Element.new('a') #-> <e/> + # e.add_text 'foo' #-> <e>foo</e> + # e.add_text Text.new(' bar') #-> <e>foo bar</e> + # Note that at the end of this example, the branch has <b>3</b> nodes; the 'e' + # element and <b>2</b> Text node children. + def add_text( text ) + if text.kind_of? String + if @children[-1].kind_of? Text + @children[-1] << text + return + end + text = Text.new( text, whitespace(), nil, raw() ) + end + self << text unless text.nil? + return self + end def node_type :element @@ -551,162 +546,146 @@ module REXML return path_elements.reverse.join( "/" ) end - ################################################# - # Attributes # - ################################################# + ################################################# + # Attributes # + ################################################# - def attribute( name, namespace=nil ) - prefix = nil + def attribute( name, namespace=nil ) + prefix = nil prefix = namespaces.index(namespace) if namespace - attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" ) - end - - # Evaluates to +true+ if this element has any attributes set, false - # otherwise. - def has_attributes? - return !@attributes.empty? - end - - # Adds an attribute to this element, overwriting any existing attribute - # by the same name. - # key:: - # can be either an Attribute or a String. If an Attribute, - # the attribute is added to the list of Element attributes. If String, - # the argument is used as the name of the new attribute, and the value - # parameter must be supplied. - # value:: - # Required if +key+ is a String, and ignored if the first argument is - # an Attribute. This is a String, and is used as the value - # of the new Attribute. - # Returns:: the Attribute added - # e = Element.new 'e' - # e.add_attribute( 'a', 'b' ) #-> <e a='b'/> - # e.add_attribute( 'x:a', 'c' ) #-> <e a='b' x:a='c'/> - # e.add_attribute Attribute.new('b', 'd') #-> <e a='b' x:a='c' b='d'/> - def add_attribute( key, value=nil ) - if key.kind_of? Attribute - @attributes << key - else - @attributes[key] = value - end - end - - # Add multiple attributes to this element. - # hash:: is either a hash, or array of arrays - # el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} ) - # el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] ) - def add_attributes hash - if hash.kind_of? Hash - hash.each_pair {|key, value| @attributes[key] = value } - elsif hash.kind_of? Array - hash.each { |value| @attributes[ value[0] ] = value[1] } - end - end - - # Removes an attribute - # key:: - # either an Attribute or a String. In either case, the - # attribute is found by matching the attribute name to the argument, - # and then removed. If no attribute is found, no action is taken. - # Returns:: - # the attribute removed, or nil if this Element did not contain - # a matching attribute - # e = Element.new('E') - # e.add_attribute( 'name', 'Sean' ) #-> <E name='Sean'/> - # r = e.add_attribute( 'sur:name', 'Russell' ) #-> <E name='Sean' sur:name='Russell'/> - # e.delete_attribute( 'name' ) #-> <E sur:name='Russell'/> - # e.delete_attribute( r ) #-> <E/> - def delete_attribute(key) - attr = @attributes.get_attribute(key) - attr.remove unless attr.nil? - end - - ################################################# - # Other Utilities # - ################################################# - - # Get an array of all CData children. - # IMMUTABLE - def cdatas - find_all { |child| child.kind_of? CData }.freeze - end - - # Get an array of all Comment children. - # IMMUTABLE - def comments - find_all { |child| child.kind_of? Comment }.freeze - end - - # Get an array of all Instruction children. - # IMMUTABLE - def instructions - find_all { |child| child.kind_of? Instruction }.freeze - end - - # Get an array of all Text children. - # IMMUTABLE - def texts - find_all { |child| child.kind_of? Text }.freeze - end - - # Writes out this element, and recursively, all children. - # output:: - # output an object which supports '<< string'; this is where the - # document will be written. - # indent:: - # An integer. If -1, no indenting will be used; otherwise, the - # indentation will be this number of spaces, and children will be - # indented an additional amount. Defaults to -1 - # transitive:: - # If transitive is true and indent is >= 0, then the output will be - # pretty-printed in such a way that the added whitespace does not affect - # the parse tree of the document - # ie_hack:: - # Internet Explorer is the worst piece of crap to have ever been - # written, with the possible exception of Windows itself. Since IE is - # unable to parse proper XML, we have to provide a hack to generate XML - # that IE's limited abilities can handle. This hack inserts a space - # before the /> on empty tags. Defaults to false - # - # out = '' - # doc.write( out ) #-> doc is written to the string 'out' - # doc.write( $stdout ) #-> doc written to the console - def write(writer=$stdout, indent=-1, transitive=false, ie_hack=false) - #print "ID:#{indent}" - writer << "<#@expanded_name" - - @attributes.each_attribute do |attr| - writer << " " - attr.write( writer, indent ) - end unless @attributes.empty? - - if @children.empty? - if transitive and indent>-1 - writer << "\n" - indent( writer, indent ) - elsif ie_hack - writer << " " + attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" ) + end + + # Evaluates to +true+ if this element has any attributes set, false + # otherwise. + def has_attributes? + return !@attributes.empty? + end + + # Adds an attribute to this element, overwriting any existing attribute + # by the same name. + # key:: + # can be either an Attribute or a String. If an Attribute, + # the attribute is added to the list of Element attributes. If String, + # the argument is used as the name of the new attribute, and the value + # parameter must be supplied. + # value:: + # Required if +key+ is a String, and ignored if the first argument is + # an Attribute. This is a String, and is used as the value + # of the new Attribute. This should be the unnormalized value of the + # attribute (without entities). + # Returns:: the Attribute added + # e = Element.new 'e' + # e.add_attribute( 'a', 'b' ) #-> <e a='b'/> + # e.add_attribute( 'x:a', 'c' ) #-> <e a='b' x:a='c'/> + # e.add_attribute Attribute.new('b', 'd') #-> <e a='b' x:a='c' b='d'/> + def add_attribute( key, value=nil ) + if key.kind_of? Attribute + @attributes << key + else + @attributes[key] = value + end + end + + # Add multiple attributes to this element. + # hash:: is either a hash, or array of arrays + # el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} ) + # el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] ) + def add_attributes hash + if hash.kind_of? Hash + hash.each_pair {|key, value| @attributes[key] = value } + elsif hash.kind_of? Array + hash.each { |value| @attributes[ value[0] ] = value[1] } + end + end + + # Removes an attribute + # key:: + # either an Attribute or a String. In either case, the + # attribute is found by matching the attribute name to the argument, + # and then removed. If no attribute is found, no action is taken. + # Returns:: + # the attribute removed, or nil if this Element did not contain + # a matching attribute + # e = Element.new('E') + # e.add_attribute( 'name', 'Sean' ) #-> <E name='Sean'/> + # r = e.add_attribute( 'sur:name', 'Russell' ) #-> <E name='Sean' sur:name='Russell'/> + # e.delete_attribute( 'name' ) #-> <E sur:name='Russell'/> + # e.delete_attribute( r ) #-> <E/> + def delete_attribute(key) + attr = @attributes.get_attribute(key) + attr.remove unless attr.nil? + end + + ################################################# + # Other Utilities # + ################################################# + + # Get an array of all CData children. + # IMMUTABLE + def cdatas + find_all { |child| child.kind_of? CData }.freeze + end + + # Get an array of all Comment children. + # IMMUTABLE + def comments + find_all { |child| child.kind_of? Comment }.freeze + end + + # Get an array of all Instruction children. + # IMMUTABLE + def instructions + find_all { |child| child.kind_of? Instruction }.freeze + end + + # Get an array of all Text children. + # IMMUTABLE + def texts + find_all { |child| child.kind_of? Text }.freeze + end + + # == DEPRECATED + # See REXML::Formatters + # + # Writes out this element, and recursively, all children. + # output:: + # output an object which supports '<< string'; this is where the + # document will be written. + # indent:: + # An integer. If -1, no indenting will be used; otherwise, the + # indentation will be this number of spaces, and children will be + # indented an additional amount. Defaults to -1 + # transitive:: + # If transitive is true and indent is >= 0, then the output will be + # pretty-printed in such a way that the added whitespace does not affect + # the parse tree of the document + # ie_hack:: + # Internet Explorer is the worst piece of crap to have ever been + # written, with the possible exception of Windows itself. Since IE is + # unable to parse proper XML, we have to provide a hack to generate XML + # that IE's limited abilities can handle. This hack inserts a space + # before the /> on empty tags. Defaults to false + # + # out = '' + # doc.write( out ) #-> doc is written to the string 'out' + # doc.write( $stdout ) #-> doc written to the console + def write(writer=$stdout, indent=-1, transitive=false, ie_hack=false) + Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters") + formatter = if indent > -1 + if transitive + REXML::Formatters::Transitive.new( indent, ie_hack ) + else + REXML::Formatters::Pretty.new( indent, ie_hack ) + end + else + REXML::Formatters::Default.new( ie_hack ) end - writer << "/" - else - if transitive and indent>-1 and !@children[0].kind_of? Text - writer << "\n" - indent writer, indent+1 - end - writer << ">" - write_children( writer, indent, transitive, ie_hack ) - writer << "</#{expanded_name}" - end - if transitive and indent>-1 and !@children.empty? - writer << "\n" - indent -= 1 if next_sibling.nil? - indent(writer, indent) - end - writer << ">" - end - - - private + formatter.write( self, output ) + end + + + private def __to_xpath_helper node rv = node.expanded_name.clone if node.parent @@ -721,528 +700,514 @@ module REXML rv end - # A private helper method - def each_with_something( test, max=0, name=nil ) - num = 0 - child=nil - @elements.each( name ){ |child| - yield child if test.call(child) and num += 1 - return if max>0 and num == max - } - end - - # A private helper method - def write_children( writer, indent, transitive, ie_hack ) - cr = (indent < 0) ? '' : "\n" - if indent == -1 - each { |child| child.write( writer, indent, transitive, ie_hack ) } - else - next_indent = indent+1 - last_child=nil - each { |child| - unless child.kind_of? Text or last_child.kind_of? Text or transitive - writer << cr - indent(writer, next_indent) - end - child.write( writer, next_indent, transitive, ie_hack ) - last_child = child - } - unless last_child.kind_of? Text or transitive - writer << cr - indent( writer, indent ) - end - end - end - end - - ######################################################################## - # ELEMENTS # - ######################################################################## - - # A class which provides filtering of children for Elements, and - # XPath search support. You are expected to only encounter this class as - # the <tt>element.elements</tt> object. Therefore, you are - # _not_ expected to instantiate this yourself. - class Elements - include Enumerable - # Constructor - # parent:: the parent Element - def initialize parent - @element = parent - end - - # Fetches a child element. Filters only Element children, regardless of - # the XPath match. - # index:: - # the search parameter. This is either an Integer, which - # will be used to find the index'th child Element, or an XPath, - # which will be used to search for the Element. <em>Because - # of the nature of XPath searches, any element in the connected XML - # document can be fetched through any other element.</em> <b>The - # Integer index is 1-based, not 0-based.</b> This means that the first - # child element is at index 1, not 0, and the +n+th element is at index - # +n+, not <tt>n-1</tt>. This is because XPath indexes element children - # starting from 1, not 0, and the indexes should be the same. - # name:: - # optional, and only used in the first argument is an - # Integer. In that case, the index'th child Element that has the - # supplied name will be returned. Note again that the indexes start at 1. - # Returns:: the first matching Element, or nil if no child matched - # doc = Document.new '<a><b/><c id="1"/><c id="2"/><d/></a>' - # doc.root.elements[1] #-> <b/> - # doc.root.elements['c'] #-> <c id="1"/> - # doc.root.elements[2,'c'] #-> <c id="2"/> - def []( index, name=nil) - if index.kind_of? Integer - raise "index (#{index}) must be >= 1" if index < 1 - name = literalize(name) if name - num = 0 - child = nil - @element.find { |child| - child.kind_of? Element and - (name.nil? ? true : child.has_name?( name )) and - (num += 1) == index - } - else - return XPath::first( @element, index ) - #{ |element| - # return element if element.kind_of? Element - #} - #return nil - end - end - - # Sets an element, replacing any previous matching element. If no - # existing element is found ,the element is added. - # index:: Used to find a matching element to replace. See [](). - # element:: - # The element to replace the existing element with - # the previous element - # Returns:: nil if no previous element was found. - # - # doc = Document.new '<a/>' - # doc.root.elements[10] = Element.new('b') #-> <a><b/></a> - # doc.root.elements[1] #-> <b/> - # doc.root.elements[1] = Element.new('c') #-> <a><c/></a> - # doc.root.elements['c'] = Element.new('d') #-> <a><d/></a> - def []=( index, element ) - previous = self[index] - if previous.nil? - @element.add element - else - previous.replace_with element - end - return previous - end - - # Returns +true+ if there are no +Element+ children, +false+ otherwise - def empty? - @element.find{ |child| child.kind_of? Element}.nil? - end - - # Returns the index of the supplied child (starting at 1), or -1 if - # the element is not a child - # element:: an +Element+ child - def index element - rv = 0 - found = @element.find do |child| - child.kind_of? Element and - (rv += 1) and - child == element - end - return rv if found == element - return -1 - end - - # Deletes a child Element - # element:: - # Either an Element, which is removed directly; an - # xpath, where the first matching child is removed; or an Integer, - # where the n'th Element is removed. - # Returns:: the removed child - # doc = Document.new '<a><b/><c/><c id="1"/></a>' - # b = doc.root.elements[1] - # doc.root.elements.delete b #-> <a><c/><c id="1"/></a> - # doc.elements.delete("a/c[@id='1']") #-> <a><c/></a> - # doc.root.elements.delete 1 #-> <a/> - def delete element - if element.kind_of? Element - @element.delete element - else - el = self[element] - el.remove if el - end - end - - # Removes multiple elements. Filters for Element children, regardless of - # XPath matching. - # xpath:: all elements matching this String path are removed. - # Returns:: an Array of Elements that have been removed - # doc = Document.new '<a><c/><c/><c/><c/></a>' - # deleted = doc.elements.delete_all 'a/c' #-> [<c/>, <c/>, <c/>, <c/>] - def delete_all( xpath ) - rv = [] - XPath::each( @element, xpath) {|element| - rv << element if element.kind_of? Element - } - rv.each do |element| - @element.delete element - element.remove - end - return rv - end - - # Adds an element - # element:: - # if supplied, is either an Element, String, or - # Source (see Element.initialize). If not supplied or nil, a - # new, default Element will be constructed - # Returns:: the added Element - # a = Element.new 'a' - # a.elements.add Element.new 'b' #-> <a><b/></a> - # a.elements.add 'c' #-> <a><b/><c/></a> - def add element=nil - rv = nil - if element.nil? - Element.new "", self, @element.context - elsif not element.kind_of?(Element) - Element.new element, self, @element.context - else - @element << element - element.context = @element.context - element - end - end - - alias :<< :add - - # Iterates through all of the child Elements, optionally filtering - # them by a given XPath - # xpath:: - # optional. If supplied, this is a String XPath, and is used to - # filter the children, so that only matching children are yielded. Note - # that XPaths are automatically filtered for Elements, so that - # non-Element children will not be yielded - # doc = Document.new '<a><b/><c/><d/>sean<b/><c/><d/></a>' - # doc.root.each {|e|p e} #-> Yields b, c, d, b, c, d elements - # doc.root.each('b') {|e|p e} #-> Yields b, b elements - # doc.root.each('child::node()') {|e|p e} - # #-> Yields <b/>, <c/>, <d/>, <b/>, <c/>, <d/> - # XPath.each(doc.root, 'child::node()', &block) - # #-> Yields <b/>, <c/>, <d/>, sean, <b/>, <c/>, <d/> - def each( xpath=nil, &block) - XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element } - end - - def collect( xpath=nil, &block ) - collection = [] - XPath::each( @element, xpath ) {|e| - collection << yield(e) if e.kind_of?(Element) - } - collection - end - - def inject( xpath=nil, initial=nil, &block ) - first = true - XPath::each( @element, xpath ) {|e| - if (e.kind_of? Element) - if (first and initial == nil) - initial = e - first = false - else - initial = yield( initial, e ) if e.kind_of? Element - end - end - } - initial - end - - # Returns the number of +Element+ children of the parent object. - # doc = Document.new '<a>sean<b/>elliott<b/>russell<b/></a>' - # doc.root.size #-> 6, 3 element and 3 text nodes - # doc.root.elements.size #-> 3 - def size - count = 0 - @element.each {|child| count+=1 if child.kind_of? Element } - count - end - - # Returns an Array of Element children. An XPath may be supplied to - # filter the children. Only Element children are returned, even if the - # supplied XPath matches non-Element children. - # doc = Document.new '<a>sean<b/>elliott<c/></a>' - # doc.root.elements.to_a #-> [ <b/>, <c/> ] - # doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ] - # XPath.match(doc.root, "child::node()") #-> [ sean, <b/>, elliott, <c/> ] - def to_a( xpath=nil ) - rv = XPath.match( @element, xpath ) - return rv.find_all{|e| e.kind_of? Element} if xpath - rv - end - - private - # Private helper class. Removes quotes from quoted strings - def literalize name - name = name[1..-2] if name[0] == ?' or name[0] == ?" #' - name - end - end - - ######################################################################## - # ATTRIBUTES # - ######################################################################## - - # A class that defines the set of Attributes of an Element and provides - # operations for accessing elements in that set. - class Attributes < Hash - # Constructor - # element:: the Element of which this is an Attribute - def initialize element - @element = element - end - - # Fetches an attribute value. If you want to get the Attribute itself, - # use get_attribute() - # name:: an XPath attribute name. Namespaces are relevant here. - # Returns:: - # the String value of the matching attribute, or +nil+ if no - # matching attribute was found. - # - # doc = Document.new "<a foo:att='1' bar:att='2' att='3'/>" - # doc.root.attributes['att'] #-> '3' - # doc.root.attributes['bar:att'] #-> '2' - def [](name) - attr = get_attribute(name) - return attr.value unless attr.nil? - return nil - end - - def to_a - values.flatten - end - - # Returns the number of attributes the owning Element contains. - # doc = Document "<a x='1' y='2' foo:x='3'/>" - # doc.root.attributes.length #-> 3 - def length - c = 0 - each_attribute { c+=1 } - c - end - alias :size :length - - # Itterates over the attributes of an Element. Yields actual Attribute - # nodes, not String values. - # - # doc = Document.new '<a x="1" y="2"/>' - # doc.root.attributes.each_attribute {|attr| - # p attr.expanded_name+" => "+attr.value - # } - def each_attribute # :yields: attribute - each_value do |val| - if val.kind_of? Attribute - yield val - else - val.each_value { |atr| yield atr } - end - end - end - - # Itterates over each attribute of an Element, yielding the expanded name - # and value as a pair of Strings. - # - # doc = Document.new '<a x="1" y="2"/>' - # doc.root.attributes.each {|name, value| p name+" => "+value } - def each - each_attribute do |attr| - yield attr.expanded_name, attr.value - end - end - - # Fetches an attribute - # name:: - # the name by which to search for the attribute. Can be a - # <tt>prefix:name</tt> namespace name. - # Returns:: The first matching attribute, or nil if there was none. This - # value is an Attribute node, not the String value of the attribute. - # doc = Document.new '<a x:foo="1" foo="2" bar="3"/>' - # doc.root.attributes.get_attribute("foo").value #-> "2" - # doc.root.attributes.get_attribute("x:foo").value #-> "1" - def get_attribute( name ) - attr = fetch( name, nil ) - if attr.nil? - return nil if name.nil? - # Look for prefix - name =~ Namespace::NAMESPLIT - prefix, n = $1, $2 - if prefix - attr = fetch( n, nil ) - # check prefix - if attr == nil - elsif attr.kind_of? Attribute - return attr if prefix == attr.prefix - else - attr = attr[ prefix ] - return attr - end - end + # A private helper method + def each_with_something( test, max=0, name=nil ) + num = 0 + child=nil + @elements.each( name ){ |child| + yield child if test.call(child) and num += 1 + return if max>0 and num == max + } + end + end + + ######################################################################## + # ELEMENTS # + ######################################################################## + + # A class which provides filtering of children for Elements, and + # XPath search support. You are expected to only encounter this class as + # the <tt>element.elements</tt> object. Therefore, you are + # _not_ expected to instantiate this yourself. + class Elements + include Enumerable + # Constructor + # parent:: the parent Element + def initialize parent + @element = parent + end + + # Fetches a child element. Filters only Element children, regardless of + # the XPath match. + # index:: + # the search parameter. This is either an Integer, which + # will be used to find the index'th child Element, or an XPath, + # which will be used to search for the Element. <em>Because + # of the nature of XPath searches, any element in the connected XML + # document can be fetched through any other element.</em> <b>The + # Integer index is 1-based, not 0-based.</b> This means that the first + # child element is at index 1, not 0, and the +n+th element is at index + # +n+, not <tt>n-1</tt>. This is because XPath indexes element children + # starting from 1, not 0, and the indexes should be the same. + # name:: + # optional, and only used in the first argument is an + # Integer. In that case, the index'th child Element that has the + # supplied name will be returned. Note again that the indexes start at 1. + # Returns:: the first matching Element, or nil if no child matched + # doc = Document.new '<a><b/><c id="1"/><c id="2"/><d/></a>' + # doc.root.elements[1] #-> <b/> + # doc.root.elements['c'] #-> <c id="1"/> + # doc.root.elements[2,'c'] #-> <c id="2"/> + def []( index, name=nil) + if index.kind_of? Integer + raise "index (#{index}) must be >= 1" if index < 1 + name = literalize(name) if name + num = 0 + child = nil + @element.find { |child| + child.kind_of? Element and + (name.nil? ? true : child.has_name?( name )) and + (num += 1) == index + } + else + return XPath::first( @element, index ) + #{ |element| + # return element if element.kind_of? Element + #} + #return nil + end + end + + # Sets an element, replacing any previous matching element. If no + # existing element is found ,the element is added. + # index:: Used to find a matching element to replace. See [](). + # element:: + # The element to replace the existing element with + # the previous element + # Returns:: nil if no previous element was found. + # + # doc = Document.new '<a/>' + # doc.root.elements[10] = Element.new('b') #-> <a><b/></a> + # doc.root.elements[1] #-> <b/> + # doc.root.elements[1] = Element.new('c') #-> <a><c/></a> + # doc.root.elements['c'] = Element.new('d') #-> <a><d/></a> + def []=( index, element ) + previous = self[index] + if previous.nil? + @element.add element + else + previous.replace_with element + end + return previous + end + + # Returns +true+ if there are no +Element+ children, +false+ otherwise + def empty? + @element.find{ |child| child.kind_of? Element}.nil? + end + + # Returns the index of the supplied child (starting at 1), or -1 if + # the element is not a child + # element:: an +Element+ child + def index element + rv = 0 + found = @element.find do |child| + child.kind_of? Element and + (rv += 1) and + child == element + end + return rv if found == element + return -1 + end + + # Deletes a child Element + # element:: + # Either an Element, which is removed directly; an + # xpath, where the first matching child is removed; or an Integer, + # where the n'th Element is removed. + # Returns:: the removed child + # doc = Document.new '<a><b/><c/><c id="1"/></a>' + # b = doc.root.elements[1] + # doc.root.elements.delete b #-> <a><c/><c id="1"/></a> + # doc.elements.delete("a/c[@id='1']") #-> <a><c/></a> + # doc.root.elements.delete 1 #-> <a/> + def delete element + if element.kind_of? Element + @element.delete element + else + el = self[element] + el.remove if el + end + end + + # Removes multiple elements. Filters for Element children, regardless of + # XPath matching. + # xpath:: all elements matching this String path are removed. + # Returns:: an Array of Elements that have been removed + # doc = Document.new '<a><c/><c/><c/><c/></a>' + # deleted = doc.elements.delete_all 'a/c' #-> [<c/>, <c/>, <c/>, <c/>] + def delete_all( xpath ) + rv = [] + XPath::each( @element, xpath) {|element| + rv << element if element.kind_of? Element + } + rv.each do |element| + @element.delete element + element.remove + end + return rv + end + + # Adds an element + # element:: + # if supplied, is either an Element, String, or + # Source (see Element.initialize). If not supplied or nil, a + # new, default Element will be constructed + # Returns:: the added Element + # a = Element.new 'a' + # a.elements.add Element.new 'b' #-> <a><b/></a> + # a.elements.add 'c' #-> <a><b/><c/></a> + def add element=nil + rv = nil + if element.nil? + Element.new "", self, @element.context + elsif not element.kind_of?(Element) + Element.new element, self, @element.context + else + @element << element + element.context = @element.context + element + end + end + + alias :<< :add + + # Iterates through all of the child Elements, optionally filtering + # them by a given XPath + # xpath:: + # optional. If supplied, this is a String XPath, and is used to + # filter the children, so that only matching children are yielded. Note + # that XPaths are automatically filtered for Elements, so that + # non-Element children will not be yielded + # doc = Document.new '<a><b/><c/><d/>sean<b/><c/><d/></a>' + # doc.root.each {|e|p e} #-> Yields b, c, d, b, c, d elements + # doc.root.each('b') {|e|p e} #-> Yields b, b elements + # doc.root.each('child::node()') {|e|p e} + # #-> Yields <b/>, <c/>, <d/>, <b/>, <c/>, <d/> + # XPath.each(doc.root, 'child::node()', &block) + # #-> Yields <b/>, <c/>, <d/>, sean, <b/>, <c/>, <d/> + def each( xpath=nil, &block) + XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element } + end + + def collect( xpath=nil, &block ) + collection = [] + XPath::each( @element, xpath ) {|e| + collection << yield(e) if e.kind_of?(Element) + } + collection + end + + def inject( xpath=nil, initial=nil, &block ) + first = true + XPath::each( @element, xpath ) {|e| + if (e.kind_of? Element) + if (first and initial == nil) + initial = e + first = false + else + initial = yield( initial, e ) if e.kind_of? Element + end + end + } + initial + end + + # Returns the number of +Element+ children of the parent object. + # doc = Document.new '<a>sean<b/>elliott<b/>russell<b/></a>' + # doc.root.size #-> 6, 3 element and 3 text nodes + # doc.root.elements.size #-> 3 + def size + count = 0 + @element.each {|child| count+=1 if child.kind_of? Element } + count + end + + # Returns an Array of Element children. An XPath may be supplied to + # filter the children. Only Element children are returned, even if the + # supplied XPath matches non-Element children. + # doc = Document.new '<a>sean<b/>elliott<c/></a>' + # doc.root.elements.to_a #-> [ <b/>, <c/> ] + # doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ] + # XPath.match(doc.root, "child::node()") #-> [ sean, <b/>, elliott, <c/> ] + def to_a( xpath=nil ) + rv = XPath.match( @element, xpath ) + return rv.find_all{|e| e.kind_of? Element} if xpath + rv + end + + private + # Private helper class. Removes quotes from quoted strings + def literalize name + name = name[1..-2] if name[0] == ?' or name[0] == ?" #' + name + end + end + + ######################################################################## + # ATTRIBUTES # + ######################################################################## + + # A class that defines the set of Attributes of an Element and provides + # operations for accessing elements in that set. + class Attributes < Hash + # Constructor + # element:: the Element of which this is an Attribute + def initialize element + @element = element + end + + # Fetches an attribute value. If you want to get the Attribute itself, + # use get_attribute() + # name:: an XPath attribute name. Namespaces are relevant here. + # Returns:: + # the String value of the matching attribute, or +nil+ if no + # matching attribute was found. This is the unnormalized value + # (with entities expanded). + # + # doc = Document.new "<a foo:att='1' bar:att='2' att='<'/>" + # doc.root.attributes['att'] #-> '<' + # doc.root.attributes['bar:att'] #-> '2' + def [](name) + attr = get_attribute(name) + return attr.value unless attr.nil? + return nil + end + + def to_a + values.flatten + end + + # Returns the number of attributes the owning Element contains. + # doc = Document "<a x='1' y='2' foo:x='3'/>" + # doc.root.attributes.length #-> 3 + def length + c = 0 + each_attribute { c+=1 } + c + end + alias :size :length + + # Itterates over the attributes of an Element. Yields actual Attribute + # nodes, not String values. + # + # doc = Document.new '<a x="1" y="2"/>' + # doc.root.attributes.each_attribute {|attr| + # p attr.expanded_name+" => "+attr.value + # } + def each_attribute # :yields: attribute + each_value do |val| + if val.kind_of? Attribute + yield val + else + val.each_value { |atr| yield atr } + end + end + end + + # Itterates over each attribute of an Element, yielding the expanded name + # and value as a pair of Strings. + # + # doc = Document.new '<a x="1" y="2"/>' + # doc.root.attributes.each {|name, value| p name+" => "+value } + def each + each_attribute do |attr| + yield attr.expanded_name, attr.value + end + end + + # Fetches an attribute + # name:: + # the name by which to search for the attribute. Can be a + # <tt>prefix:name</tt> namespace name. + # Returns:: The first matching attribute, or nil if there was none. This + # value is an Attribute node, not the String value of the attribute. + # doc = Document.new '<a x:foo="1" foo="2" bar="3"/>' + # doc.root.attributes.get_attribute("foo").value #-> "2" + # doc.root.attributes.get_attribute("x:foo").value #-> "1" + def get_attribute( name ) + attr = fetch( name, nil ) + if attr.nil? + return nil if name.nil? + # Look for prefix + name =~ Namespace::NAMESPLIT + prefix, n = $1, $2 + if prefix + attr = fetch( n, nil ) + # check prefix + if attr == nil + elsif attr.kind_of? Attribute + return attr if prefix == attr.prefix + else + attr = attr[ prefix ] + return attr + end + end element_document = @element.document - if element_document and element_document.doctype - expn = @element.expanded_name - expn = element_document.doctype.name if expn.size == 0 - attr_val = element_document.doctype.attribute_of(expn, name) - return Attribute.new( name, attr_val ) if attr_val - end - return nil - end - if attr.kind_of? Hash - attr = attr[ @element.prefix ] - end - return attr - end - - # Sets an attribute, overwriting any existing attribute value by the - # same name. Namespace is significant. - # name:: the name of the attribute - # value:: - # (optional) If supplied, the value of the attribute. If - # nil, any existing matching attribute is deleted. - # Returns:: - # Owning element - # doc = Document.new "<a x:foo='1' foo='3'/>" - # doc.root.attributes['y:foo'] = '2' - # doc.root.attributes['foo'] = '4' - # doc.root.attributes['x:foo'] = nil - def []=( name, value ) - if value.nil? # Delete the named attribute - attr = get_attribute(name) - delete attr - return - end - value = Attribute.new(name, value) unless value.kind_of? Attribute - value.element = @element - old_attr = fetch(value.name, nil) - if old_attr.nil? - store(value.name, value) - elsif old_attr.kind_of? Hash - old_attr[value.prefix] = value - elsif old_attr.prefix != value.prefix - # Check for conflicting namespaces - raise ParseException.new( - "Namespace conflict in adding attribute \"#{value.name}\": "+ - "Prefix \"#{old_attr.prefix}\" = "+ - "\"#{@element.namespace(old_attr.prefix)}\" and prefix "+ - "\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if - value.prefix != "xmlns" and old_attr.prefix != "xmlns" and - @element.namespace( old_attr.prefix ) == - @element.namespace( value.prefix ) - store value.name, { old_attr.prefix => old_attr, - value.prefix => value } - else - store value.name, value - end - return @element - end - - # Returns an array of Strings containing all of the prefixes declared - # by this set of # attributes. The array does not include the default - # namespace declaration, if one exists. - # doc = Document.new("<a xmlns='foo' xmlns:x='bar' xmlns:y='twee' "+ - # "z='glorp' p:k='gru'/>") - # prefixes = doc.root.attributes.prefixes #-> ['x', 'y'] - def prefixes - ns = [] - each_attribute do |attribute| - ns << attribute.name if attribute.prefix == 'xmlns' - end - if @element.document and @element.document.doctype - expn = @element.expanded_name - expn = @element.document.doctype.name if expn.size == 0 - @element.document.doctype.attributes_of(expn).each { - |attribute| - ns << attribute.name if attribute.prefix == 'xmlns' - } - end - ns - end - - def namespaces - namespaces = {} - each_attribute do |attribute| - namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' - end - if @element.document and @element.document.doctype - expn = @element.expanded_name - expn = @element.document.doctype.name if expn.size == 0 - @element.document.doctype.attributes_of(expn).each { - |attribute| - namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' - } - end - namespaces - end - - # Removes an attribute - # attribute:: - # either a String, which is the name of the attribute to remove -- - # namespaces are significant here -- or the attribute to remove. - # Returns:: the owning element - # doc = Document.new "<a y:foo='0' x:foo='1' foo='3' z:foo='4'/>" - # doc.root.attributes.delete 'foo' #-> <a y:foo='0' x:foo='1' z:foo='4'/>" - # doc.root.attributes.delete 'x:foo' #-> <a y:foo='0' z:foo='4'/>" - # attr = doc.root.attributes.get_attribute('y:foo') - # doc.root.attributes.delete attr #-> <a z:foo='4'/>" - def delete( attribute ) - name = nil - prefix = nil - if attribute.kind_of? Attribute - name = attribute.name - prefix = attribute.prefix - else - attribute =~ Namespace::NAMESPLIT - prefix, name = $1, $2 - prefix = '' unless prefix - end - old = fetch(name, nil) - attr = nil - if old.kind_of? Hash # the supplied attribute is one of many - attr = old.delete(prefix) - if old.size == 1 - repl = nil - old.each_value{|v| repl = v} - store name, repl - end - elsif old.nil? - return @element - else # the supplied attribute is a top-level one - attr = old - res = super(name) - end - @element - end - - # Adds an attribute, overriding any existing attribute by the - # same name. Namespaces are significant. - # attribute:: An Attribute - def add( attribute ) - self[attribute.name] = attribute - end - - alias :<< :add - - # Deletes all attributes matching a name. Namespaces are significant. - # name:: - # A String; all attributes that match this path will be removed - # Returns:: an Array of the Attributes that were removed - def delete_all( name ) - rv = [] - each_attribute { |attribute| - rv << attribute if attribute.expanded_name == name - } - rv.each{ |attr| attr.remove } - return rv - end - + if element_document and element_document.doctype + expn = @element.expanded_name + expn = element_document.doctype.name if expn.size == 0 + attr_val = element_document.doctype.attribute_of(expn, name) + return Attribute.new( name, attr_val ) if attr_val + end + return nil + end + if attr.kind_of? Hash + attr = attr[ @element.prefix ] + end + return attr + end + + # Sets an attribute, overwriting any existing attribute value by the + # same name. Namespace is significant. + # name:: the name of the attribute + # value:: + # (optional) If supplied, the value of the attribute. If + # nil, any existing matching attribute is deleted. + # Returns:: + # Owning element + # doc = Document.new "<a x:foo='1' foo='3'/>" + # doc.root.attributes['y:foo'] = '2' + # doc.root.attributes['foo'] = '4' + # doc.root.attributes['x:foo'] = nil + def []=( name, value ) + if value.nil? # Delete the named attribute + attr = get_attribute(name) + delete attr + return + end + element_document = @element.document + unless value.kind_of? Attribute + if @element.document and @element.document.doctype + value = Text::normalize( value, @element.document.doctype ) + else + value = Text::normalize( value, nil ) + end + value = Attribute.new(name, value) + end + value.element = @element + old_attr = fetch(value.name, nil) + if old_attr.nil? + store(value.name, value) + elsif old_attr.kind_of? Hash + old_attr[value.prefix] = value + elsif old_attr.prefix != value.prefix + # Check for conflicting namespaces + raise ParseException.new( + "Namespace conflict in adding attribute \"#{value.name}\": "+ + "Prefix \"#{old_attr.prefix}\" = "+ + "\"#{@element.namespace(old_attr.prefix)}\" and prefix "+ + "\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if + value.prefix != "xmlns" and old_attr.prefix != "xmlns" and + @element.namespace( old_attr.prefix ) == + @element.namespace( value.prefix ) + store value.name, { old_attr.prefix => old_attr, + value.prefix => value } + else + store value.name, value + end + return @element + end + + # Returns an array of Strings containing all of the prefixes declared + # by this set of # attributes. The array does not include the default + # namespace declaration, if one exists. + # doc = Document.new("<a xmlns='foo' xmlns:x='bar' xmlns:y='twee' "+ + # "z='glorp' p:k='gru'/>") + # prefixes = doc.root.attributes.prefixes #-> ['x', 'y'] + def prefixes + ns = [] + each_attribute do |attribute| + ns << attribute.name if attribute.prefix == 'xmlns' + end + if @element.document and @element.document.doctype + expn = @element.expanded_name + expn = @element.document.doctype.name if expn.size == 0 + @element.document.doctype.attributes_of(expn).each { + |attribute| + ns << attribute.name if attribute.prefix == 'xmlns' + } + end + ns + end + + def namespaces + namespaces = {} + each_attribute do |attribute| + namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' + end + if @element.document and @element.document.doctype + expn = @element.expanded_name + expn = @element.document.doctype.name if expn.size == 0 + @element.document.doctype.attributes_of(expn).each { + |attribute| + namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' + } + end + namespaces + end + + # Removes an attribute + # attribute:: + # either a String, which is the name of the attribute to remove -- + # namespaces are significant here -- or the attribute to remove. + # Returns:: the owning element + # doc = Document.new "<a y:foo='0' x:foo='1' foo='3' z:foo='4'/>" + # doc.root.attributes.delete 'foo' #-> <a y:foo='0' x:foo='1' z:foo='4'/>" + # doc.root.attributes.delete 'x:foo' #-> <a y:foo='0' z:foo='4'/>" + # attr = doc.root.attributes.get_attribute('y:foo') + # doc.root.attributes.delete attr #-> <a z:foo='4'/>" + def delete( attribute ) + name = nil + prefix = nil + if attribute.kind_of? Attribute + name = attribute.name + prefix = attribute.prefix + else + attribute =~ Namespace::NAMESPLIT + prefix, name = $1, $2 + prefix = '' unless prefix + end + old = fetch(name, nil) + attr = nil + if old.kind_of? Hash # the supplied attribute is one of many + attr = old.delete(prefix) + if old.size == 1 + repl = nil + old.each_value{|v| repl = v} + store name, repl + end + elsif old.nil? + return @element + else # the supplied attribute is a top-level one + attr = old + res = super(name) + end + @element + end + + # Adds an attribute, overriding any existing attribute by the + # same name. Namespaces are significant. + # attribute:: An Attribute + def add( attribute ) + self[attribute.name] = attribute + end + + alias :<< :add + + # Deletes all attributes matching a name. Namespaces are significant. + # name:: + # A String; all attributes that match this path will be removed + # Returns:: an Array of the Attributes that were removed + def delete_all( name ) + rv = [] + each_attribute { |attribute| + rv << attribute if attribute.expanded_name == name + } + rv.each{ |attr| attr.remove } + return rv + end + # The +get_attribute_ns+ method retrieves a method by its namespace # and name. Thus it is possible to reliably identify an attribute # even if an XML processor has changed the prefix. @@ -1251,11 +1216,11 @@ module REXML def get_attribute_ns(namespace, name) each_attribute() { |attribute| if name == attribute.name && - namespace == attribute.namespace() + namespace == attribute.namespace() return attribute end } nil end - end + end end diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb index e35c3acf7c..6cae6b644d 100644 --- a/lib/rexml/encoding.rb +++ b/lib/rexml/encoding.rb @@ -58,8 +58,8 @@ module REXML # We have to recognize UTF-16, LSB UTF-16, and UTF-8 return UTF_16 if /\A\xfe\xff/n =~ str return UNILE if /\A\xff\xfe/n =~ str - str =~ /^\s*<?xml\s*version\s*=\s*(['"]).*?\2\s*encoding\s*=\s*(["'])(.*?)\2/um - return $1.upcase if $1 + str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um + return $3.upcase if $3 return UTF_8 end end diff --git a/lib/rexml/encodings/CP-1252.rb b/lib/rexml/encodings/CP-1252.rb index 51179f119f..29f94d3f8c 100644 --- a/lib/rexml/encodings/CP-1252.rb +++ b/lib/rexml/encodings/CP-1252.rb @@ -3,9 +3,15 @@ # module REXML module Encoding - @@__REXML_encoding_methods = %q~ + register( "CP-1252" ) do |o| + class << o + alias encode encode_cp1252 + alias decode decode_cp1252 + end + end + # Convert from UTF-8 - def encode content + def encode_cp1252(content) array_utf8 = content.unpack('U*') array_enc = [] array_utf8.each do |num| @@ -54,7 +60,7 @@ module REXML end # Convert to UTF-8 - def decode(str) + def decode_cp1252(str) array_latin9 = str.unpack('C*') array_enc = [] array_latin9.each do |num| @@ -93,6 +99,5 @@ module REXML end array_enc.pack('U*') end - ~ end end diff --git a/lib/rexml/encodings/ISO-8859-15.rb b/lib/rexml/encodings/ISO-8859-15.rb index ce565e7dd5..e0c6a51ed2 100644 --- a/lib/rexml/encodings/ISO-8859-15.rb +++ b/lib/rexml/encodings/ISO-8859-15.rb @@ -3,9 +3,13 @@ # module REXML module Encoding - @@__REXML_encoding_methods = %q~ + register("ISO-8859-15") do |o| + alias encode to_iso_8859_15 + alias decode from_iso_8859_15 + end + # Convert from UTF-8 - def to_iso_8859_15 content + def to_iso_8859_15(content) array_utf8 = content.unpack('U*') array_enc = [] array_utf8.each do |num| @@ -64,6 +68,5 @@ module REXML end array_enc.pack('U*') end - ~ end end diff --git a/lib/rexml/encodings/UTF-16.rb b/lib/rexml/encodings/UTF-16.rb index 792adfd44d..007c493d9c 100644 --- a/lib/rexml/encodings/UTF-16.rb +++ b/lib/rexml/encodings/UTF-16.rb @@ -16,7 +16,7 @@ module REXML end def decode_utf16(str) - str = str[2..-1] if /^\376\377/ =~ str + str = str[2..-1] if /^\376\377/n =~ str array_enc=str.unpack('C*') array_utf8 = [] 0.step(array_enc.size-1, 2){|i| diff --git a/lib/rexml/entity.rb b/lib/rexml/entity.rb index 4b88a3c553..ff2d45f39b 100644 --- a/lib/rexml/entity.rb +++ b/lib/rexml/entity.rb @@ -89,6 +89,12 @@ module REXML # Write out a fully formed, correct entity definition (assuming the Entity # object itself is valid.) + # + # out:: + # An object implementing <TT><<<TT> to which the entity will be + # output + # indent:: + # *DEPRECATED* and ignored def write out, indent=-1 out << '<!ENTITY ' out << '% ' if @reference diff --git a/lib/rexml/formatters/default.rb b/lib/rexml/formatters/default.rb new file mode 100644 index 0000000000..77381bdf84 --- /dev/null +++ b/lib/rexml/formatters/default.rb @@ -0,0 +1,109 @@ +module REXML + module Formatters + class Default + # Prints out the XML document with no formatting -- except if id_hack is + # set. + # + # ie_hack:: + # If set to true, then inserts whitespace before the close of an empty + # tag, so that IE's bad XML parser doesn't choke. + def initialize( ie_hack=false ) + @ie_hack = ie_hack + end + + # Writes the node to some output. + # + # node:: + # The node to write + # output:: + # A class implementing <TT><<</TT>. Pass in an Output object to + # change the output encoding. + def write( node, output ) + case node + + when Document + if node.xml_decl.encoding != "UTF-8" && !output.kind_of?(Output) + output = Output.new( output, node.xml_decl.encoding ) + end + write_document( node, output ) + + when Element + write_element( node, output ) + + when Declaration, ElementDecl, NotationDecl, ExternalEntity, Entity, + Attribute, AttlistDecl + node.write( output,-1 ) + + when Instruction + write_instruction( node, output ) + + when DocType, XMLDecl + node.write( output ) + + when Comment + write_comment( node, output ) + + when CData + write_cdata( node, output ) + + when Text + write_text( node, output ) + + else + raise Exception.new("XML FORMATTING ERROR") + + end + end + + protected + def write_document( node, output ) + node.children.each { |child| write( child, output ) } + end + + def write_element( node, output ) + output << "<#{node.expanded_name}" + + node.attributes.each_attribute do |attr| + output << " " + attr.write( output ) + end unless node.attributes.empty? + + if node.children.empty? + output << " " if @ie_hack + output << "/" + else + output << ">" + node.children.each { |child| + write( child, output ) + } + output << "</#{node.expanded_name}" + end + output << ">" + end + + def write_text( node, output ) + output << node.to_s() + end + + def write_comment( node, output ) + output << Comment::START + output << node.to_s + output << Comment::STOP + end + + def write_cdata( node, output ) + output << CData::START + output << node.to_s + output << CData::STOP + end + + def write_instruction( node, output ) + output << Instruction::START.sub(/\\/u, '') + output << node.target + output << ' ' + output << node.content + output << Instruction::STOP.sub(/\\/u, '') + end + end + end +end diff --git a/lib/rexml/formatters/pretty.rb b/lib/rexml/formatters/pretty.rb new file mode 100644 index 0000000000..78eab27402 --- /dev/null +++ b/lib/rexml/formatters/pretty.rb @@ -0,0 +1,134 @@ +require 'rexml/formatters/default' + +module REXML + module Formatters + # Pretty-prints an XML document. This destroys whitespace in text nodes + # and will insert carriage returns and indentations. + # + # TODO: Add an option to print attributes on new lines + class Pretty < Default + + # If compact is set to true, then the formatter will attempt to use as + # little space as possible + attr_accessor :compact + # The width of a page. Used for formatting text + attr_accessor :width + + # Create a new pretty printer. + # + # output:: + # An object implementing '<<(String)', to which the output will be written. + # indentation:: + # An integer greater than 0. The indentation of each level will be + # this number of spaces. If this is < 1, the behavior of this object + # is undefined. Defaults to 2. + # ie_hack:: + # If true, the printer will insert whitespace before closing empty + # tags, thereby allowing Internet Explorer's feeble XML parser to + # function. Defaults to false. + def initialize( indentation=2, ie_hack=false ) + @indentation = indentation + @level = 0 + @ie_hack = ie_hack + @width = 80 + end + + protected + def write_element(node, output) + output << ' '*@level + output << "<#{node.expanded_name}" + + node.attributes.each_attribute do |attr| + output << " " + attr.write( output ) + end unless node.attributes.empty? + + if node.children.empty? + if @ie_hack + output << " " + end + output << "/" + else + output << ">" + # If compact and all children are text, and if the formatted output + # is less than the specified width, then try to print everything on + # one line + skip = false + if compact + if node.children.inject(true) {|s,c| s & c.kind_of?(Text)} + string = "" + node.children.each { |child| write( child, string, 0 ) } + if string.length + @level < @width + output << string + skip = true + end + end + end + unless skip + output << "\n" + @level += @indentation + node.children.each { |child| + next if child.kind_of?(Text) and child.to_s.strip.length == 0 + write( child, output ) + output << "\n" + } + @level -= @indentation + output << ' '*@level + end + output << "</#{node.expanded_name}" + end + output << ">" + end + + def write_text( node, output ) + s = node.to_s() + s.gsub!(/\s/,' ') + s.squeeze!(" ") + s = wrap(s, 80-@level) + s = indent_text(s, @level, " ", true) + output << (' '*@level + s) + end + + def write_comment( node, output) + output << ' ' * @level + super + end + + def write_cdata( node, output) + output << ' ' * @level + super + end + + def write_document( node, output ) + # Ok, this is a bit odd. All XML documents have an XML declaration, + # but it may not write itself if the user didn't specifically add it, + # either through the API or in the input document. If it doesn't write + # itself, then we don't need a carriage return... which makes this + # logic more complex. + node.children.each { |child| + next if child == node.children[-1] and child.instance_of?(Text) + unless child == node.children[0] or child.instance_of?(Text) or + (child == node.children[1] and !node.children[0].writethis) + output << "\n" + end + write( child, output ) + } + end + + private + def indent_text(string, level=1, style="\t", indentfirstline=true) + return string if level < 0 + string.gsub(/\n/, "\n#{style*level}") + end + + def wrap(string, width) + # Recursivly wrap string at width. + return string if string.length <= width + place = string.rindex(' ', width) # Position in string with last ' ' before cutoff + return string[0,place] + "\n" + wrap(string[place+1..-1], width) + end + + end + end +end + diff --git a/lib/rexml/formatters/transitive.rb b/lib/rexml/formatters/transitive.rb new file mode 100644 index 0000000000..1d80f21fbb --- /dev/null +++ b/lib/rexml/formatters/transitive.rb @@ -0,0 +1,56 @@ +require 'rexml/formatters/pretty' + +module REXML + module Formatters + # The Transitive formatter writes an XML document that parses to an + # identical document as the source document. This means that no extra + # whitespace nodes are inserted, and whitespace within text nodes is + # preserved. Within these constraints, the document is pretty-printed, + # with whitespace inserted into the metadata to introduce formatting. + # + # Note that this is only useful if the original XML is not already + # formatted. Since this formatter does not alter whitespace nodes, the + # results of formatting already formatted XML will be odd. + class Transitive < Default + def initialize( indentation=2 ) + @indentation = indentation + @level = 0 + end + + protected + def write_element( node, output ) + output << "<#{node.expanded_name}" + + node.attributes.each_attribute do |attr| + output << " " + attr.write( output ) + end unless node.attributes.empty? + + output << "\n" + output << ' '*@level + if node.children.empty? + output << "/" + else + output << ">" + # If compact and all children are text, and if the formatted output + # is less than the specified width, then try to print everything on + # one line + skip = false + @level += @indentation + node.children.each { |child| + write( child, output ) + } + @level -= @indentation + output << "</#{node.expanded_name}" + output << "\n" + output << ' '*@level + end + output << ">" + end + + def write_text( node, output ) + output << node.to_s() + end + end + end +end diff --git a/lib/rexml/functions.rb b/lib/rexml/functions.rb index cad4f6a8c9..8293e9c5ac 100644 --- a/lib/rexml/functions.rb +++ b/lib/rexml/functions.rb @@ -339,7 +339,6 @@ module REXML object.to_f else str = string( object ) - #puts "STRING OF #{object.inspect} = #{str}" # If XPath ever gets scientific notation... #if str =~ /^\s*-?(\d*\.?\d+|\d+\.)([Ee]\d*)?\s*$/ if str =~ /^\s*-?(\d*\.?\d+|\d+\.)\s*$/ diff --git a/lib/rexml/instruction.rb b/lib/rexml/instruction.rb index f24f7786f7..c16b894b4a 100644 --- a/lib/rexml/instruction.rb +++ b/lib/rexml/instruction.rb @@ -38,7 +38,11 @@ module REXML Instruction.new self end + # == DEPRECATED + # See the rexml/formatters package + # def write writer, indent=-1, transitive=false, ie_hack=false + Kernel.warn( "#{self.class.name}.write is deprecated" ) indent(writer, indent) writer << START.sub(/\\/u, '') writer << @target diff --git a/lib/rexml/node.rb b/lib/rexml/node.rb index 7226e5be6c..ebdbd44ff2 100644 --- a/lib/rexml/node.rb +++ b/lib/rexml/node.rb @@ -18,10 +18,19 @@ module REXML @parent[ ind - 1 ] end - def to_s indent=-1 - rv = "" - write rv,indent - rv + # indent:: + # *DEPRECATED* This parameter is now ignored. See the formatters in the + # REXML::Formatters package for changing the output style. + def to_s indent=nil + unless indent.nil? + Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated" ) + f = REXML::Formatters::Pretty.new( indent ) + f.write( self, rv, indent ) + else + f = REXML::Formatters::Default.new + f.write( self, rv = "" ) + end + return rv end def indent to, ind diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index fecd801d6f..3782d61b2c 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -53,7 +53,7 @@ module REXML STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um ENTITY_START = /^\s*<!ENTITY/ - IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u + IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u ELEMENTDECL_START = /^\s*<!ELEMENT/um ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um SYSTEMENTITY = /^\s*(%.*?;)\s*$/um @@ -217,10 +217,10 @@ module REXML close = md[2] identity =~ IDENTITY name = $1 - raise REXML::ParseException("DOCTYPE is missing a name") if name.nil? + raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil? pub_sys = $2.nil? ? nil : $2.strip - long_name = $3.nil? ? nil : $3.strip - uri = $4.nil? ? nil : $4.strip + long_name = $4.nil? ? nil : $4.strip + uri = $6.nil? ? nil : $6.strip args = [ :start_doctype, name, pub_sys, long_name, uri ] if close == ">" @document_status = :after_doctype diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb index 6c7fbe000a..e402eb7747 100644 --- a/lib/rexml/parsers/sax2parser.rb +++ b/lib/rexml/parsers/sax2parser.rb @@ -94,6 +94,8 @@ module REXML when :end_document handle( :end_document ) break + when :start_doctype + handle( :doctype, *event[1..-1]) when :end_doctype context = context[1] when :start_element @@ -167,7 +169,7 @@ module REXML when :entitydecl @entities[ event[1] ] = event[2] if event.size == 3 handle( *event ) - when :processing_instruction, :comment, :doctype, :attlistdecl, + when :processing_instruction, :comment, :attlistdecl, :elementdecl, :cdata, :notationdecl, :xmldecl handle( *event ) end diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb index 6f5b21cd93..de2530e347 100644 --- a/lib/rexml/parsers/xpathparser.rb +++ b/lib/rexml/parsers/xpathparser.rb @@ -551,7 +551,7 @@ module REXML end end #puts "BEFORE WITH '#{rest}'" - rest = LocationPath(rest, n) if rest =~ /^[\/\.\@\[\w_*]/ + rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w_*]/ parsed.concat(n) return rest end diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb index bff1cd9815..132261ed95 100644 --- a/lib/rexml/rexml.rb +++ b/lib/rexml/rexml.rb @@ -10,8 +10,8 @@ # # Main page:: http://www.germane-software.com/software/rexml # Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom> -# Version:: 3.1.6 -# Date:: 2006/335 +# Version:: 3.1.7 +# Date:: 2007/206 # # This API documentation can be downloaded from the REXML home page, or can # be accessed online[http://www.germane-software.com/software/rexml_doc] @@ -20,9 +20,10 @@ # or can be accessed # online[http://www.germane-software.com/software/rexml/docs/tutorial.html] module REXML - COPYRIGHT = "Copyright © 2001-2006 Sean Russell <ser@germane-software.com>" - DATE = "2006/335" - VERSION = "3.1.6" + COPYRIGHT = "Copyright © 2001-2007 Sean Russell <ser@germane-software.com>" + DATE = "2007/206" + VERSION = "3.1.7" + REVISION = "$Revision$".gsub(/\$Revision:|\$/,'').strip Copyright = COPYRIGHT Version = VERSION diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index 2fee99c0e9..ada876cde5 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -1,139 +1,139 @@ require 'rexml/encoding' module REXML - # Generates Source-s. USE THIS CLASS. - class SourceFactory - # Generates a Source object - # @param arg Either a String, or an IO - # @return a Source, or nil if a bad argument was given - def SourceFactory::create_from(arg) + # Generates Source-s. USE THIS CLASS. + class SourceFactory + # Generates a Source object + # @param arg Either a String, or an IO + # @return a Source, or nil if a bad argument was given + def SourceFactory::create_from(arg) if arg.kind_of? String - Source.new(arg) + Source.new(arg) elsif arg.respond_to? :read and arg.respond_to? :readline and arg.respond_to? :nil? and arg.respond_to? :eof? - IOSource.new(arg) + IOSource.new(arg) elsif arg.kind_of? Source arg else raise "#{source.class} is not a valid input stream. It must walk \n"+ "like either a String, IO, or Source." end - end - end - - # A Source can be searched for patterns, and wraps buffers and other - # objects and provides consumption of text - class Source - include Encoding - # The current buffer (what we're going to read next) - attr_reader :buffer - # The line number of the last consumed text - attr_reader :line - attr_reader :encoding - - # Constructor - # @param arg must be a String, and should be a valid XML document + end + end + + # A Source can be searched for patterns, and wraps buffers and other + # objects and provides consumption of text + class Source + include Encoding + # The current buffer (what we're going to read next) + attr_reader :buffer + # The line number of the last consumed text + attr_reader :line + attr_reader :encoding + + # Constructor + # @param arg must be a String, and should be a valid XML document # @param encoding if non-null, sets the encoding of the source to this # value, overriding all encoding detection - def initialize(arg, encoding=nil) - @orig = @buffer = arg + def initialize(arg, encoding=nil) + @orig = @buffer = arg if encoding self.encoding = encoding else self.encoding = check_encoding( @buffer ) end - @line = 0 - end - - - # Inherited from Encoding - # Overridden to support optimized en/decoding - def encoding=(enc) - return unless super - @line_break = encode( '>' ) - if enc != UTF_8 - @buffer = decode(@buffer) - @to_utf = true - else - @to_utf = false - end - end - - # Scans the source for a given pattern. Note, that this is not your - # usual scan() method. For one thing, the pattern argument has some - # requirements; for another, the source can be consumed. You can easily - # confuse this method. Originally, the patterns were easier - # to construct and this method more robust, because this method - # generated search regexes on the fly; however, this was - # computationally expensive and slowed down the entire REXML package - # considerably, since this is by far the most commonly called method. - # @param pattern must be a Regexp, and must be in the form of - # /^\s*(#{your pattern, with no groups})(.*)/. The first group - # will be returned; the second group is used if the consume flag is - # set. - # @param consume if true, the pattern returned will be consumed, leaving - # everything after it in the Source. - # @return the pattern, if found, or nil if the Source is empty or the - # pattern is not found. - def scan(pattern, cons=false) - return nil if @buffer.nil? - rv = @buffer.scan(pattern) - @buffer = $' if cons and rv.size>0 - rv - end - - def read - end - - def consume( pattern ) - @buffer = $' if pattern.match( @buffer ) - end - - def match_to( char, pattern ) - return pattern.match(@buffer) - end - - def match_to_consume( char, pattern ) - md = pattern.match(@buffer) - @buffer = $' - return md - end - - def match(pattern, cons=false) - md = pattern.match(@buffer) - @buffer = $' if cons and md - return md - end - - # @return true if the Source is exhausted - def empty? - @buffer == "" - end + @line = 0 + end + + + # Inherited from Encoding + # Overridden to support optimized en/decoding + def encoding=(enc) + return unless super + @line_break = encode( '>' ) + if enc != UTF_8 + @buffer = decode(@buffer) + @to_utf = true + else + @to_utf = false + end + end + + # Scans the source for a given pattern. Note, that this is not your + # usual scan() method. For one thing, the pattern argument has some + # requirements; for another, the source can be consumed. You can easily + # confuse this method. Originally, the patterns were easier + # to construct and this method more robust, because this method + # generated search regexes on the fly; however, this was + # computationally expensive and slowed down the entire REXML package + # considerably, since this is by far the most commonly called method. + # @param pattern must be a Regexp, and must be in the form of + # /^\s*(#{your pattern, with no groups})(.*)/. The first group + # will be returned; the second group is used if the consume flag is + # set. + # @param consume if true, the pattern returned will be consumed, leaving + # everything after it in the Source. + # @return the pattern, if found, or nil if the Source is empty or the + # pattern is not found. + def scan(pattern, cons=false) + return nil if @buffer.nil? + rv = @buffer.scan(pattern) + @buffer = $' if cons and rv.size>0 + rv + end + + def read + end + + def consume( pattern ) + @buffer = $' if pattern.match( @buffer ) + end + + def match_to( char, pattern ) + return pattern.match(@buffer) + end + + def match_to_consume( char, pattern ) + md = pattern.match(@buffer) + @buffer = $' + return md + end + + def match(pattern, cons=false) + md = pattern.match(@buffer) + @buffer = $' if cons and md + return md + end + + # @return true if the Source is exhausted + def empty? + @buffer == "" + end def position @orig.index( @buffer ) end - # @return the current line in the source - def current_line - lines = @orig.split - res = lines.grep @buffer[0..30] - res = res[-1] if res.kind_of? Array - lines.index( res ) if res - end - end + # @return the current line in the source + def current_line + lines = @orig.split + res = lines.grep @buffer[0..30] + res = res[-1] if res.kind_of? Array + lines.index( res ) if res + end + end - # A Source that wraps an IO. See the Source class for method - # documentation - class IOSource < Source - #attr_reader :block_size + # A Source that wraps an IO. See the Source class for method + # documentation + class IOSource < Source + #attr_reader :block_size # block_size has been deprecated - def initialize(arg, block_size=500, encoding=nil) - @er_source = @source = arg - @to_utf = false + def initialize(arg, block_size=500, encoding=nil) + @er_source = @source = arg + @to_utf = false # Determining the encoding is a deceptively difficult issue to resolve. # First, we check the first two bytes for UTF-16. Then we # assume that the encoding is at least ASCII enough for the '>', and @@ -147,86 +147,89 @@ module REXML self.encoding = encoding elsif /\A(?:\xfe\xff|\xff\xfe)/n =~ str self.encoding = check_encoding( str ) + elsif (0xef == str[0] && 0xbb == str[1]) + str += @source.read(1) + str = '' if (0xbf == str[2]) else @line_break = '>' end super str+@source.readline( @line_break ) end - def scan(pattern, cons=false) - rv = super - # You'll notice that this next section is very similar to the same - # section in match(), but just a liiittle different. This is - # because it is a touch faster to do it this way with scan() - # than the way match() does it; enough faster to warrent duplicating - # some code - if rv.size == 0 - until @buffer =~ pattern or @source.nil? - begin - # READLINE OPT - #str = @source.read(@block_size) - str = @source.readline(@line_break) - str = decode(str) if @to_utf and str - @buffer << str + def scan(pattern, cons=false) + rv = super + # You'll notice that this next section is very similar to the same + # section in match(), but just a liiittle different. This is + # because it is a touch faster to do it this way with scan() + # than the way match() does it; enough faster to warrent duplicating + # some code + if rv.size == 0 + until @buffer =~ pattern or @source.nil? + begin + # READLINE OPT + #str = @source.read(@block_size) + str = @source.readline(@line_break) + str = decode(str) if @to_utf and str + @buffer << str rescue Iconv::IllegalSequence raise - rescue - @source = nil - end - end - rv = super - end - rv.taint - rv - end - - def read - begin + rescue + @source = nil + end + end + rv = super + end + rv.taint + rv + end + + def read + begin str = @source.readline(@line_break) - str = decode(str) if @to_utf and str - @buffer << str - rescue Exception, NameError - @source = nil - end - end - - def consume( pattern ) - match( pattern, true ) - end - - def match( pattern, cons=false ) - rv = pattern.match(@buffer) - @buffer = $' if cons and rv - while !rv and @source - begin + str = decode(str) if @to_utf and str + @buffer << str + rescue Exception, NameError + @source = nil + end + end + + def consume( pattern ) + match( pattern, true ) + end + + def match( pattern, cons=false ) + rv = pattern.match(@buffer) + @buffer = $' if cons and rv + while !rv and @source + begin str = @source.readline(@line_break) - str = decode(str) if @to_utf and str - @buffer << str - rv = pattern.match(@buffer) - @buffer = $' if cons and rv - rescue - @source = nil - end - end - rv.taint - rv - end - - def empty? - super and ( @source.nil? || @source.eof? ) - end + str = decode(str) if @to_utf and str + @buffer << str + rv = pattern.match(@buffer) + @buffer = $' if cons and rv + rescue + @source = nil + end + end + rv.taint + rv + end + + def empty? + super and ( @source.nil? || @source.eof? ) + end def position @er_source.stat.pipe? ? 0 : @er_source.pos end - # @return the current line in the source - def current_line + # @return the current line in the source + def current_line begin - pos = @er_source.pos # The byte position in the source - lineno = @er_source.lineno # The XML < position in the source + pos = @er_source.pos # The byte position in the source + lineno = @er_source.lineno # The XML < position in the source @er_source.rewind - line = 0 # The \r\n position in the source + line = 0 # The \r\n position in the source begin while @er_source.pos < pos @er_source.readline @@ -238,7 +241,7 @@ module REXML pos = -1 line = -1 end - [pos, lineno, line] - end - end + [pos, lineno, line] + end + end end diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb index 3de9170623..9804aa710b 100644 --- a/lib/rexml/text.rb +++ b/lib/rexml/text.rb @@ -211,16 +211,17 @@ module REXML return new_string end + # == DEPRECATED + # See REXML::Formatters + # def write( writer, indent=-1, transitive=false, ie_hack=false ) - s = to_s() - if not (@parent and @parent.whitespace) then - s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all - if @parent and not @parent.context[:indentstyle].nil? and indent > 0 and s.count("\n") > 0 - s = indent_text(s, indent, @parent.context[:indentstyle], false) + Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters") + formatter = if indent > -1 + REXML::Formatters::Pretty.new( indent ) + else + REXML::Formatters::Default.new end - s.squeeze!(" \n\t") if @parent and !@parent.whitespace - end - writer << s + formatter.write( self, writer ) end # FIXME diff --git a/lib/rexml/xmldecl.rb b/lib/rexml/xmldecl.rb index b65604b762..427eb78cf8 100644 --- a/lib/rexml/xmldecl.rb +++ b/lib/rexml/xmldecl.rb @@ -13,7 +13,7 @@ module REXML STOP = '\?>'; attr_accessor :version, :standalone - attr_reader :writeencoding + attr_reader :writeencoding, :writethis def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil) @writethis = true @@ -37,9 +37,14 @@ module REXML XMLDecl.new(self) end - def write writer, indent=-1, transitive=false, ie_hack=false + # indent:: + # Ignored. There must be no whitespace before an XML declaration + # transitive:: + # Ignored + # ie_hack:: + # Ignored + def write(writer, indent=-1, transitive=false, ie_hack=false) return nil unless @writethis or writer.kind_of? Output - indent( writer, indent ) writer << START.sub(/\\/u, '') if writer.kind_of? Output writer << " #{content writer.encoding}" diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index 3393113d6a..eb608fdb34 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -160,6 +160,7 @@ module REXML node_types = ELEMENTS return nodeset if path_stack.length == 0 || nodeset.length == 0 while path_stack.length > 0 + #puts "#"*5 #puts "Path stack = #{path_stack.inspect}" #puts "Nodeset is #{nodeset.inspect}" if nodeset.length == 0 @@ -351,7 +352,8 @@ module REXML when :following_sibling #puts "FOLLOWING_SIBLING 1: nodeset = #{nodeset}" results = [] - for node in nodeset + nodeset.each do |node| + next if node.parent.nil? all_siblings = node.parent.children current_index = all_siblings.index( node ) following_siblings = all_siblings[ current_index+1 .. -1 ] @@ -362,13 +364,14 @@ module REXML when :preceding_sibling results = [] - for node in nodeset + nodeset.each do |node| + next if node.parent.nil? all_siblings = node.parent.children current_index = all_siblings.index( node ) - preceding_siblings = all_siblings[ 0 .. current_index-1 ].reverse - #results += expr( path_stack.dclone, preceding_siblings ) + preceding_siblings = all_siblings[ 0, current_index ].reverse + results += preceding_siblings end - nodeset = preceding_siblings || [] + nodeset = results node_types = ELEMENTS when :preceding @@ -389,15 +392,21 @@ module REXML node_types = ELEMENTS when :namespace + #puts "In :namespace" new_nodeset = [] prefix = path_stack.shift for node in nodeset if (node.node_type == :element or node.node_type == :attribute) - if (node.node_type == :element) + if @namespaces + namespaces = @namespaces + elsif (node.node_type == :element) namespaces = node.namespaces else namespaces = node.element.namesapces end + #puts "Namespaces = #{namespaces.inspect}" + #puts "Prefix = #{prefix.inspect}" + #puts "Node.namespace = #{node.namespace}" if (node.namespace == namespaces[prefix]) new_nodeset << node end |