diff options
-rw-r--r-- | lib/rexml/cdata.rb | 4 | ||||
-rw-r--r-- | lib/rexml/doctype.rb | 433 | ||||
-rw-r--r-- | lib/rexml/document.rb | 22 | ||||
-rw-r--r-- | lib/rexml/element.rb | 15 | ||||
-rw-r--r-- | lib/rexml/encoding.rb | 110 | ||||
-rw-r--r-- | lib/rexml/functions.rb | 7 | ||||
-rw-r--r-- | lib/rexml/parent.rb | 312 | ||||
-rw-r--r-- | lib/rexml/parsers/baseparser.rb | 46 | ||||
-rw-r--r-- | lib/rexml/parsers/streamparser.rb | 74 | ||||
-rw-r--r-- | lib/rexml/parsers/treeparser.rb | 10 | ||||
-rw-r--r-- | lib/rexml/rexml.rb | 13 | ||||
-rw-r--r-- | lib/rexml/sax2listener.rb | 1 | ||||
-rw-r--r-- | lib/rexml/source.rb | 12 | ||||
-rw-r--r-- | lib/rexml/streamlistener.rb | 3 | ||||
-rw-r--r-- | lib/rexml/text.rb | 4 | ||||
-rw-r--r-- | lib/rexml/xmldecl.rb | 5 | ||||
-rw-r--r-- | lib/rexml/xpath_parser.rb | 12 |
17 files changed, 605 insertions, 478 deletions
diff --git a/lib/rexml/cdata.rb b/lib/rexml/cdata.rb index ffedac1b53..046012ba61 100644 --- a/lib/rexml/cdata.rb +++ b/lib/rexml/cdata.rb @@ -35,6 +35,10 @@ module REXML @string end + def value + @string + end + # Generates XML output of this object # # output:: diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb index 652a04fce2..4a1ffb4336 100644 --- a/lib/rexml/doctype.rb +++ b/lib/rexml/doctype.rb @@ -6,55 +6,55 @@ require 'rexml/attlistdecl' require 'rexml/xmltokens' module REXML - # Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE - # ... >. DOCTYPES can be used to declare the DTD of a document, as well as - # being used to declare entities used in the document. - class DocType < Parent - include XMLTokens - START = "<!DOCTYPE" - STOP = ">" - SYSTEM = "SYSTEM" - PUBLIC = "PUBLIC" - DEFAULT_ENTITIES = { - 'gt'=>EntityConst::GT, - 'lt'=>EntityConst::LT, - 'quot'=>EntityConst::QUOT, - "apos"=>EntityConst::APOS - } - - # name is the name of the doctype - # external_id is the referenced DTD, if given - attr_reader :name, :external_id, :entities, :namespaces - - # Constructor - # - # dt = DocType.new( 'foo', '-//I/Hate/External/IDs' ) - # # <!DOCTYPE foo '-//I/Hate/External/IDs'> - # dt = DocType.new( doctype_to_clone ) - # # Incomplete. Shallow clone of doctype + # Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE + # ... >. DOCTYPES can be used to declare the DTD of a document, as well as + # being used to declare entities used in the document. + class DocType < Parent + include XMLTokens + START = "<!DOCTYPE" + STOP = ">" + SYSTEM = "SYSTEM" + PUBLIC = "PUBLIC" + DEFAULT_ENTITIES = { + 'gt'=>EntityConst::GT, + 'lt'=>EntityConst::LT, + 'quot'=>EntityConst::QUOT, + "apos"=>EntityConst::APOS + } + + # name is the name of the doctype + # external_id is the referenced DTD, if given + attr_reader :name, :external_id, :entities, :namespaces + + # Constructor + # + # dt = DocType.new( 'foo', '-//I/Hate/External/IDs' ) + # # <!DOCTYPE foo '-//I/Hate/External/IDs'> + # dt = DocType.new( doctype_to_clone ) + # # Incomplete. Shallow clone of doctype # # +Note+ that the constructor: # # Doctype.new( Source.new( "<!DOCTYPE foo 'bar'>" ) ) # # is _deprecated_. Do not use it. It will probably disappear. - def initialize( first, parent=nil ) - @entities = DEFAULT_ENTITIES - @long_name = @uri = nil - if first.kind_of? String - super() - @name = first - @external_id = parent - elsif first.kind_of? DocType - super( parent ) - @name = first.name - @external_id = first.external_id - elsif first.kind_of? Array - super( parent ) - @name = first[0] - @external_id = first[1] - @long_name = first[2] - @uri = first[3] + def initialize( first, parent=nil ) + @entities = DEFAULT_ENTITIES + @long_name = @uri = nil + if first.kind_of? String + super() + @name = first + @external_id = parent + elsif first.kind_of? DocType + super( parent ) + @name = first.name + @external_id = first.external_id + elsif first.kind_of? Array + super( parent ) + @name = first[0] + @external_id = first[1] + @long_name = first[2] + @uri = first[3] elsif first.kind_of? Source super( parent ) parser = Parsers::BaseParser.new( first ) @@ -64,150 +64,215 @@ module REXML end else super() - end - end - - def node_type - :doctype - end - - def attributes_of element - rv = [] - each do |child| - child.each do |key,val| - rv << Attribute.new(key,val) - end if child.kind_of? AttlistDecl and child.element_name == element - end - rv - end - - def attribute_of element, attribute - att_decl = find do |child| - child.kind_of? AttlistDecl and - child.element_name == element and - child.include? attribute - end - return nil unless att_decl - att_decl[attribute] - end - - def clone - DocType.new self - end - - # output:: - # Where to write the string - # indent:: - # An integer. If -1, no indenting will be used; otherwise, the - # indentation will be this number of spaces, and children will be - # indented an additional amount. - # transitive:: - # If transitive is true and indent is >= 0, then the output will be - # pretty-printed in such a way that the added whitespace does not affect - # the absolute *value* of the document -- that is, it leaves the value - # and number of Text nodes in the document unchanged. - # ie_hack:: - # Internet Explorer is the worst piece of crap to have ever been - # written, with the possible exception of Windows itself. Since IE is - # unable to parse proper XML, we have to provide a hack to generate XML - # that IE's limited abilities can handle. This hack inserts a space - # before the /> on empty tags. - # - def write( output, indent=0, transitive=false, ie_hack=false ) - indent( output, indent ) - output << START - output << ' ' - output << @name - output << " #@external_id" if @external_id - output << " #@long_name" if @long_name - output << " #@uri" if @uri - unless @children.empty? - next_indent = indent + 1 - output << ' [' - child = nil # speed - @children.each { |child| - output << "\n" - child.write( output, next_indent ) - } - output << "\n" - #output << ' '*next_indent - output << "]" - end - output << STOP - end + end + end + + def node_type + :doctype + end + + def attributes_of element + rv = [] + each do |child| + child.each do |key,val| + rv << Attribute.new(key,val) + end if child.kind_of? AttlistDecl and child.element_name == element + end + rv + end + + def attribute_of element, attribute + att_decl = find do |child| + child.kind_of? AttlistDecl and + child.element_name == element and + child.include? attribute + end + return nil unless att_decl + att_decl[attribute] + end + + def clone + DocType.new self + end + + # output:: + # Where to write the string + # indent:: + # An integer. If -1, no indenting will be used; otherwise, the + # indentation will be this number of spaces, and children will be + # indented an additional amount. + # transitive:: + # If transitive is true and indent is >= 0, then the output will be + # pretty-printed in such a way that the added whitespace does not affect + # the absolute *value* of the document -- that is, it leaves the value + # and number of Text nodes in the document unchanged. + # ie_hack:: + # Internet Explorer is the worst piece of crap to have ever been + # written, with the possible exception of Windows itself. Since IE is + # unable to parse proper XML, we have to provide a hack to generate XML + # that IE's limited abilities can handle. This hack inserts a space + # before the /> on empty tags. + # + def write( output, indent=0, transitive=false, ie_hack=false ) + indent( output, indent ) + output << START + output << ' ' + output << @name + output << " #@external_id" if @external_id + output << " #@long_name" if @long_name + output << " #@uri" if @uri + unless @children.empty? + next_indent = indent + 1 + output << ' [' + child = nil # speed + @children.each { |child| + output << "\n" + child.write( output, next_indent ) + } + #output << ' '*next_indent + output << "\n]" + end + output << STOP + end def context @parent.context end - def entity( name ) - @entities[name].unnormalized if @entities[name] - end - - def add child - super(child) - @entities = DEFAULT_ENTITIES.clone if @entities == DEFAULT_ENTITIES - @entities[ child.name ] = child if child.kind_of? Entity - end - end - - # We don't really handle any of these since we're not a validating - # parser, so we can be pretty dumb about them. All we need to be able - # to do is spew them back out on a write() - - # This is an abstract class. You never use this directly; it serves as a - # parent class for the specific declarations. - class Declaration < Child - def initialize src - super() - @string = src - end - - def to_s - @string+'>' - end - - def write( output, indent ) - output << (' '*indent) if indent > 0 - output << to_s - end - end - - public - class ElementDecl < Declaration - def initialize( src ) - super - end - end - - class ExternalEntity < Child - def initialize( src ) - super() - @entity = src - end - def to_s - @entity - end - def write( output, indent ) - output << @entity - output << "\n" - end - end - - class NotationDecl < Child - def initialize name, middle, rest - @name = name - @middle = middle - @rest = rest - end - - def to_s - "<!NOTATION #@name '#@middle #@rest'>" - end - - def write( output, indent=-1 ) - output << (' '*indent) if indent > 0 - output << to_s - end - end + def entity( name ) + @entities[name].unnormalized if @entities[name] + end + + def add child + super(child) + @entities = DEFAULT_ENTITIES.clone if @entities == DEFAULT_ENTITIES + @entities[ child.name ] = child if child.kind_of? Entity + end + + # This method retrieves the public identifier identifying the document's + # DTD. + # + # Method contributed by Henrik Martensson + def public + case @external_id + when "SYSTEM" + nil + when "PUBLIC" + strip_quotes(@long_name) + end + end + + # This method retrieves the system identifier identifying the document's DTD + # + # Method contributed by Henrik Martensson + def system + case @external_id + when "SYSTEM" + strip_quotes(@long_name) + when "PUBLIC" + @uri.kind_of?(String) ? strip_quotes(@uri) : nil + end + end + + # This method returns a list of notations that have been declared in the + # _internal_ DTD subset. Notations in the external DTD subset are not + # listed. + # + # Method contributed by Henrik Martensson + def notations + children().select {|node| node.kind_of?(REXML::NotationDecl)} + end + + # Retrieves a named notation. Only notations declared in the internal + # DTD subset can be retrieved. + # + # Method contributed by Henrik Martensson + def notation(name) + notations.find { |notation_decl| + notation_decl.name == name + } + end + + private + + # Method contributed by Henrik Martensson + def strip_quotes(quoted_string) + quoted_string =~ /^[\'\"].*[\´\"]$/ ? + quoted_string[1, quoted_string.length-2] : + quoted_string + end + end + + # We don't really handle any of these since we're not a validating + # parser, so we can be pretty dumb about them. All we need to be able + # to do is spew them back out on a write() + + # This is an abstract class. You never use this directly; it serves as a + # parent class for the specific declarations. + class Declaration < Child + def initialize src + super() + @string = src + end + + def to_s + @string+'>' + end + + def write( output, indent ) + output << (' '*indent) if indent > 0 + output << to_s + end + end + + public + class ElementDecl < Declaration + def initialize( src ) + super + end + end + + class ExternalEntity < Child + def initialize( src ) + super() + @entity = src + end + def to_s + @entity + end + def write( output, indent ) + output << @entity + end + end + + class NotationDecl < Child + attr_accessor :public, :system + def initialize name, middle, pub, sys + super(nil) + @name = name + @middle = middle + @public = pub + @system = sys + end + + def to_s + "<!NOTATION #@name #@middle#{ + @public ? ' ' + public.inspect : '' + }#{ + @system ? ' ' +@system.inspect : '' + }>" + end + + def write( output, indent=-1 ) + output << (' '*indent) if indent > 0 + output << to_s + end + + # This method retrieves the name of the notation. + # + # Method contributed by Henrik Martensson + def name + @name + end + end end diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb index 1a63501905..619a844257 100644 --- a/lib/rexml/document.rb +++ b/lib/rexml/document.rb @@ -70,11 +70,23 @@ module REXML if child.kind_of? XMLDecl @children.unshift child elsif child.kind_of? DocType - if @children[0].kind_of? XMLDecl - @children[1,0] = child - else - @children.unshift child - end + # Find first Element or DocType node and insert the decl right + # before it. If there is no such node, just insert the child at the + # end. If there is a child and it is an DocType, then replace it. + insert_before_index = 0 + @children.find { |x| + insert_before_index += 1 + x.kind_of?(Element) || x.kind_of?(DocType) + } + if @children[ insert_before_index ] # Not null = not end of list + if @children[ insert_before_index ].kind_of DocType + @children[ insert_before_index ] = child + else + @children[ index_before_index-1, 0 ] = child + end + else # Insert at end of list + @children[insert_before_index] = child + end child.parent = self else rv = super diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index 0b025da475..80463d95b7 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -1224,5 +1224,20 @@ module REXML rv.each{ |attr| attr.remove } return rv end + + # The +get_attribute_ns+ method retrieves a method by its namespace + # and name. Thus it is possible to reliably identify an attribute + # even if an XML processor has changed the prefix. + # + # Method contributed by Henrik Martensson + def get_attribute_ns(namespace, name) + each_attribute() { |attribute| + if name == attribute.name && + namespace == attribute.namespace() + return attribute + end + } + nil + end end end diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb index 541e152de0..644957439e 100644 --- a/lib/rexml/encoding.rb +++ b/lib/rexml/encoding.rb @@ -1,58 +1,64 @@ # -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2 module REXML - module Encoding - @encoding_methods = {} - def self.register(enc, &block) - @encoding_methods[enc] = block - end - def self.apply(obj, enc) - @encoding_methods[enc][obj] - end - def self.encoding_method(enc) - @encoding_methods[enc] - end + module Encoding + @encoding_methods = {} + def self.register(enc, &block) + @encoding_methods[enc] = block + end + def self.apply(obj, enc) + @encoding_methods[enc][obj] + end + def self.encoding_method(enc) + @encoding_methods[enc] + end - # Native, default format is UTF-8, so it is declared here rather than in - # an encodings/ definition. - UTF_8 = 'UTF-8' - UTF_16 = 'UTF-16' - UNILE = 'UNILE' + # Native, default format is UTF-8, so it is declared here rather than in + # an encodings/ definition. + UTF_8 = 'UTF-8' + UTF_16 = 'UTF-16' + UNILE = 'UNILE' - # ID ---> Encoding name - attr_reader :encoding - def encoding=( enc ) - old_verbosity = $VERBOSE - begin - $VERBOSE = false - return if defined? @encoding and enc == @encoding - if enc - raise ArgumentError, "Bad encoding name #{enc}" unless /\A[\w-]+\z/n =~ enc - @encoding = enc.upcase.untaint - else - @encoding = UTF_8 - end - err = nil - [@encoding, "ICONV"].each do |enc| - begin - require File.join("rexml", "encodings", "#{enc}.rb") - return Encoding.apply(self, enc) - rescue LoadError, Exception => err - end - end - puts err.message - raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv." - ensure - $VERBOSE = old_verbosity - end - end + # ID ---> Encoding name + attr_reader :encoding + def encoding=( enc ) + old_verbosity = $VERBOSE + begin + $VERBOSE = false + return if defined? @encoding and enc == @encoding + if enc and enc != UTF_8 + @encoding = enc.upcase + begin + require 'rexml/encodings/ICONV.rb' + Encoding.apply(self, "ICONV") + rescue LoadError, Exception => err + raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/ + @encoding.untaint + enc_file = File.join( "rexml", "encodings", "#@encoding.rb" ) + begin + require enc_file + Encoding.apply(self, @encoding) + rescue LoadError + puts $!.message + raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv." + end + end + else + @encoding = UTF_8 + require 'rexml/encodings/UTF-8.rb' + Encoding.apply(self, @encoding) + end + ensure + $VERBOSE = old_verbosity + end + end - def check_encoding str - # We have to recognize UTF-16, LSB UTF-16, and UTF-8 - return UTF_16 if str[0] == 254 && str[1] == 255 - return UNILE if str[0] == 255 && str[1] == 254 - str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um - return $1.upcase if $1 - return UTF_8 - end - end + def check_encoding str + # We have to recognize UTF-16, LSB UTF-16, and UTF-8 + return UTF_16 if str[0] == 254 && str[1] == 255 + return UNILE if str[0] == 255 && str[1] == 254 + str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um + return $1.upcase if $1 + return UTF_8 + end + end end diff --git a/lib/rexml/functions.rb b/lib/rexml/functions.rb index 010926611e..c09ffdeae7 100644 --- a/lib/rexml/functions.rb +++ b/lib/rexml/functions.rb @@ -67,11 +67,10 @@ module REXML if node_set == nil yield @@context[:node] if defined? @@context[:node].namespace else - if node_set.namespace - yield node_set - else - return unless node_set.kind_of? Enumerable + if node_set.respond_to? :each node_set.each { |node| yield node if defined? node.namespace } + elsif node_set.respond_to? :namespace + yield node_set end end end diff --git a/lib/rexml/parent.rb b/lib/rexml/parent.rb index cc74a40666..a20aaaef6b 100644 --- a/lib/rexml/parent.rb +++ b/lib/rexml/parent.rb @@ -1,168 +1,166 @@ require "rexml/child" module REXML - # A parent has children, and has methods for accessing them. The Parent + # A parent has children, and has methods for accessing them. The Parent # class is never encountered except as the superclass for some other # object. - class Parent < Child - include Enumerable - - # Constructor - # @param parent if supplied, will be set as the parent of this object - def initialize parent=nil - super(parent) - @children = [] - end - - def add( object ) - #puts "PARENT GOTS #{size} CHILDREN" - object.parent = self - @children << object - #puts "PARENT NOW GOTS #{size} CHILDREN" - object - end - - alias :push :add - alias :<< :push - - def unshift( object ) - object.parent = self - @children.unshift object - end - - def delete( object ) + class Parent < Child + include Enumerable + + # Constructor + # @param parent if supplied, will be set as the parent of this object + def initialize parent=nil + super(parent) + @children = [] + end + + def add( object ) + #puts "PARENT GOTS #{size} CHILDREN" + object.parent = self + @children << object + #puts "PARENT NOW GOTS #{size} CHILDREN" + object + end + + alias :push :add + alias :<< :push + + def unshift( object ) + object.parent = self + @children.unshift object + end + + def delete( object ) found = false - @children.delete_if {|c| - c.equal?(object) and found = true - } - object.parent = nil if found - end - - def each(&block) - @children.each(&block) - end - - def delete_if( &block ) - @children.delete_if(&block) - end - - def delete_at( index ) - @children.delete_at index - end - - def each_index( &block ) - @children.each_index(&block) - end - - # Fetches a child at a given index - # @param index the Integer index of the child to fetch - def []( index ) - @children[index] - end - - alias :each_child :each - - - - # Set an index entry. See Array.[]= - # @param index the index of the element to set - # @param opt either the object to set, or an Integer length - # @param child if opt is an Integer, this is the child to set - # @return the parent (self) - def []=( *args ) - args[-1].parent = self - @children[*args[0..-2]] = args[-1] - end - - # Inserts an child before another child - # @param child1 this is either an xpath or an Element. If an Element, - # child2 will be inserted before child1 in the child list of the parent. - # If an xpath, child2 will be inserted before the first child to match - # the xpath. - # @param child2 the child to insert - # @return the parent (self) - def insert_before( child1, child2 ) - if child1.kind_of? String - child1 = XPath.first( self, child1 ) - child1.parent.insert_before child1, child2 - else - ind = index(child1) - child2.parent.delete(child2) if child2.parent - @children[ind,0] = child2 - child2.parent = self - end - self - end - - # Inserts an child after another child - # @param child1 this is either an xpath or an Element. If an Element, - # child2 will be inserted after child1 in the child list of the parent. - # If an xpath, child2 will be inserted after the first child to match - # the xpath. - # @param child2 the child to insert - # @return the parent (self) - def insert_after( child1, child2 ) - if child1.kind_of? String - child1 = XPath.first( self, child1 ) - child1.parent.insert_after child1, child2 - else - ind = index(child1)+1 - child2.parent.delete(child2) if child2.parent - @children[ind,0] = child2 - child2.parent = self - end - self - end - - def to_a - @children.dup - end - - # Fetches the index of a given child - # @param child the child to get the index of - # @return the index of the child, or nil if the object is not a child - # of this parent. - def index( child ) - count = -1 - @children.find { |i| count += 1 ; i.hash == child.hash } - count - end - - # @return the number of children of this parent - def size - @children.size - end - + @children.delete_if {|c| c.equal?(object) and found = true } + object.parent = nil if found + end + + def each(&block) + @children.each(&block) + end + + def delete_if( &block ) + @children.delete_if(&block) + end + + def delete_at( index ) + @children.delete_at index + end + + def each_index( &block ) + @children.each_index(&block) + end + + # Fetches a child at a given index + # @param index the Integer index of the child to fetch + def []( index ) + @children[index] + end + + alias :each_child :each + + + + # Set an index entry. See Array.[]= + # @param index the index of the element to set + # @param opt either the object to set, or an Integer length + # @param child if opt is an Integer, this is the child to set + # @return the parent (self) + def []=( *args ) + args[-1].parent = self + @children[*args[0..-2]] = args[-1] + end + + # Inserts an child before another child + # @param child1 this is either an xpath or an Element. If an Element, + # child2 will be inserted before child1 in the child list of the parent. + # If an xpath, child2 will be inserted before the first child to match + # the xpath. + # @param child2 the child to insert + # @return the parent (self) + def insert_before( child1, child2 ) + if child1.kind_of? String + child1 = XPath.first( self, child1 ) + child1.parent.insert_before child1, child2 + else + ind = index(child1) + child2.parent.delete(child2) if child2.parent + @children[ind,0] = child2 + child2.parent = self + end + self + end + + # Inserts an child after another child + # @param child1 this is either an xpath or an Element. If an Element, + # child2 will be inserted after child1 in the child list of the parent. + # If an xpath, child2 will be inserted after the first child to match + # the xpath. + # @param child2 the child to insert + # @return the parent (self) + def insert_after( child1, child2 ) + if child1.kind_of? String + child1 = XPath.first( self, child1 ) + child1.parent.insert_after child1, child2 + else + ind = index(child1)+1 + child2.parent.delete(child2) if child2.parent + @children[ind,0] = child2 + child2.parent = self + end + self + end + + def to_a + @children.dup + end + + # Fetches the index of a given child + # @param child the child to get the index of + # @return the index of the child, or nil if the object is not a child + # of this parent. + def index( child ) + count = -1 + @children.find { |i| count += 1 ; i.hash == child.hash } + count + end + + # @return the number of children of this parent + def size + @children.size + end + alias :length :size - - # Replaces one child with another, making sure the nodelist is correct - # @param to_replace the child to replace (must be a Child) - # @param replacement the child to insert into the nodelist (must be a - # Child) - def replace_child( to_replace, replacement ) + + # Replaces one child with another, making sure the nodelist is correct + # @param to_replace the child to replace (must be a Child) + # @param replacement the child to insert into the nodelist (must be a + # Child) + def replace_child( to_replace, replacement ) @children.map! {|c| c.equal?( to_replace ) ? replacement : c } to_replace.parent = nil replacement.parent = self - end - - # Deeply clones this object. This creates a complete duplicate of this - # Parent, including all descendants. - def deep_clone - cl = clone() - each do |child| - if child.kind_of? Parent - cl << child.deep_clone - else - cl << child.clone - end - end - cl - end - - alias :children :to_a - - def parent? - true - end - end + end + + # Deeply clones this object. This creates a complete duplicate of this + # Parent, including all descendants. + def deep_clone + cl = clone() + each do |child| + if child.kind_of? Parent + cl << child.deep_clone + else + cl << child.clone + end + end + cl + end + + alias :children :to_a + + def parent? + true + end + end end diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index c898ba0b60..bce4ba4c20 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -42,7 +42,7 @@ module REXML CDATA_END = /^\s*\]\s*>/um CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um XMLDECL_START = /\A<\?xml\s/u; - XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um + XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um INSTRUCTION_START = /\A<\?/u INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um @@ -68,8 +68,8 @@ module REXML ATTLISTDECL_START = /^\s*<!ATTLIST/um ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um NOTATIONDECL_START = /^\s*<!NOTATION/um - PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um - SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um + PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um + SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um TEXT_PATTERN = /\A([^<]*)/um @@ -120,20 +120,7 @@ module REXML attr_reader :source def stream=( source ) - if source.kind_of? String - @source = Source.new(source) - elsif source.kind_of? IO - @source = IOSource.new(source) - elsif source.kind_of? Source - @source = source - elsif defined? StringIO and source.kind_of? StringIO - @source = IOSource.new(source) - elsif defined? Tempfile and source.kind_of? Tempfile - @source = IOSource.new(source) - else - raise "#{source.class} is not a valid input stream. It must be \n"+ - "either a String, IO, StringIO or Source." - end + @source = SourceFactory.create_from( source ) @closed = nil @document_status = nil @tags = [] @@ -152,8 +139,8 @@ module REXML # Returns true if there are no more events def empty? - #puts "@source.empty? = #{@source.empty?}" - #puts "@stack.empty? = #{@stack.empty?}" + #STDERR.puts "@source.empty? = #{@source.empty?}" + #STDERR.puts "@stack.empty? = #{@stack.empty?}" return (@source.empty? and @stack.empty?) end @@ -197,14 +184,17 @@ module REXML return [ :end_document ] if empty? return @stack.shift if @stack.size > 0 @source.read if @source.buffer.size<2 + #STDERR.puts "BUFFER = #{@source.buffer.inspect}" if @document_status == nil - @source.consume( /^\s*/um ) - word = @source.match( /(<[^>]*)>/um ) + #@source.consume( /^\s*/um ) + word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um ) word = word[1] unless word.nil? + #STDERR.puts "WORD = #{word.inspect}" case word when COMMENT_START return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ] when XMLDECL_START + #STDERR.puts "XMLDECL" results = @source.match( XMLDECL_PATTERN, true )[1] version = VERSION.match( results ) version = version[1] unless version.nil? @@ -213,7 +203,7 @@ module REXML @source.encoding = encoding standalone = STANDALONE.match(results) standalone = standalone[1] unless standalone.nil? - return [ :xmldecl, version, encoding, standalone] + return [ :xmldecl, version, encoding, standalone ] when INSTRUCTION_START return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ] when DOCTYPE_START @@ -236,6 +226,7 @@ module REXML @document_status = :in_doctype end return args + when /^\s+/ else @document_status = :after_doctype @source.read if @source.buffer.size<2 @@ -299,12 +290,14 @@ module REXML md = nil if @source.match( PUBLIC ) md = @source.match( PUBLIC, true ) + vals = [md[1],md[2],md[4],md[6]] elsif @source.match( SYSTEM ) md = @source.match( SYSTEM, true ) + vals = [md[1],md[2],nil,md[4]] else raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source ) end - return [ :notationdecl, md[1], md[2], md[3] ] + return [ :notationdecl, *vals ] when CDATA_END @document_status = :after_doctype @source.match( CDATA_END, true ) @@ -323,7 +316,7 @@ module REXML return [ :end_element, last_tag ] elsif @source.buffer[1] == ?! md = @source.match(/\A(\s*[^>]*>)/um) - #puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" + #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" raise REXML::ParseException.new("Malformed node", @source) unless md if md[0][2] == ?- md = @source.match( COMMENT_PATTERN, true ) @@ -361,10 +354,11 @@ module REXML else md = @source.match( TEXT_PATTERN, true ) if md[0].length == 0 - #puts "EMPTY = #{empty?}" - #puts "BUFFER = \"#{@source.buffer}\"" + puts "EMPTY = #{empty?}" + puts "BUFFER = \"#{@source.buffer}\"" @source.match( /(\s+)/, true ) end + #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0 #return [ :text, "" ] if md[0].length == 0 # unnormalized = Text::unnormalize( md[1], self ) # return PullEvent.new( :text, md[1], unnormalized ) diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb index 996d613e15..256d0f611c 100644 --- a/lib/rexml/parsers/streamparser.rb +++ b/lib/rexml/parsers/streamparser.rb @@ -1,42 +1,46 @@ module REXML - module Parsers - class StreamParser - def initialize source, listener - @listener = listener - @parser = BaseParser.new( source ) - end - + module Parsers + class StreamParser + def initialize source, listener + @listener = listener + @parser = BaseParser.new( source ) + end + def add_listener( listener ) @parser.add_listener( listener ) end - - def parse - # entity string - while true - event = @parser.pull - case event[0] - when :end_document - return - when :start_element - attrs = event[2].each do |n, v| - event[2][n] = @parser.unnormalize( v ) - end - @listener.tag_start( event[1], attrs ) - when :end_element - @listener.tag_end( event[1] ) - when :text - normalized = @parser.unnormalize( event[1] ) - @listener.text( normalized ) - when :processing_instruction - @listener.instruction( *event[1,2] ) + + def parse + # entity string + while true + event = @parser.pull + case event[0] + when :end_document + return + when :start_element + attrs = event[2].each do |n, v| + event[2][n] = @parser.unnormalize( v ) + end + @listener.tag_start( event[1], attrs ) + when :end_element + @listener.tag_end( event[1] ) + when :text + normalized = @parser.unnormalize( event[1] ) + @listener.text( normalized ) + when :processing_instruction + @listener.instruction( *event[1,2] ) when :start_doctype @listener.doctype( *event[1..-1] ) - when :comment, :attlistdecl, :notationdecl, :elementdecl, - :entitydecl, :cdata, :xmldecl, :attlistdecl - @listener.send( event[0].to_s, *event[1..-1] ) - end - end - end - end - end + when :end_doctype + # FIXME: remove this condition for milestone:3.2 + @listener.doctype_end if @listener.respond_to? :doctype_end + when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl + @listener.send( event[0].to_s, *event[1..-1] ) + when :entitydecl, :notationdecl + @listener.send( event[0].to_s, event[1..-1] ) + end + end + end + end + end end diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb index 57d11f7e23..500a53f426 100644 --- a/lib/rexml/parsers/treeparser.rb +++ b/lib/rexml/parsers/treeparser.rb @@ -19,8 +19,12 @@ module REXML begin while true event = @parser.pull + #STDERR.puts "TREEPARSER GOT #{event.inspect}" case event[0] when :end_document + unless tag_stack.empty? + raise ParseException.new("No close tag for #{tag_stack.inspect}") + end return when :start_element tag_stack.push(event[1]) @@ -35,10 +39,10 @@ module REXML @build_context[-1] << event[1] else @build_context.add( - Text.new( event[1], @build_context.whitespace, nil, true ) + Text.new(event[1], @build_context.whitespace, nil, true) ) unless ( - event[1].strip.size==0 and - @build_context.ignore_whitespace_nodes + @build_context.ignore_whitespace_nodes and + event[1].strip.size==0 ) end end diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb index 285d50cea5..ca154443b5 100644 --- a/lib/rexml/rexml.rb +++ b/lib/rexml/rexml.rb @@ -10,8 +10,8 @@ # # Main page:: http://www.germane-software.com/software/rexml # Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom> -# Version:: 3.1.3.1 -# Date:: 2005/364 +# Version:: 3.1.4 +# Date:: 2006/104 # # This API documentation can be downloaded from the REXML home page, or can # be accessed online[http://www.germane-software.com/software/rexml_doc] @@ -20,7 +20,10 @@ # or can be accessed # online[http://www.germane-software.com/software/rexml/docs/tutorial.html] module REXML - Copyright = "Copyright © 2001-2005 Sean Russell <ser@germane-software.com>" - Date = "2005/364" - Version = "3.1.3.1" + COPYRIGHT = "Copyright © 2001-2006 Sean Russell <ser@germane-software.com>" + DATE = "2006/104" + VERSION = "3.1.4" + + Copyright = COPYRIGHT + Version = VERSION end diff --git a/lib/rexml/sax2listener.rb b/lib/rexml/sax2listener.rb index 339ae64121..9a992917e6 100644 --- a/lib/rexml/sax2listener.rb +++ b/lib/rexml/sax2listener.rb @@ -84,6 +84,7 @@ module REXML # @p version the version attribute value. EG, "1.0" # @p encoding the encoding attribute value, or nil. EG, "utf" # @p standalone the standalone attribute value, or nil. EG, nil + # @p spaced the declaration is followed by a line break def xmldecl version, encoding, standalone end # Called when a comment is encountered. diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index ddade5de0e..cacab221db 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -7,13 +7,19 @@ module REXML # @param arg Either a String, or an IO # @return a Source, or nil if a bad argument was given def SourceFactory::create_from arg#, slurp=true - if arg.kind_of? String + if arg.kind_of? String Source.new(arg) - elsif arg.kind_of? IO + elsif arg.respond_to? :read and + arg.respond_to? :readline and + arg.respond_to? :nil? and + arg.respond_to? :eof? IOSource.new(arg) elsif arg.kind_of? Source arg - end + else + raise "#{source.class} is not a valid input stream. It must walk \n"+ + "like either a String, IO, or Source." + end end end diff --git a/lib/rexml/streamlistener.rb b/lib/rexml/streamlistener.rb index 3c3c5e3684..6f401125b5 100644 --- a/lib/rexml/streamlistener.rb +++ b/lib/rexml/streamlistener.rb @@ -39,6 +39,9 @@ module REXML # @p uri the uri of the doctype, or nil. EG, "bar" def doctype name, pub_sys, long_name, uri end + # Called when the doctype is done + def doctype_end + end # If a doctype includes an ATTLIST declaration, it will cause this # method to be called. The content is the declaration itself, unparsed. # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb index 5d200deac6..55bc9f50f8 100644 --- a/lib/rexml/text.rb +++ b/lib/rexml/text.rb @@ -284,9 +284,10 @@ module REXML EREFERENCE = /&(?!#{Entity::NAME};)/ # Escapes all possible entities def Text::normalize( input, doctype=nil, entity_filter=nil ) - copy = input.clone + copy = input # Doing it like this rather than in a loop improves the speed if doctype + # Replace all ampersands that aren't part of an entity copy = copy.gsub( EREFERENCE, '&' ) doctype.entities.each_value do |entity| copy = copy.gsub( entity.value, @@ -294,6 +295,7 @@ module REXML not( entity_filter and entity_filter.include?(entity) ) end else + # Replace all ampersands that aren't part of an entity copy = copy.gsub( EREFERENCE, '&' ) DocType::DEFAULT_ENTITIES.each_value do |entity| copy = copy.gsub(entity.value, "&#{entity.name};" ) diff --git a/lib/rexml/xmldecl.rb b/lib/rexml/xmldecl.rb index 1b7ef813d1..b65604b762 100644 --- a/lib/rexml/xmldecl.rb +++ b/lib/rexml/xmldecl.rb @@ -80,6 +80,11 @@ module REXML self.dowrite end + # Only use this if you do not want the XML declaration to be written; + # this object is ignored by the XML writer. Otherwise, instantiate your + # own XMLDecl and add it to the document. + # + # Note that XML 1.1 documents *must* include an XML declaration def XMLDecl.default rv = XMLDecl.new( "1.0" ) rv.nowrite diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index fbfe608746..98ed70cc10 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -380,10 +380,13 @@ module REXML return @variables[ var_name ] # :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq + # TODO: Special case for :or and :and -- not evaluate the right + # operand if the left alone determines result (i.e. is true for + # :or and false for :and). when :eq, :neq, :lt, :lteq, :gt, :gteq, :and, :or - left = expr( path_stack.shift, nodeset, context ) + left = expr( path_stack.shift, nodeset.dup, context ) #puts "LEFT => #{left.inspect} (#{left.class.name})" - right = expr( path_stack.shift, nodeset, context ) + right = expr( path_stack.shift, nodeset.dup, context ) #puts "RIGHT => #{right.inspect} (#{right.class.name})" res = equality_relational_compare( left, op, right ) #puts "RES => #{res.inspect}" @@ -467,8 +470,11 @@ module REXML def descendant_or_self( path_stack, nodeset ) rs = [] + #puts "#"*80 + #puts "PATH_STACK = #{path_stack.inspect}" + #puts "NODESET = #{nodeset.collect{|n|n.inspect}.inspect}" d_o_s( path_stack, nodeset, rs ) - #puts "RS = #{rs.collect{|n|n.to_s}.inspect}" + #puts "RS = #{rs.collect{|n|n.inspect}.inspect}" document_order(rs.flatten.compact) #rs.flatten.compact end |