summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/rexml/cdata.rb4
-rw-r--r--lib/rexml/doctype.rb433
-rw-r--r--lib/rexml/document.rb22
-rw-r--r--lib/rexml/element.rb15
-rw-r--r--lib/rexml/encoding.rb110
-rw-r--r--lib/rexml/functions.rb7
-rw-r--r--lib/rexml/parent.rb312
-rw-r--r--lib/rexml/parsers/baseparser.rb46
-rw-r--r--lib/rexml/parsers/streamparser.rb74
-rw-r--r--lib/rexml/parsers/treeparser.rb10
-rw-r--r--lib/rexml/rexml.rb13
-rw-r--r--lib/rexml/sax2listener.rb1
-rw-r--r--lib/rexml/source.rb12
-rw-r--r--lib/rexml/streamlistener.rb3
-rw-r--r--lib/rexml/text.rb4
-rw-r--r--lib/rexml/xmldecl.rb5
-rw-r--r--lib/rexml/xpath_parser.rb12
17 files changed, 605 insertions, 478 deletions
diff --git a/lib/rexml/cdata.rb b/lib/rexml/cdata.rb
index ffedac1b53..046012ba61 100644
--- a/lib/rexml/cdata.rb
+++ b/lib/rexml/cdata.rb
@@ -35,6 +35,10 @@ module REXML
@string
end
+ def value
+ @string
+ end
+
# Generates XML output of this object
#
# output::
diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb
index 652a04fce2..4a1ffb4336 100644
--- a/lib/rexml/doctype.rb
+++ b/lib/rexml/doctype.rb
@@ -6,55 +6,55 @@ require 'rexml/attlistdecl'
require 'rexml/xmltokens'
module REXML
- # Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
- # ... >. DOCTYPES can be used to declare the DTD of a document, as well as
- # being used to declare entities used in the document.
- class DocType < Parent
- include XMLTokens
- START = "<!DOCTYPE"
- STOP = ">"
- SYSTEM = "SYSTEM"
- PUBLIC = "PUBLIC"
- DEFAULT_ENTITIES = {
- 'gt'=>EntityConst::GT,
- 'lt'=>EntityConst::LT,
- 'quot'=>EntityConst::QUOT,
- "apos"=>EntityConst::APOS
- }
-
- # name is the name of the doctype
- # external_id is the referenced DTD, if given
- attr_reader :name, :external_id, :entities, :namespaces
-
- # Constructor
- #
- # dt = DocType.new( 'foo', '-//I/Hate/External/IDs' )
- # # <!DOCTYPE foo '-//I/Hate/External/IDs'>
- # dt = DocType.new( doctype_to_clone )
- # # Incomplete. Shallow clone of doctype
+ # Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
+ # ... >. DOCTYPES can be used to declare the DTD of a document, as well as
+ # being used to declare entities used in the document.
+ class DocType < Parent
+ include XMLTokens
+ START = "<!DOCTYPE"
+ STOP = ">"
+ SYSTEM = "SYSTEM"
+ PUBLIC = "PUBLIC"
+ DEFAULT_ENTITIES = {
+ 'gt'=>EntityConst::GT,
+ 'lt'=>EntityConst::LT,
+ 'quot'=>EntityConst::QUOT,
+ "apos"=>EntityConst::APOS
+ }
+
+ # name is the name of the doctype
+ # external_id is the referenced DTD, if given
+ attr_reader :name, :external_id, :entities, :namespaces
+
+ # Constructor
+ #
+ # dt = DocType.new( 'foo', '-//I/Hate/External/IDs' )
+ # # <!DOCTYPE foo '-//I/Hate/External/IDs'>
+ # dt = DocType.new( doctype_to_clone )
+ # # Incomplete. Shallow clone of doctype
#
# +Note+ that the constructor:
#
# Doctype.new( Source.new( "<!DOCTYPE foo 'bar'>" ) )
#
# is _deprecated_. Do not use it. It will probably disappear.
- def initialize( first, parent=nil )
- @entities = DEFAULT_ENTITIES
- @long_name = @uri = nil
- if first.kind_of? String
- super()
- @name = first
- @external_id = parent
- elsif first.kind_of? DocType
- super( parent )
- @name = first.name
- @external_id = first.external_id
- elsif first.kind_of? Array
- super( parent )
- @name = first[0]
- @external_id = first[1]
- @long_name = first[2]
- @uri = first[3]
+ def initialize( first, parent=nil )
+ @entities = DEFAULT_ENTITIES
+ @long_name = @uri = nil
+ if first.kind_of? String
+ super()
+ @name = first
+ @external_id = parent
+ elsif first.kind_of? DocType
+ super( parent )
+ @name = first.name
+ @external_id = first.external_id
+ elsif first.kind_of? Array
+ super( parent )
+ @name = first[0]
+ @external_id = first[1]
+ @long_name = first[2]
+ @uri = first[3]
elsif first.kind_of? Source
super( parent )
parser = Parsers::BaseParser.new( first )
@@ -64,150 +64,215 @@ module REXML
end
else
super()
- end
- end
-
- def node_type
- :doctype
- end
-
- def attributes_of element
- rv = []
- each do |child|
- child.each do |key,val|
- rv << Attribute.new(key,val)
- end if child.kind_of? AttlistDecl and child.element_name == element
- end
- rv
- end
-
- def attribute_of element, attribute
- att_decl = find do |child|
- child.kind_of? AttlistDecl and
- child.element_name == element and
- child.include? attribute
- end
- return nil unless att_decl
- att_decl[attribute]
- end
-
- def clone
- DocType.new self
- end
-
- # output::
- # Where to write the string
- # indent::
- # An integer. If -1, no indenting will be used; otherwise, the
- # indentation will be this number of spaces, and children will be
- # indented an additional amount.
- # transitive::
- # If transitive is true and indent is >= 0, then the output will be
- # pretty-printed in such a way that the added whitespace does not affect
- # the absolute *value* of the document -- that is, it leaves the value
- # and number of Text nodes in the document unchanged.
- # ie_hack::
- # Internet Explorer is the worst piece of crap to have ever been
- # written, with the possible exception of Windows itself. Since IE is
- # unable to parse proper XML, we have to provide a hack to generate XML
- # that IE's limited abilities can handle. This hack inserts a space
- # before the /> on empty tags.
- #
- def write( output, indent=0, transitive=false, ie_hack=false )
- indent( output, indent )
- output << START
- output << ' '
- output << @name
- output << " #@external_id" if @external_id
- output << " #@long_name" if @long_name
- output << " #@uri" if @uri
- unless @children.empty?
- next_indent = indent + 1
- output << ' ['
- child = nil # speed
- @children.each { |child|
- output << "\n"
- child.write( output, next_indent )
- }
- output << "\n"
- #output << ' '*next_indent
- output << "]"
- end
- output << STOP
- end
+ end
+ end
+
+ def node_type
+ :doctype
+ end
+
+ def attributes_of element
+ rv = []
+ each do |child|
+ child.each do |key,val|
+ rv << Attribute.new(key,val)
+ end if child.kind_of? AttlistDecl and child.element_name == element
+ end
+ rv
+ end
+
+ def attribute_of element, attribute
+ att_decl = find do |child|
+ child.kind_of? AttlistDecl and
+ child.element_name == element and
+ child.include? attribute
+ end
+ return nil unless att_decl
+ att_decl[attribute]
+ end
+
+ def clone
+ DocType.new self
+ end
+
+ # output::
+ # Where to write the string
+ # indent::
+ # An integer. If -1, no indenting will be used; otherwise, the
+ # indentation will be this number of spaces, and children will be
+ # indented an additional amount.
+ # transitive::
+ # If transitive is true and indent is >= 0, then the output will be
+ # pretty-printed in such a way that the added whitespace does not affect
+ # the absolute *value* of the document -- that is, it leaves the value
+ # and number of Text nodes in the document unchanged.
+ # ie_hack::
+ # Internet Explorer is the worst piece of crap to have ever been
+ # written, with the possible exception of Windows itself. Since IE is
+ # unable to parse proper XML, we have to provide a hack to generate XML
+ # that IE's limited abilities can handle. This hack inserts a space
+ # before the /> on empty tags.
+ #
+ def write( output, indent=0, transitive=false, ie_hack=false )
+ indent( output, indent )
+ output << START
+ output << ' '
+ output << @name
+ output << " #@external_id" if @external_id
+ output << " #@long_name" if @long_name
+ output << " #@uri" if @uri
+ unless @children.empty?
+ next_indent = indent + 1
+ output << ' ['
+ child = nil # speed
+ @children.each { |child|
+ output << "\n"
+ child.write( output, next_indent )
+ }
+ #output << ' '*next_indent
+ output << "\n]"
+ end
+ output << STOP
+ end
def context
@parent.context
end
- def entity( name )
- @entities[name].unnormalized if @entities[name]
- end
-
- def add child
- super(child)
- @entities = DEFAULT_ENTITIES.clone if @entities == DEFAULT_ENTITIES
- @entities[ child.name ] = child if child.kind_of? Entity
- end
- end
-
- # We don't really handle any of these since we're not a validating
- # parser, so we can be pretty dumb about them. All we need to be able
- # to do is spew them back out on a write()
-
- # This is an abstract class. You never use this directly; it serves as a
- # parent class for the specific declarations.
- class Declaration < Child
- def initialize src
- super()
- @string = src
- end
-
- def to_s
- @string+'>'
- end
-
- def write( output, indent )
- output << (' '*indent) if indent > 0
- output << to_s
- end
- end
-
- public
- class ElementDecl < Declaration
- def initialize( src )
- super
- end
- end
-
- class ExternalEntity < Child
- def initialize( src )
- super()
- @entity = src
- end
- def to_s
- @entity
- end
- def write( output, indent )
- output << @entity
- output << "\n"
- end
- end
-
- class NotationDecl < Child
- def initialize name, middle, rest
- @name = name
- @middle = middle
- @rest = rest
- end
-
- def to_s
- "<!NOTATION #@name '#@middle #@rest'>"
- end
-
- def write( output, indent=-1 )
- output << (' '*indent) if indent > 0
- output << to_s
- end
- end
+ def entity( name )
+ @entities[name].unnormalized if @entities[name]
+ end
+
+ def add child
+ super(child)
+ @entities = DEFAULT_ENTITIES.clone if @entities == DEFAULT_ENTITIES
+ @entities[ child.name ] = child if child.kind_of? Entity
+ end
+
+ # This method retrieves the public identifier identifying the document's
+ # DTD.
+ #
+ # Method contributed by Henrik Martensson
+ def public
+ case @external_id
+ when "SYSTEM"
+ nil
+ when "PUBLIC"
+ strip_quotes(@long_name)
+ end
+ end
+
+ # This method retrieves the system identifier identifying the document's DTD
+ #
+ # Method contributed by Henrik Martensson
+ def system
+ case @external_id
+ when "SYSTEM"
+ strip_quotes(@long_name)
+ when "PUBLIC"
+ @uri.kind_of?(String) ? strip_quotes(@uri) : nil
+ end
+ end
+
+ # This method returns a list of notations that have been declared in the
+ # _internal_ DTD subset. Notations in the external DTD subset are not
+ # listed.
+ #
+ # Method contributed by Henrik Martensson
+ def notations
+ children().select {|node| node.kind_of?(REXML::NotationDecl)}
+ end
+
+ # Retrieves a named notation. Only notations declared in the internal
+ # DTD subset can be retrieved.
+ #
+ # Method contributed by Henrik Martensson
+ def notation(name)
+ notations.find { |notation_decl|
+ notation_decl.name == name
+ }
+ end
+
+ private
+
+ # Method contributed by Henrik Martensson
+ def strip_quotes(quoted_string)
+ quoted_string =~ /^[\'\"].*[\´\"]$/ ?
+ quoted_string[1, quoted_string.length-2] :
+ quoted_string
+ end
+ end
+
+ # We don't really handle any of these since we're not a validating
+ # parser, so we can be pretty dumb about them. All we need to be able
+ # to do is spew them back out on a write()
+
+ # This is an abstract class. You never use this directly; it serves as a
+ # parent class for the specific declarations.
+ class Declaration < Child
+ def initialize src
+ super()
+ @string = src
+ end
+
+ def to_s
+ @string+'>'
+ end
+
+ def write( output, indent )
+ output << (' '*indent) if indent > 0
+ output << to_s
+ end
+ end
+
+ public
+ class ElementDecl < Declaration
+ def initialize( src )
+ super
+ end
+ end
+
+ class ExternalEntity < Child
+ def initialize( src )
+ super()
+ @entity = src
+ end
+ def to_s
+ @entity
+ end
+ def write( output, indent )
+ output << @entity
+ end
+ end
+
+ class NotationDecl < Child
+ attr_accessor :public, :system
+ def initialize name, middle, pub, sys
+ super(nil)
+ @name = name
+ @middle = middle
+ @public = pub
+ @system = sys
+ end
+
+ def to_s
+ "<!NOTATION #@name #@middle#{
+ @public ? ' ' + public.inspect : ''
+ }#{
+ @system ? ' ' +@system.inspect : ''
+ }>"
+ end
+
+ def write( output, indent=-1 )
+ output << (' '*indent) if indent > 0
+ output << to_s
+ end
+
+ # This method retrieves the name of the notation.
+ #
+ # Method contributed by Henrik Martensson
+ def name
+ @name
+ end
+ end
end
diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb
index 1a63501905..619a844257 100644
--- a/lib/rexml/document.rb
+++ b/lib/rexml/document.rb
@@ -70,11 +70,23 @@ module REXML
if child.kind_of? XMLDecl
@children.unshift child
elsif child.kind_of? DocType
- if @children[0].kind_of? XMLDecl
- @children[1,0] = child
- else
- @children.unshift child
- end
+ # Find first Element or DocType node and insert the decl right
+ # before it. If there is no such node, just insert the child at the
+ # end. If there is a child and it is an DocType, then replace it.
+ insert_before_index = 0
+ @children.find { |x|
+ insert_before_index += 1
+ x.kind_of?(Element) || x.kind_of?(DocType)
+ }
+ if @children[ insert_before_index ] # Not null = not end of list
+ if @children[ insert_before_index ].kind_of DocType
+ @children[ insert_before_index ] = child
+ else
+ @children[ index_before_index-1, 0 ] = child
+ end
+ else # Insert at end of list
+ @children[insert_before_index] = child
+ end
child.parent = self
else
rv = super
diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb
index 0b025da475..80463d95b7 100644
--- a/lib/rexml/element.rb
+++ b/lib/rexml/element.rb
@@ -1224,5 +1224,20 @@ module REXML
rv.each{ |attr| attr.remove }
return rv
end
+
+ # The +get_attribute_ns+ method retrieves a method by its namespace
+ # and name. Thus it is possible to reliably identify an attribute
+ # even if an XML processor has changed the prefix.
+ #
+ # Method contributed by Henrik Martensson
+ def get_attribute_ns(namespace, name)
+ each_attribute() { |attribute|
+ if name == attribute.name &&
+ namespace == attribute.namespace()
+ return attribute
+ end
+ }
+ nil
+ end
end
end
diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb
index 541e152de0..644957439e 100644
--- a/lib/rexml/encoding.rb
+++ b/lib/rexml/encoding.rb
@@ -1,58 +1,64 @@
# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
module REXML
- module Encoding
- @encoding_methods = {}
- def self.register(enc, &block)
- @encoding_methods[enc] = block
- end
- def self.apply(obj, enc)
- @encoding_methods[enc][obj]
- end
- def self.encoding_method(enc)
- @encoding_methods[enc]
- end
+ module Encoding
+ @encoding_methods = {}
+ def self.register(enc, &block)
+ @encoding_methods[enc] = block
+ end
+ def self.apply(obj, enc)
+ @encoding_methods[enc][obj]
+ end
+ def self.encoding_method(enc)
+ @encoding_methods[enc]
+ end
- # Native, default format is UTF-8, so it is declared here rather than in
- # an encodings/ definition.
- UTF_8 = 'UTF-8'
- UTF_16 = 'UTF-16'
- UNILE = 'UNILE'
+ # Native, default format is UTF-8, so it is declared here rather than in
+ # an encodings/ definition.
+ UTF_8 = 'UTF-8'
+ UTF_16 = 'UTF-16'
+ UNILE = 'UNILE'
- # ID ---> Encoding name
- attr_reader :encoding
- def encoding=( enc )
- old_verbosity = $VERBOSE
- begin
- $VERBOSE = false
- return if defined? @encoding and enc == @encoding
- if enc
- raise ArgumentError, "Bad encoding name #{enc}" unless /\A[\w-]+\z/n =~ enc
- @encoding = enc.upcase.untaint
- else
- @encoding = UTF_8
- end
- err = nil
- [@encoding, "ICONV"].each do |enc|
- begin
- require File.join("rexml", "encodings", "#{enc}.rb")
- return Encoding.apply(self, enc)
- rescue LoadError, Exception => err
- end
- end
- puts err.message
- raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
- ensure
- $VERBOSE = old_verbosity
- end
- end
+ # ID ---> Encoding name
+ attr_reader :encoding
+ def encoding=( enc )
+ old_verbosity = $VERBOSE
+ begin
+ $VERBOSE = false
+ return if defined? @encoding and enc == @encoding
+ if enc and enc != UTF_8
+ @encoding = enc.upcase
+ begin
+ require 'rexml/encodings/ICONV.rb'
+ Encoding.apply(self, "ICONV")
+ rescue LoadError, Exception => err
+ raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
+ @encoding.untaint
+ enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
+ begin
+ require enc_file
+ Encoding.apply(self, @encoding)
+ rescue LoadError
+ puts $!.message
+ raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
+ end
+ end
+ else
+ @encoding = UTF_8
+ require 'rexml/encodings/UTF-8.rb'
+ Encoding.apply(self, @encoding)
+ end
+ ensure
+ $VERBOSE = old_verbosity
+ end
+ end
- def check_encoding str
- # We have to recognize UTF-16, LSB UTF-16, and UTF-8
- return UTF_16 if str[0] == 254 && str[1] == 255
- return UNILE if str[0] == 255 && str[1] == 254
- str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um
- return $1.upcase if $1
- return UTF_8
- end
- end
+ def check_encoding str
+ # We have to recognize UTF-16, LSB UTF-16, and UTF-8
+ return UTF_16 if str[0] == 254 && str[1] == 255
+ return UNILE if str[0] == 255 && str[1] == 254
+ str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um
+ return $1.upcase if $1
+ return UTF_8
+ end
+ end
end
diff --git a/lib/rexml/functions.rb b/lib/rexml/functions.rb
index 010926611e..c09ffdeae7 100644
--- a/lib/rexml/functions.rb
+++ b/lib/rexml/functions.rb
@@ -67,11 +67,10 @@ module REXML
if node_set == nil
yield @@context[:node] if defined? @@context[:node].namespace
else
- if node_set.namespace
- yield node_set
- else
- return unless node_set.kind_of? Enumerable
+ if node_set.respond_to? :each
node_set.each { |node| yield node if defined? node.namespace }
+ elsif node_set.respond_to? :namespace
+ yield node_set
end
end
end
diff --git a/lib/rexml/parent.rb b/lib/rexml/parent.rb
index cc74a40666..a20aaaef6b 100644
--- a/lib/rexml/parent.rb
+++ b/lib/rexml/parent.rb
@@ -1,168 +1,166 @@
require "rexml/child"
module REXML
- # A parent has children, and has methods for accessing them. The Parent
+ # A parent has children, and has methods for accessing them. The Parent
# class is never encountered except as the superclass for some other
# object.
- class Parent < Child
- include Enumerable
-
- # Constructor
- # @param parent if supplied, will be set as the parent of this object
- def initialize parent=nil
- super(parent)
- @children = []
- end
-
- def add( object )
- #puts "PARENT GOTS #{size} CHILDREN"
- object.parent = self
- @children << object
- #puts "PARENT NOW GOTS #{size} CHILDREN"
- object
- end
-
- alias :push :add
- alias :<< :push
-
- def unshift( object )
- object.parent = self
- @children.unshift object
- end
-
- def delete( object )
+ class Parent < Child
+ include Enumerable
+
+ # Constructor
+ # @param parent if supplied, will be set as the parent of this object
+ def initialize parent=nil
+ super(parent)
+ @children = []
+ end
+
+ def add( object )
+ #puts "PARENT GOTS #{size} CHILDREN"
+ object.parent = self
+ @children << object
+ #puts "PARENT NOW GOTS #{size} CHILDREN"
+ object
+ end
+
+ alias :push :add
+ alias :<< :push
+
+ def unshift( object )
+ object.parent = self
+ @children.unshift object
+ end
+
+ def delete( object )
found = false
- @children.delete_if {|c|
- c.equal?(object) and found = true
- }
- object.parent = nil if found
- end
-
- def each(&block)
- @children.each(&block)
- end
-
- def delete_if( &block )
- @children.delete_if(&block)
- end
-
- def delete_at( index )
- @children.delete_at index
- end
-
- def each_index( &block )
- @children.each_index(&block)
- end
-
- # Fetches a child at a given index
- # @param index the Integer index of the child to fetch
- def []( index )
- @children[index]
- end
-
- alias :each_child :each
-
-
-
- # Set an index entry. See Array.[]=
- # @param index the index of the element to set
- # @param opt either the object to set, or an Integer length
- # @param child if opt is an Integer, this is the child to set
- # @return the parent (self)
- def []=( *args )
- args[-1].parent = self
- @children[*args[0..-2]] = args[-1]
- end
-
- # Inserts an child before another child
- # @param child1 this is either an xpath or an Element. If an Element,
- # child2 will be inserted before child1 in the child list of the parent.
- # If an xpath, child2 will be inserted before the first child to match
- # the xpath.
- # @param child2 the child to insert
- # @return the parent (self)
- def insert_before( child1, child2 )
- if child1.kind_of? String
- child1 = XPath.first( self, child1 )
- child1.parent.insert_before child1, child2
- else
- ind = index(child1)
- child2.parent.delete(child2) if child2.parent
- @children[ind,0] = child2
- child2.parent = self
- end
- self
- end
-
- # Inserts an child after another child
- # @param child1 this is either an xpath or an Element. If an Element,
- # child2 will be inserted after child1 in the child list of the parent.
- # If an xpath, child2 will be inserted after the first child to match
- # the xpath.
- # @param child2 the child to insert
- # @return the parent (self)
- def insert_after( child1, child2 )
- if child1.kind_of? String
- child1 = XPath.first( self, child1 )
- child1.parent.insert_after child1, child2
- else
- ind = index(child1)+1
- child2.parent.delete(child2) if child2.parent
- @children[ind,0] = child2
- child2.parent = self
- end
- self
- end
-
- def to_a
- @children.dup
- end
-
- # Fetches the index of a given child
- # @param child the child to get the index of
- # @return the index of the child, or nil if the object is not a child
- # of this parent.
- def index( child )
- count = -1
- @children.find { |i| count += 1 ; i.hash == child.hash }
- count
- end
-
- # @return the number of children of this parent
- def size
- @children.size
- end
-
+ @children.delete_if {|c| c.equal?(object) and found = true }
+ object.parent = nil if found
+ end
+
+ def each(&block)
+ @children.each(&block)
+ end
+
+ def delete_if( &block )
+ @children.delete_if(&block)
+ end
+
+ def delete_at( index )
+ @children.delete_at index
+ end
+
+ def each_index( &block )
+ @children.each_index(&block)
+ end
+
+ # Fetches a child at a given index
+ # @param index the Integer index of the child to fetch
+ def []( index )
+ @children[index]
+ end
+
+ alias :each_child :each
+
+
+
+ # Set an index entry. See Array.[]=
+ # @param index the index of the element to set
+ # @param opt either the object to set, or an Integer length
+ # @param child if opt is an Integer, this is the child to set
+ # @return the parent (self)
+ def []=( *args )
+ args[-1].parent = self
+ @children[*args[0..-2]] = args[-1]
+ end
+
+ # Inserts an child before another child
+ # @param child1 this is either an xpath or an Element. If an Element,
+ # child2 will be inserted before child1 in the child list of the parent.
+ # If an xpath, child2 will be inserted before the first child to match
+ # the xpath.
+ # @param child2 the child to insert
+ # @return the parent (self)
+ def insert_before( child1, child2 )
+ if child1.kind_of? String
+ child1 = XPath.first( self, child1 )
+ child1.parent.insert_before child1, child2
+ else
+ ind = index(child1)
+ child2.parent.delete(child2) if child2.parent
+ @children[ind,0] = child2
+ child2.parent = self
+ end
+ self
+ end
+
+ # Inserts an child after another child
+ # @param child1 this is either an xpath or an Element. If an Element,
+ # child2 will be inserted after child1 in the child list of the parent.
+ # If an xpath, child2 will be inserted after the first child to match
+ # the xpath.
+ # @param child2 the child to insert
+ # @return the parent (self)
+ def insert_after( child1, child2 )
+ if child1.kind_of? String
+ child1 = XPath.first( self, child1 )
+ child1.parent.insert_after child1, child2
+ else
+ ind = index(child1)+1
+ child2.parent.delete(child2) if child2.parent
+ @children[ind,0] = child2
+ child2.parent = self
+ end
+ self
+ end
+
+ def to_a
+ @children.dup
+ end
+
+ # Fetches the index of a given child
+ # @param child the child to get the index of
+ # @return the index of the child, or nil if the object is not a child
+ # of this parent.
+ def index( child )
+ count = -1
+ @children.find { |i| count += 1 ; i.hash == child.hash }
+ count
+ end
+
+ # @return the number of children of this parent
+ def size
+ @children.size
+ end
+
alias :length :size
-
- # Replaces one child with another, making sure the nodelist is correct
- # @param to_replace the child to replace (must be a Child)
- # @param replacement the child to insert into the nodelist (must be a
- # Child)
- def replace_child( to_replace, replacement )
+
+ # Replaces one child with another, making sure the nodelist is correct
+ # @param to_replace the child to replace (must be a Child)
+ # @param replacement the child to insert into the nodelist (must be a
+ # Child)
+ def replace_child( to_replace, replacement )
@children.map! {|c| c.equal?( to_replace ) ? replacement : c }
to_replace.parent = nil
replacement.parent = self
- end
-
- # Deeply clones this object. This creates a complete duplicate of this
- # Parent, including all descendants.
- def deep_clone
- cl = clone()
- each do |child|
- if child.kind_of? Parent
- cl << child.deep_clone
- else
- cl << child.clone
- end
- end
- cl
- end
-
- alias :children :to_a
-
- def parent?
- true
- end
- end
+ end
+
+ # Deeply clones this object. This creates a complete duplicate of this
+ # Parent, including all descendants.
+ def deep_clone
+ cl = clone()
+ each do |child|
+ if child.kind_of? Parent
+ cl << child.deep_clone
+ else
+ cl << child.clone
+ end
+ end
+ cl
+ end
+
+ alias :children :to_a
+
+ def parent?
+ true
+ end
+ end
end
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index c898ba0b60..bce4ba4c20 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -42,7 +42,7 @@ module REXML
CDATA_END = /^\s*\]\s*>/um
CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
XMLDECL_START = /\A<\?xml\s/u;
- XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um
+ XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
INSTRUCTION_START = /\A<\?/u
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
@@ -68,8 +68,8 @@ module REXML
ATTLISTDECL_START = /^\s*<!ATTLIST/um
ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
NOTATIONDECL_START = /^\s*<!NOTATION/um
- PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
- SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
+ PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
+ SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
TEXT_PATTERN = /\A([^<]*)/um
@@ -120,20 +120,7 @@ module REXML
attr_reader :source
def stream=( source )
- if source.kind_of? String
- @source = Source.new(source)
- elsif source.kind_of? IO
- @source = IOSource.new(source)
- elsif source.kind_of? Source
- @source = source
- elsif defined? StringIO and source.kind_of? StringIO
- @source = IOSource.new(source)
- elsif defined? Tempfile and source.kind_of? Tempfile
- @source = IOSource.new(source)
- else
- raise "#{source.class} is not a valid input stream. It must be \n"+
- "either a String, IO, StringIO or Source."
- end
+ @source = SourceFactory.create_from( source )
@closed = nil
@document_status = nil
@tags = []
@@ -152,8 +139,8 @@ module REXML
# Returns true if there are no more events
def empty?
- #puts "@source.empty? = #{@source.empty?}"
- #puts "@stack.empty? = #{@stack.empty?}"
+ #STDERR.puts "@source.empty? = #{@source.empty?}"
+ #STDERR.puts "@stack.empty? = #{@stack.empty?}"
return (@source.empty? and @stack.empty?)
end
@@ -197,14 +184,17 @@ module REXML
return [ :end_document ] if empty?
return @stack.shift if @stack.size > 0
@source.read if @source.buffer.size<2
+ #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
if @document_status == nil
- @source.consume( /^\s*/um )
- word = @source.match( /(<[^>]*)>/um )
+ #@source.consume( /^\s*/um )
+ word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
word = word[1] unless word.nil?
+ #STDERR.puts "WORD = #{word.inspect}"
case word
when COMMENT_START
return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
when XMLDECL_START
+ #STDERR.puts "XMLDECL"
results = @source.match( XMLDECL_PATTERN, true )[1]
version = VERSION.match( results )
version = version[1] unless version.nil?
@@ -213,7 +203,7 @@ module REXML
@source.encoding = encoding
standalone = STANDALONE.match(results)
standalone = standalone[1] unless standalone.nil?
- return [ :xmldecl, version, encoding, standalone]
+ return [ :xmldecl, version, encoding, standalone ]
when INSTRUCTION_START
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
when DOCTYPE_START
@@ -236,6 +226,7 @@ module REXML
@document_status = :in_doctype
end
return args
+ when /^\s+/
else
@document_status = :after_doctype
@source.read if @source.buffer.size<2
@@ -299,12 +290,14 @@ module REXML
md = nil
if @source.match( PUBLIC )
md = @source.match( PUBLIC, true )
+ vals = [md[1],md[2],md[4],md[6]]
elsif @source.match( SYSTEM )
md = @source.match( SYSTEM, true )
+ vals = [md[1],md[2],nil,md[4]]
else
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
end
- return [ :notationdecl, md[1], md[2], md[3] ]
+ return [ :notationdecl, *vals ]
when CDATA_END
@document_status = :after_doctype
@source.match( CDATA_END, true )
@@ -323,7 +316,7 @@ module REXML
return [ :end_element, last_tag ]
elsif @source.buffer[1] == ?!
md = @source.match(/\A(\s*[^>]*>)/um)
- #puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
+ #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
raise REXML::ParseException.new("Malformed node", @source) unless md
if md[0][2] == ?-
md = @source.match( COMMENT_PATTERN, true )
@@ -361,10 +354,11 @@ module REXML
else
md = @source.match( TEXT_PATTERN, true )
if md[0].length == 0
- #puts "EMPTY = #{empty?}"
- #puts "BUFFER = \"#{@source.buffer}\""
+ puts "EMPTY = #{empty?}"
+ puts "BUFFER = \"#{@source.buffer}\""
@source.match( /(\s+)/, true )
end
+ #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
#return [ :text, "" ] if md[0].length == 0
# unnormalized = Text::unnormalize( md[1], self )
# return PullEvent.new( :text, md[1], unnormalized )
diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb
index 996d613e15..256d0f611c 100644
--- a/lib/rexml/parsers/streamparser.rb
+++ b/lib/rexml/parsers/streamparser.rb
@@ -1,42 +1,46 @@
module REXML
- module Parsers
- class StreamParser
- def initialize source, listener
- @listener = listener
- @parser = BaseParser.new( source )
- end
-
+ module Parsers
+ class StreamParser
+ def initialize source, listener
+ @listener = listener
+ @parser = BaseParser.new( source )
+ end
+
def add_listener( listener )
@parser.add_listener( listener )
end
-
- def parse
- # entity string
- while true
- event = @parser.pull
- case event[0]
- when :end_document
- return
- when :start_element
- attrs = event[2].each do |n, v|
- event[2][n] = @parser.unnormalize( v )
- end
- @listener.tag_start( event[1], attrs )
- when :end_element
- @listener.tag_end( event[1] )
- when :text
- normalized = @parser.unnormalize( event[1] )
- @listener.text( normalized )
- when :processing_instruction
- @listener.instruction( *event[1,2] )
+
+ def parse
+ # entity string
+ while true
+ event = @parser.pull
+ case event[0]
+ when :end_document
+ return
+ when :start_element
+ attrs = event[2].each do |n, v|
+ event[2][n] = @parser.unnormalize( v )
+ end
+ @listener.tag_start( event[1], attrs )
+ when :end_element
+ @listener.tag_end( event[1] )
+ when :text
+ normalized = @parser.unnormalize( event[1] )
+ @listener.text( normalized )
+ when :processing_instruction
+ @listener.instruction( *event[1,2] )
when :start_doctype
@listener.doctype( *event[1..-1] )
- when :comment, :attlistdecl, :notationdecl, :elementdecl,
- :entitydecl, :cdata, :xmldecl, :attlistdecl
- @listener.send( event[0].to_s, *event[1..-1] )
- end
- end
- end
- end
- end
+ when :end_doctype
+ # FIXME: remove this condition for milestone:3.2
+ @listener.doctype_end if @listener.respond_to? :doctype_end
+ when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
+ @listener.send( event[0].to_s, *event[1..-1] )
+ when :entitydecl, :notationdecl
+ @listener.send( event[0].to_s, event[1..-1] )
+ end
+ end
+ end
+ end
+ end
end
diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb
index 57d11f7e23..500a53f426 100644
--- a/lib/rexml/parsers/treeparser.rb
+++ b/lib/rexml/parsers/treeparser.rb
@@ -19,8 +19,12 @@ module REXML
begin
while true
event = @parser.pull
+ #STDERR.puts "TREEPARSER GOT #{event.inspect}"
case event[0]
when :end_document
+ unless tag_stack.empty?
+ raise ParseException.new("No close tag for #{tag_stack.inspect}")
+ end
return
when :start_element
tag_stack.push(event[1])
@@ -35,10 +39,10 @@ module REXML
@build_context[-1] << event[1]
else
@build_context.add(
- Text.new( event[1], @build_context.whitespace, nil, true )
+ Text.new(event[1], @build_context.whitespace, nil, true)
) unless (
- event[1].strip.size==0 and
- @build_context.ignore_whitespace_nodes
+ @build_context.ignore_whitespace_nodes and
+ event[1].strip.size==0
)
end
end
diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb
index 285d50cea5..ca154443b5 100644
--- a/lib/rexml/rexml.rb
+++ b/lib/rexml/rexml.rb
@@ -10,8 +10,8 @@
#
# Main page:: http://www.germane-software.com/software/rexml
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
-# Version:: 3.1.3.1
-# Date:: 2005/364
+# Version:: 3.1.4
+# Date:: 2006/104
#
# This API documentation can be downloaded from the REXML home page, or can
# be accessed online[http://www.germane-software.com/software/rexml_doc]
@@ -20,7 +20,10 @@
# or can be accessed
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
module REXML
- Copyright = "Copyright © 2001-2005 Sean Russell <ser@germane-software.com>"
- Date = "2005/364"
- Version = "3.1.3.1"
+ COPYRIGHT = "Copyright © 2001-2006 Sean Russell <ser@germane-software.com>"
+ DATE = "2006/104"
+ VERSION = "3.1.4"
+
+ Copyright = COPYRIGHT
+ Version = VERSION
end
diff --git a/lib/rexml/sax2listener.rb b/lib/rexml/sax2listener.rb
index 339ae64121..9a992917e6 100644
--- a/lib/rexml/sax2listener.rb
+++ b/lib/rexml/sax2listener.rb
@@ -84,6 +84,7 @@ module REXML
# @p version the version attribute value. EG, "1.0"
# @p encoding the encoding attribute value, or nil. EG, "utf"
# @p standalone the standalone attribute value, or nil. EG, nil
+ # @p spaced the declaration is followed by a line break
def xmldecl version, encoding, standalone
end
# Called when a comment is encountered.
diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb
index ddade5de0e..cacab221db 100644
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@@ -7,13 +7,19 @@ module REXML
# @param arg Either a String, or an IO
# @return a Source, or nil if a bad argument was given
def SourceFactory::create_from arg#, slurp=true
- if arg.kind_of? String
+ if arg.kind_of? String
Source.new(arg)
- elsif arg.kind_of? IO
+ elsif arg.respond_to? :read and
+ arg.respond_to? :readline and
+ arg.respond_to? :nil? and
+ arg.respond_to? :eof?
IOSource.new(arg)
elsif arg.kind_of? Source
arg
- end
+ else
+ raise "#{source.class} is not a valid input stream. It must walk \n"+
+ "like either a String, IO, or Source."
+ end
end
end
diff --git a/lib/rexml/streamlistener.rb b/lib/rexml/streamlistener.rb
index 3c3c5e3684..6f401125b5 100644
--- a/lib/rexml/streamlistener.rb
+++ b/lib/rexml/streamlistener.rb
@@ -39,6 +39,9 @@ module REXML
# @p uri the uri of the doctype, or nil. EG, "bar"
def doctype name, pub_sys, long_name, uri
end
+ # Called when the doctype is done
+ def doctype_end
+ end
# If a doctype includes an ATTLIST declaration, it will cause this
# method to be called. The content is the declaration itself, unparsed.
# EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb
index 5d200deac6..55bc9f50f8 100644
--- a/lib/rexml/text.rb
+++ b/lib/rexml/text.rb
@@ -284,9 +284,10 @@ module REXML
EREFERENCE = /&(?!#{Entity::NAME};)/
# Escapes all possible entities
def Text::normalize( input, doctype=nil, entity_filter=nil )
- copy = input.clone
+ copy = input
# Doing it like this rather than in a loop improves the speed
if doctype
+ # Replace all ampersands that aren't part of an entity
copy = copy.gsub( EREFERENCE, '&amp;' )
doctype.entities.each_value do |entity|
copy = copy.gsub( entity.value,
@@ -294,6 +295,7 @@ module REXML
not( entity_filter and entity_filter.include?(entity) )
end
else
+ # Replace all ampersands that aren't part of an entity
copy = copy.gsub( EREFERENCE, '&amp;' )
DocType::DEFAULT_ENTITIES.each_value do |entity|
copy = copy.gsub(entity.value, "&#{entity.name};" )
diff --git a/lib/rexml/xmldecl.rb b/lib/rexml/xmldecl.rb
index 1b7ef813d1..b65604b762 100644
--- a/lib/rexml/xmldecl.rb
+++ b/lib/rexml/xmldecl.rb
@@ -80,6 +80,11 @@ module REXML
self.dowrite
end
+ # Only use this if you do not want the XML declaration to be written;
+ # this object is ignored by the XML writer. Otherwise, instantiate your
+ # own XMLDecl and add it to the document.
+ #
+ # Note that XML 1.1 documents *must* include an XML declaration
def XMLDecl.default
rv = XMLDecl.new( "1.0" )
rv.nowrite
diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb
index fbfe608746..98ed70cc10 100644
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@@ -380,10 +380,13 @@ module REXML
return @variables[ var_name ]
# :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq
+ # TODO: Special case for :or and :and -- not evaluate the right
+ # operand if the left alone determines result (i.e. is true for
+ # :or and false for :and).
when :eq, :neq, :lt, :lteq, :gt, :gteq, :and, :or
- left = expr( path_stack.shift, nodeset, context )
+ left = expr( path_stack.shift, nodeset.dup, context )
#puts "LEFT => #{left.inspect} (#{left.class.name})"
- right = expr( path_stack.shift, nodeset, context )
+ right = expr( path_stack.shift, nodeset.dup, context )
#puts "RIGHT => #{right.inspect} (#{right.class.name})"
res = equality_relational_compare( left, op, right )
#puts "RES => #{res.inspect}"
@@ -467,8 +470,11 @@ module REXML
def descendant_or_self( path_stack, nodeset )
rs = []
+ #puts "#"*80
+ #puts "PATH_STACK = #{path_stack.inspect}"
+ #puts "NODESET = #{nodeset.collect{|n|n.inspect}.inspect}"
d_o_s( path_stack, nodeset, rs )
- #puts "RS = #{rs.collect{|n|n.to_s}.inspect}"
+ #puts "RS = #{rs.collect{|n|n.inspect}.inspect}"
document_order(rs.flatten.compact)
#rs.flatten.compact
end