summaryrefslogtreecommitdiff
path: root/trunk/lib/rexml
diff options
context:
space:
mode:
Diffstat (limited to 'trunk/lib/rexml')
-rw-r--r--trunk/lib/rexml/attlistdecl.rb62
-rw-r--r--trunk/lib/rexml/attribute.rb188
-rw-r--r--trunk/lib/rexml/cdata.rb67
-rw-r--r--trunk/lib/rexml/child.rb96
-rw-r--r--trunk/lib/rexml/comment.rb80
-rw-r--r--trunk/lib/rexml/doctype.rb270
-rw-r--r--trunk/lib/rexml/document.rb208
-rw-r--r--trunk/lib/rexml/dtd/attlistdecl.rb10
-rw-r--r--trunk/lib/rexml/dtd/dtd.rb51
-rw-r--r--trunk/lib/rexml/dtd/elementdecl.rb17
-rw-r--r--trunk/lib/rexml/dtd/entitydecl.rb56
-rw-r--r--trunk/lib/rexml/dtd/notationdecl.rb39
-rw-r--r--trunk/lib/rexml/element.rb1245
-rw-r--r--trunk/lib/rexml/encoding.rb71
-rw-r--r--trunk/lib/rexml/encodings/CP-1252.rb103
-rw-r--r--trunk/lib/rexml/encodings/EUC-JP.rb35
-rw-r--r--trunk/lib/rexml/encodings/ICONV.rb22
-rw-r--r--trunk/lib/rexml/encodings/ISO-8859-1.rb7
-rw-r--r--trunk/lib/rexml/encodings/ISO-8859-15.rb72
-rw-r--r--trunk/lib/rexml/encodings/SHIFT-JIS.rb37
-rw-r--r--trunk/lib/rexml/encodings/SHIFT_JIS.rb1
-rw-r--r--trunk/lib/rexml/encodings/UNILE.rb34
-rw-r--r--trunk/lib/rexml/encodings/US-ASCII.rb30
-rw-r--r--trunk/lib/rexml/encodings/UTF-16.rb35
-rw-r--r--trunk/lib/rexml/encodings/UTF-8.rb18
-rw-r--r--trunk/lib/rexml/entity.rb165
-rw-r--r--trunk/lib/rexml/formatters/default.rb109
-rw-r--r--trunk/lib/rexml/formatters/pretty.rb138
-rw-r--r--trunk/lib/rexml/formatters/transitive.rb56
-rw-r--r--trunk/lib/rexml/functions.rb388
-rw-r--r--trunk/lib/rexml/instruction.rb70
-rw-r--r--trunk/lib/rexml/light/node.rb196
-rw-r--r--trunk/lib/rexml/namespace.rb47
-rw-r--r--trunk/lib/rexml/node.rb75
-rw-r--r--trunk/lib/rexml/output.rb24
-rw-r--r--trunk/lib/rexml/parent.rb166
-rw-r--r--trunk/lib/rexml/parseexception.rb51
-rw-r--r--trunk/lib/rexml/parsers/baseparser.rb530
-rw-r--r--trunk/lib/rexml/parsers/lightparser.rb60
-rw-r--r--trunk/lib/rexml/parsers/pullparser.rb196
-rw-r--r--trunk/lib/rexml/parsers/sax2parser.rb247
-rw-r--r--trunk/lib/rexml/parsers/streamparser.rb46
-rw-r--r--trunk/lib/rexml/parsers/treeparser.rb100
-rw-r--r--trunk/lib/rexml/parsers/ultralightparser.rb56
-rw-r--r--trunk/lib/rexml/parsers/xpathparser.rb698
-rw-r--r--trunk/lib/rexml/quickpath.rb263
-rw-r--r--trunk/lib/rexml/rexml.rb31
-rw-r--r--trunk/lib/rexml/sax2listener.rb97
-rw-r--r--trunk/lib/rexml/source.rb258
-rw-r--r--trunk/lib/rexml/streamlistener.rb92
-rw-r--r--trunk/lib/rexml/syncenumerator.rb32
-rw-r--r--trunk/lib/rexml/text.rb404
-rw-r--r--trunk/lib/rexml/undefinednamespaceexception.rb8
-rw-r--r--trunk/lib/rexml/validation/relaxng.rb559
-rw-r--r--trunk/lib/rexml/validation/validation.rb155
-rw-r--r--trunk/lib/rexml/validation/validationexception.rb9
-rw-r--r--trunk/lib/rexml/xmldecl.rb119
-rw-r--r--trunk/lib/rexml/xmltokens.rb18
-rw-r--r--trunk/lib/rexml/xpath.rb66
-rw-r--r--trunk/lib/rexml/xpath_parser.rb792
60 files changed, 0 insertions, 9175 deletions
diff --git a/trunk/lib/rexml/attlistdecl.rb b/trunk/lib/rexml/attlistdecl.rb
deleted file mode 100644
index ef4721b5ce..0000000000
--- a/trunk/lib/rexml/attlistdecl.rb
+++ /dev/null
@@ -1,62 +0,0 @@
-#vim:ts=2 sw=2 noexpandtab:
-require 'rexml/child'
-require 'rexml/source'
-
-module REXML
- # This class needs:
- # * Documentation
- # * Work! Not all types of attlists are intelligently parsed, so we just
- # spew back out what we get in. This works, but it would be better if
- # we formatted the output ourselves.
- #
- # AttlistDecls provide *just* enough support to allow namespace
- # declarations. If you need some sort of generalized support, or have an
- # interesting idea about how to map the hideous, terrible design of DTD
- # AttlistDecls onto an intuitive Ruby interface, let me know. I'm desperate
- # for anything to make DTDs more palateable.
- class AttlistDecl < Child
- include Enumerable
-
- # What is this? Got me.
- attr_reader :element_name
-
- # Create an AttlistDecl, pulling the information from a Source. Notice
- # that this isn't very convenient; to create an AttlistDecl, you basically
- # have to format it yourself, and then have the initializer parse it.
- # Sorry, but for the forseeable future, DTD support in REXML is pretty
- # weak on convenience. Have I mentioned how much I hate DTDs?
- def initialize(source)
- super()
- if (source.kind_of? Array)
- @element_name, @pairs, @contents = *source
- end
- end
-
- # Access the attlist attribute/value pairs.
- # value = attlist_decl[ attribute_name ]
- def [](key)
- @pairs[key]
- end
-
- # Whether an attlist declaration includes the given attribute definition
- # if attlist_decl.include? "xmlns:foobar"
- def include?(key)
- @pairs.keys.include? key
- end
-
- # Iterate over the key/value pairs:
- # attlist_decl.each { |attribute_name, attribute_value| ... }
- def each(&block)
- @pairs.each(&block)
- end
-
- # Write out exactly what we got in.
- def write out, indent=-1
- out << @contents
- end
-
- def node_type
- :attlistdecl
- end
- end
-end
diff --git a/trunk/lib/rexml/attribute.rb b/trunk/lib/rexml/attribute.rb
deleted file mode 100644
index 17ced44c45..0000000000
--- a/trunk/lib/rexml/attribute.rb
+++ /dev/null
@@ -1,188 +0,0 @@
-require "rexml/namespace"
-require 'rexml/text'
-
-module REXML
- # Defines an Element Attribute; IE, a attribute=value pair, as in:
- # <element attribute="value"/>. Attributes can be in their own
- # namespaces. General users of REXML will not interact with the
- # Attribute class much.
- class Attribute
- include Node
- include Namespace
-
- # The element to which this attribute belongs
- attr_reader :element
- # The normalized value of this attribute. That is, the attribute with
- # entities intact.
- attr_writer :normalized
- PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
-
- NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
-
- # Constructor.
- # FIXME: The parser doesn't catch illegal characters in attributes
- #
- # first::
- # Either: an Attribute, which this new attribute will become a
- # clone of; or a String, which is the name of this attribute
- # second::
- # If +first+ is an Attribute, then this may be an Element, or nil.
- # If nil, then the Element parent of this attribute is the parent
- # of the +first+ Attribute. If the first argument is a String,
- # then this must also be a String, and is the content of the attribute.
- # If this is the content, it must be fully normalized (contain no
- # illegal characters).
- # parent::
- # Ignored unless +first+ is a String; otherwise, may be the Element
- # parent of this attribute, or nil.
- #
- #
- # Attribute.new( attribute_to_clone )
- # Attribute.new( attribute_to_clone, parent_element )
- # Attribute.new( "attr", "attr_value" )
- # Attribute.new( "attr", "attr_value", parent_element )
- def initialize( first, second=nil, parent=nil )
- @normalized = @unnormalized = @element = nil
- if first.kind_of? Attribute
- self.name = first.expanded_name
- @unnormalized = first.value
- if second.kind_of? Element
- @element = second
- else
- @element = first.element
- end
- elsif first.kind_of? String
- @element = parent
- self.name = first
- @normalized = second.to_s
- else
- raise "illegal argument #{first.class.name} to Attribute constructor"
- end
- end
-
- # Returns the namespace of the attribute.
- #
- # e = Element.new( "elns:myelement" )
- # e.add_attribute( "nsa:a", "aval" )
- # e.add_attribute( "b", "bval" )
- # e.attributes.get_attribute( "a" ).prefix # -> "nsa"
- # e.attributes.get_attribute( "b" ).prefix # -> "elns"
- # a = Attribute.new( "x", "y" )
- # a.prefix # -> ""
- def prefix
- pf = super
- if pf == ""
- pf = @element.prefix if @element
- end
- pf
- end
-
- # Returns the namespace URL, if defined, or nil otherwise
- #
- # e = Element.new("el")
- # e.add_attributes({"xmlns:ns", "http://url"})
- # e.namespace( "ns" ) # -> "http://url"
- def namespace arg=nil
- arg = prefix if arg.nil?
- @element.namespace arg
- end
-
- # Returns true if other is an Attribute and has the same name and value,
- # false otherwise.
- def ==( other )
- other.kind_of?(Attribute) and other.name==name and other.value==value
- end
-
- # Creates (and returns) a hash from both the name and value
- def hash
- name.hash + value.hash
- end
-
- # Returns this attribute out as XML source, expanding the name
- #
- # a = Attribute.new( "x", "y" )
- # a.to_string # -> "x='y'"
- # b = Attribute.new( "ns:x", "y" )
- # b.to_string # -> "ns:x='y'"
- def to_string
- if @element and @element.context and @element.context[:attribute_quote] == :quote
- %Q^#@expanded_name="#{to_s().gsub(/"/, '&quote;')}"^
- else
- "#@expanded_name='#{to_s().gsub(/'/, '&apos;')}'"
- end
- end
-
- def doctype
- if @element
- doc = @element.document
- doctype = doc.doctype if doc
- end
- end
-
- # Returns the attribute value, with entities replaced
- def to_s
- return @normalized if @normalized
-
- @normalized = Text::normalize( @unnormalized, doctype )
- @unnormalized = nil
- @normalized
- end
-
- # Returns the UNNORMALIZED value of this attribute. That is, entities
- # have been expanded to their values
- def value
- return @unnormalized if @unnormalized
- @unnormalized = Text::unnormalize( @normalized, doctype )
- @normalized = nil
- @unnormalized
- end
-
- # Returns a copy of this attribute
- def clone
- Attribute.new self
- end
-
- # Sets the element of which this object is an attribute. Normally, this
- # is not directly called.
- #
- # Returns this attribute
- def element=( element )
- @element = element
-
- if @normalized
- Text.check( @normalized, NEEDS_A_SECOND_CHECK, doctype )
- end
-
- self
- end
-
- # Removes this Attribute from the tree, and returns true if successfull
- #
- # This method is usually not called directly.
- def remove
- @element.attributes.delete self.name unless @element.nil?
- end
-
- # Writes this attribute (EG, puts 'key="value"' to the output)
- def write( output, indent=-1 )
- output << to_string
- end
-
- def node_type
- :attribute
- end
-
- def inspect
- rv = ""
- write( rv )
- rv
- end
-
- def xpath
- path = @element.xpath
- path += "/@#{self.expanded_name}"
- return path
- end
- end
-end
-#vim:ts=2 sw=2 noexpandtab:
diff --git a/trunk/lib/rexml/cdata.rb b/trunk/lib/rexml/cdata.rb
deleted file mode 100644
index 856b9ef8b2..0000000000
--- a/trunk/lib/rexml/cdata.rb
+++ /dev/null
@@ -1,67 +0,0 @@
-require "rexml/text"
-
-module REXML
- class CData < Text
- START = '<![CDATA['
- STOP = ']]>'
- ILLEGAL = /(\]\]>)/
-
- # Constructor. CData is data between <![CDATA[ ... ]]>
- #
- # _Examples_
- # CData.new( source )
- # CData.new( "Here is some CDATA" )
- # CData.new( "Some unprocessed data", respect_whitespace_TF, parent_element )
- def initialize( first, whitespace=true, parent=nil )
- super( first, whitespace, parent, false, true, ILLEGAL )
- end
-
- # Make a copy of this object
- #
- # _Examples_
- # c = CData.new( "Some text" )
- # d = c.clone
- # d.to_s # -> "Some text"
- def clone
- CData.new self
- end
-
- # Returns the content of this CData object
- #
- # _Examples_
- # c = CData.new( "Some text" )
- # c.to_s # -> "Some text"
- def to_s
- @string
- end
-
- def value
- @string
- end
-
- # == DEPRECATED
- # See the rexml/formatters package
- #
- # Generates XML output of this object
- #
- # output::
- # Where to write the string. Defaults to $stdout
- # indent::
- # The amount to indent this node by
- # transitive::
- # Ignored
- # ie_hack::
- # Ignored
- #
- # _Examples_
- # c = CData.new( " Some text " )
- # c.write( $stdout ) #-> <![CDATA[ Some text ]]>
- def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
- Kernel.warn( "#{self.class.name}.write is deprecated" )
- indent( output, indent )
- output << START
- output << @string
- output << STOP
- end
- end
-end
diff --git a/trunk/lib/rexml/child.rb b/trunk/lib/rexml/child.rb
deleted file mode 100644
index 6d3c9df5e6..0000000000
--- a/trunk/lib/rexml/child.rb
+++ /dev/null
@@ -1,96 +0,0 @@
-require "rexml/node"
-
-module REXML
- ##
- # A Child object is something contained by a parent, and this class
- # contains methods to support that. Most user code will not use this
- # class directly.
- class Child
- include Node
- attr_reader :parent # The Parent of this object
-
- # Constructor. Any inheritors of this class should call super to make
- # sure this method is called.
- # parent::
- # if supplied, the parent of this child will be set to the
- # supplied value, and self will be added to the parent
- def initialize( parent = nil )
- @parent = nil
- # Declare @parent, but don't define it. The next line sets the
- # parent.
- parent.add( self ) if parent
- end
-
- # Replaces this object with another object. Basically, calls
- # Parent.replace_child
- #
- # Returns:: self
- def replace_with( child )
- @parent.replace_child( self, child )
- self
- end
-
- # Removes this child from the parent.
- #
- # Returns:: self
- def remove
- unless @parent.nil?
- @parent.delete self
- end
- self
- end
-
- # Sets the parent of this child to the supplied argument.
- #
- # other::
- # Must be a Parent object. If this object is the same object as the
- # existing parent of this child, no action is taken. Otherwise, this
- # child is removed from the current parent (if one exists), and is added
- # to the new parent.
- # Returns:: The parent added
- def parent=( other )
- return @parent if @parent == other
- @parent.delete self if defined? @parent and @parent
- @parent = other
- end
-
- alias :next_sibling :next_sibling_node
- alias :previous_sibling :previous_sibling_node
-
- # Sets the next sibling of this child. This can be used to insert a child
- # after some other child.
- # a = Element.new("a")
- # b = a.add_element("b")
- # c = Element.new("c")
- # b.next_sibling = c
- # # => <a><b/><c/></a>
- def next_sibling=( other )
- parent.insert_after self, other
- end
-
- # Sets the previous sibling of this child. This can be used to insert a
- # child before some other child.
- # a = Element.new("a")
- # b = a.add_element("b")
- # c = Element.new("c")
- # b.previous_sibling = c
- # # => <a><b/><c/></a>
- def previous_sibling=(other)
- parent.insert_before self, other
- end
-
- # Returns:: the document this child belongs to, or nil if this child
- # belongs to no document
- def document
- return parent.document unless parent.nil?
- nil
- end
-
- # This doesn't yet handle encodings
- def bytes
- encoding = document.encoding
-
- to_s
- end
- end
-end
diff --git a/trunk/lib/rexml/comment.rb b/trunk/lib/rexml/comment.rb
deleted file mode 100644
index 2b9b4b89c9..0000000000
--- a/trunk/lib/rexml/comment.rb
+++ /dev/null
@@ -1,80 +0,0 @@
-require "rexml/child"
-
-module REXML
- ##
- # Represents an XML comment; that is, text between \<!-- ... -->
- class Comment < Child
- include Comparable
- START = "<!--"
- STOP = "-->"
-
- # The content text
-
- attr_accessor :string
-
- ##
- # Constructor. The first argument can be one of three types:
- # @param first If String, the contents of this comment are set to the
- # argument. If Comment, the argument is duplicated. If
- # Source, the argument is scanned for a comment.
- # @param second If the first argument is a Source, this argument
- # should be nil, not supplied, or a Parent to be set as the parent
- # of this object
- def initialize( first, second = nil )
- #puts "IN COMMENT CONSTRUCTOR; SECOND IS #{second.type}"
- super(second)
- if first.kind_of? String
- @string = first
- elsif first.kind_of? Comment
- @string = first.string
- end
- end
-
- def clone
- Comment.new self
- end
-
- # == DEPRECATED
- # See REXML::Formatters
- #
- # output::
- # Where to write the string
- # indent::
- # An integer. If -1, no indenting will be used; otherwise, the
- # indentation will be this number of spaces, and children will be
- # indented an additional amount.
- # transitive::
- # Ignored by this class. The contents of comments are never modified.
- # ie_hack::
- # Needed for conformity to the child API, but not used by this class.
- def write( output, indent=-1, transitive=false, ie_hack=false )
- Kernel.warn("Comment.write is deprecated. See REXML::Formatters")
- indent( output, indent )
- output << START
- output << @string
- output << STOP
- end
-
- alias :to_s :string
-
- ##
- # Compares this Comment to another; the contents of the comment are used
- # in the comparison.
- def <=>(other)
- other.to_s <=> @string
- end
-
- ##
- # Compares this Comment to another; the contents of the comment are used
- # in the comparison.
- def ==( other )
- other.kind_of? Comment and
- (other <=> self) == 0
- end
-
- def node_type
- :comment
- end
- end
-end
-#vim:ts=2 sw=2 noexpandtab:
diff --git a/trunk/lib/rexml/doctype.rb b/trunk/lib/rexml/doctype.rb
deleted file mode 100644
index 35beabc566..0000000000
--- a/trunk/lib/rexml/doctype.rb
+++ /dev/null
@@ -1,270 +0,0 @@
-require "rexml/parent"
-require "rexml/parseexception"
-require "rexml/namespace"
-require 'rexml/entity'
-require 'rexml/attlistdecl'
-require 'rexml/xmltokens'
-
-module REXML
- # Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
- # ... >. DOCTYPES can be used to declare the DTD of a document, as well as
- # being used to declare entities used in the document.
- class DocType < Parent
- include XMLTokens
- START = "<!DOCTYPE"
- STOP = ">"
- SYSTEM = "SYSTEM"
- PUBLIC = "PUBLIC"
- DEFAULT_ENTITIES = {
- 'gt'=>EntityConst::GT,
- 'lt'=>EntityConst::LT,
- 'quot'=>EntityConst::QUOT,
- "apos"=>EntityConst::APOS
- }
-
- # name is the name of the doctype
- # external_id is the referenced DTD, if given
- attr_reader :name, :external_id, :entities, :namespaces
-
- # Constructor
- #
- # dt = DocType.new( 'foo', '-//I/Hate/External/IDs' )
- # # <!DOCTYPE foo '-//I/Hate/External/IDs'>
- # dt = DocType.new( doctype_to_clone )
- # # Incomplete. Shallow clone of doctype
- #
- # +Note+ that the constructor:
- #
- # Doctype.new( Source.new( "<!DOCTYPE foo 'bar'>" ) )
- #
- # is _deprecated_. Do not use it. It will probably disappear.
- def initialize( first, parent=nil )
- @entities = DEFAULT_ENTITIES
- @long_name = @uri = nil
- if first.kind_of? String
- super()
- @name = first
- @external_id = parent
- elsif first.kind_of? DocType
- super( parent )
- @name = first.name
- @external_id = first.external_id
- elsif first.kind_of? Array
- super( parent )
- @name = first[0]
- @external_id = first[1]
- @long_name = first[2]
- @uri = first[3]
- elsif first.kind_of? Source
- super( parent )
- parser = Parsers::BaseParser.new( first )
- event = parser.pull
- if event[0] == :start_doctype
- @name, @external_id, @long_name, @uri, = event[1..-1]
- end
- else
- super()
- end
- end
-
- def node_type
- :doctype
- end
-
- def attributes_of element
- rv = []
- each do |child|
- child.each do |key,val|
- rv << Attribute.new(key,val)
- end if child.kind_of? AttlistDecl and child.element_name == element
- end
- rv
- end
-
- def attribute_of element, attribute
- att_decl = find do |child|
- child.kind_of? AttlistDecl and
- child.element_name == element and
- child.include? attribute
- end
- return nil unless att_decl
- att_decl[attribute]
- end
-
- def clone
- DocType.new self
- end
-
- # output::
- # Where to write the string
- # indent::
- # An integer. If -1, no indentation will be used; otherwise, the
- # indentation will be this number of spaces, and children will be
- # indented an additional amount.
- # transitive::
- # Ignored
- # ie_hack::
- # Ignored
- def write( output, indent=0, transitive=false, ie_hack=false )
- f = REXML::Formatters::Default.new
- indent( output, indent )
- output << START
- output << ' '
- output << @name
- output << " #@external_id" if @external_id
- output << " #{@long_name.inspect}" if @long_name
- output << " #{@uri.inspect}" if @uri
- unless @children.empty?
- next_indent = indent + 1
- output << ' ['
- @children.each { |child|
- output << "\n"
- f.write( child, output )
- }
- output << "\n]"
- end
- output << STOP
- end
-
- def context
- @parent.context
- end
-
- def entity( name )
- @entities[name].unnormalized if @entities[name]
- end
-
- def add child
- super(child)
- @entities = DEFAULT_ENTITIES.clone if @entities == DEFAULT_ENTITIES
- @entities[ child.name ] = child if child.kind_of? Entity
- end
-
- # This method retrieves the public identifier identifying the document's
- # DTD.
- #
- # Method contributed by Henrik Martensson
- def public
- case @external_id
- when "SYSTEM"
- nil
- when "PUBLIC"
- strip_quotes(@long_name)
- end
- end
-
- # This method retrieves the system identifier identifying the document's DTD
- #
- # Method contributed by Henrik Martensson
- def system
- case @external_id
- when "SYSTEM"
- strip_quotes(@long_name)
- when "PUBLIC"
- @uri.kind_of?(String) ? strip_quotes(@uri) : nil
- end
- end
-
- # This method returns a list of notations that have been declared in the
- # _internal_ DTD subset. Notations in the external DTD subset are not
- # listed.
- #
- # Method contributed by Henrik Martensson
- def notations
- children().select {|node| node.kind_of?(REXML::NotationDecl)}
- end
-
- # Retrieves a named notation. Only notations declared in the internal
- # DTD subset can be retrieved.
- #
- # Method contributed by Henrik Martensson
- def notation(name)
- notations.find { |notation_decl|
- notation_decl.name == name
- }
- end
-
- private
-
- # Method contributed by Henrik Martensson
- def strip_quotes(quoted_string)
- quoted_string =~ /^[\'\"].*[\'\"]$/ ?
- quoted_string[1, quoted_string.length-2] :
- quoted_string
- end
- end
-
- # We don't really handle any of these since we're not a validating
- # parser, so we can be pretty dumb about them. All we need to be able
- # to do is spew them back out on a write()
-
- # This is an abstract class. You never use this directly; it serves as a
- # parent class for the specific declarations.
- class Declaration < Child
- def initialize src
- super()
- @string = src
- end
-
- def to_s
- @string+'>'
- end
-
- # == DEPRECATED
- # See REXML::Formatters
- #
- def write( output, indent )
- output << to_s
- end
- end
-
- public
- class ElementDecl < Declaration
- def initialize( src )
- super
- end
- end
-
- class ExternalEntity < Child
- def initialize( src )
- super()
- @entity = src
- end
- def to_s
- @entity
- end
- def write( output, indent )
- output << @entity
- end
- end
-
- class NotationDecl < Child
- attr_accessor :public, :system
- def initialize name, middle, pub, sys
- super(nil)
- @name = name
- @middle = middle
- @public = pub
- @system = sys
- end
-
- def to_s
- "<!NOTATION #@name #@middle#{
- @public ? ' ' + public.inspect : ''
- }#{
- @system ? ' ' +@system.inspect : ''
- }>"
- end
-
- def write( output, indent=-1 )
- output << to_s
- end
-
- # This method retrieves the name of the notation.
- #
- # Method contributed by Henrik Martensson
- def name
- @name
- end
- end
-end
diff --git a/trunk/lib/rexml/document.rb b/trunk/lib/rexml/document.rb
deleted file mode 100644
index 42d70bb6da..0000000000
--- a/trunk/lib/rexml/document.rb
+++ /dev/null
@@ -1,208 +0,0 @@
-require "rexml/element"
-require "rexml/xmldecl"
-require "rexml/source"
-require "rexml/comment"
-require "rexml/doctype"
-require "rexml/instruction"
-require "rexml/rexml"
-require "rexml/parseexception"
-require "rexml/output"
-require "rexml/parsers/baseparser"
-require "rexml/parsers/streamparser"
-require "rexml/parsers/treeparser"
-
-module REXML
- # Represents a full XML document, including PIs, a doctype, etc. A
- # Document has a single child that can be accessed by root().
- # Note that if you want to have an XML declaration written for a document
- # you create, you must add one; REXML documents do not write a default
- # declaration for you. See |DECLARATION| and |write|.
- class Document < Element
- # A convenient default XML declaration. If you want an XML declaration,
- # the easiest way to add one is mydoc << Document::DECLARATION
- # +DEPRECATED+
- # Use: mydoc << XMLDecl.default
- DECLARATION = XMLDecl.default
-
- # Constructor
- # @param source if supplied, must be a Document, String, or IO.
- # Documents have their context and Element attributes cloned.
- # Strings are expected to be valid XML documents. IOs are expected
- # to be sources of valid XML documents.
- # @param context if supplied, contains the context of the document;
- # this should be a Hash.
- def initialize( source = nil, context = {} )
- super()
- @context = context
- return if source.nil?
- if source.kind_of? Document
- @context = source.context
- super source
- else
- build( source )
- end
- end
-
- def node_type
- :document
- end
-
- # Should be obvious
- def clone
- Document.new self
- end
-
- # According to the XML spec, a root node has no expanded name
- def expanded_name
- ''
- #d = doc_type
- #d ? d.name : "UNDEFINED"
- end
-
- alias :name :expanded_name
-
- # We override this, because XMLDecls and DocTypes must go at the start
- # of the document
- def add( child )
- if child.kind_of? XMLDecl
- @children.unshift child
- child.parent = self
- elsif child.kind_of? DocType
- # Find first Element or DocType node and insert the decl right
- # before it. If there is no such node, just insert the child at the
- # end. If there is a child and it is an DocType, then replace it.
- insert_before_index = 0
- @children.find { |x|
- insert_before_index += 1
- x.kind_of?(Element) || x.kind_of?(DocType)
- }
- if @children[ insert_before_index ] # Not null = not end of list
- if @children[ insert_before_index ].kind_of DocType
- @children[ insert_before_index ] = child
- else
- @children[ index_before_index-1, 0 ] = child
- end
- else # Insert at end of list
- @children[insert_before_index] = child
- end
- child.parent = self
- else
- rv = super
- raise "attempted adding second root element to document" if @elements.size > 1
- rv
- end
- end
- alias :<< :add
-
- def add_element(arg=nil, arg2=nil)
- rv = super
- raise "attempted adding second root element to document" if @elements.size > 1
- rv
- end
-
- # @return the root Element of the document, or nil if this document
- # has no children.
- def root
- elements[1]
- #self
- #@children.find { |item| item.kind_of? Element }
- end
-
- # @return the DocType child of the document, if one exists,
- # and nil otherwise.
- def doctype
- @children.find { |item| item.kind_of? DocType }
- end
-
- # @return the XMLDecl of this document; if no XMLDecl has been
- # set, the default declaration is returned.
- def xml_decl
- rv = @children[0]
- return rv if rv.kind_of? XMLDecl
- rv = @children.unshift(XMLDecl.default)[0]
- end
-
- # @return the XMLDecl version of this document as a String.
- # If no XMLDecl has been set, returns the default version.
- def version
- xml_decl().version
- end
-
- # @return the XMLDecl encoding of this document as a String.
- # If no XMLDecl has been set, returns the default encoding.
- def encoding
- xml_decl().encoding
- end
-
- # @return the XMLDecl standalone value of this document as a String.
- # If no XMLDecl has been set, returns the default setting.
- def stand_alone?
- xml_decl().stand_alone?
- end
-
- # Write the XML tree out, optionally with indent. This writes out the
- # entire XML document, including XML declarations, doctype declarations,
- # and processing instructions (if any are given).
- #
- # A controversial point is whether Document should always write the XML
- # declaration (<?xml version='1.0'?>) whether or not one is given by the
- # user (or source document). REXML does not write one if one was not
- # specified, because it adds unnecessary bandwidth to applications such
- # as XML-RPC.
- #
- # See also the classes in the rexml/formatters package for the proper way
- # to change the default formatting of XML output
- #
- # _Examples_
- # Document.new("<a><b/></a>").serialize
- #
- # output_string = ""
- # tr = Transitive.new( output_string )
- # Document.new("<a><b/></a>").serialize( tr )
- #
- # output::
- # output an object which supports '<< string'; this is where the
- # document will be written.
- # indent::
- # An integer. If -1, no indenting will be used; otherwise, the
- # indentation will be twice this number of spaces, and children will be
- # indented an additional amount. For a value of 3, every item will be
- # indented 3 more levels, or 6 more spaces (2 * 3). Defaults to -1
- # transitive::
- # If transitive is true and indent is >= 0, then the output will be
- # pretty-printed in such a way that the added whitespace does not affect
- # the absolute *value* of the document -- that is, it leaves the value
- # and number of Text nodes in the document unchanged.
- # ie_hack::
- # Internet Explorer is the worst piece of crap to have ever been
- # written, with the possible exception of Windows itself. Since IE is
- # unable to parse proper XML, we have to provide a hack to generate XML
- # that IE's limited abilities can handle. This hack inserts a space
- # before the /> on empty tags. Defaults to false
- def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
- if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
- output = Output.new( output, xml_decl.encoding )
- end
- formatter = if indent > -1
- if transitive
- REXML::Formatters::Transitive.new( indent, ie_hack )
- else
- REXML::Formatters::Pretty.new( indent, ie_hack )
- end
- else
- REXML::Formatters::Default.new( ie_hack )
- end
- formatter.write( self, output )
- end
-
-
- def Document::parse_stream( source, listener )
- Parsers::StreamParser.new( source, listener ).parse
- end
-
- private
- def build( source )
- Parsers::TreeParser.new( source, self ).parse
- end
- end
-end
diff --git a/trunk/lib/rexml/dtd/attlistdecl.rb b/trunk/lib/rexml/dtd/attlistdecl.rb
deleted file mode 100644
index e176bb0749..0000000000
--- a/trunk/lib/rexml/dtd/attlistdecl.rb
+++ /dev/null
@@ -1,10 +0,0 @@
-require "rexml/child"
-module REXML
- module DTD
- class AttlistDecl < Child
- START = "<!ATTLIST"
- START_RE = /^\s*#{START}/um
- PATTERN_RE = /\s*(#{START}.*?>)/um
- end
- end
-end
diff --git a/trunk/lib/rexml/dtd/dtd.rb b/trunk/lib/rexml/dtd/dtd.rb
deleted file mode 100644
index 4f735d4812..0000000000
--- a/trunk/lib/rexml/dtd/dtd.rb
+++ /dev/null
@@ -1,51 +0,0 @@
-require "rexml/dtd/elementdecl"
-require "rexml/dtd/entitydecl"
-require "rexml/comment"
-require "rexml/dtd/notationdecl"
-require "rexml/dtd/attlistdecl"
-require "rexml/parent"
-
-module REXML
- module DTD
- class Parser
- def Parser.parse( input )
- case input
- when String
- parse_helper input
- when File
- parse_helper input.read
- end
- end
-
- # Takes a String and parses it out
- def Parser.parse_helper( input )
- contents = Parent.new
- while input.size > 0
- case input
- when ElementDecl.PATTERN_RE
- match = $&
- source = $'
- contents << ElementDecl.new( match )
- when AttlistDecl.PATTERN_RE
- matchdata = $~
- source = $'
- contents << AttlistDecl.new( matchdata )
- when EntityDecl.PATTERN_RE
- matchdata = $~
- source = $'
- contents << EntityDecl.new( matchdata )
- when Comment.PATTERN_RE
- matchdata = $~
- source = $'
- contents << Comment.new( matchdata )
- when NotationDecl.PATTERN_RE
- matchdata = $~
- source = $'
- contents << NotationDecl.new( matchdata )
- end
- end
- contents
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/dtd/elementdecl.rb b/trunk/lib/rexml/dtd/elementdecl.rb
deleted file mode 100644
index c4e620f389..0000000000
--- a/trunk/lib/rexml/dtd/elementdecl.rb
+++ /dev/null
@@ -1,17 +0,0 @@
-require "rexml/child"
-module REXML
- module DTD
- class ElementDecl < Child
- START = "<!ELEMENT"
- START_RE = /^\s*#{START}/um
- PATTERN_RE = /^\s*(#{START}.*?)>/um
- PATTERN_RE = /^\s*#{START}\s+((?:[:\w_][-\.\w_]*:)?[-!\*\.\w_]*)(.*?)>/
- #\s*((((["']).*?\5)|[^\/'">]*)*?)(\/)?>/um, true)
-
- def initialize match
- @name = match[1]
- @rest = match[2]
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/dtd/entitydecl.rb b/trunk/lib/rexml/dtd/entitydecl.rb
deleted file mode 100644
index a5f1520f2b..0000000000
--- a/trunk/lib/rexml/dtd/entitydecl.rb
+++ /dev/null
@@ -1,56 +0,0 @@
-require "rexml/child"
-module REXML
- module DTD
- class EntityDecl < Child
- START = "<!ENTITY"
- START_RE = /^\s*#{START}/um
- PUBLIC = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+PUBLIC\s+((["']).*?\3)\s+((["']).*?\5)\s*>/um
- SYSTEM = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+SYSTEM\s+((["']).*?\3)(?:\s+NDATA\s+\w+)?\s*>/um
- PLAIN = /^\s*#{START}\s+(\w+)\s+((["']).*?\3)\s*>/um
- PERCENT = /^\s*#{START}\s+%\s+(\w+)\s+((["']).*?\3)\s*>/um
- # <!ENTITY name SYSTEM "...">
- # <!ENTITY name "...">
- def initialize src
- super()
- md = nil
- if src.match( PUBLIC )
- md = src.match( PUBLIC, true )
- @middle = "PUBLIC"
- @content = "#{md[2]} #{md[4]}"
- elsif src.match( SYSTEM )
- md = src.match( SYSTEM, true )
- @middle = "SYSTEM"
- @content = md[2]
- elsif src.match( PLAIN )
- md = src.match( PLAIN, true )
- @middle = ""
- @content = md[2]
- elsif src.match( PERCENT )
- md = src.match( PERCENT, true )
- @middle = ""
- @content = md[2]
- end
- raise ParseException.new("failed Entity match", src) if md.nil?
- @name = md[1]
- end
-
- def to_s
- rv = "<!ENTITY #@name "
- rv << "#@middle " if @middle.size > 0
- rv << @content
- rv
- end
-
- def write( output, indent )
- indent( output, indent )
- output << to_s
- end
-
- def EntityDecl.parse_source source, listener
- md = source.match( PATTERN_RE, true )
- thing = md[0].squeeze(" \t\n\r")
- listener.send inspect.downcase, thing
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/dtd/notationdecl.rb b/trunk/lib/rexml/dtd/notationdecl.rb
deleted file mode 100644
index a47ff8f24b..0000000000
--- a/trunk/lib/rexml/dtd/notationdecl.rb
+++ /dev/null
@@ -1,39 +0,0 @@
-require "rexml/child"
-module REXML
- module DTD
- class NotationDecl < Child
- START = "<!NOTATION"
- START_RE = /^\s*#{START}/um
- PUBLIC = /^\s*#{START}\s+(\w[\w-]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
- SYSTEM = /^\s*#{START}\s+(\w[\w-]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
- def initialize src
- super()
- if src.match( PUBLIC )
- md = src.match( PUBLIC, true )
- elsif src.match( SYSTEM )
- md = src.match( SYSTEM, true )
- else
- raise ParseException.new( "error parsing notation: no matching pattern", src )
- end
- @name = md[1]
- @middle = md[2]
- @rest = md[3]
- end
-
- def to_s
- "<!NOTATION #@name #@middle #@rest>"
- end
-
- def write( output, indent )
- indent( output, indent )
- output << to_s
- end
-
- def NotationDecl.parse_source source, listener
- md = source.match( PATTERN_RE, true )
- thing = md[0].squeeze(" \t\n\r")
- listener.send inspect.downcase, thing
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/element.rb b/trunk/lib/rexml/element.rb
deleted file mode 100644
index a1c01e7de7..0000000000
--- a/trunk/lib/rexml/element.rb
+++ /dev/null
@@ -1,1245 +0,0 @@
-require "rexml/parent"
-require "rexml/namespace"
-require "rexml/attribute"
-require "rexml/cdata"
-require "rexml/xpath"
-require "rexml/parseexception"
-
-module REXML
- # An implementation note about namespaces:
- # As we parse, when we find namespaces we put them in a hash and assign
- # them a unique ID. We then convert the namespace prefix for the node
- # to the unique ID. This makes namespace lookup much faster for the
- # cost of extra memory use. We save the namespace prefix for the
- # context node and convert it back when we write it.
- @@namespaces = {}
-
- # Represents a tagged XML element. Elements are characterized by
- # having children, attributes, and names, and can themselves be
- # children.
- class Element < Parent
- include Namespace
-
- UNDEFINED = "UNDEFINED"; # The default name
-
- # Mechanisms for accessing attributes and child elements of this
- # element.
- attr_reader :attributes, :elements
- # The context holds information about the processing environment, such as
- # whitespace handling.
- attr_accessor :context
-
- # Constructor
- # arg::
- # if not supplied, will be set to the default value.
- # If a String, the name of this object will be set to the argument.
- # If an Element, the object will be shallowly cloned; name,
- # attributes, and namespaces will be copied. Children will +not+ be
- # copied.
- # parent::
- # if supplied, must be a Parent, and will be used as
- # the parent of this object.
- # context::
- # If supplied, must be a hash containing context items. Context items
- # include:
- # * <tt>:respect_whitespace</tt> the value of this is :+all+ or an array of
- # strings being the names of the elements to respect
- # whitespace for. Defaults to :+all+.
- # * <tt>:compress_whitespace</tt> the value can be :+all+ or an array of
- # strings being the names of the elements to ignore whitespace on.
- # Overrides :+respect_whitespace+.
- # * <tt>:ignore_whitespace_nodes</tt> the value can be :+all+ or an array
- # of strings being the names of the elements in which to ignore
- # whitespace-only nodes. If this is set, Text nodes which contain only
- # whitespace will not be added to the document tree.
- # * <tt>:raw</tt> can be :+all+, or an array of strings being the names of
- # the elements to process in raw mode. In raw mode, special
- # characters in text is not converted to or from entities.
- def initialize( arg = UNDEFINED, parent=nil, context=nil )
- super(parent)
-
- @elements = Elements.new(self)
- @attributes = Attributes.new(self)
- @context = context
-
- if arg.kind_of? String
- self.name = arg
- elsif arg.kind_of? Element
- self.name = arg.expanded_name
- arg.attributes.each_attribute{ |attribute|
- @attributes << Attribute.new( attribute )
- }
- @context = arg.context
- end
- end
-
- def inspect
- rv = "<#@expanded_name"
-
- @attributes.each_attribute do |attr|
- rv << " "
- attr.write( rv, 0 )
- end
-
- if children.size > 0
- rv << "> ... </>"
- else
- rv << "/>"
- end
- end
-
-
- # Creates a shallow copy of self.
- # d = Document.new "<a><b/><b/><c><d/></c></a>"
- # new_a = d.root.clone
- # puts new_a # => "<a/>"
- def clone
- self.class.new self
- end
-
- # Evaluates to the root node of the document that this element
- # belongs to. If this element doesn't belong to a document, but does
- # belong to another Element, the parent's root will be returned, until the
- # earliest ancestor is found.
- #
- # Note that this is not the same as the document element.
- # In the following example, <a> is the document element, and the root
- # node is the parent node of the document element. You may ask yourself
- # why the root node is useful: consider the doctype and XML declaration,
- # and any processing instructions before the document element... they
- # are children of the root node, or siblings of the document element.
- # The only time this isn't true is when an Element is created that is
- # not part of any Document. In this case, the ancestor that has no
- # parent acts as the root node.
- # d = Document.new '<a><b><c/></b></a>'
- # a = d[1] ; c = a[1][1]
- # d.root_node == d # TRUE
- # a.root_node # namely, d
- # c.root_node # again, d
- def root_node
- parent.nil? ? self : parent.root_node
- end
-
- def root
- return elements[1] if self.kind_of? Document
- return self if parent.kind_of? Document or parent.nil?
- return parent.root
- end
-
- # Evaluates to the document to which this element belongs, or nil if this
- # element doesn't belong to a document.
- def document
- rt = root
- rt.parent if rt
- end
-
- # Evaluates to +true+ if whitespace is respected for this element. This
- # is the case if:
- # 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value
- # 2. The context has :+respect_whitespace+ set to :+all+ or
- # an array containing the name of this element, and
- # :+compress_whitespace+ isn't set to :+all+ or an array containing the
- # name of this element.
- # The evaluation is tested against +expanded_name+, and so is namespace
- # sensitive.
- def whitespace
- @whitespace = nil
- if @context
- if @context[:respect_whitespace]
- @whitespace = (@context[:respect_whitespace] == :all or
- @context[:respect_whitespace].include? expanded_name)
- end
- @whitespace = false if (@context[:compress_whitespace] and
- (@context[:compress_whitespace] == :all or
- @context[:compress_whitespace].include? expanded_name)
- )
- end
- @whitespace = true unless @whitespace == false
- @whitespace
- end
-
- def ignore_whitespace_nodes
- @ignore_whitespace_nodes = false
- if @context
- if @context[:ignore_whitespace_nodes]
- @ignore_whitespace_nodes =
- (@context[:ignore_whitespace_nodes] == :all or
- @context[:ignore_whitespace_nodes].include? expanded_name)
- end
- end
- end
-
- # Evaluates to +true+ if raw mode is set for this element. This
- # is the case if the context has :+raw+ set to :+all+ or
- # an array containing the name of this element.
- #
- # The evaluation is tested against +expanded_name+, and so is namespace
- # sensitive.
- def raw
- @raw = (@context and @context[:raw] and
- (@context[:raw] == :all or
- @context[:raw].include? expanded_name))
- @raw
- end
-
- #once :whitespace, :raw, :ignore_whitespace_nodes
-
- #################################################
- # Namespaces #
- #################################################
-
- # Evaluates to an +Array+ containing the prefixes (names) of all defined
- # namespaces at this context node.
- # doc = Document.new("<a xmlns:x='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
- # doc.elements['//b'].prefixes # -> ['x', 'y']
- def prefixes
- prefixes = []
- prefixes = parent.prefixes if parent
- prefixes |= attributes.prefixes
- return prefixes
- end
-
- def namespaces
- namespaces = {}
- namespaces = parent.namespaces if parent
- namespaces = namespaces.merge( attributes.namespaces )
- return namespaces
- end
-
- # Evalutas to the URI for a prefix, or the empty string if no such
- # namespace is declared for this element. Evaluates recursively for
- # ancestors. Returns the default namespace, if there is one.
- # prefix::
- # the prefix to search for. If not supplied, returns the default
- # namespace if one exists
- # Returns::
- # the namespace URI as a String, or nil if no such namespace
- # exists. If the namespace is undefined, returns an empty string
- # doc = Document.new("<a xmlns='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
- # b = doc.elements['//b']
- # b.namespace # -> '1'
- # b.namespace("y") # -> '2'
- def namespace(prefix=nil)
- if prefix.nil?
- prefix = prefix()
- end
- if prefix == ''
- prefix = "xmlns"
- else
- prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
- end
- ns = attributes[ prefix ]
- ns = parent.namespace(prefix) if ns.nil? and parent
- ns = '' if ns.nil? and prefix == 'xmlns'
- return ns
- end
-
- # Adds a namespace to this element.
- # prefix::
- # the prefix string, or the namespace URI if +uri+ is not
- # supplied
- # uri::
- # the namespace URI. May be nil, in which +prefix+ is used as
- # the URI
- # Evaluates to: this Element
- # a = Element.new("a")
- # a.add_namespace("xmlns:foo", "bar" )
- # a.add_namespace("foo", "bar") # shorthand for previous line
- # a.add_namespace("twiddle")
- # puts a #-> <a xmlns:foo='bar' xmlns='twiddle'/>
- def add_namespace( prefix, uri=nil )
- unless uri
- @attributes["xmlns"] = prefix
- else
- prefix = "xmlns:#{prefix}" unless prefix =~ /^xmlns:/
- @attributes[ prefix ] = uri
- end
- self
- end
-
- # Removes a namespace from this node. This only works if the namespace is
- # actually declared in this node. If no argument is passed, deletes the
- # default namespace.
- #
- # Evaluates to: this element
- # doc = Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>"
- # doc.root.delete_namespace
- # puts doc # -> <a xmlns:foo='bar'/>
- # doc.root.delete_namespace 'foo'
- # puts doc # -> <a/>
- def delete_namespace namespace="xmlns"
- namespace = "xmlns:#{namespace}" unless namespace == 'xmlns'
- attribute = attributes.get_attribute(namespace)
- attribute.remove unless attribute.nil?
- self
- end
-
- #################################################
- # Elements #
- #################################################
-
- # Adds a child to this element, optionally setting attributes in
- # the element.
- # element::
- # optional. If Element, the element is added.
- # Otherwise, a new Element is constructed with the argument (see
- # Element.initialize).
- # attrs::
- # If supplied, must be a Hash containing String name,value
- # pairs, which will be used to set the attributes of the new Element.
- # Returns:: the Element that was added
- # el = doc.add_element 'my-tag'
- # el = doc.add_element 'my-tag', {'attr1'=>'val1', 'attr2'=>'val2'}
- # el = Element.new 'my-tag'
- # doc.add_element el
- def add_element element, attrs=nil
- raise "First argument must be either an element name, or an Element object" if element.nil?
- el = @elements.add(element)
- attrs.each do |key, value|
- el.attributes[key]=value
- end if attrs.kind_of? Hash
- el
- end
-
- # Deletes a child element.
- # element::
- # Must be an +Element+, +String+, or +Integer+. If Element,
- # the element is removed. If String, the element is found (via XPath)
- # and removed. <em>This means that any parent can remove any
- # descendant.<em> If Integer, the Element indexed by that number will be
- # removed.
- # Returns:: the element that was removed.
- # doc.delete_element "/a/b/c[@id='4']"
- # doc.delete_element doc.elements["//k"]
- # doc.delete_element 1
- def delete_element element
- @elements.delete element
- end
-
- # Evaluates to +true+ if this element has at least one child Element
- # doc = Document.new "<a><b/><c>Text</c></a>"
- # doc.root.has_elements # -> true
- # doc.elements["/a/b"].has_elements # -> false
- # doc.elements["/a/c"].has_elements # -> false
- def has_elements?
- !@elements.empty?
- end
-
- # Iterates through the child elements, yielding for each Element that
- # has a particular attribute set.
- # key::
- # the name of the attribute to search for
- # value::
- # the value of the attribute
- # max::
- # (optional) causes this method to return after yielding
- # for this number of matching children
- # name::
- # (optional) if supplied, this is an XPath that filters
- # the children to check.
- #
- # doc = Document.new "<a><b @id='1'/><c @id='2'/><d @id='1'/><e/></a>"
- # # Yields b, c, d
- # doc.root.each_element_with_attribute( 'id' ) {|e| p e}
- # # Yields b, d
- # doc.root.each_element_with_attribute( 'id', '1' ) {|e| p e}
- # # Yields b
- # doc.root.each_element_with_attribute( 'id', '1', 1 ) {|e| p e}
- # # Yields d
- # doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e}
- def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element
- each_with_something( proc {|child|
- if value.nil?
- child.attributes[key] != nil
- else
- child.attributes[key]==value
- end
- }, max, name, &block )
- end
-
- # Iterates through the children, yielding for each Element that
- # has a particular text set.
- # text::
- # the text to search for. If nil, or not supplied, will iterate
- # over all +Element+ children that contain at least one +Text+ node.
- # max::
- # (optional) causes this method to return after yielding
- # for this number of matching children
- # name::
- # (optional) if supplied, this is an XPath that filters
- # the children to check.
- #
- # doc = Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>'
- # # Yields b, c, d
- # doc.each_element_with_text {|e|p e}
- # # Yields b, c
- # doc.each_element_with_text('b'){|e|p e}
- # # Yields b
- # doc.each_element_with_text('b', 1){|e|p e}
- # # Yields d
- # doc.each_element_with_text(nil, 0, 'd'){|e|p e}
- def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element
- each_with_something( proc {|child|
- if text.nil?
- child.has_text?
- else
- child.text == text
- end
- }, max, name, &block )
- end
-
- # Synonym for Element.elements.each
- def each_element( xpath=nil, &block ) # :yields: Element
- @elements.each( xpath, &block )
- end
-
- # Synonym for Element.to_a
- # This is a little slower than calling elements.each directly.
- # xpath:: any XPath by which to search for elements in the tree
- # Returns:: an array of Elements that match the supplied path
- def get_elements( xpath )
- @elements.to_a( xpath )
- end
-
- # Returns the next sibling that is an element, or nil if there is
- # no Element sibling after this one
- # doc = Document.new '<a><b/>text<c/></a>'
- # doc.root.elements['b'].next_element #-> <c/>
- # doc.root.elements['c'].next_element #-> nil
- def next_element
- element = next_sibling
- element = element.next_sibling until element.nil? or element.kind_of? Element
- return element
- end
-
- # Returns the previous sibling that is an element, or nil if there is
- # no Element sibling prior to this one
- # doc = Document.new '<a><b/>text<c/></a>'
- # doc.root.elements['c'].previous_element #-> <b/>
- # doc.root.elements['b'].previous_element #-> nil
- def previous_element
- element = previous_sibling
- element = element.previous_sibling until element.nil? or element.kind_of? Element
- return element
- end
-
-
- #################################################
- # Text #
- #################################################
-
- # Evaluates to +true+ if this element has at least one Text child
- def has_text?
- not text().nil?
- end
-
- # A convenience method which returns the String value of the _first_
- # child text element, if one exists, and +nil+ otherwise.
- #
- # <em>Note that an element may have multiple Text elements, perhaps
- # separated by other children</em>. Be aware that this method only returns
- # the first Text node.
- #
- # This method returns the +value+ of the first text child node, which
- # ignores the +raw+ setting, so always returns normalized text. See
- # the Text::value documentation.
- #
- # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
- # # The element 'p' has two text elements, "some text " and " more text".
- # doc.root.text #-> "some text "
- def text( path = nil )
- rv = get_text(path)
- return rv.value unless rv.nil?
- nil
- end
-
- # Returns the first child Text node, if any, or +nil+ otherwise.
- # This method returns the actual +Text+ node, rather than the String content.
- # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
- # # The element 'p' has two text elements, "some text " and " more text".
- # doc.root.get_text.value #-> "some text "
- def get_text path = nil
- rv = nil
- if path
- element = @elements[ path ]
- rv = element.get_text unless element.nil?
- else
- rv = @children.find { |node| node.kind_of? Text }
- end
- return rv
- end
-
- # Sets the first Text child of this object. See text() for a
- # discussion about Text children.
- #
- # If a Text child already exists, the child is replaced by this
- # content. This means that Text content can be deleted by calling
- # this method with a nil argument. In this case, the next Text
- # child becomes the first Text child. In no case is the order of
- # any siblings disturbed.
- # text::
- # If a String, a new Text child is created and added to
- # this Element as the first Text child. If Text, the text is set
- # as the first Child element. If nil, then any existing first Text
- # child is removed.
- # Returns:: this Element.
- # doc = Document.new '<a><b/></a>'
- # doc.root.text = 'Sean' #-> '<a><b/>Sean</a>'
- # doc.root.text = 'Elliott' #-> '<a><b/>Elliott</a>'
- # doc.root.add_element 'c' #-> '<a><b/>Elliott<c/></a>'
- # doc.root.text = 'Russell' #-> '<a><b/>Russell<c/></a>'
- # doc.root.text = nil #-> '<a><b/><c/></a>'
- def text=( text )
- if text.kind_of? String
- text = Text.new( text, whitespace(), nil, raw() )
- elsif text and !text.kind_of? Text
- text = Text.new( text.to_s, whitespace(), nil, raw() )
- end
- old_text = get_text
- if text.nil?
- old_text.remove unless old_text.nil?
- else
- if old_text.nil?
- self << text
- else
- old_text.replace_with( text )
- end
- end
- return self
- end
-
- # A helper method to add a Text child. Actual Text instances can
- # be added with regular Parent methods, such as add() and <<()
- # text::
- # if a String, a new Text instance is created and added
- # to the parent. If Text, the object is added directly.
- # Returns:: this Element
- # e = Element.new('a') #-> <e/>
- # e.add_text 'foo' #-> <e>foo</e>
- # e.add_text Text.new(' bar') #-> <e>foo bar</e>
- # Note that at the end of this example, the branch has <b>3</b> nodes; the 'e'
- # element and <b>2</b> Text node children.
- def add_text( text )
- if text.kind_of? String
- if @children[-1].kind_of? Text
- @children[-1] << text
- return
- end
- text = Text.new( text, whitespace(), nil, raw() )
- end
- self << text unless text.nil?
- return self
- end
-
- def node_type
- :element
- end
-
- def xpath
- path_elements = []
- cur = self
- path_elements << __to_xpath_helper( self )
- while cur.parent
- cur = cur.parent
- path_elements << __to_xpath_helper( cur )
- end
- return path_elements.reverse.join( "/" )
- end
-
- #################################################
- # Attributes #
- #################################################
-
- def attribute( name, namespace=nil )
- prefix = nil
- if namespaces.respond_to? :key
- prefix = namespaces.key(namespace) if namespace
- else
- prefix = namespaces.index(namespace) if namespace
- end
- prefix = nil if prefix == 'xmlns'
-
- ret_val =
- attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
-
- return ret_val unless ret_val.nil?
- return nil if prefix.nil?
-
- # now check that prefix'es namespace is not the same as the
- # default namespace
- return nil unless ( namespaces[ prefix ] == namespaces[ 'xmlns' ] )
-
- attributes.get_attribute( name )
-
- end
-
- # Evaluates to +true+ if this element has any attributes set, false
- # otherwise.
- def has_attributes?
- return !@attributes.empty?
- end
-
- # Adds an attribute to this element, overwriting any existing attribute
- # by the same name.
- # key::
- # can be either an Attribute or a String. If an Attribute,
- # the attribute is added to the list of Element attributes. If String,
- # the argument is used as the name of the new attribute, and the value
- # parameter must be supplied.
- # value::
- # Required if +key+ is a String, and ignored if the first argument is
- # an Attribute. This is a String, and is used as the value
- # of the new Attribute. This should be the unnormalized value of the
- # attribute (without entities).
- # Returns:: the Attribute added
- # e = Element.new 'e'
- # e.add_attribute( 'a', 'b' ) #-> <e a='b'/>
- # e.add_attribute( 'x:a', 'c' ) #-> <e a='b' x:a='c'/>
- # e.add_attribute Attribute.new('b', 'd') #-> <e a='b' x:a='c' b='d'/>
- def add_attribute( key, value=nil )
- if key.kind_of? Attribute
- @attributes << key
- else
- @attributes[key] = value
- end
- end
-
- # Add multiple attributes to this element.
- # hash:: is either a hash, or array of arrays
- # el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} )
- # el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] )
- def add_attributes hash
- if hash.kind_of? Hash
- hash.each_pair {|key, value| @attributes[key] = value }
- elsif hash.kind_of? Array
- hash.each { |value| @attributes[ value[0] ] = value[1] }
- end
- end
-
- # Removes an attribute
- # key::
- # either an Attribute or a String. In either case, the
- # attribute is found by matching the attribute name to the argument,
- # and then removed. If no attribute is found, no action is taken.
- # Returns::
- # the attribute removed, or nil if this Element did not contain
- # a matching attribute
- # e = Element.new('E')
- # e.add_attribute( 'name', 'Sean' ) #-> <E name='Sean'/>
- # r = e.add_attribute( 'sur:name', 'Russell' ) #-> <E name='Sean' sur:name='Russell'/>
- # e.delete_attribute( 'name' ) #-> <E sur:name='Russell'/>
- # e.delete_attribute( r ) #-> <E/>
- def delete_attribute(key)
- attr = @attributes.get_attribute(key)
- attr.remove unless attr.nil?
- end
-
- #################################################
- # Other Utilities #
- #################################################
-
- # Get an array of all CData children.
- # IMMUTABLE
- def cdatas
- find_all { |child| child.kind_of? CData }.freeze
- end
-
- # Get an array of all Comment children.
- # IMMUTABLE
- def comments
- find_all { |child| child.kind_of? Comment }.freeze
- end
-
- # Get an array of all Instruction children.
- # IMMUTABLE
- def instructions
- find_all { |child| child.kind_of? Instruction }.freeze
- end
-
- # Get an array of all Text children.
- # IMMUTABLE
- def texts
- find_all { |child| child.kind_of? Text }.freeze
- end
-
- # == DEPRECATED
- # See REXML::Formatters
- #
- # Writes out this element, and recursively, all children.
- # output::
- # output an object which supports '<< string'; this is where the
- # document will be written.
- # indent::
- # An integer. If -1, no indenting will be used; otherwise, the
- # indentation will be this number of spaces, and children will be
- # indented an additional amount. Defaults to -1
- # transitive::
- # If transitive is true and indent is >= 0, then the output will be
- # pretty-printed in such a way that the added whitespace does not affect
- # the parse tree of the document
- # ie_hack::
- # Internet Explorer is the worst piece of crap to have ever been
- # written, with the possible exception of Windows itself. Since IE is
- # unable to parse proper XML, we have to provide a hack to generate XML
- # that IE's limited abilities can handle. This hack inserts a space
- # before the /> on empty tags. Defaults to false
- #
- # out = ''
- # doc.write( out ) #-> doc is written to the string 'out'
- # doc.write( $stdout ) #-> doc written to the console
- def write(output=$stdout, indent=-1, transitive=false, ie_hack=false)
- Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
- formatter = if indent > -1
- if transitive
- REXML::Formatters::Transitive.new( indent, ie_hack )
- else
- REXML::Formatters::Pretty.new( indent, ie_hack )
- end
- else
- REXML::Formatters::Default.new( ie_hack )
- end
- formatter.write( self, output )
- end
-
-
- private
- def __to_xpath_helper node
- rv = node.expanded_name.clone
- if node.parent
- results = node.parent.find_all {|n|
- n.kind_of?(REXML::Element) and n.expanded_name == node.expanded_name
- }
- if results.length > 1
- idx = results.index( node )
- rv << "[#{idx+1}]"
- end
- end
- rv
- end
-
- # A private helper method
- def each_with_something( test, max=0, name=nil )
- num = 0
- @elements.each( name ){ |child|
- yield child if test.call(child) and num += 1
- return if max>0 and num == max
- }
- end
- end
-
- ########################################################################
- # ELEMENTS #
- ########################################################################
-
- # A class which provides filtering of children for Elements, and
- # XPath search support. You are expected to only encounter this class as
- # the <tt>element.elements</tt> object. Therefore, you are
- # _not_ expected to instantiate this yourself.
- class Elements
- include Enumerable
- # Constructor
- # parent:: the parent Element
- def initialize parent
- @element = parent
- end
-
- # Fetches a child element. Filters only Element children, regardless of
- # the XPath match.
- # index::
- # the search parameter. This is either an Integer, which
- # will be used to find the index'th child Element, or an XPath,
- # which will be used to search for the Element. <em>Because
- # of the nature of XPath searches, any element in the connected XML
- # document can be fetched through any other element.</em> <b>The
- # Integer index is 1-based, not 0-based.</b> This means that the first
- # child element is at index 1, not 0, and the +n+th element is at index
- # +n+, not <tt>n-1</tt>. This is because XPath indexes element children
- # starting from 1, not 0, and the indexes should be the same.
- # name::
- # optional, and only used in the first argument is an
- # Integer. In that case, the index'th child Element that has the
- # supplied name will be returned. Note again that the indexes start at 1.
- # Returns:: the first matching Element, or nil if no child matched
- # doc = Document.new '<a><b/><c id="1"/><c id="2"/><d/></a>'
- # doc.root.elements[1] #-> <b/>
- # doc.root.elements['c'] #-> <c id="1"/>
- # doc.root.elements[2,'c'] #-> <c id="2"/>
- def []( index, name=nil)
- if index.kind_of? Integer
- raise "index (#{index}) must be >= 1" if index < 1
- name = literalize(name) if name
- num = 0
- @element.find { |child|
- child.kind_of? Element and
- (name.nil? ? true : child.has_name?( name )) and
- (num += 1) == index
- }
- else
- return XPath::first( @element, index )
- #{ |element|
- # return element if element.kind_of? Element
- #}
- #return nil
- end
- end
-
- # Sets an element, replacing any previous matching element. If no
- # existing element is found ,the element is added.
- # index:: Used to find a matching element to replace. See []().
- # element::
- # The element to replace the existing element with
- # the previous element
- # Returns:: nil if no previous element was found.
- #
- # doc = Document.new '<a/>'
- # doc.root.elements[10] = Element.new('b') #-> <a><b/></a>
- # doc.root.elements[1] #-> <b/>
- # doc.root.elements[1] = Element.new('c') #-> <a><c/></a>
- # doc.root.elements['c'] = Element.new('d') #-> <a><d/></a>
- def []=( index, element )
- previous = self[index]
- if previous.nil?
- @element.add element
- else
- previous.replace_with element
- end
- return previous
- end
-
- # Returns +true+ if there are no +Element+ children, +false+ otherwise
- def empty?
- @element.find{ |child| child.kind_of? Element}.nil?
- end
-
- # Returns the index of the supplied child (starting at 1), or -1 if
- # the element is not a child
- # element:: an +Element+ child
- def index element
- rv = 0
- found = @element.find do |child|
- child.kind_of? Element and
- (rv += 1) and
- child == element
- end
- return rv if found == element
- return -1
- end
-
- # Deletes a child Element
- # element::
- # Either an Element, which is removed directly; an
- # xpath, where the first matching child is removed; or an Integer,
- # where the n'th Element is removed.
- # Returns:: the removed child
- # doc = Document.new '<a><b/><c/><c id="1"/></a>'
- # b = doc.root.elements[1]
- # doc.root.elements.delete b #-> <a><c/><c id="1"/></a>
- # doc.elements.delete("a/c[@id='1']") #-> <a><c/></a>
- # doc.root.elements.delete 1 #-> <a/>
- def delete element
- if element.kind_of? Element
- @element.delete element
- else
- el = self[element]
- el.remove if el
- end
- end
-
- # Removes multiple elements. Filters for Element children, regardless of
- # XPath matching.
- # xpath:: all elements matching this String path are removed.
- # Returns:: an Array of Elements that have been removed
- # doc = Document.new '<a><c/><c/><c/><c/></a>'
- # deleted = doc.elements.delete_all 'a/c' #-> [<c/>, <c/>, <c/>, <c/>]
- def delete_all( xpath )
- rv = []
- XPath::each( @element, xpath) {|element|
- rv << element if element.kind_of? Element
- }
- rv.each do |element|
- @element.delete element
- element.remove
- end
- return rv
- end
-
- # Adds an element
- # element::
- # if supplied, is either an Element, String, or
- # Source (see Element.initialize). If not supplied or nil, a
- # new, default Element will be constructed
- # Returns:: the added Element
- # a = Element.new('a')
- # a.elements.add(Element.new('b')) #-> <a><b/></a>
- # a.elements.add('c') #-> <a><b/><c/></a>
- def add element=nil
- rv = nil
- if element.nil?
- Element.new("", self, @element.context)
- elsif not element.kind_of?(Element)
- Element.new(element, self, @element.context)
- else
- @element << element
- element.context = @element.context
- element
- end
- end
-
- alias :<< :add
-
- # Iterates through all of the child Elements, optionally filtering
- # them by a given XPath
- # xpath::
- # optional. If supplied, this is a String XPath, and is used to
- # filter the children, so that only matching children are yielded. Note
- # that XPaths are automatically filtered for Elements, so that
- # non-Element children will not be yielded
- # doc = Document.new '<a><b/><c/><d/>sean<b/><c/><d/></a>'
- # doc.root.each {|e|p e} #-> Yields b, c, d, b, c, d elements
- # doc.root.each('b') {|e|p e} #-> Yields b, b elements
- # doc.root.each('child::node()') {|e|p e}
- # #-> Yields <b/>, <c/>, <d/>, <b/>, <c/>, <d/>
- # XPath.each(doc.root, 'child::node()', &block)
- # #-> Yields <b/>, <c/>, <d/>, sean, <b/>, <c/>, <d/>
- def each( xpath=nil, &block)
- XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element }
- end
-
- def collect( xpath=nil, &block )
- collection = []
- XPath::each( @element, xpath ) {|e|
- collection << yield(e) if e.kind_of?(Element)
- }
- collection
- end
-
- def inject( xpath=nil, initial=nil, &block )
- first = true
- XPath::each( @element, xpath ) {|e|
- if (e.kind_of? Element)
- if (first and initial == nil)
- initial = e
- first = false
- else
- initial = yield( initial, e ) if e.kind_of? Element
- end
- end
- }
- initial
- end
-
- # Returns the number of +Element+ children of the parent object.
- # doc = Document.new '<a>sean<b/>elliott<b/>russell<b/></a>'
- # doc.root.size #-> 6, 3 element and 3 text nodes
- # doc.root.elements.size #-> 3
- def size
- count = 0
- @element.each {|child| count+=1 if child.kind_of? Element }
- count
- end
-
- # Returns an Array of Element children. An XPath may be supplied to
- # filter the children. Only Element children are returned, even if the
- # supplied XPath matches non-Element children.
- # doc = Document.new '<a>sean<b/>elliott<c/></a>'
- # doc.root.elements.to_a #-> [ <b/>, <c/> ]
- # doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ]
- # XPath.match(doc.root, "child::node()") #-> [ sean, <b/>, elliott, <c/> ]
- def to_a( xpath=nil )
- rv = XPath.match( @element, xpath )
- return rv.find_all{|e| e.kind_of? Element} if xpath
- rv
- end
-
- private
- # Private helper class. Removes quotes from quoted strings
- def literalize name
- name = name[1..-2] if name[0] == ?' or name[0] == ?" #'
- name
- end
- end
-
- ########################################################################
- # ATTRIBUTES #
- ########################################################################
-
- # A class that defines the set of Attributes of an Element and provides
- # operations for accessing elements in that set.
- class Attributes < Hash
- # Constructor
- # element:: the Element of which this is an Attribute
- def initialize element
- @element = element
- end
-
- # Fetches an attribute value. If you want to get the Attribute itself,
- # use get_attribute()
- # name:: an XPath attribute name. Namespaces are relevant here.
- # Returns::
- # the String value of the matching attribute, or +nil+ if no
- # matching attribute was found. This is the unnormalized value
- # (with entities expanded).
- #
- # doc = Document.new "<a foo:att='1' bar:att='2' att='&lt;'/>"
- # doc.root.attributes['att'] #-> '<'
- # doc.root.attributes['bar:att'] #-> '2'
- def [](name)
- attr = get_attribute(name)
- return attr.value unless attr.nil?
- return nil
- end
-
- def to_a
- values.flatten
- end
-
- # Returns the number of attributes the owning Element contains.
- # doc = Document "<a x='1' y='2' foo:x='3'/>"
- # doc.root.attributes.length #-> 3
- def length
- c = 0
- each_attribute { c+=1 }
- c
- end
- alias :size :length
-
- # Iterates over the attributes of an Element. Yields actual Attribute
- # nodes, not String values.
- #
- # doc = Document.new '<a x="1" y="2"/>'
- # doc.root.attributes.each_attribute {|attr|
- # p attr.expanded_name+" => "+attr.value
- # }
- def each_attribute # :yields: attribute
- each_value do |val|
- if val.kind_of? Attribute
- yield val
- else
- val.each_value { |atr| yield atr }
- end
- end
- end
-
- # Iterates over each attribute of an Element, yielding the expanded name
- # and value as a pair of Strings.
- #
- # doc = Document.new '<a x="1" y="2"/>'
- # doc.root.attributes.each {|name, value| p name+" => "+value }
- def each
- each_attribute do |attr|
- yield [attr.expanded_name, attr.value]
- end
- end
-
- # Fetches an attribute
- # name::
- # the name by which to search for the attribute. Can be a
- # <tt>prefix:name</tt> namespace name.
- # Returns:: The first matching attribute, or nil if there was none. This
- # value is an Attribute node, not the String value of the attribute.
- # doc = Document.new '<a x:foo="1" foo="2" bar="3"/>'
- # doc.root.attributes.get_attribute("foo").value #-> "2"
- # doc.root.attributes.get_attribute("x:foo").value #-> "1"
- def get_attribute( name )
- attr = fetch( name, nil )
- if attr.nil?
- return nil if name.nil?
- # Look for prefix
- name =~ Namespace::NAMESPLIT
- prefix, n = $1, $2
- if prefix
- attr = fetch( n, nil )
- # check prefix
- if attr == nil
- elsif attr.kind_of? Attribute
- return attr if prefix == attr.prefix
- else
- attr = attr[ prefix ]
- return attr
- end
- end
- element_document = @element.document
- if element_document and element_document.doctype
- expn = @element.expanded_name
- expn = element_document.doctype.name if expn.size == 0
- attr_val = element_document.doctype.attribute_of(expn, name)
- return Attribute.new( name, attr_val ) if attr_val
- end
- return nil
- end
- if attr.kind_of? Hash
- attr = attr[ @element.prefix ]
- end
- return attr
- end
-
- # Sets an attribute, overwriting any existing attribute value by the
- # same name. Namespace is significant.
- # name:: the name of the attribute
- # value::
- # (optional) If supplied, the value of the attribute. If
- # nil, any existing matching attribute is deleted.
- # Returns::
- # Owning element
- # doc = Document.new "<a x:foo='1' foo='3'/>"
- # doc.root.attributes['y:foo'] = '2'
- # doc.root.attributes['foo'] = '4'
- # doc.root.attributes['x:foo'] = nil
- def []=( name, value )
- if value.nil? # Delete the named attribute
- attr = get_attribute(name)
- delete attr
- return
- end
- element_document = @element.document
- unless value.kind_of? Attribute
- if @element.document and @element.document.doctype
- value = Text::normalize( value, @element.document.doctype )
- else
- value = Text::normalize( value, nil )
- end
- value = Attribute.new(name, value)
- end
- value.element = @element
- old_attr = fetch(value.name, nil)
- if old_attr.nil?
- store(value.name, value)
- elsif old_attr.kind_of? Hash
- old_attr[value.prefix] = value
- elsif old_attr.prefix != value.prefix
- # Check for conflicting namespaces
- raise ParseException.new(
- "Namespace conflict in adding attribute \"#{value.name}\": "+
- "Prefix \"#{old_attr.prefix}\" = "+
- "\"#{@element.namespace(old_attr.prefix)}\" and prefix "+
- "\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if
- value.prefix != "xmlns" and old_attr.prefix != "xmlns" and
- @element.namespace( old_attr.prefix ) ==
- @element.namespace( value.prefix )
- store value.name, { old_attr.prefix => old_attr,
- value.prefix => value }
- else
- store value.name, value
- end
- return @element
- end
-
- # Returns an array of Strings containing all of the prefixes declared
- # by this set of # attributes. The array does not include the default
- # namespace declaration, if one exists.
- # doc = Document.new("<a xmlns='foo' xmlns:x='bar' xmlns:y='twee' "+
- # "z='glorp' p:k='gru'/>")
- # prefixes = doc.root.attributes.prefixes #-> ['x', 'y']
- def prefixes
- ns = []
- each_attribute do |attribute|
- ns << attribute.name if attribute.prefix == 'xmlns'
- end
- if @element.document and @element.document.doctype
- expn = @element.expanded_name
- expn = @element.document.doctype.name if expn.size == 0
- @element.document.doctype.attributes_of(expn).each {
- |attribute|
- ns << attribute.name if attribute.prefix == 'xmlns'
- }
- end
- ns
- end
-
- def namespaces
- namespaces = {}
- each_attribute do |attribute|
- namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
- end
- if @element.document and @element.document.doctype
- expn = @element.expanded_name
- expn = @element.document.doctype.name if expn.size == 0
- @element.document.doctype.attributes_of(expn).each {
- |attribute|
- namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
- }
- end
- namespaces
- end
-
- # Removes an attribute
- # attribute::
- # either a String, which is the name of the attribute to remove --
- # namespaces are significant here -- or the attribute to remove.
- # Returns:: the owning element
- # doc = Document.new "<a y:foo='0' x:foo='1' foo='3' z:foo='4'/>"
- # doc.root.attributes.delete 'foo' #-> <a y:foo='0' x:foo='1' z:foo='4'/>"
- # doc.root.attributes.delete 'x:foo' #-> <a y:foo='0' z:foo='4'/>"
- # attr = doc.root.attributes.get_attribute('y:foo')
- # doc.root.attributes.delete attr #-> <a z:foo='4'/>"
- def delete( attribute )
- name = nil
- prefix = nil
- if attribute.kind_of? Attribute
- name = attribute.name
- prefix = attribute.prefix
- else
- attribute =~ Namespace::NAMESPLIT
- prefix, name = $1, $2
- prefix = '' unless prefix
- end
- old = fetch(name, nil)
- attr = nil
- if old.kind_of? Hash # the supplied attribute is one of many
- attr = old.delete(prefix)
- if old.size == 1
- repl = nil
- old.each_value{|v| repl = v}
- store name, repl
- end
- elsif old.nil?
- return @element
- else # the supplied attribute is a top-level one
- attr = old
- res = super(name)
- end
- @element
- end
-
- # Adds an attribute, overriding any existing attribute by the
- # same name. Namespaces are significant.
- # attribute:: An Attribute
- def add( attribute )
- self[attribute.name] = attribute
- end
-
- alias :<< :add
-
- # Deletes all attributes matching a name. Namespaces are significant.
- # name::
- # A String; all attributes that match this path will be removed
- # Returns:: an Array of the Attributes that were removed
- def delete_all( name )
- rv = []
- each_attribute { |attribute|
- rv << attribute if attribute.expanded_name == name
- }
- rv.each{ |attr| attr.remove }
- return rv
- end
-
- # The +get_attribute_ns+ method retrieves a method by its namespace
- # and name. Thus it is possible to reliably identify an attribute
- # even if an XML processor has changed the prefix.
- #
- # Method contributed by Henrik Martensson
- def get_attribute_ns(namespace, name)
- result = nil
- each_attribute() { |attribute|
- if name == attribute.name &&
- namespace == attribute.namespace() &&
- ( !namespace.empty? || !attribute.fully_expanded_name.index(':') )
- # foo will match xmlns:foo, but only if foo isn't also an attribute
- result = attribute if !result or !namespace.empty? or
- !attribute.fully_expanded_name.index(':')
- end
- }
- result
- end
- end
-end
diff --git a/trunk/lib/rexml/encoding.rb b/trunk/lib/rexml/encoding.rb
deleted file mode 100644
index 608c69cd65..0000000000
--- a/trunk/lib/rexml/encoding.rb
+++ /dev/null
@@ -1,71 +0,0 @@
-# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
-module REXML
- module Encoding
- @encoding_methods = {}
- def self.register(enc, &block)
- @encoding_methods[enc] = block
- end
- def self.apply(obj, enc)
- @encoding_methods[enc][obj]
- end
- def self.encoding_method(enc)
- @encoding_methods[enc]
- end
-
- # Native, default format is UTF-8, so it is declared here rather than in
- # an encodings/ definition.
- UTF_8 = 'UTF-8'
- UTF_16 = 'UTF-16'
- UNILE = 'UNILE'
-
- # ID ---> Encoding name
- attr_reader :encoding
- def encoding=( enc )
- old_verbosity = $VERBOSE
- begin
- $VERBOSE = false
- enc = enc.nil? ? nil : enc.upcase
- return false if defined? @encoding and enc == @encoding
- if enc and enc != UTF_8
- @encoding = enc
- raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
- @encoding.untaint
- begin
- require 'rexml/encodings/ICONV.rb'
- Encoding.apply(self, "ICONV")
- rescue LoadError, Exception
- begin
- enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
- require enc_file
- Encoding.apply(self, @encoding)
- rescue LoadError => err
- puts err.message
- raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
- end
- end
- else
- @encoding = UTF_8
- require 'rexml/encodings/UTF-8.rb'
- Encoding.apply(self, @encoding)
- end
- ensure
- $VERBOSE = old_verbosity
- end
- true
- end
-
- def check_encoding str
- # We have to recognize UTF-16, LSB UTF-16, and UTF-8
- if str[0,2] == "\xfe\xff"
- str[0,2] = ""
- return UTF_16
- elsif str[0,2] == "\xff\xfe"
- str[0,2] = ""
- return UNILE
- end
- str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m
- return $3.upcase if $3
- return UTF_8
- end
- end
-end
diff --git a/trunk/lib/rexml/encodings/CP-1252.rb b/trunk/lib/rexml/encodings/CP-1252.rb
deleted file mode 100644
index 8675f9ff98..0000000000
--- a/trunk/lib/rexml/encodings/CP-1252.rb
+++ /dev/null
@@ -1,103 +0,0 @@
-#
-# This class was contributed by Mikko Tiihonen mikko DOT tiihonen AT hut DOT fi
-#
-module REXML
- module Encoding
- register( "CP-1252" ) do |o|
- class << o
- alias encode encode_cp1252
- alias decode decode_cp1252
- end
- end
-
- # Convert from UTF-8
- def encode_cp1252(content)
- array_utf8 = content.unpack('U*')
- array_enc = []
- array_utf8.each do |num|
- case num
- # shortcut first bunch basic characters
- when 0..0xFF; array_enc << num
- # characters added compared to iso-8859-1
- when 0x20AC; array_enc << 0x80 # 0xe2 0x82 0xac
- when 0x201A; array_enc << 0x82 # 0xe2 0x82 0x9a
- when 0x0192; array_enc << 0x83 # 0xc6 0x92
- when 0x201E; array_enc << 0x84 # 0xe2 0x82 0x9e
- when 0x2026; array_enc << 0x85 # 0xe2 0x80 0xa6
- when 0x2020; array_enc << 0x86 # 0xe2 0x80 0xa0
- when 0x2021; array_enc << 0x87 # 0xe2 0x80 0xa1
- when 0x02C6; array_enc << 0x88 # 0xcb 0x86
- when 0x2030; array_enc << 0x89 # 0xe2 0x80 0xb0
- when 0x0160; array_enc << 0x8A # 0xc5 0xa0
- when 0x2039; array_enc << 0x8B # 0xe2 0x80 0xb9
- when 0x0152; array_enc << 0x8C # 0xc5 0x92
- when 0x017D; array_enc << 0x8E # 0xc5 0xbd
- when 0x2018; array_enc << 0x91 # 0xe2 0x80 0x98
- when 0x2019; array_enc << 0x92 # 0xe2 0x80 0x99
- when 0x201C; array_enc << 0x93 # 0xe2 0x80 0x9c
- when 0x201D; array_enc << 0x94 # 0xe2 0x80 0x9d
- when 0x2022; array_enc << 0x95 # 0xe2 0x80 0xa2
- when 0x2013; array_enc << 0x96 # 0xe2 0x80 0x93
- when 0x2014; array_enc << 0x97 # 0xe2 0x80 0x94
- when 0x02DC; array_enc << 0x98 # 0xcb 0x9c
- when 0x2122; array_enc << 0x99 # 0xe2 0x84 0xa2
- when 0x0161; array_enc << 0x9A # 0xc5 0xa1
- when 0x203A; array_enc << 0x9B # 0xe2 0x80 0xba
- when 0x0152; array_enc << 0x9C # 0xc5 0x93
- when 0x017E; array_enc << 0x9E # 0xc5 0xbe
- when 0x0178; array_enc << 0x9F # 0xc5 0xb8
- else
- # all remaining basic characters can be used directly
- if num <= 0xFF
- array_enc << num
- else
- # Numeric entity (&#nnnn;); shard by Stefan Scholl
- array_enc.concat "&\##{num};".unpack('C*')
- end
- end
- end
- array_enc.pack('C*')
- end
-
- # Convert to UTF-8
- def decode_cp1252(str)
- array_latin9 = str.unpack('C*')
- array_enc = []
- array_latin9.each do |num|
- case num
- # characters that added compared to iso-8859-1
- when 0x80; array_enc << 0x20AC # 0xe2 0x82 0xac
- when 0x82; array_enc << 0x201A # 0xe2 0x82 0x9a
- when 0x83; array_enc << 0x0192 # 0xc6 0x92
- when 0x84; array_enc << 0x201E # 0xe2 0x82 0x9e
- when 0x85; array_enc << 0x2026 # 0xe2 0x80 0xa6
- when 0x86; array_enc << 0x2020 # 0xe2 0x80 0xa0
- when 0x87; array_enc << 0x2021 # 0xe2 0x80 0xa1
- when 0x88; array_enc << 0x02C6 # 0xcb 0x86
- when 0x89; array_enc << 0x2030 # 0xe2 0x80 0xb0
- when 0x8A; array_enc << 0x0160 # 0xc5 0xa0
- when 0x8B; array_enc << 0x2039 # 0xe2 0x80 0xb9
- when 0x8C; array_enc << 0x0152 # 0xc5 0x92
- when 0x8E; array_enc << 0x017D # 0xc5 0xbd
- when 0x91; array_enc << 0x2018 # 0xe2 0x80 0x98
- when 0x92; array_enc << 0x2019 # 0xe2 0x80 0x99
- when 0x93; array_enc << 0x201C # 0xe2 0x80 0x9c
- when 0x94; array_enc << 0x201D # 0xe2 0x80 0x9d
- when 0x95; array_enc << 0x2022 # 0xe2 0x80 0xa2
- when 0x96; array_enc << 0x2013 # 0xe2 0x80 0x93
- when 0x97; array_enc << 0x2014 # 0xe2 0x80 0x94
- when 0x98; array_enc << 0x02DC # 0xcb 0x9c
- when 0x99; array_enc << 0x2122 # 0xe2 0x84 0xa2
- when 0x9A; array_enc << 0x0161 # 0xc5 0xa1
- when 0x9B; array_enc << 0x203A # 0xe2 0x80 0xba
- when 0x9C; array_enc << 0x0152 # 0xc5 0x93
- when 0x9E; array_enc << 0x017E # 0xc5 0xbe
- when 0x9F; array_enc << 0x0178 # 0xc5 0xb8
- else
- array_enc << num
- end
- end
- array_enc.pack('U*')
- end
- end
-end
diff --git a/trunk/lib/rexml/encodings/EUC-JP.rb b/trunk/lib/rexml/encodings/EUC-JP.rb
deleted file mode 100644
index db37b6bf0d..0000000000
--- a/trunk/lib/rexml/encodings/EUC-JP.rb
+++ /dev/null
@@ -1,35 +0,0 @@
-module REXML
- module Encoding
- begin
- require 'uconv'
-
- def decode_eucjp(str)
- Uconv::euctou8(str)
- end
-
- def encode_eucjp content
- Uconv::u8toeuc(content)
- end
- rescue LoadError
- require 'nkf'
-
- EUCTOU8 = '-Ewm0'
- U8TOEUC = '-Wem0'
-
- def decode_eucjp(str)
- NKF.nkf(EUCTOU8, str)
- end
-
- def encode_eucjp content
- NKF.nkf(U8TOEUC, content)
- end
- end
-
- register("EUC-JP") do |obj|
- class << obj
- alias decode decode_eucjp
- alias encode encode_eucjp
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/encodings/ICONV.rb b/trunk/lib/rexml/encodings/ICONV.rb
deleted file mode 100644
index 172fba7cd1..0000000000
--- a/trunk/lib/rexml/encodings/ICONV.rb
+++ /dev/null
@@ -1,22 +0,0 @@
-require "iconv"
-raise LoadError unless defined? Iconv
-
-module REXML
- module Encoding
- def decode_iconv(str)
- Iconv.conv(UTF_8, @encoding, str)
- end
-
- def encode_iconv(content)
- Iconv.conv(@encoding, UTF_8, content)
- end
-
- register("ICONV") do |obj|
- Iconv.conv(UTF_8, obj.encoding, nil)
- class << obj
- alias decode decode_iconv
- alias encode encode_iconv
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/encodings/ISO-8859-1.rb b/trunk/lib/rexml/encodings/ISO-8859-1.rb
deleted file mode 100644
index 2873d13bf0..0000000000
--- a/trunk/lib/rexml/encodings/ISO-8859-1.rb
+++ /dev/null
@@ -1,7 +0,0 @@
-require 'rexml/encodings/US-ASCII'
-
-module REXML
- module Encoding
- register("ISO-8859-1", &encoding_method("US-ASCII"))
- end
-end
diff --git a/trunk/lib/rexml/encodings/ISO-8859-15.rb b/trunk/lib/rexml/encodings/ISO-8859-15.rb
deleted file mode 100644
index 8dea0d38a4..0000000000
--- a/trunk/lib/rexml/encodings/ISO-8859-15.rb
+++ /dev/null
@@ -1,72 +0,0 @@
-#
-# This class was contributed by Mikko Tiihonen mikko DOT tiihonen AT hut DOT fi
-#
-module REXML
- module Encoding
- register("ISO-8859-15") do |o|
- alias encode to_iso_8859_15
- alias decode from_iso_8859_15
- end
-
- # Convert from UTF-8
- def to_iso_8859_15(content)
- array_utf8 = content.unpack('U*')
- array_enc = []
- array_utf8.each do |num|
- case num
- # shortcut first bunch basic characters
- when 0..0xA3; array_enc << num
- # characters removed compared to iso-8859-1
- when 0xA4; array_enc << '&#164;'
- when 0xA6; array_enc << '&#166;'
- when 0xA8; array_enc << '&#168;'
- when 0xB4; array_enc << '&#180;'
- when 0xB8; array_enc << '&#184;'
- when 0xBC; array_enc << '&#188;'
- when 0xBD; array_enc << '&#189;'
- when 0xBE; array_enc << '&#190;'
- # characters added compared to iso-8859-1
- when 0x20AC; array_enc << 0xA4 # 0xe2 0x82 0xac
- when 0x0160; array_enc << 0xA6 # 0xc5 0xa0
- when 0x0161; array_enc << 0xA8 # 0xc5 0xa1
- when 0x017D; array_enc << 0xB4 # 0xc5 0xbd
- when 0x017E; array_enc << 0xB8 # 0xc5 0xbe
- when 0x0152; array_enc << 0xBC # 0xc5 0x92
- when 0x0153; array_enc << 0xBD # 0xc5 0x93
- when 0x0178; array_enc << 0xBE # 0xc5 0xb8
- else
- # all remaining basic characters can be used directly
- if num <= 0xFF
- array_enc << num
- else
- # Numeric entity (&#nnnn;); shard by Stefan Scholl
- array_enc.concat "&\##{num};".unpack('C*')
- end
- end
- end
- array_enc.pack('C*')
- end
-
- # Convert to UTF-8
- def from_iso_8859_15(str)
- array_latin9 = str.unpack('C*')
- array_enc = []
- array_latin9.each do |num|
- case num
- # characters that differ compared to iso-8859-1
- when 0xA4; array_enc << 0x20AC
- when 0xA6; array_enc << 0x0160
- when 0xA8; array_enc << 0x0161
- when 0xB4; array_enc << 0x017D
- when 0xB8; array_enc << 0x017E
- when 0xBC; array_enc << 0x0152
- when 0xBD; array_enc << 0x0153
- when 0xBE; array_enc << 0x0178
- else
- array_enc << num
- end
- end
- array_enc.pack('U*')
- end
- end
-end
diff --git a/trunk/lib/rexml/encodings/SHIFT-JIS.rb b/trunk/lib/rexml/encodings/SHIFT-JIS.rb
deleted file mode 100644
index 9e0f4af20e..0000000000
--- a/trunk/lib/rexml/encodings/SHIFT-JIS.rb
+++ /dev/null
@@ -1,37 +0,0 @@
-module REXML
- module Encoding
- begin
- require 'uconv'
-
- def decode_sjis content
- Uconv::sjistou8(content)
- end
-
- def encode_sjis(str)
- Uconv::u8tosjis(str)
- end
- rescue LoadError
- require 'nkf'
-
- SJISTOU8 = '-Swm0x'
- U8TOSJIS = '-Wsm0x'
-
- def decode_sjis(str)
- NKF.nkf(SJISTOU8, str)
- end
-
- def encode_sjis content
- NKF.nkf(U8TOSJIS, content)
- end
- end
-
- b = proc do |obj|
- class << obj
- alias decode decode_sjis
- alias encode encode_sjis
- end
- end
- register("SHIFT-JIS", &b)
- register("SHIFT_JIS", &b)
- end
-end
diff --git a/trunk/lib/rexml/encodings/SHIFT_JIS.rb b/trunk/lib/rexml/encodings/SHIFT_JIS.rb
deleted file mode 100644
index e355704a7c..0000000000
--- a/trunk/lib/rexml/encodings/SHIFT_JIS.rb
+++ /dev/null
@@ -1 +0,0 @@
-require 'rexml/encodings/SHIFT-JIS'
diff --git a/trunk/lib/rexml/encodings/UNILE.rb b/trunk/lib/rexml/encodings/UNILE.rb
deleted file mode 100644
index d054140c40..0000000000
--- a/trunk/lib/rexml/encodings/UNILE.rb
+++ /dev/null
@@ -1,34 +0,0 @@
-module REXML
- module Encoding
- def encode_unile content
- array_utf8 = content.unpack("U*")
- array_enc = []
- array_utf8.each do |num|
- if ((num>>16) > 0)
- array_enc << ??
- array_enc << 0
- else
- array_enc << (num & 0xFF)
- array_enc << (num >> 8)
- end
- end
- array_enc.pack('C*')
- end
-
- def decode_unile(str)
- array_enc=str.unpack('C*')
- array_utf8 = []
- 0.step(array_enc.size-1, 2){|i|
- array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100)
- }
- array_utf8.pack('U*')
- end
-
- register(UNILE) do |obj|
- class << obj
- alias decode decode_unile
- alias encode encode_unile
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/encodings/US-ASCII.rb b/trunk/lib/rexml/encodings/US-ASCII.rb
deleted file mode 100644
index fb4c217074..0000000000
--- a/trunk/lib/rexml/encodings/US-ASCII.rb
+++ /dev/null
@@ -1,30 +0,0 @@
-module REXML
- module Encoding
- # Convert from UTF-8
- def encode_ascii content
- array_utf8 = content.unpack('U*')
- array_enc = []
- array_utf8.each do |num|
- if num <= 0x7F
- array_enc << num
- else
- # Numeric entity (&#nnnn;); shard by Stefan Scholl
- array_enc.concat "&\##{num};".unpack('C*')
- end
- end
- array_enc.pack('C*')
- end
-
- # Convert to UTF-8
- def decode_ascii(str)
- str.unpack('C*').pack('U*')
- end
-
- register("US-ASCII") do |obj|
- class << obj
- alias decode decode_ascii
- alias encode encode_ascii
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/encodings/UTF-16.rb b/trunk/lib/rexml/encodings/UTF-16.rb
deleted file mode 100644
index 007c493d9c..0000000000
--- a/trunk/lib/rexml/encodings/UTF-16.rb
+++ /dev/null
@@ -1,35 +0,0 @@
-module REXML
- module Encoding
- def encode_utf16 content
- array_utf8 = content.unpack("U*")
- array_enc = []
- array_utf8.each do |num|
- if ((num>>16) > 0)
- array_enc << 0
- array_enc << ??
- else
- array_enc << (num >> 8)
- array_enc << (num & 0xFF)
- end
- end
- array_enc.pack('C*')
- end
-
- def decode_utf16(str)
- str = str[2..-1] if /^\376\377/n =~ str
- array_enc=str.unpack('C*')
- array_utf8 = []
- 0.step(array_enc.size-1, 2){|i|
- array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100)
- }
- array_utf8.pack('U*')
- end
-
- register(UTF_16) do |obj|
- class << obj
- alias decode decode_utf16
- alias encode encode_utf16
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/encodings/UTF-8.rb b/trunk/lib/rexml/encodings/UTF-8.rb
deleted file mode 100644
index bb08f44100..0000000000
--- a/trunk/lib/rexml/encodings/UTF-8.rb
+++ /dev/null
@@ -1,18 +0,0 @@
-module REXML
- module Encoding
- def encode_utf8 content
- content
- end
-
- def decode_utf8(str)
- str
- end
-
- register(UTF_8) do |obj|
- class << obj
- alias decode decode_utf8
- alias encode encode_utf8
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/entity.rb b/trunk/lib/rexml/entity.rb
deleted file mode 100644
index 1c6a25c410..0000000000
--- a/trunk/lib/rexml/entity.rb
+++ /dev/null
@@ -1,165 +0,0 @@
-require 'rexml/child'
-require 'rexml/source'
-require 'rexml/xmltokens'
-
-module REXML
- # God, I hate DTDs. I really do. Why this idiot standard still
- # plagues us is beyond me.
- class Entity < Child
- include XMLTokens
- PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
- SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
- PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
- EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
- NDATADECL = "\\s+NDATA\\s+#{NAME}"
- PEREFERENCE = "%#{NAME};"
- ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
- PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
- ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
- PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
- GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
- ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
-
- attr_reader :name, :external, :ref, :ndata, :pubid
-
- # Create a new entity. Simple entities can be constructed by passing a
- # name, value to the constructor; this creates a generic, plain entity
- # reference. For anything more complicated, you have to pass a Source to
- # the constructor with the entity definiton, or use the accessor methods.
- # +WARNING+: There is no validation of entity state except when the entity
- # is read from a stream. If you start poking around with the accessors,
- # you can easily create a non-conformant Entity. The best thing to do is
- # dump the stupid DTDs and use XMLSchema instead.
- #
- # e = Entity.new( 'amp', '&' )
- def initialize stream, value=nil, parent=nil, reference=false
- super(parent)
- @ndata = @pubid = @value = @external = nil
- if stream.kind_of? Array
- @name = stream[1]
- if stream[-1] == '%'
- @reference = true
- stream.pop
- else
- @reference = false
- end
- if stream[2] =~ /SYSTEM|PUBLIC/
- @external = stream[2]
- if @external == 'SYSTEM'
- @ref = stream[3]
- @ndata = stream[4] if stream.size == 5
- else
- @pubid = stream[3]
- @ref = stream[4]
- end
- else
- @value = stream[2]
- end
- else
- @reference = reference
- @external = nil
- @name = stream
- @value = value
- end
- end
-
- # Evaluates whether the given string matchs an entity definition,
- # returning true if so, and false otherwise.
- def Entity::matches? string
- (ENTITYDECL =~ string) == 0
- end
-
- # Evaluates to the unnormalized value of this entity; that is, replacing
- # all entities -- both %ent; and &ent; entities. This differs from
- # +value()+ in that +value+ only replaces %ent; entities.
- def unnormalized
- v = value()
- return nil if v.nil?
- @unnormalized = Text::unnormalize(v, parent)
- @unnormalized
- end
-
- #once :unnormalized
-
- # Returns the value of this entity unprocessed -- raw. This is the
- # normalized value; that is, with all %ent; and &ent; entities intact
- def normalized
- @value
- end
-
- # Write out a fully formed, correct entity definition (assuming the Entity
- # object itself is valid.)
- #
- # out::
- # An object implementing <TT>&lt;&lt;<TT> to which the entity will be
- # output
- # indent::
- # *DEPRECATED* and ignored
- def write out, indent=-1
- out << '<!ENTITY '
- out << '% ' if @reference
- out << @name
- out << ' '
- if @external
- out << @external << ' '
- if @pubid
- q = @pubid.include?('"')?"'":'"'
- out << q << @pubid << q << ' '
- end
- q = @ref.include?('"')?"'":'"'
- out << q << @ref << q
- out << ' NDATA ' << @ndata if @ndata
- else
- q = @value.include?('"')?"'":'"'
- out << q << @value << q
- end
- out << '>'
- end
-
- # Returns this entity as a string. See write().
- def to_s
- rv = ''
- write rv
- rv
- end
-
- PEREFERENCE_RE = /#{PEREFERENCE}/um
- # Returns the value of this entity. At the moment, only internal entities
- # are processed. If the value contains internal references (IE,
- # %blah;), those are replaced with their values. IE, if the doctype
- # contains:
- # <!ENTITY % foo "bar">
- # <!ENTITY yada "nanoo %foo; nanoo>
- # then:
- # doctype.entity('yada').value #-> "nanoo bar nanoo"
- def value
- if @value
- matches = @value.scan(PEREFERENCE_RE)
- rv = @value.clone
- if @parent
- matches.each do |entity_reference|
- entity_value = @parent.entity( entity_reference[0] )
- rv.gsub!( /%#{entity_reference.join};/um, entity_value )
- end
- end
- return rv
- end
- nil
- end
- end
-
- # This is a set of entity constants -- the ones defined in the XML
- # specification. These are +gt+, +lt+, +amp+, +quot+ and +apos+.
- module EntityConst
- # +>+
- GT = Entity.new( 'gt', '>' )
- # +<+
- LT = Entity.new( 'lt', '<' )
- # +&+
- AMP = Entity.new( 'amp', '&' )
- # +"+
- QUOT = Entity.new( 'quot', '"' )
- # +'+
- APOS = Entity.new( 'apos', "'" )
- end
-end
diff --git a/trunk/lib/rexml/formatters/default.rb b/trunk/lib/rexml/formatters/default.rb
deleted file mode 100644
index b4d63bc5b5..0000000000
--- a/trunk/lib/rexml/formatters/default.rb
+++ /dev/null
@@ -1,109 +0,0 @@
-module REXML
- module Formatters
- class Default
- # Prints out the XML document with no formatting -- except if id_hack is
- # set.
- #
- # ie_hack::
- # If set to true, then inserts whitespace before the close of an empty
- # tag, so that IE's bad XML parser doesn't choke.
- def initialize( ie_hack=false )
- @ie_hack = ie_hack
- end
-
- # Writes the node to some output.
- #
- # node::
- # The node to write
- # output::
- # A class implementing <TT>&lt;&lt;</TT>. Pass in an Output object to
- # change the output encoding.
- def write( node, output )
- case node
-
- when Document
- if node.xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
- output = Output.new( output, node.xml_decl.encoding )
- end
- write_document( node, output )
-
- when Element
- write_element( node, output )
-
- when Declaration, ElementDecl, NotationDecl, ExternalEntity, Entity,
- Attribute, AttlistDecl
- node.write( output,-1 )
-
- when Instruction
- write_instruction( node, output )
-
- when DocType, XMLDecl
- node.write( output )
-
- when Comment
- write_comment( node, output )
-
- when CData
- write_cdata( node, output )
-
- when Text
- write_text( node, output )
-
- else
- raise Exception.new("XML FORMATTING ERROR")
-
- end
- end
-
- protected
- def write_document( node, output )
- node.children.each { |child| write( child, output ) }
- end
-
- def write_element( node, output )
- output << "<#{node.expanded_name}"
-
- node.attributes.to_a.sort_by {|attr| attr.name}.each do |attr|
- output << " "
- attr.write( output )
- end unless node.attributes.empty?
-
- if node.children.empty?
- output << " " if @ie_hack
- output << "/"
- else
- output << ">"
- node.children.each { |child|
- write( child, output )
- }
- output << "</#{node.expanded_name}"
- end
- output << ">"
- end
-
- def write_text( node, output )
- output << node.to_s()
- end
-
- def write_comment( node, output )
- output << Comment::START
- output << node.to_s
- output << Comment::STOP
- end
-
- def write_cdata( node, output )
- output << CData::START
- output << node.to_s
- output << CData::STOP
- end
-
- def write_instruction( node, output )
- output << Instruction::START.sub(/\\/u, '')
- output << node.target
- output << ' '
- output << node.content
- output << Instruction::STOP.sub(/\\/u, '')
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/formatters/pretty.rb b/trunk/lib/rexml/formatters/pretty.rb
deleted file mode 100644
index 296e607687..0000000000
--- a/trunk/lib/rexml/formatters/pretty.rb
+++ /dev/null
@@ -1,138 +0,0 @@
-require 'rexml/formatters/default'
-
-module REXML
- module Formatters
- # Pretty-prints an XML document. This destroys whitespace in text nodes
- # and will insert carriage returns and indentations.
- #
- # TODO: Add an option to print attributes on new lines
- class Pretty < Default
-
- # If compact is set to true, then the formatter will attempt to use as
- # little space as possible
- attr_accessor :compact
- # The width of a page. Used for formatting text
- attr_accessor :width
-
- # Create a new pretty printer.
- #
- # output::
- # An object implementing '<<(String)', to which the output will be written.
- # indentation::
- # An integer greater than 0. The indentation of each level will be
- # this number of spaces. If this is < 1, the behavior of this object
- # is undefined. Defaults to 2.
- # ie_hack::
- # If true, the printer will insert whitespace before closing empty
- # tags, thereby allowing Internet Explorer's feeble XML parser to
- # function. Defaults to false.
- def initialize( indentation=2, ie_hack=false )
- @indentation = indentation
- @level = 0
- @ie_hack = ie_hack
- @width = 80
- @compact = false
- end
-
- protected
- def write_element(node, output)
- output << ' '*@level
- output << "<#{node.expanded_name}"
-
- node.attributes.each_attribute do |attr|
- output << " "
- attr.write( output )
- end unless node.attributes.empty?
-
- if node.children.empty?
- if @ie_hack
- output << " "
- end
- output << "/"
- else
- output << ">"
- # If compact and all children are text, and if the formatted output
- # is less than the specified width, then try to print everything on
- # one line
- skip = false
- if compact
- if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
- string = ""
- old_level = @level
- @level = 0
- node.children.each { |child| write( child, string ) }
- @level = old_level
- if string.length < @width
- output << string
- skip = true
- end
- end
- end
- unless skip
- output << "\n"
- @level += @indentation
- node.children.each { |child|
- next if child.kind_of?(Text) and child.to_s.strip.length == 0
- write( child, output )
- output << "\n"
- }
- @level -= @indentation
- output << ' '*@level
- end
- output << "</#{node.expanded_name}"
- end
- output << ">"
- end
-
- def write_text( node, output )
- s = node.to_s()
- s.gsub!(/\s/,' ')
- s.squeeze!(" ")
- s = wrap(s, 80-@level)
- s = indent_text(s, @level, " ", true)
- output << (' '*@level + s)
- end
-
- def write_comment( node, output)
- output << ' ' * @level
- super
- end
-
- def write_cdata( node, output)
- output << ' ' * @level
- super
- end
-
- def write_document( node, output )
- # Ok, this is a bit odd. All XML documents have an XML declaration,
- # but it may not write itself if the user didn't specifically add it,
- # either through the API or in the input document. If it doesn't write
- # itself, then we don't need a carriage return... which makes this
- # logic more complex.
- node.children.each { |child|
- next if child == node.children[-1] and child.instance_of?(Text)
- unless child == node.children[0] or child.instance_of?(Text) or
- (child == node.children[1] and !node.children[0].writethis)
- output << "\n"
- end
- write( child, output )
- }
- end
-
- private
- def indent_text(string, level=1, style="\t", indentfirstline=true)
- return string if level < 0
- string.gsub(/\n/, "\n#{style*level}")
- end
-
- def wrap(string, width)
- # Recursively wrap string at width.
- return string if string.length <= width
- place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
- return string[0,place] + "\n" + wrap(string[place+1..-1], width)
- end
-
- end
- end
-end
-
diff --git a/trunk/lib/rexml/formatters/transitive.rb b/trunk/lib/rexml/formatters/transitive.rb
deleted file mode 100644
index 1d80f21fbb..0000000000
--- a/trunk/lib/rexml/formatters/transitive.rb
+++ /dev/null
@@ -1,56 +0,0 @@
-require 'rexml/formatters/pretty'
-
-module REXML
- module Formatters
- # The Transitive formatter writes an XML document that parses to an
- # identical document as the source document. This means that no extra
- # whitespace nodes are inserted, and whitespace within text nodes is
- # preserved. Within these constraints, the document is pretty-printed,
- # with whitespace inserted into the metadata to introduce formatting.
- #
- # Note that this is only useful if the original XML is not already
- # formatted. Since this formatter does not alter whitespace nodes, the
- # results of formatting already formatted XML will be odd.
- class Transitive < Default
- def initialize( indentation=2 )
- @indentation = indentation
- @level = 0
- end
-
- protected
- def write_element( node, output )
- output << "<#{node.expanded_name}"
-
- node.attributes.each_attribute do |attr|
- output << " "
- attr.write( output )
- end unless node.attributes.empty?
-
- output << "\n"
- output << ' '*@level
- if node.children.empty?
- output << "/"
- else
- output << ">"
- # If compact and all children are text, and if the formatted output
- # is less than the specified width, then try to print everything on
- # one line
- skip = false
- @level += @indentation
- node.children.each { |child|
- write( child, output )
- }
- @level -= @indentation
- output << "</#{node.expanded_name}"
- output << "\n"
- output << ' '*@level
- end
- output << ">"
- end
-
- def write_text( node, output )
- output << node.to_s()
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/functions.rb b/trunk/lib/rexml/functions.rb
deleted file mode 100644
index fc9c4701c4..0000000000
--- a/trunk/lib/rexml/functions.rb
+++ /dev/null
@@ -1,388 +0,0 @@
-module REXML
- # If you add a method, keep in mind two things:
- # (1) the first argument will always be a list of nodes from which to
- # filter. In the case of context methods (such as position), the function
- # should return an array with a value for each child in the array.
- # (2) all method calls from XML will have "-" replaced with "_".
- # Therefore, in XML, "local-name()" is identical (and actually becomes)
- # "local_name()"
- module Functions
- @@context = nil
- @@namespace_context = {}
- @@variables = {}
-
- def Functions::namespace_context=(x) ; @@namespace_context=x ; end
- def Functions::variables=(x) ; @@variables=x ; end
- def Functions::namespace_context ; @@namespace_context ; end
- def Functions::variables ; @@variables ; end
-
- def Functions::context=(value); @@context = value; end
-
- def Functions::text( )
- if @@context[:node].node_type == :element
- return @@context[:node].find_all{|n| n.node_type == :text}.collect{|n| n.value}
- elsif @@context[:node].node_type == :text
- return @@context[:node].value
- else
- return false
- end
- end
-
- def Functions::last( )
- @@context[:size]
- end
-
- def Functions::position( )
- @@context[:index]
- end
-
- def Functions::count( node_set )
- node_set.size
- end
-
- # Since REXML is non-validating, this method is not implemented as it
- # requires a DTD
- def Functions::id( object )
- end
-
- # UNTESTED
- def Functions::local_name( node_set=nil )
- get_namespace( node_set ) do |node|
- return node.local_name
- end
- end
-
- def Functions::namespace_uri( node_set=nil )
- get_namespace( node_set ) {|node| node.namespace}
- end
-
- def Functions::name( node_set=nil )
- get_namespace( node_set ) do |node|
- node.expanded_name
- end
- end
-
- # Helper method.
- def Functions::get_namespace( node_set = nil )
- if node_set == nil
- yield @@context[:node] if defined? @@context[:node].namespace
- else
- if node_set.respond_to? :each
- node_set.each { |node| yield node if defined? node.namespace }
- elsif node_set.respond_to? :namespace
- yield node_set
- end
- end
- end
-
- # A node-set is converted to a string by returning the string-value of the
- # node in the node-set that is first in document order. If the node-set is
- # empty, an empty string is returned.
- #
- # A number is converted to a string as follows
- #
- # NaN is converted to the string NaN
- #
- # positive zero is converted to the string 0
- #
- # negative zero is converted to the string 0
- #
- # positive infinity is converted to the string Infinity
- #
- # negative infinity is converted to the string -Infinity
- #
- # if the number is an integer, the number is represented in decimal form
- # as a Number with no decimal point and no leading zeros, preceded by a
- # minus sign (-) if the number is negative
- #
- # otherwise, the number is represented in decimal form as a Number
- # including a decimal point with at least one digit before the decimal
- # point and at least one digit after the decimal point, preceded by a
- # minus sign (-) if the number is negative; there must be no leading zeros
- # before the decimal point apart possibly from the one required digit
- # immediately before the decimal point; beyond the one required digit
- # after the decimal point there must be as many, but only as many, more
- # digits as are needed to uniquely distinguish the number from all other
- # IEEE 754 numeric values.
- #
- # The boolean false value is converted to the string false. The boolean
- # true value is converted to the string true.
- #
- # An object of a type other than the four basic types is converted to a
- # string in a way that is dependent on that type.
- def Functions::string( object=nil )
- #object = @context unless object
- if object.instance_of? Array
- string( object[0] )
- elsif defined? object.node_type
- if object.node_type == :attribute
- object.value
- elsif object.node_type == :element || object.node_type == :document
- string_value(object)
- else
- object.to_s
- end
- elsif object.nil?
- return ""
- else
- object.to_s
- end
- end
-
- def Functions::string_value( o )
- rv = ""
- o.children.each { |e|
- if e.node_type == :text
- rv << e.to_s
- elsif e.node_type == :element
- rv << string_value( e )
- end
- }
- rv
- end
-
- # UNTESTED
- def Functions::concat( *objects )
- objects.join
- end
-
- # Fixed by Mike Stok
- def Functions::starts_with( string, test )
- string(string).index(string(test)) == 0
- end
-
- # Fixed by Mike Stok
- def Functions::contains( string, test )
- string(string).include?(string(test))
- end
-
- # Kouhei fixed this
- def Functions::substring_before( string, test )
- ruby_string = string(string)
- ruby_index = ruby_string.index(string(test))
- if ruby_index.nil?
- ""
- else
- ruby_string[ 0...ruby_index ]
- end
- end
-
- # Kouhei fixed this too
- def Functions::substring_after( string, test )
- ruby_string = string(string)
- test_string = string(test)
- return $1 if ruby_string =~ /#{test}(.*)/
- ""
- end
-
- # Take equal portions of Mike Stok and Sean Russell; mix
- # vigorously, and pour into a tall, chilled glass. Serves 10,000.
- def Functions::substring( string, start, length=nil )
- ruby_string = string(string)
- ruby_length = if length.nil?
- ruby_string.length.to_f
- else
- number(length)
- end
- ruby_start = number(start)
-
- # Handle the special cases
- return '' if (
- ruby_length.nan? or
- ruby_start.nan? or
- ruby_start.infinite?
- )
-
- infinite_length = ruby_length.infinite? == 1
- ruby_length = ruby_string.length if infinite_length
-
- # Now, get the bounds. The XPath bounds are 1..length; the ruby bounds
- # are 0..length. Therefore, we have to offset the bounds by one.
- ruby_start = ruby_start.round - 1
- ruby_length = ruby_length.round
-
- if ruby_start < 0
- ruby_length += ruby_start unless infinite_length
- ruby_start = 0
- end
- return '' if ruby_length <= 0
- ruby_string[ruby_start,ruby_length]
- end
-
- # UNTESTED
- def Functions::string_length( string )
- string(string).length
- end
-
- # UNTESTED
- def Functions::normalize_space( string=nil )
- string = string(@@context[:node]) if string.nil?
- if string.kind_of? Array
- string.collect{|x| string.to_s.strip.gsub(/\s+/um, ' ') if string}
- else
- string.to_s.strip.gsub(/\s+/um, ' ')
- end
- end
-
- # This is entirely Mike Stok's beast
- def Functions::translate( string, tr1, tr2 )
- from = string(tr1)
- to = string(tr2)
-
- # the map is our translation table.
- #
- # if a character occurs more than once in the
- # from string then we ignore the second &
- # subsequent mappings
- #
- # if a character maps to nil then we delete it
- # in the output. This happens if the from
- # string is longer than the to string
- #
- # there's nothing about - or ^ being special in
- # http://www.w3.org/TR/xpath#function-translate
- # so we don't build ranges or negated classes
-
- map = Hash.new
- 0.upto(from.length - 1) { |pos|
- from_char = from[pos]
- unless map.has_key? from_char
- map[from_char] =
- if pos < to.length
- to[pos]
- else
- nil
- end
- end
- }
-
- if ''.respond_to? :chars
- string(string).chars.collect { |c|
- if map.has_key? c then map[c] else c end
- }.compact.join
- else
- string(string).unpack('U*').collect { |c|
- if map.has_key? c then map[c] else c end
- }.compact.pack('U*')
- end
- end
-
- # UNTESTED
- def Functions::boolean( object=nil )
- if object.kind_of? String
- if object =~ /\d+/u
- return object.to_f != 0
- else
- return object.size > 0
- end
- elsif object.kind_of? Array
- object = object.find{|x| x and true}
- end
- return object ? true : false
- end
-
- # UNTESTED
- def Functions::not( object )
- not boolean( object )
- end
-
- # UNTESTED
- def Functions::true( )
- true
- end
-
- # UNTESTED
- def Functions::false( )
- false
- end
-
- # UNTESTED
- def Functions::lang( language )
- lang = false
- node = @@context[:node]
- attr = nil
- until node.nil?
- if node.node_type == :element
- attr = node.attributes["xml:lang"]
- unless attr.nil?
- lang = compare_language(string(language), attr)
- break
- else
- end
- end
- node = node.parent
- end
- lang
- end
-
- def Functions::compare_language lang1, lang2
- lang2.downcase.index(lang1.downcase) == 0
- end
-
- # a string that consists of optional whitespace followed by an optional
- # minus sign followed by a Number followed by whitespace is converted to
- # the IEEE 754 number that is nearest (according to the IEEE 754
- # round-to-nearest rule) to the mathematical value represented by the
- # string; any other string is converted to NaN
- #
- # boolean true is converted to 1; boolean false is converted to 0
- #
- # a node-set is first converted to a string as if by a call to the string
- # function and then converted in the same way as a string argument
- #
- # an object of a type other than the four basic types is converted to a
- # number in a way that is dependent on that type
- def Functions::number( object=nil )
- object = @@context[:node] unless object
- case object
- when true
- Float(1)
- when false
- Float(0)
- when Array
- number(string( object ))
- when Numeric
- object.to_f
- else
- str = string( object )
- # If XPath ever gets scientific notation...
- #if str =~ /^\s*-?(\d*\.?\d+|\d+\.)([Ee]\d*)?\s*$/
- if str =~ /^\s*-?(\d*\.?\d+|\d+\.)\s*$/
- str.to_f
- else
- (0.0 / 0.0)
- end
- end
- end
-
- def Functions::sum( nodes )
- nodes = [nodes] unless nodes.kind_of? Array
- nodes.inject(0) { |r,n| r += number(string(n)) }
- end
-
- def Functions::floor( number )
- number(number).floor
- end
-
- def Functions::ceiling( number )
- number(number).ceil
- end
-
- def Functions::round( number )
- begin
- number(number).round
- rescue FloatDomainError
- number(number)
- end
- end
-
- def Functions::processing_instruction( node )
- node.node_type == :processing_instruction
- end
-
- def Functions::method_missing( id )
- puts "METHOD MISSING #{id.id2name}"
- XPath.match( @@context[:node], id.id2name )
- end
- end
-end
diff --git a/trunk/lib/rexml/instruction.rb b/trunk/lib/rexml/instruction.rb
deleted file mode 100644
index c16b894b4a..0000000000
--- a/trunk/lib/rexml/instruction.rb
+++ /dev/null
@@ -1,70 +0,0 @@
-require "rexml/child"
-require "rexml/source"
-
-module REXML
- # Represents an XML Instruction; IE, <? ... ?>
- # TODO: Add parent arg (3rd arg) to constructor
- class Instruction < Child
- START = '<\?'
- STOP = '\?>'
-
- # target is the "name" of the Instruction; IE, the "tag" in <?tag ...?>
- # content is everything else.
- attr_accessor :target, :content
-
- # Constructs a new Instruction
- # @param target can be one of a number of things. If String, then
- # the target of this instruction is set to this. If an Instruction,
- # then the Instruction is shallowly cloned (target and content are
- # copied). If a Source, then the source is scanned and parsed for
- # an Instruction declaration.
- # @param content Must be either a String, or a Parent. Can only
- # be a Parent if the target argument is a Source. Otherwise, this
- # String is set as the content of this instruction.
- def initialize(target, content=nil)
- if target.kind_of? String
- super()
- @target = target
- @content = content
- elsif target.kind_of? Instruction
- super(content)
- @target = target.target
- @content = target.content
- end
- @content.strip! if @content
- end
-
- def clone
- Instruction.new self
- end
-
- # == DEPRECATED
- # See the rexml/formatters package
- #
- def write writer, indent=-1, transitive=false, ie_hack=false
- Kernel.warn( "#{self.class.name}.write is deprecated" )
- indent(writer, indent)
- writer << START.sub(/\\/u, '')
- writer << @target
- writer << ' '
- writer << @content
- writer << STOP.sub(/\\/u, '')
- end
-
- # @return true if other is an Instruction, and the content and target
- # of the other matches the target and content of this object.
- def ==( other )
- other.kind_of? Instruction and
- other.target == @target and
- other.content == @content
- end
-
- def node_type
- :processing_instruction
- end
-
- def inspect
- "<?p-i #{target} ...?>"
- end
- end
-end
diff --git a/trunk/lib/rexml/light/node.rb b/trunk/lib/rexml/light/node.rb
deleted file mode 100644
index 943ec5f1a0..0000000000
--- a/trunk/lib/rexml/light/node.rb
+++ /dev/null
@@ -1,196 +0,0 @@
-require 'rexml/xmltokens'
-require 'rexml/light/node'
-
-# [ :element, parent, name, attributes, children* ]
- # a = Node.new
- # a << "B" # => <a>B</a>
- # a.b # => <a>B<b/></a>
- # a.b[1] # => <a>B<b/><b/><a>
- # a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
- # a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
- # a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
-module REXML
- module Light
- # Represents a tagged XML element. Elements are characterized by
- # having children, attributes, and names, and can themselves be
- # children.
- class Node
- NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u
- PARENTS = [ :element, :document, :doctype ]
- # Create a new element.
- def initialize node=nil
- @node = node
- if node.kind_of? String
- node = [ :text, node ]
- elsif node.nil?
- node = [ :document, nil, nil ]
- elsif node[0] == :start_element
- node[0] = :element
- elsif node[0] == :start_doctype
- node[0] = :doctype
- elsif node[0] == :start_document
- node[0] = :document
- end
- end
-
- def size
- if PARENTS.include? @node[0]
- @node[-1].size
- else
- 0
- end
- end
-
- def each( &block )
- size.times { |x| yield( at(x+4) ) }
- end
-
- def name
- at(2)
- end
-
- def name=( name_str, ns=nil )
- pfx = ''
- pfx = "#{prefix(ns)}:" if ns
- _old_put(2, "#{pfx}#{name_str}")
- end
-
- def parent=( node )
- _old_put(1,node)
- end
-
- def local_name
- namesplit
- @name
- end
-
- def local_name=( name_str )
- _old_put( 1, "#@prefix:#{name_str}" )
- end
-
- def prefix( namespace=nil )
- prefix_of( self, namespace )
- end
-
- def namespace( prefix=prefix() )
- namespace_of( self, prefix )
- end
-
- def namespace=( namespace )
- @prefix = prefix( namespace )
- pfx = ''
- pfx = "#@prefix:" if @prefix.size > 0
- _old_put(1, "#{pfx}#@name")
- end
-
- def []( reference, ns=nil )
- if reference.kind_of? String
- pfx = ''
- pfx = "#{prefix(ns)}:" if ns
- at(3)["#{pfx}#{reference}"]
- elsif reference.kind_of? Range
- _old_get( Range.new(4+reference.begin, reference.end, reference.exclude_end?) )
- else
- _old_get( 4+reference )
- end
- end
-
- def =~( path )
- XPath.match( self, path )
- end
-
- # Doesn't handle namespaces yet
- def []=( reference, ns, value=nil )
- if reference.kind_of? String
- value = ns unless value
- at( 3 )[reference] = value
- elsif reference.kind_of? Range
- _old_put( Range.new(3+reference.begin, reference.end, reference.exclude_end?), ns )
- else
- if value
- _old_put( 4+reference, ns, value )
- else
- _old_put( 4+reference, ns )
- end
- end
- end
-
- # Append a child to this element, optionally under a provided namespace.
- # The namespace argument is ignored if the element argument is an Element
- # object. Otherwise, the element argument is a string, the namespace (if
- # provided) is the namespace the element is created in.
- def << element
- if node_type() == :text
- at(-1) << element
- else
- newnode = Node.new( element )
- newnode.parent = self
- self.push( newnode )
- end
- at(-1)
- end
-
- def node_type
- _old_get(0)
- end
-
- def text=( foo )
- replace = at(4).kind_of?(String)? 1 : 0
- self._old_put(4,replace, normalizefoo)
- end
-
- def root
- context = self
- context = context.at(1) while context.at(1)
- end
-
- def has_name?( name, namespace = '' )
- at(3) == name and namespace() == namespace
- end
-
- def children
- self
- end
-
- def parent
- at(1)
- end
-
- def to_s
-
- end
-
- private
-
- def namesplit
- return if @name.defined?
- at(2) =~ NAMESPLIT
- @prefix = '' || $1
- @name = $2
- end
-
- def namespace_of( node, prefix=nil )
- if not prefix
- name = at(2)
- name =~ NAMESPLIT
- prefix = $1
- end
- to_find = 'xmlns'
- to_find = "xmlns:#{prefix}" if not prefix.nil?
- ns = at(3)[ to_find ]
- ns ? ns : namespace_of( @node[0], prefix )
- end
-
- def prefix_of( node, namespace=nil )
- if not namespace
- name = node.name
- name =~ NAMESPLIT
- $1
- else
- ns = at(3).find { |k,v| v == namespace }
- ns ? ns : prefix_of( node.parent, namespace )
- end
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/namespace.rb b/trunk/lib/rexml/namespace.rb
deleted file mode 100644
index 3e8790580b..0000000000
--- a/trunk/lib/rexml/namespace.rb
+++ /dev/null
@@ -1,47 +0,0 @@
-require 'rexml/xmltokens'
-
-module REXML
- # Adds named attributes to an object.
- module Namespace
- # The name of the object, valid if set
- attr_reader :name, :expanded_name
- # The expanded name of the object, valid if name is set
- attr_accessor :prefix
- include XMLTokens
- NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
-
- # Sets the name and the expanded name
- def name=( name )
- @expanded_name = name
- name =~ NAMESPLIT
- if $1
- @prefix = $1
- else
- @prefix = ""
- @namespace = ""
- end
- @name = $2
- end
-
- # Compares names optionally WITH namespaces
- def has_name?( other, ns=nil )
- if ns
- return (namespace() == ns and name() == other)
- elsif other.include? ":"
- return fully_expanded_name == other
- else
- return name == other
- end
- end
-
- alias :local_name :name
-
- # Fully expand the name, even if the prefix wasn't specified in the
- # source file.
- def fully_expanded_name
- ns = prefix
- return "#{ns}:#@name" if ns.size > 0
- return @name
- end
- end
-end
diff --git a/trunk/lib/rexml/node.rb b/trunk/lib/rexml/node.rb
deleted file mode 100644
index d5e8456e53..0000000000
--- a/trunk/lib/rexml/node.rb
+++ /dev/null
@@ -1,75 +0,0 @@
-require "rexml/parseexception"
-require "rexml/formatters/pretty"
-require "rexml/formatters/default"
-
-module REXML
- # Represents a node in the tree. Nodes are never encountered except as
- # superclasses of other objects. Nodes have siblings.
- module Node
- # @return the next sibling (nil if unset)
- def next_sibling_node
- return nil if @parent.nil?
- @parent[ @parent.index(self) + 1 ]
- end
-
- # @return the previous sibling (nil if unset)
- def previous_sibling_node
- return nil if @parent.nil?
- ind = @parent.index(self)
- return nil if ind == 0
- @parent[ ind - 1 ]
- end
-
- # indent::
- # *DEPRECATED* This parameter is now ignored. See the formatters in the
- # REXML::Formatters package for changing the output style.
- def to_s indent=nil
- unless indent.nil?
- Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated" )
- f = REXML::Formatters::Pretty.new( indent )
- f.write( self, rv = "" )
- else
- f = REXML::Formatters::Default.new
- f.write( self, rv = "" )
- end
- return rv
- end
-
- def indent to, ind
- if @parent and @parent.context and not @parent.context[:indentstyle].nil? then
- indentstyle = @parent.context[:indentstyle]
- else
- indentstyle = ' '
- end
- to << indentstyle*ind unless ind<1
- end
-
- def parent?
- false;
- end
-
-
- # Visit all subnodes of +self+ recursively
- def each_recursive(&block) # :yields: node
- self.elements.each {|node|
- block.call(node)
- node.each_recursive(&block)
- }
- end
-
- # Find (and return) first subnode (recursively) for which the block
- # evaluates to true. Returns +nil+ if none was found.
- def find_first_recursive(&block) # :yields: node
- each_recursive {|node|
- return node if block.call(node)
- }
- return nil
- end
-
- # Returns the position that +self+ holds in its parent's array, indexed
- # from 1.
- def index_in_parent
- parent.index(self)+1
- end
- end
-end
diff --git a/trunk/lib/rexml/output.rb b/trunk/lib/rexml/output.rb
deleted file mode 100644
index be4d23d42d..0000000000
--- a/trunk/lib/rexml/output.rb
+++ /dev/null
@@ -1,24 +0,0 @@
-require 'rexml/encoding'
-
-module REXML
- class Output
- include Encoding
-
- attr_reader :encoding
-
- def initialize real_IO, encd="iso-8859-1"
- @output = real_IO
- self.encoding = encd
-
- @to_utf = encd == UTF_8 ? false : true
- end
-
- def <<( content )
- @output << (@to_utf ? self.encode(content) : content)
- end
-
- def to_s
- "Output[#{encoding}]"
- end
- end
-end
diff --git a/trunk/lib/rexml/parent.rb b/trunk/lib/rexml/parent.rb
deleted file mode 100644
index a20aaaef6b..0000000000
--- a/trunk/lib/rexml/parent.rb
+++ /dev/null
@@ -1,166 +0,0 @@
-require "rexml/child"
-
-module REXML
- # A parent has children, and has methods for accessing them. The Parent
- # class is never encountered except as the superclass for some other
- # object.
- class Parent < Child
- include Enumerable
-
- # Constructor
- # @param parent if supplied, will be set as the parent of this object
- def initialize parent=nil
- super(parent)
- @children = []
- end
-
- def add( object )
- #puts "PARENT GOTS #{size} CHILDREN"
- object.parent = self
- @children << object
- #puts "PARENT NOW GOTS #{size} CHILDREN"
- object
- end
-
- alias :push :add
- alias :<< :push
-
- def unshift( object )
- object.parent = self
- @children.unshift object
- end
-
- def delete( object )
- found = false
- @children.delete_if {|c| c.equal?(object) and found = true }
- object.parent = nil if found
- end
-
- def each(&block)
- @children.each(&block)
- end
-
- def delete_if( &block )
- @children.delete_if(&block)
- end
-
- def delete_at( index )
- @children.delete_at index
- end
-
- def each_index( &block )
- @children.each_index(&block)
- end
-
- # Fetches a child at a given index
- # @param index the Integer index of the child to fetch
- def []( index )
- @children[index]
- end
-
- alias :each_child :each
-
-
-
- # Set an index entry. See Array.[]=
- # @param index the index of the element to set
- # @param opt either the object to set, or an Integer length
- # @param child if opt is an Integer, this is the child to set
- # @return the parent (self)
- def []=( *args )
- args[-1].parent = self
- @children[*args[0..-2]] = args[-1]
- end
-
- # Inserts an child before another child
- # @param child1 this is either an xpath or an Element. If an Element,
- # child2 will be inserted before child1 in the child list of the parent.
- # If an xpath, child2 will be inserted before the first child to match
- # the xpath.
- # @param child2 the child to insert
- # @return the parent (self)
- def insert_before( child1, child2 )
- if child1.kind_of? String
- child1 = XPath.first( self, child1 )
- child1.parent.insert_before child1, child2
- else
- ind = index(child1)
- child2.parent.delete(child2) if child2.parent
- @children[ind,0] = child2
- child2.parent = self
- end
- self
- end
-
- # Inserts an child after another child
- # @param child1 this is either an xpath or an Element. If an Element,
- # child2 will be inserted after child1 in the child list of the parent.
- # If an xpath, child2 will be inserted after the first child to match
- # the xpath.
- # @param child2 the child to insert
- # @return the parent (self)
- def insert_after( child1, child2 )
- if child1.kind_of? String
- child1 = XPath.first( self, child1 )
- child1.parent.insert_after child1, child2
- else
- ind = index(child1)+1
- child2.parent.delete(child2) if child2.parent
- @children[ind,0] = child2
- child2.parent = self
- end
- self
- end
-
- def to_a
- @children.dup
- end
-
- # Fetches the index of a given child
- # @param child the child to get the index of
- # @return the index of the child, or nil if the object is not a child
- # of this parent.
- def index( child )
- count = -1
- @children.find { |i| count += 1 ; i.hash == child.hash }
- count
- end
-
- # @return the number of children of this parent
- def size
- @children.size
- end
-
- alias :length :size
-
- # Replaces one child with another, making sure the nodelist is correct
- # @param to_replace the child to replace (must be a Child)
- # @param replacement the child to insert into the nodelist (must be a
- # Child)
- def replace_child( to_replace, replacement )
- @children.map! {|c| c.equal?( to_replace ) ? replacement : c }
- to_replace.parent = nil
- replacement.parent = self
- end
-
- # Deeply clones this object. This creates a complete duplicate of this
- # Parent, including all descendants.
- def deep_clone
- cl = clone()
- each do |child|
- if child.kind_of? Parent
- cl << child.deep_clone
- else
- cl << child.clone
- end
- end
- cl
- end
-
- alias :children :to_a
-
- def parent?
- true
- end
- end
-end
diff --git a/trunk/lib/rexml/parseexception.rb b/trunk/lib/rexml/parseexception.rb
deleted file mode 100644
index feb7a7e638..0000000000
--- a/trunk/lib/rexml/parseexception.rb
+++ /dev/null
@@ -1,51 +0,0 @@
-module REXML
- class ParseException < RuntimeError
- attr_accessor :source, :parser, :continued_exception
-
- def initialize( message, source=nil, parser=nil, exception=nil )
- super(message)
- @source = source
- @parser = parser
- @continued_exception = exception
- end
-
- def to_s
- # Quote the original exception, if there was one
- if @continued_exception
- err = @continued_exception.inspect
- err << "\n"
- err << @continued_exception.backtrace.join("\n")
- err << "\n...\n"
- else
- err = ""
- end
-
- # Get the stack trace and error message
- err << super
-
- # Add contextual information
- if @source
- err << "\nLine: #{line}\n"
- err << "Position: #{position}\n"
- err << "Last 80 unconsumed characters:\n"
- err << @source.buffer[0..80].gsub(/\n/, ' ')
- end
-
- err
- end
-
- def position
- @source.current_line[0] if @source and defined? @source.current_line and
- @source.current_line
- end
-
- def line
- @source.current_line[2] if @source and defined? @source.current_line and
- @source.current_line
- end
-
- def context
- @source.current_line
- end
- end
-end
diff --git a/trunk/lib/rexml/parsers/baseparser.rb b/trunk/lib/rexml/parsers/baseparser.rb
deleted file mode 100644
index 162d029a62..0000000000
--- a/trunk/lib/rexml/parsers/baseparser.rb
+++ /dev/null
@@ -1,530 +0,0 @@
-require 'rexml/parseexception'
-require 'rexml/undefinednamespaceexception'
-require 'rexml/source'
-require 'set'
-
-module REXML
- module Parsers
- # = Using the Pull Parser
- # <em>This API is experimental, and subject to change.</em>
- # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
- # while parser.has_next?
- # res = parser.next
- # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
- # end
- # See the PullEvent class for information on the content of the results.
- # The data is identical to the arguments passed for the various events to
- # the StreamListener API.
- #
- # Notice that:
- # parser = PullParser.new( "<a>BAD DOCUMENT" )
- # while parser.has_next?
- # res = parser.next
- # raise res[1] if res.error?
- # end
- #
- # Nat Price gave me some good ideas for the API.
- class BaseParser
- if String.method_defined? :encode
- # Oniguruma / POSIX [understands unicode]
- LETTER = '[[:alpha:]]'
- DIGIT = '[[:digit:]]'
- else
- # Ruby < 1.9 [doesn't understand unicode]
- LETTER = 'a-zA-Z'
- DIGIT = '\d'
- end
-
- COMBININGCHAR = '' # TODO
- EXTENDER = '' # TODO
-
- NCNAME_STR= "[#{LETTER}_:][-#{LETTER}#{DIGIT}._:#{COMBININGCHAR}#{EXTENDER}]*"
- NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
- UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
-
- NAMECHAR = '[\-\w\d\.:]'
- NAME = "([\\w:]#{NAMECHAR}*)"
- NMTOKEN = "(?:#{NAMECHAR})+"
- NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
- REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
- REFERENCE_RE = /#{REFERENCE}/
-
- DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
- DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
- ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
- COMMENT_START = /\A<!--/u
- COMMENT_PATTERN = /<!--(.*?)-->/um
- CDATA_START = /\A<!\[CDATA\[/u
- CDATA_END = /^\s*\]\s*>/um
- CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
- XMLDECL_START = /\A<\?xml\s/u;
- XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
- INSTRUCTION_START = /\A<\?/u
- INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
- TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um
- CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
-
- VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
- ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
- STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um
-
- ENTITY_START = /^\s*<!ENTITY/
- IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
- ELEMENTDECL_START = /^\s*<!ELEMENT/um
- ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
- SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
- ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
- NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
- ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
- ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
- ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
- DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
- ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
- ATTDEF_RE = /#{ATTDEF}/
- ATTLISTDECL_START = /^\s*<!ATTLIST/um
- ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
- NOTATIONDECL_START = /^\s*<!NOTATION/um
- PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
- SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
-
- TEXT_PATTERN = /\A([^<]*)/um
-
- # Entity constants
- PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
- SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
- PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
- EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
- NDATADECL = "\\s+NDATA\\s+#{NAME}"
- PEREFERENCE = "%#{NAME};"
- ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
- PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
- ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
- PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
- GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
- ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
-
- EREFERENCE = /&(?!#{NAME};)/
-
- DEFAULT_ENTITIES = {
- 'gt' => [/&gt;/, '&gt;', '>', />/],
- 'lt' => [/&lt;/, '&lt;', '<', /</],
- 'quot' => [/&quot;/, '&quot;', '"', /"/],
- "apos" => [/&apos;/, "&apos;", "'", /'/]
- }
-
-
- ######################################################################
- # These are patterns to identify common markup errors, to make the
- # error messages more informative.
- ######################################################################
- MISSING_ATTRIBUTE_QUOTES = /^<#{NAME_STR}\s+#{NAME_STR}\s*=\s*[^"']/um
-
- def initialize( source )
- self.stream = source
- end
-
- def add_listener( listener )
- if !defined?(@listeners) or !@listeners
- @listeners = []
- instance_eval <<-EOL
- alias :_old_pull :pull
- def pull
- event = _old_pull
- @listeners.each do |listener|
- listener.receive event
- end
- event
- end
- EOL
- end
- @listeners << listener
- end
-
- attr_reader :source
-
- def stream=( source )
- @source = SourceFactory.create_from( source )
- @closed = nil
- @document_status = nil
- @tags = []
- @stack = []
- @entities = []
- @nsstack = []
- end
-
- def position
- if @source.respond_to? :position
- @source.position
- else
- # FIXME
- 0
- end
- end
-
- # Returns true if there are no more events
- def empty?
- return (@source.empty? and @stack.empty?)
- end
-
- # Returns true if there are more events. Synonymous with !empty?
- def has_next?
- return !(@source.empty? and @stack.empty?)
- end
-
- # Push an event back on the head of the stream. This method
- # has (theoretically) infinite depth.
- def unshift token
- @stack.unshift(token)
- end
-
- # Peek at the +depth+ event in the stack. The first element on the stack
- # is at depth 0. If +depth+ is -1, will parse to the end of the input
- # stream and return the last event, which is always :end_document.
- # Be aware that this causes the stream to be parsed up to the +depth+
- # event, so you can effectively pre-parse the entire document (pull the
- # entire thing into memory) using this method.
- def peek depth=0
- raise %Q[Illegal argument "#{depth}"] if depth < -1
- temp = []
- if depth == -1
- temp.push(pull()) until empty?
- else
- while @stack.size+temp.size < depth+1
- temp.push(pull())
- end
- end
- @stack += temp if temp.size > 0
- @stack[depth]
- end
-
- # Returns the next event. This is a +PullEvent+ object.
- def pull
- if @closed
- x, @closed = @closed, nil
- return [ :end_element, x ]
- end
- return [ :end_document ] if empty?
- return @stack.shift if @stack.size > 0
- #STDERR.puts @source.encoding
- @source.read if @source.buffer.size<2
- #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
- if @document_status == nil
- #@source.consume( /^\s*/um )
- word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
- word = word[1] unless word.nil?
- #STDERR.puts "WORD = #{word.inspect}"
- case word
- when COMMENT_START
- return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
- when XMLDECL_START
- #STDERR.puts "XMLDECL"
- results = @source.match( XMLDECL_PATTERN, true )[1]
- version = VERSION.match( results )
- version = version[1] unless version.nil?
- encoding = ENCODING.match(results)
- encoding = encoding[1] unless encoding.nil?
- @source.encoding = encoding
- standalone = STANDALONE.match(results)
- standalone = standalone[1] unless standalone.nil?
- return [ :xmldecl, version, encoding, standalone ]
- when INSTRUCTION_START
- return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
- when DOCTYPE_START
- md = @source.match( DOCTYPE_PATTERN, true )
- @nsstack.unshift(curr_ns=Set.new)
- identity = md[1]
- close = md[2]
- identity =~ IDENTITY
- name = $1
- raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
- pub_sys = $2.nil? ? nil : $2.strip
- long_name = $4.nil? ? nil : $4.strip
- uri = $6.nil? ? nil : $6.strip
- args = [ :start_doctype, name, pub_sys, long_name, uri ]
- if close == ">"
- @document_status = :after_doctype
- @source.read if @source.buffer.size<2
- md = @source.match(/^\s*/um, true)
- @stack << [ :end_doctype ]
- else
- @document_status = :in_doctype
- end
- return args
- when /^\s+/
- else
- @document_status = :after_doctype
- @source.read if @source.buffer.size<2
- md = @source.match(/\s*/um, true)
- if @source.encoding == "UTF-8"
- if @source.buffer.respond_to? :force_encoding
- @source.buffer.force_encoding(Encoding::UTF_8)
- end
- end
- end
- end
- if @document_status == :in_doctype
- md = @source.match(/\s*(.*?>)/um)
- case md[1]
- when SYSTEMENTITY
- match = @source.match( SYSTEMENTITY, true )[1]
- return [ :externalentity, match ]
-
- when ELEMENTDECL_START
- return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
-
- when ENTITY_START
- match = @source.match( ENTITYDECL, true ).to_a.compact
- match[0] = :entitydecl
- ref = false
- if match[1] == '%'
- ref = true
- match.delete_at 1
- end
- # Now we have to sort out what kind of entity reference this is
- if match[2] == 'SYSTEM'
- # External reference
- match[3] = match[3][1..-2] # PUBID
- match.delete_at(4) if match.size > 4 # Chop out NDATA decl
- # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
- elsif match[2] == 'PUBLIC'
- # External reference
- match[3] = match[3][1..-2] # PUBID
- match[4] = match[4][1..-2] # HREF
- # match is [ :entity, name, PUBLIC, pubid, href ]
- else
- match[2] = match[2][1..-2]
- match.pop if match.size == 4
- # match is [ :entity, name, value ]
- end
- match << '%' if ref
- return match
- when ATTLISTDECL_START
- md = @source.match( ATTLISTDECL_PATTERN, true )
- raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
- element = md[1]
- contents = md[0]
-
- pairs = {}
- values = md[0].scan( ATTDEF_RE )
- values.each do |attdef|
- unless attdef[3] == "#IMPLIED"
- attdef.compact!
- val = attdef[3]
- val = attdef[4] if val == "#FIXED "
- pairs[attdef[0]] = val
- if attdef[0] =~ /^xmlns:(.*)/
- @nsstack[0] << $1
- end
- end
- end
- return [ :attlistdecl, element, pairs, contents ]
- when NOTATIONDECL_START
- md = nil
- if @source.match( PUBLIC )
- md = @source.match( PUBLIC, true )
- vals = [md[1],md[2],md[4],md[6]]
- elsif @source.match( SYSTEM )
- md = @source.match( SYSTEM, true )
- vals = [md[1],md[2],nil,md[4]]
- else
- raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
- end
- return [ :notationdecl, *vals ]
- when CDATA_END
- @document_status = :after_doctype
- @source.match( CDATA_END, true )
- return [ :end_doctype ]
- end
- end
- begin
- if @source.buffer[0] == ?<
- if @source.buffer[1] == ?/
- @nsstack.shift
- last_tag = @tags.pop
- #md = @source.match_to_consume( '>', CLOSE_MATCH)
- md = @source.match( CLOSE_MATCH, true )
- raise REXML::ParseException.new( "Missing end tag for "+
- "'#{last_tag}' (got \"#{md[1]}\")",
- @source) unless last_tag == md[1]
- return [ :end_element, last_tag ]
- elsif @source.buffer[1] == ?!
- md = @source.match(/\A(\s*[^>]*>)/um)
- #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
- raise REXML::ParseException.new("Malformed node", @source) unless md
- if md[0][2] == ?-
- md = @source.match( COMMENT_PATTERN, true )
-
- case md[1]
- when /--/, /-$/
- raise REXML::ParseException.new("Malformed comment", @source)
- end
-
- return [ :comment, md[1] ] if md
- else
- md = @source.match( CDATA_PATTERN, true )
- return [ :cdata, md[1] ] if md
- end
- raise REXML::ParseException.new( "Declarations can only occur "+
- "in the doctype declaration.", @source)
- elsif @source.buffer[1] == ??
- md = @source.match( INSTRUCTION_PATTERN, true )
- return [ :processing_instruction, md[1], md[2] ] if md
- raise REXML::ParseException.new( "Bad instruction declaration",
- @source)
- else
- # Get the next tag
- md = @source.match(TAG_MATCH, true)
- unless md
- # Check for missing attribute quotes
- raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
- raise REXML::ParseException.new("malformed XML: missing tag start", @source)
- end
- attributes = {}
- prefixes = Set.new
- prefixes << md[2] if md[2]
- @nsstack.unshift(curr_ns=Set.new)
- if md[4].size > 0
- attrs = md[4].scan( ATTRIBUTE_PATTERN )
- raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
- attrs.each { |a,b,c,d,e|
- if b == "xmlns"
- if c == "xml"
- if d != "http://www.w3.org/XML/1998/namespace"
- msg = "The 'xml' prefix must not be bound to any other namespace "+
- "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
- raise REXML::ParseException.new( msg, @source, self )
- end
- elsif c == "xmlns"
- msg = "The 'xmlns' prefix must not be declared "+
- "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
- raise REXML::ParseException.new( msg, @source, self)
- end
- curr_ns << c
- elsif b
- prefixes << b unless b == "xml"
- end
-
- if attributes.has_key? a
- msg = "Duplicate attribute #{a.inspect}"
- raise REXML::ParseException.new( msg, @source, self)
- end
-
- attributes[a] = e
- }
- end
-
- # Verify that all of the prefixes have been defined
- for prefix in prefixes
- unless @nsstack.find{|k| k.member?(prefix)}
- raise UndefinedNamespaceException.new(prefix,@source,self)
- end
- end
-
- if md[6]
- @closed = md[1]
- @nsstack.shift
- else
- @tags.push( md[1] )
- end
- return [ :start_element, md[1], attributes ]
- end
- else
- md = @source.match( TEXT_PATTERN, true )
- if md[0].length == 0
- @source.match( /(\s+)/, true )
- end
- #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
- #return [ :text, "" ] if md[0].length == 0
- # unnormalized = Text::unnormalize( md[1], self )
- # return PullEvent.new( :text, md[1], unnormalized )
- return [ :text, md[1] ]
- end
- rescue REXML::UndefinedNamespaceException
- raise
- rescue REXML::ParseException
- raise
- rescue Exception, NameError => error
- raise REXML::ParseException.new( "Exception parsing",
- @source, self, (error ? error : $!) )
- end
- return [ :dummy ]
- end
-
- def entity( reference, entities )
- value = nil
- value = entities[ reference ] if entities
- if not value
- value = DEFAULT_ENTITIES[ reference ]
- value = value[2] if value
- end
- unnormalize( value, entities ) if value
- end
-
- # Escapes all possible entities
- def normalize( input, entities=nil, entity_filter=nil )
- copy = input.clone
- # Doing it like this rather than in a loop improves the speed
- copy.gsub!( EREFERENCE, '&amp;' )
- entities.each do |key, value|
- copy.gsub!( value, "&#{key};" ) unless entity_filter and
- entity_filter.include?(entity)
- end if entities
- copy.gsub!( EREFERENCE, '&amp;' )
- DEFAULT_ENTITIES.each do |key, value|
- copy.gsub!( value[3], value[1] )
- end
- copy
- end
-
- # Unescapes all possible entities
- def unnormalize( string, entities=nil, filter=nil )
- rv = string.clone
- rv.gsub!( /\r\n?/, "\n" )
- matches = rv.scan( REFERENCE_RE )
- return rv if matches.size == 0
- rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
- m=$1
- m = "0#{m}" if m[0] == ?x
- [Integer(m)].pack('U*')
- }
- matches.collect!{|x|x[0]}.compact!
- if matches.size > 0
- matches.each do |entity_reference|
- unless filter and filter.include?(entity_reference)
- entity_value = entity( entity_reference, entities )
- if entity_value
- re = /&#{entity_reference};/
- rv.gsub!( re, entity_value )
- else
- er = DEFAULT_ENTITIES[entity_reference]
- rv.gsub!( er[0], er[2] ) if er
- end
- end
- end
- rv.gsub!( /&amp;/, '&' )
- end
- rv
- end
- end
- end
-end
-
-=begin
- case event[0]
- when :start_element
- when :text
- when :end_element
- when :processing_instruction
- when :cdata
- when :comment
- when :xmldecl
- when :start_doctype
- when :end_doctype
- when :externalentity
- when :elementdecl
- when :entity
- when :attlistdecl
- when :notationdecl
- when :end_doctype
- end
-=end
diff --git a/trunk/lib/rexml/parsers/lightparser.rb b/trunk/lib/rexml/parsers/lightparser.rb
deleted file mode 100644
index 0f35034993..0000000000
--- a/trunk/lib/rexml/parsers/lightparser.rb
+++ /dev/null
@@ -1,60 +0,0 @@
-require 'rexml/parsers/streamparser'
-require 'rexml/parsers/baseparser'
-require 'rexml/light/node'
-
-module REXML
- module Parsers
- class LightParser
- def initialize stream
- @stream = stream
- @parser = REXML::Parsers::BaseParser.new( stream )
- end
-
- def add_listener( listener )
- @parser.add_listener( listener )
- end
-
- def rewind
- @stream.rewind
- @parser.stream = @stream
- end
-
- def parse
- root = context = [ :document ]
- while true
- event = @parser.pull
- case event[0]
- when :end_document
- break
- when :end_doctype
- context = context[1]
- when :start_element, :start_doctype
- new_node = event
- context << new_node
- new_node[1,0] = [context]
- context = new_node
- when :end_element, :end_doctype
- context = context[1]
- else
- new_node = event
- context << new_node
- new_node[1,0] = [context]
- end
- end
- root
- end
- end
-
- # An element is an array. The array contains:
- # 0 The parent element
- # 1 The tag name
- # 2 A hash of attributes
- # 3..-1 The child elements
- # An element is an array of size > 3
- # Text is a String
- # PIs are [ :processing_instruction, target, data ]
- # Comments are [ :comment, data ]
- # DocTypes are DocType structs
- # The root is an array with XMLDecls, Text, DocType, Array, Text
- end
-end
diff --git a/trunk/lib/rexml/parsers/pullparser.rb b/trunk/lib/rexml/parsers/pullparser.rb
deleted file mode 100644
index 36dc7160c3..0000000000
--- a/trunk/lib/rexml/parsers/pullparser.rb
+++ /dev/null
@@ -1,196 +0,0 @@
-require 'forwardable'
-
-require 'rexml/parseexception'
-require 'rexml/parsers/baseparser'
-require 'rexml/xmltokens'
-
-module REXML
- module Parsers
- # = Using the Pull Parser
- # <em>This API is experimental, and subject to change.</em>
- # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
- # while parser.has_next?
- # res = parser.next
- # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
- # end
- # See the PullEvent class for information on the content of the results.
- # The data is identical to the arguments passed for the various events to
- # the StreamListener API.
- #
- # Notice that:
- # parser = PullParser.new( "<a>BAD DOCUMENT" )
- # while parser.has_next?
- # res = parser.next
- # raise res[1] if res.error?
- # end
- #
- # Nat Price gave me some good ideas for the API.
- class PullParser
- include XMLTokens
- extend Forwardable
-
- def_delegators( :@parser, :has_next? )
- def_delegators( :@parser, :entity )
- def_delegators( :@parser, :empty? )
- def_delegators( :@parser, :source )
-
- def initialize stream
- @entities = {}
- @listeners = nil
- @parser = BaseParser.new( stream )
- @my_stack = []
- end
-
- def add_listener( listener )
- @listeners = [] unless @listeners
- @listeners << listener
- end
-
- def each
- while has_next?
- yield self.pull
- end
- end
-
- def peek depth=0
- if @my_stack.length <= depth
- (depth - @my_stack.length + 1).times {
- e = PullEvent.new(@parser.pull)
- @my_stack.push(e)
- }
- end
- @my_stack[depth]
- end
-
- def pull
- return @my_stack.shift if @my_stack.length > 0
-
- event = @parser.pull
- case event[0]
- when :entitydecl
- @entities[ event[1] ] =
- event[2] unless event[2] =~ /PUBLIC|SYSTEM/
- when :text
- unnormalized = @parser.unnormalize( event[1], @entities )
- event << unnormalized
- end
- PullEvent.new( event )
- end
-
- def unshift token
- @my_stack.unshift token
- end
- end
-
- # A parsing event. The contents of the event are accessed as an +Array?,
- # and the type is given either by the ...? methods, or by accessing the
- # +type+ accessor. The contents of this object vary from event to event,
- # but are identical to the arguments passed to +StreamListener+s for each
- # event.
- class PullEvent
- # The type of this event. Will be one of :tag_start, :tag_end, :text,
- # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
- # :notationdecl, :entity, :cdata, :xmldecl, or :error.
- def initialize(arg)
- @contents = arg
- end
-
- def []( start, endd=nil)
- if start.kind_of? Range
- @contents.slice( start.begin+1 .. start.end )
- elsif start.kind_of? Numeric
- if endd.nil?
- @contents.slice( start+1 )
- else
- @contents.slice( start+1, endd )
- end
- else
- raise "Illegal argument #{start.inspect} (#{start.class})"
- end
- end
-
- def event_type
- @contents[0]
- end
-
- # Content: [ String tag_name, Hash attributes ]
- def start_element?
- @contents[0] == :start_element
- end
-
- # Content: [ String tag_name ]
- def end_element?
- @contents[0] == :end_element
- end
-
- # Content: [ String raw_text, String unnormalized_text ]
- def text?
- @contents[0] == :text
- end
-
- # Content: [ String text ]
- def instruction?
- @contents[0] == :processing_instruction
- end
-
- # Content: [ String text ]
- def comment?
- @contents[0] == :comment
- end
-
- # Content: [ String name, String pub_sys, String long_name, String uri ]
- def doctype?
- @contents[0] == :start_doctype
- end
-
- # Content: [ String text ]
- def attlistdecl?
- @contents[0] == :attlistdecl
- end
-
- # Content: [ String text ]
- def elementdecl?
- @contents[0] == :elementdecl
- end
-
- # Due to the wonders of DTDs, an entity declaration can be just about
- # anything. There's no way to normalize it; you'll have to interpret the
- # content yourself. However, the following is true:
- #
- # * If the entity declaration is an internal entity:
- # [ String name, String value ]
- # Content: [ String text ]
- def entitydecl?
- @contents[0] == :entitydecl
- end
-
- # Content: [ String text ]
- def notationdecl?
- @contents[0] == :notationdecl
- end
-
- # Content: [ String text ]
- def entity?
- @contents[0] == :entity
- end
-
- # Content: [ String text ]
- def cdata?
- @contents[0] == :cdata
- end
-
- # Content: [ String version, String encoding, String standalone ]
- def xmldecl?
- @contents[0] == :xmldecl
- end
-
- def error?
- @contents[0] == :error
- end
-
- def inspect
- @contents[0].to_s + ": " + @contents[1..-1].inspect
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/parsers/sax2parser.rb b/trunk/lib/rexml/parsers/sax2parser.rb
deleted file mode 100644
index cafc39375d..0000000000
--- a/trunk/lib/rexml/parsers/sax2parser.rb
+++ /dev/null
@@ -1,247 +0,0 @@
-require 'rexml/parsers/baseparser'
-require 'rexml/parseexception'
-require 'rexml/namespace'
-require 'rexml/text'
-
-module REXML
- module Parsers
- # SAX2Parser
- class SAX2Parser
- def initialize source
- @parser = BaseParser.new(source)
- @listeners = []
- @procs = []
- @namespace_stack = []
- @has_listeners = false
- @tag_stack = []
- @entities = {}
- end
-
- def source
- @parser.source
- end
-
- def add_listener( listener )
- @parser.add_listener( listener )
- end
-
- # Listen arguments:
- #
- # Symbol, Array, Block
- # Listen to Symbol events on Array elements
- # Symbol, Block
- # Listen to Symbol events
- # Array, Listener
- # Listen to all events on Array elements
- # Array, Block
- # Listen to :start_element events on Array elements
- # Listener
- # Listen to All events
- #
- # Symbol can be one of: :start_element, :end_element,
- # :start_prefix_mapping, :end_prefix_mapping, :characters,
- # :processing_instruction, :doctype, :attlistdecl, :elementdecl,
- # :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
- #
- # There is an additional symbol that can be listened for: :progress.
- # This will be called for every event generated, passing in the current
- # stream position.
- #
- # Array contains regular expressions or strings which will be matched
- # against fully qualified element names.
- #
- # Listener must implement the methods in SAX2Listener
- #
- # Block will be passed the same arguments as a SAX2Listener method would
- # be, where the method name is the same as the matched Symbol.
- # See the SAX2Listener for more information.
- def listen( *args, &blok )
- if args[0].kind_of? Symbol
- if args.size == 2
- args[1].each { |match| @procs << [args[0], match, blok] }
- else
- add( [args[0], nil, blok] )
- end
- elsif args[0].kind_of? Array
- if args.size == 2
- args[0].each { |match| add( [nil, match, args[1]] ) }
- else
- args[0].each { |match| add( [ :start_element, match, blok ] ) }
- end
- else
- add([nil, nil, args[0]])
- end
- end
-
- def deafen( listener=nil, &blok )
- if listener
- @listeners.delete_if {|item| item[-1] == listener }
- @has_listeners = false if @listeners.size == 0
- else
- @procs.delete_if {|item| item[-1] == blok }
- end
- end
-
- def parse
- @procs.each { |sym,match,block| block.call if sym == :start_document }
- @listeners.each { |sym,match,block|
- block.start_document if sym == :start_document or sym.nil?
- }
- root = context = []
- while true
- event = @parser.pull
- case event[0]
- when :end_document
- handle( :end_document )
- break
- when :start_doctype
- handle( :doctype, *event[1..-1])
- when :end_doctype
- context = context[1]
- when :start_element
- @tag_stack.push(event[1])
- # find the observers for namespaces
- procs = get_procs( :start_prefix_mapping, event[1] )
- listeners = get_listeners( :start_prefix_mapping, event[1] )
- if procs or listeners
- # break out the namespace declarations
- # The attributes live in event[2]
- event[2].each {|n, v| event[2][n] = @parser.normalize(v)}
- nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
- nsdecl.collect! { |n, value| [ n[6..-1], value ] }
- @namespace_stack.push({})
- nsdecl.each do |n,v|
- @namespace_stack[-1][n] = v
- # notify observers of namespaces
- procs.each { |ob| ob.call( n, v ) } if procs
- listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
- end
- end
- event[1] =~ Namespace::NAMESPLIT
- prefix = $1
- local = $2
- uri = get_namespace(prefix)
- # find the observers for start_element
- procs = get_procs( :start_element, event[1] )
- listeners = get_listeners( :start_element, event[1] )
- # notify observers
- procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
- listeners.each { |ob|
- ob.start_element( uri, local, event[1], event[2] )
- } if listeners
- when :end_element
- @tag_stack.pop
- event[1] =~ Namespace::NAMESPLIT
- prefix = $1
- local = $2
- uri = get_namespace(prefix)
- # find the observers for start_element
- procs = get_procs( :end_element, event[1] )
- listeners = get_listeners( :end_element, event[1] )
- # notify observers
- procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
- listeners.each { |ob|
- ob.end_element( uri, local, event[1] )
- } if listeners
-
- namespace_mapping = @namespace_stack.pop
- # find the observers for namespaces
- procs = get_procs( :end_prefix_mapping, event[1] )
- listeners = get_listeners( :end_prefix_mapping, event[1] )
- if procs or listeners
- namespace_mapping.each do |ns_prefix, ns_uri|
- # notify observers of namespaces
- procs.each { |ob| ob.call( ns_prefix ) } if procs
- listeners.each { |ob| ob.end_prefix_mapping(ns_prefix) } if listeners
- end
- end
- when :text
- #normalized = @parser.normalize( event[1] )
- #handle( :characters, normalized )
- copy = event[1].clone
-
- esub = proc { |match|
- if @entities.has_key?($1)
- @entities[$1].gsub(Text::REFERENCE, &esub)
- else
- match
- end
- }
-
- copy.gsub!( Text::REFERENCE, &esub )
- copy.gsub!( Text::NUMERICENTITY ) {|m|
- m=$1
- m = "0#{m}" if m[0] == ?x
- [Integer(m)].pack('U*')
- }
- handle( :characters, copy )
- when :entitydecl
- @entities[ event[1] ] = event[2] if event.size == 3
- handle( *event )
- when :processing_instruction, :comment, :attlistdecl,
- :elementdecl, :cdata, :notationdecl, :xmldecl
- handle( *event )
- end
- handle( :progress, @parser.position )
- end
- end
-
- private
- def handle( symbol, *arguments )
- tag = @tag_stack[-1]
- procs = get_procs( symbol, tag )
- listeners = get_listeners( symbol, tag )
- # notify observers
- procs.each { |ob| ob.call( *arguments ) } if procs
- listeners.each { |l|
- l.send( symbol.to_s, *arguments )
- } if listeners
- end
-
- # The following methods are duplicates, but it is faster than using
- # a helper
- def get_procs( symbol, name )
- return nil if @procs.size == 0
- @procs.find_all do |sym, match, block|
- #puts sym.inspect+"=="+symbol.inspect+ "\t"+match.inspect+"=="+name.inspect+ "\t"+( (sym.nil? or symbol == sym) and ((name.nil? and match.nil?) or match.nil? or ( (name == match) or (match.kind_of? Regexp and name =~ match)))).to_s
- (
- (sym.nil? or symbol == sym) and
- ((name.nil? and match.nil?) or match.nil? or (
- (name == match) or
- (match.kind_of? Regexp and name =~ match)
- )
- )
- )
- end.collect{|x| x[-1]}
- end
- def get_listeners( symbol, name )
- return nil if @listeners.size == 0
- @listeners.find_all do |sym, match, block|
- (
- (sym.nil? or symbol == sym) and
- ((name.nil? and match.nil?) or match.nil? or (
- (name == match) or
- (match.kind_of? Regexp and name =~ match)
- )
- )
- )
- end.collect{|x| x[-1]}
- end
-
- def add( pair )
- if pair[-1].respond_to? :call
- @procs << pair unless @procs.include? pair
- else
- @listeners << pair unless @listeners.include? pair
- @has_listeners = true
- end
- end
-
- def get_namespace( prefix )
- uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
- (@namespace_stack.find { |ns| not ns[nil].nil? })
- uris[-1][prefix] unless uris.nil? or 0 == uris.size
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/parsers/streamparser.rb b/trunk/lib/rexml/parsers/streamparser.rb
deleted file mode 100644
index 256d0f611c..0000000000
--- a/trunk/lib/rexml/parsers/streamparser.rb
+++ /dev/null
@@ -1,46 +0,0 @@
-module REXML
- module Parsers
- class StreamParser
- def initialize source, listener
- @listener = listener
- @parser = BaseParser.new( source )
- end
-
- def add_listener( listener )
- @parser.add_listener( listener )
- end
-
- def parse
- # entity string
- while true
- event = @parser.pull
- case event[0]
- when :end_document
- return
- when :start_element
- attrs = event[2].each do |n, v|
- event[2][n] = @parser.unnormalize( v )
- end
- @listener.tag_start( event[1], attrs )
- when :end_element
- @listener.tag_end( event[1] )
- when :text
- normalized = @parser.unnormalize( event[1] )
- @listener.text( normalized )
- when :processing_instruction
- @listener.instruction( *event[1,2] )
- when :start_doctype
- @listener.doctype( *event[1..-1] )
- when :end_doctype
- # FIXME: remove this condition for milestone:3.2
- @listener.doctype_end if @listener.respond_to? :doctype_end
- when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
- @listener.send( event[0].to_s, *event[1..-1] )
- when :entitydecl, :notationdecl
- @listener.send( event[0].to_s, event[1..-1] )
- end
- end
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/parsers/treeparser.rb b/trunk/lib/rexml/parsers/treeparser.rb
deleted file mode 100644
index 30327d0dfd..0000000000
--- a/trunk/lib/rexml/parsers/treeparser.rb
+++ /dev/null
@@ -1,100 +0,0 @@
-require 'rexml/validation/validationexception'
-require 'rexml/undefinednamespaceexception'
-
-module REXML
- module Parsers
- class TreeParser
- def initialize( source, build_context = Document.new )
- @build_context = build_context
- @parser = Parsers::BaseParser.new( source )
- end
-
- def add_listener( listener )
- @parser.add_listener( listener )
- end
-
- def parse
- tag_stack = []
- in_doctype = false
- entities = nil
- begin
- while true
- event = @parser.pull
- #STDERR.puts "TREEPARSER GOT #{event.inspect}"
- case event[0]
- when :end_document
- unless tag_stack.empty?
- #raise ParseException.new("No close tag for #{tag_stack.inspect}")
- raise ParseException.new("No close tag for #{@build_context.xpath}")
- end
- return
- when :start_element
- tag_stack.push(event[1])
- el = @build_context = @build_context.add_element( event[1] )
- event[2].each do |key, value|
- el.attributes[key]=Attribute.new(key,value,self)
- end
- when :end_element
- tag_stack.pop
- @build_context = @build_context.parent
- when :text
- if not in_doctype
- if @build_context[-1].instance_of? Text
- @build_context[-1] << event[1]
- else
- @build_context.add(
- Text.new(event[1], @build_context.whitespace, nil, true)
- ) unless (
- @build_context.ignore_whitespace_nodes and
- event[1].strip.size==0
- )
- end
- end
- when :comment
- c = Comment.new( event[1] )
- @build_context.add( c )
- when :cdata
- c = CData.new( event[1] )
- @build_context.add( c )
- when :processing_instruction
- @build_context.add( Instruction.new( event[1], event[2] ) )
- when :end_doctype
- in_doctype = false
- entities.each { |k,v| entities[k] = @build_context.entities[k].value }
- @build_context = @build_context.parent
- when :start_doctype
- doctype = DocType.new( event[1..-1], @build_context )
- @build_context = doctype
- entities = {}
- in_doctype = true
- when :attlistdecl
- n = AttlistDecl.new( event[1..-1] )
- @build_context.add( n )
- when :externalentity
- n = ExternalEntity.new( event[1] )
- @build_context.add( n )
- when :elementdecl
- n = ElementDecl.new( event[1] )
- @build_context.add(n)
- when :entitydecl
- entities[ event[1] ] = event[2] unless event[2] =~ /PUBLIC|SYSTEM/
- @build_context.add(Entity.new(event))
- when :notationdecl
- n = NotationDecl.new( *event[1..-1] )
- @build_context.add( n )
- when :xmldecl
- x = XMLDecl.new( event[1], event[2], event[3] )
- @build_context.add( x )
- end
- end
- rescue REXML::Validation::ValidationException
- raise
- rescue REXML::UndefinedNamespaceException
- raise
- rescue
- raise ParseException.new( $!.message, @parser.source, @parser, $! )
- end
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/parsers/ultralightparser.rb b/trunk/lib/rexml/parsers/ultralightparser.rb
deleted file mode 100644
index adc4af18e2..0000000000
--- a/trunk/lib/rexml/parsers/ultralightparser.rb
+++ /dev/null
@@ -1,56 +0,0 @@
-require 'rexml/parsers/streamparser'
-require 'rexml/parsers/baseparser'
-
-module REXML
- module Parsers
- class UltraLightParser
- def initialize stream
- @stream = stream
- @parser = REXML::Parsers::BaseParser.new( stream )
- end
-
- def add_listener( listener )
- @parser.add_listener( listener )
- end
-
- def rewind
- @stream.rewind
- @parser.stream = @stream
- end
-
- def parse
- root = context = []
- while true
- event = @parser.pull
- case event[0]
- when :end_document
- break
- when :end_doctype
- context = context[1]
- when :start_element, :doctype
- context << event
- event[1,0] = [context]
- context = event
- when :end_element
- context = context[1]
- else
- context << event
- end
- end
- root
- end
- end
-
- # An element is an array. The array contains:
- # 0 The parent element
- # 1 The tag name
- # 2 A hash of attributes
- # 3..-1 The child elements
- # An element is an array of size > 3
- # Text is a String
- # PIs are [ :processing_instruction, target, data ]
- # Comments are [ :comment, data ]
- # DocTypes are DocType structs
- # The root is an array with XMLDecls, Text, DocType, Array, Text
- end
-end
diff --git a/trunk/lib/rexml/parsers/xpathparser.rb b/trunk/lib/rexml/parsers/xpathparser.rb
deleted file mode 100644
index 152198856d..0000000000
--- a/trunk/lib/rexml/parsers/xpathparser.rb
+++ /dev/null
@@ -1,698 +0,0 @@
-require 'rexml/namespace'
-require 'rexml/xmltokens'
-
-module REXML
- module Parsers
- # You don't want to use this class. Really. Use XPath, which is a wrapper
- # for this class. Believe me. You don't want to poke around in here.
- # There is strange, dark magic at work in this code. Beware. Go back! Go
- # back while you still can!
- class XPathParser
- include XMLTokens
- LITERAL = /^'([^']*)'|^"([^"]*)"/u
-
- def namespaces=( namespaces )
- Functions::namespace_context = namespaces
- @namespaces = namespaces
- end
-
- def parse path
- path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
- path.gsub!( /\s+([\]\)])/, '\1' )
- parsed = []
- path = OrExpr(path, parsed)
- parsed
- end
-
- def predicate path
- parsed = []
- Predicate( "[#{path}]", parsed )
- parsed
- end
-
- def abbreviate( path )
- path = path.kind_of?(String) ? parse( path ) : path
- string = ""
- document = false
- while path.size > 0
- op = path.shift
- case op
- when :node
- when :attribute
- string << "/" if string.size > 0
- string << "@"
- when :child
- string << "/" if string.size > 0
- when :descendant_or_self
- string << "/"
- when :self
- string << "."
- when :parent
- string << ".."
- when :any
- string << "*"
- when :text
- string << "text()"
- when :following, :following_sibling,
- :ancestor, :ancestor_or_self, :descendant,
- :namespace, :preceding, :preceding_sibling
- string << "/" unless string.size == 0
- string << op.to_s.tr("_", "-")
- string << "::"
- when :qname
- prefix = path.shift
- name = path.shift
- string << prefix+":" if prefix.size > 0
- string << name
- when :predicate
- string << '['
- string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
- string << ']'
- when :document
- document = true
- when :function
- string << path.shift
- string << "( "
- string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
- string << " )"
- when :literal
- string << %Q{ "#{path.shift}" }
- else
- string << "/" unless string.size == 0
- string << "UNKNOWN("
- string << op.inspect
- string << ")"
- end
- end
- string = "/"+string if document
- return string
- end
-
- def expand( path )
- path = path.kind_of?(String) ? parse( path ) : path
- string = ""
- document = false
- while path.size > 0
- op = path.shift
- case op
- when :node
- string << "node()"
- when :attribute, :child, :following, :following_sibling,
- :ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
- :namespace, :preceding, :preceding_sibling, :self, :parent
- string << "/" unless string.size == 0
- string << op.to_s.tr("_", "-")
- string << "::"
- when :any
- string << "*"
- when :qname
- prefix = path.shift
- name = path.shift
- string << prefix+":" if prefix.size > 0
- string << name
- when :predicate
- string << '['
- string << predicate_to_string( path.shift ) { |x| expand(x) }
- string << ']'
- when :document
- document = true
- else
- string << "/" unless string.size == 0
- string << "UNKNOWN("
- string << op.inspect
- string << ")"
- end
- end
- string = "/"+string if document
- return string
- end
-
- def predicate_to_string( path, &block )
- string = ""
- case path[0]
- when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
- op = path.shift
- case op
- when :eq
- op = "="
- when :lt
- op = "<"
- when :gt
- op = ">"
- when :lteq
- op = "<="
- when :gteq
- op = ">="
- when :neq
- op = "!="
- when :union
- op = "|"
- end
- left = predicate_to_string( path.shift, &block )
- right = predicate_to_string( path.shift, &block )
- string << " "
- string << left
- string << " "
- string << op.to_s
- string << " "
- string << right
- string << " "
- when :function
- path.shift
- name = path.shift
- string << name
- string << "( "
- string << predicate_to_string( path.shift, &block )
- string << " )"
- when :literal
- path.shift
- string << " "
- string << path.shift.inspect
- string << " "
- else
- string << " "
- string << yield( path )
- string << " "
- end
- return string.squeeze(" ")
- end
-
- private
- #LocationPath
- # | RelativeLocationPath
- # | '/' RelativeLocationPath?
- # | '//' RelativeLocationPath
- def LocationPath path, parsed
- #puts "LocationPath '#{path}'"
- path = path.strip
- if path[0] == ?/
- parsed << :document
- if path[1] == ?/
- parsed << :descendant_or_self
- parsed << :node
- path = path[2..-1]
- else
- path = path[1..-1]
- end
- end
- #puts parsed.inspect
- return RelativeLocationPath( path, parsed ) if path.size > 0
- end
-
- #RelativeLocationPath
- # | Step
- # | (AXIS_NAME '::' | '@' | '') AxisSpecifier
- # NodeTest
- # Predicate
- # | '.' | '..' AbbreviatedStep
- # | RelativeLocationPath '/' Step
- # | RelativeLocationPath '//' Step
- AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
- def RelativeLocationPath path, parsed
- #puts "RelativeLocationPath #{path}"
- while path.size > 0
- # (axis or @ or <child::>) nodetest predicate >
- # OR > / Step
- # (. or ..) >
- if path[0] == ?.
- if path[1] == ?.
- parsed << :parent
- parsed << :node
- path = path[2..-1]
- else
- parsed << :self
- parsed << :node
- path = path[1..-1]
- end
- else
- if path[0] == ?@
- #puts "ATTRIBUTE"
- parsed << :attribute
- path = path[1..-1]
- # Goto Nodetest
- elsif path =~ AXIS
- parsed << $1.tr('-','_').intern
- path = $'
- # Goto Nodetest
- else
- parsed << :child
- end
-
- #puts "NODETESTING '#{path}'"
- n = []
- path = NodeTest( path, n)
- #puts "NODETEST RETURNED '#{path}'"
-
- if path[0] == ?[
- path = Predicate( path, n )
- end
-
- parsed.concat(n)
- end
-
- if path.size > 0
- if path[0] == ?/
- if path[1] == ?/
- parsed << :descendant_or_self
- parsed << :node
- path = path[2..-1]
- else
- path = path[1..-1]
- end
- else
- return path
- end
- end
- end
- return path
- end
-
- # Returns a 1-1 map of the nodeset
- # The contents of the resulting array are either:
- # true/false, if a positive match
- # String, if a name match
- #NodeTest
- # | ('*' | NCNAME ':' '*' | QNAME) NameTest
- # | NODE_TYPE '(' ')' NodeType
- # | PI '(' LITERAL ')' PI
- # | '[' expr ']' Predicate
- NCNAMETEST= /^(#{NCNAME_STR}):\*/u
- QNAME = Namespace::NAMESPLIT
- NODE_TYPE = /^(comment|text|node)\(\s*\)/m
- PI = /^processing-instruction\(/
- def NodeTest path, parsed
- #puts "NodeTest with #{path}"
- res = nil
- case path
- when /^\*/
- path = $'
- parsed << :any
- when NODE_TYPE
- type = $1
- path = $'
- parsed << type.tr('-', '_').intern
- when PI
- path = $'
- literal = nil
- if path !~ /^\s*\)/
- path =~ LITERAL
- literal = $1
- path = $'
- raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?)
- path = path[1..-1]
- end
- parsed << :processing_instruction
- parsed << (literal || '')
- when NCNAMETEST
- #puts "NCNAMETEST"
- prefix = $1
- path = $'
- parsed << :namespace
- parsed << prefix
- when QNAME
- #puts "QNAME"
- prefix = $1
- name = $2
- path = $'
- prefix = "" unless prefix
- parsed << :qname
- parsed << prefix
- parsed << name
- end
- return path
- end
-
- # Filters the supplied nodeset on the predicate(s)
- def Predicate path, parsed
- #puts "PREDICATE with #{path}"
- return nil unless path[0] == ?[
- predicates = []
- while path[0] == ?[
- path, expr = get_group(path)
- predicates << expr[1..-2] if expr
- end
- #puts "PREDICATES = #{predicates.inspect}"
- predicates.each{ |pred|
- #puts "ORING #{pred}"
- preds = []
- parsed << :predicate
- parsed << preds
- OrExpr(pred, preds)
- }
- #puts "PREDICATES = #{predicates.inspect}"
- path
- end
-
- # The following return arrays of true/false, a 1-1 mapping of the
- # supplied nodeset, except for axe(), which returns a filtered
- # nodeset
-
- #| OrExpr S 'or' S AndExpr
- #| AndExpr
- def OrExpr path, parsed
- #puts "OR >>> #{path}"
- n = []
- rest = AndExpr( path, n )
- #puts "OR <<< #{rest}"
- if rest != path
- while rest =~ /^\s*( or )/
- n = [ :or, n, [] ]
- rest = AndExpr( $', n[-1] )
- end
- end
- if parsed.size == 0 and n.size != 0
- parsed.replace(n)
- elsif n.size > 0
- parsed << n
- end
- rest
- end
-
- #| AndExpr S 'and' S EqualityExpr
- #| EqualityExpr
- def AndExpr path, parsed
- #puts "AND >>> #{path}"
- n = []
- rest = EqualityExpr( path, n )
- #puts "AND <<< #{rest}"
- if rest != path
- while rest =~ /^\s*( and )/
- n = [ :and, n, [] ]
- #puts "AND >>> #{rest}"
- rest = EqualityExpr( $', n[-1] )
- #puts "AND <<< #{rest}"
- end
- end
- if parsed.size == 0 and n.size != 0
- parsed.replace(n)
- elsif n.size > 0
- parsed << n
- end
- rest
- end
-
- #| EqualityExpr ('=' | '!=') RelationalExpr
- #| RelationalExpr
- def EqualityExpr path, parsed
- #puts "EQUALITY >>> #{path}"
- n = []
- rest = RelationalExpr( path, n )
- #puts "EQUALITY <<< #{rest}"
- if rest != path
- while rest =~ /^\s*(!?=)\s*/
- if $1[0] == ?!
- n = [ :neq, n, [] ]
- else
- n = [ :eq, n, [] ]
- end
- rest = RelationalExpr( $', n[-1] )
- end
- end
- if parsed.size == 0 and n.size != 0
- parsed.replace(n)
- elsif n.size > 0
- parsed << n
- end
- rest
- end
-
- #| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr
- #| AdditiveExpr
- def RelationalExpr path, parsed
- #puts "RELATION >>> #{path}"
- n = []
- rest = AdditiveExpr( path, n )
- #puts "RELATION <<< #{rest}"
- if rest != path
- while rest =~ /^\s*([<>]=?)\s*/
- if $1[0] == ?<
- sym = "lt"
- else
- sym = "gt"
- end
- sym << "eq" if $1[-1] == ?=
- n = [ sym.intern, n, [] ]
- rest = AdditiveExpr( $', n[-1] )
- end
- end
- if parsed.size == 0 and n.size != 0
- parsed.replace(n)
- elsif n.size > 0
- parsed << n
- end
- rest
- end
-
- #| AdditiveExpr ('+' | S '-') MultiplicativeExpr
- #| MultiplicativeExpr
- def AdditiveExpr path, parsed
- #puts "ADDITIVE >>> #{path}"
- n = []
- rest = MultiplicativeExpr( path, n )
- #puts "ADDITIVE <<< #{rest}"
- if rest != path
- while rest =~ /^\s*(\+| -)\s*/
- if $1[0] == ?+
- n = [ :plus, n, [] ]
- else
- n = [ :minus, n, [] ]
- end
- rest = MultiplicativeExpr( $', n[-1] )
- end
- end
- if parsed.size == 0 and n.size != 0
- parsed.replace(n)
- elsif n.size > 0
- parsed << n
- end
- rest
- end
-
- #| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr
- #| UnaryExpr
- def MultiplicativeExpr path, parsed
- #puts "MULT >>> #{path}"
- n = []
- rest = UnaryExpr( path, n )
- #puts "MULT <<< #{rest}"
- if rest != path
- while rest =~ /^\s*(\*| div | mod )\s*/
- if $1[0] == ?*
- n = [ :mult, n, [] ]
- elsif $1.include?( "div" )
- n = [ :div, n, [] ]
- else
- n = [ :mod, n, [] ]
- end
- rest = UnaryExpr( $', n[-1] )
- end
- end
- if parsed.size == 0 and n.size != 0
- parsed.replace(n)
- elsif n.size > 0
- parsed << n
- end
- rest
- end
-
- #| '-' UnaryExpr
- #| UnionExpr
- def UnaryExpr path, parsed
- path =~ /^(\-*)/
- path = $'
- if $1 and (($1.size % 2) != 0)
- mult = -1
- else
- mult = 1
- end
- parsed << :neg if mult < 0
-
- #puts "UNARY >>> #{path}"
- n = []
- path = UnionExpr( path, n )
- #puts "UNARY <<< #{path}"
- parsed.concat( n )
- path
- end
-
- #| UnionExpr '|' PathExpr
- #| PathExpr
- def UnionExpr path, parsed
- #puts "UNION >>> #{path}"
- n = []
- rest = PathExpr( path, n )
- #puts "UNION <<< #{rest}"
- if rest != path
- while rest =~ /^\s*(\|)\s*/
- n = [ :union, n, [] ]
- rest = PathExpr( $', n[-1] )
- end
- end
- if parsed.size == 0 and n.size != 0
- parsed.replace( n )
- elsif n.size > 0
- parsed << n
- end
- rest
- end
-
- #| LocationPath
- #| FilterExpr ('/' | '//') RelativeLocationPath
- def PathExpr path, parsed
- path =~ /^\s*/
- path = $'
- #puts "PATH >>> #{path}"
- n = []
- rest = FilterExpr( path, n )
- #puts "PATH <<< '#{rest}'"
- if rest != path
- if rest and rest[0] == ?/
- return RelativeLocationPath(rest, n)
- end
- end
- #puts "BEFORE WITH '#{rest}'"
- rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w_*]/
- parsed.concat(n)
- return rest
- end
-
- #| FilterExpr Predicate
- #| PrimaryExpr
- def FilterExpr path, parsed
- #puts "FILTER >>> #{path}"
- n = []
- path = PrimaryExpr( path, n )
- #puts "FILTER <<< #{path}"
- path = Predicate(path, n) if path and path[0] == ?[
- #puts "FILTER <<< #{path}"
- parsed.concat(n)
- path
- end
-
- #| VARIABLE_REFERENCE
- #| '(' expr ')'
- #| LITERAL
- #| NUMBER
- #| FunctionCall
- VARIABLE_REFERENCE = /^\$(#{NAME_STR})/u
- NUMBER = /^(\d*\.?\d+)/
- NT = /^comment|text|processing-instruction|node$/
- def PrimaryExpr path, parsed
- arry = []
- case path
- when VARIABLE_REFERENCE
- varname = $1
- path = $'
- parsed << :variable
- parsed << varname
- #arry << @variables[ varname ]
- when /^(\w[-\w]*)(?:\()/
- #puts "PrimaryExpr :: Function >>> #$1 -- '#$''"
- fname = $1
- tmp = $'
- #puts "#{fname} =~ #{NT.inspect}"
- return path if fname =~ NT
- path = tmp
- parsed << :function
- parsed << fname
- path = FunctionCall(path, parsed)
- when NUMBER
- #puts "LITERAL or NUMBER: #$1"
- varname = $1.nil? ? $2 : $1
- path = $'
- parsed << :literal
- parsed << (varname.include?('.') ? varname.to_f : varname.to_i)
- when LITERAL
- #puts "LITERAL or NUMBER: #$1"
- varname = $1.nil? ? $2 : $1
- path = $'
- parsed << :literal
- parsed << varname
- when /^\(/ #/
- path, contents = get_group(path)
- contents = contents[1..-2]
- n = []
- OrExpr( contents, n )
- parsed.concat(n)
- end
- path
- end
-
- #| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
- def FunctionCall rest, parsed
- path, arguments = parse_args(rest)
- argset = []
- for argument in arguments
- args = []
- OrExpr( argument, args )
- argset << args
- end
- parsed << argset
- path
- end
-
- # get_group( '[foo]bar' ) -> ['bar', '[foo]']
- def get_group string
- ind = 0
- depth = 0
- st = string[0,1]
- en = (st == "(" ? ")" : "]")
- begin
- case string[ind,1]
- when st
- depth += 1
- when en
- depth -= 1
- end
- ind += 1
- end while depth > 0 and ind < string.length
- return nil unless depth==0
- [string[ind..-1], string[0..ind-1]]
- end
-
- def parse_args( string )
- arguments = []
- ind = 0
- inquot = false
- inapos = false
- depth = 1
- begin
- case string[ind]
- when ?"
- inquot = !inquot unless inapos
- when ?'
- inapos = !inapos unless inquot
- else
- unless inquot or inapos
- case string[ind]
- when ?(
- depth += 1
- if depth == 1
- string = string[1..-1]
- ind -= 1
- end
- when ?)
- depth -= 1
- if depth == 0
- s = string[0,ind].strip
- arguments << s unless s == ""
- string = string[ind+1..-1]
- end
- when ?,
- if depth == 1
- s = string[0,ind].strip
- arguments << s unless s == ""
- string = string[ind+1..-1]
- ind = -1
- end
- end
- end
- end
- ind += 1
- end while depth > 0 and ind < string.length
- return nil unless depth==0
- [string,arguments]
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/quickpath.rb b/trunk/lib/rexml/quickpath.rb
deleted file mode 100644
index cdf4c53017..0000000000
--- a/trunk/lib/rexml/quickpath.rb
+++ /dev/null
@@ -1,263 +0,0 @@
-require 'rexml/functions'
-require 'rexml/xmltokens'
-
-module REXML
- class QuickPath
- include Functions
- include XMLTokens
-
- EMPTY_HASH = {}
-
- def QuickPath::first element, path, namespaces=EMPTY_HASH
- match(element, path, namespaces)[0]
- end
-
- def QuickPath::each element, path, namespaces=EMPTY_HASH, &block
- path = "*" unless path
- match(element, path, namespaces).each( &block )
- end
-
- def QuickPath::match element, path, namespaces=EMPTY_HASH
- raise "nil is not a valid xpath" unless path
- results = nil
- Functions::namespace_context = namespaces
- case path
- when /^\/([^\/]|$)/u
- # match on root
- path = path[1..-1]
- return [element.root.parent] if path == ''
- results = filter([element.root], path)
- when /^[-\w]*::/u
- results = filter([element], path)
- when /^\*/u
- results = filter(element.to_a, path)
- when /^[\[!\w:]/u
- # match on child
- matches = []
- children = element.to_a
- results = filter(children, path)
- else
- results = filter([element], path)
- end
- return results
- end
-
- # Given an array of nodes it filters the array based on the path. The
- # result is that when this method returns, the array will contain elements
- # which match the path
- def QuickPath::filter elements, path
- return elements if path.nil? or path == '' or elements.size == 0
- case path
- when /^\/\//u # Descendant
- return axe( elements, "descendant-or-self", $' )
- when /^\/?\b(\w[-\w]*)\b::/u # Axe
- axe_name = $1
- rest = $'
- return axe( elements, $1, $' )
- when /^\/(?=\b([:!\w][-\.\w]*:)?[-!\*\.\w]*\b([^:(]|$)|\*)/u # Child
- rest = $'
- results = []
- elements.each do |element|
- results |= filter( element.to_a, rest )
- end
- return results
- when /^\/?(\w[-\w]*)\(/u # / Function
- return function( elements, $1, $' )
- when Namespace::NAMESPLIT # Element name
- name = $2
- ns = $1
- rest = $'
- elements.delete_if do |element|
- !(element.kind_of? Element and
- (element.expanded_name == name or
- (element.name == name and
- element.namespace == Functions.namespace_context[ns])))
- end
- return filter( elements, rest )
- when /^\/\[/u
- matches = []
- elements.each do |element|
- matches |= predicate( element.to_a, path[1..-1] ) if element.kind_of? Element
- end
- return matches
- when /^\[/u # Predicate
- return predicate( elements, path )
- when /^\/?\.\.\./u # Ancestor
- return axe( elements, "ancestor", $' )
- when /^\/?\.\./u # Parent
- return filter( elements.collect{|e|e.parent}, $' )
- when /^\/?\./u # Self
- return filter( elements, $' )
- when /^\*/u # Any
- results = []
- elements.each do |element|
- results |= filter( [element], $' ) if element.kind_of? Element
- #if element.kind_of? Element
- # children = element.to_a
- # children.delete_if { |child| !child.kind_of?(Element) }
- # results |= filter( children, $' )
- #end
- end
- return results
- end
- return []
- end
-
- def QuickPath::axe( elements, axe_name, rest )
- matches = []
- matches = filter( elements.dup, rest ) if axe_name =~ /-or-self$/u
- case axe_name
- when /^descendant/u
- elements.each do |element|
- matches |= filter( element.to_a, "descendant-or-self::#{rest}" ) if element.kind_of? Element
- end
- when /^ancestor/u
- elements.each do |element|
- while element.parent
- matches << element.parent
- element = element.parent
- end
- end
- matches = filter( matches, rest )
- when "self"
- matches = filter( elements, rest )
- when "child"
- elements.each do |element|
- matches |= filter( element.to_a, rest ) if element.kind_of? Element
- end
- when "attribute"
- elements.each do |element|
- matches << element.attributes[ rest ] if element.kind_of? Element
- end
- when "parent"
- matches = filter(elements.collect{|element| element.parent}.uniq, rest)
- when "following-sibling"
- matches = filter(elements.collect{|element| element.next_sibling}.uniq,
- rest)
- when "previous-sibling"
- matches = filter(elements.collect{|element|
- element.previous_sibling}.uniq, rest )
- end
- return matches.uniq
- end
-
- # A predicate filters a node-set with respect to an axis to produce a
- # new node-set. For each node in the node-set to be filtered, the
- # PredicateExpr is evaluated with that node as the context node, with
- # the number of nodes in the node-set as the context size, and with the
- # proximity position of the node in the node-set with respect to the
- # axis as the context position; if PredicateExpr evaluates to true for
- # that node, the node is included in the new node-set; otherwise, it is
- # not included.
- #
- # A PredicateExpr is evaluated by evaluating the Expr and converting
- # the result to a boolean. If the result is a number, the result will
- # be converted to true if the number is equal to the context position
- # and will be converted to false otherwise; if the result is not a
- # number, then the result will be converted as if by a call to the
- # boolean function. Thus a location path para[3] is equivalent to
- # para[position()=3].
- def QuickPath::predicate( elements, path )
- ind = 1
- bcount = 1
- while bcount > 0
- bcount += 1 if path[ind] == ?[
- bcount -= 1 if path[ind] == ?]
- ind += 1
- end
- ind -= 1
- predicate = path[1..ind-1]
- rest = path[ind+1..-1]
-
- # have to change 'a [=<>] b [=<>] c' into 'a [=<>] b and b [=<>] c'
- predicate.gsub!( /([^\s(and)(or)<>=]+)\s*([<>=])\s*([^\s(and)(or)<>=]+)\s*([<>=])\s*([^\s(and)(or)<>=]+)/u,
- '\1 \2 \3 and \3 \4 \5' )
- # Let's do some Ruby trickery to avoid some work:
- predicate.gsub!( /&/u, "&&" )
- predicate.gsub!( /=/u, "==" )
- predicate.gsub!( /@(\w[-\w.]*)/u, 'attribute("\1")' )
- predicate.gsub!( /\bmod\b/u, "%" )
- predicate.gsub!( /\b(\w[-\w.]*\()/u ) {
- fname = $1
- fname.gsub( /-/u, "_" )
- }
-
- Functions.pair = [ 0, elements.size ]
- results = []
- elements.each do |element|
- Functions.pair[0] += 1
- Functions.node = element
- res = eval( predicate )
- case res
- when true
- results << element
- when Fixnum
- results << element if Functions.pair[0] == res
- when String
- results << element
- end
- end
- return filter( results, rest )
- end
-
- def QuickPath::attribute( name )
- return Functions.node.attributes[name] if Functions.node.kind_of? Element
- end
-
- def QuickPath::name()
- return Functions.node.name if Functions.node.kind_of? Element
- end
-
- def QuickPath::method_missing( id, *args )
- begin
- Functions.send( id.id2name, *args )
- rescue Exception
- raise "METHOD: #{id.id2name}(#{args.join ', '})\n#{$!.message}"
- end
- end
-
- def QuickPath::function( elements, fname, rest )
- args = parse_args( elements, rest )
- Functions.pair = [0, elements.size]
- results = []
- elements.each do |element|
- Functions.pair[0] += 1
- Functions.node = element
- res = Functions.send( fname, *args )
- case res
- when true
- results << element
- when Fixnum
- results << element if Functions.pair[0] == res
- end
- end
- return results
- end
-
- def QuickPath::parse_args( element, string )
- # /.*?(?:\)|,)/
- arguments = []
- buffer = ""
- while string and string != ""
- c = string[0]
- string.sub!(/^./u, "")
- case c
- when ?,
- # if depth = 1, then we start a new argument
- arguments << evaluate( buffer )
- #arguments << evaluate( string[0..count] )
- when ?(
- # start a new method call
- function( element, buffer, string )
- buffer = ""
- when ?)
- # close the method call and return arguments
- return arguments
- else
- buffer << c
- end
- end
- ""
- end
- end
-end
diff --git a/trunk/lib/rexml/rexml.rb b/trunk/lib/rexml/rexml.rb
deleted file mode 100644
index 810af31356..0000000000
--- a/trunk/lib/rexml/rexml.rb
+++ /dev/null
@@ -1,31 +0,0 @@
-# -*- encoding: utf-8 -*-
-# REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby.
-#
-# REXML is a _pure_ Ruby, XML 1.0 conforming,
-# non-validating[http://www.w3.org/TR/2004/REC-xml-20040204/#sec-conformance]
-# toolkit with an intuitive API. REXML passes 100% of the non-validating Oasis
-# tests[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml],
-# and provides tree, stream, SAX2, pull, and lightweight APIs. REXML also
-# includes a full XPath[http://www.w3c.org/tr/xpath] 1.0 implementation. Since
-# Ruby 1.8, REXML is included in the standard Ruby distribution.
-#
-# Main page:: http://www.germane-software.com/software/rexml
-# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
-# Date:: 2008/019
-# Version:: 3.1.7.3
-#
-# This API documentation can be downloaded from the REXML home page, or can
-# be accessed online[http://www.germane-software.com/software/rexml_doc]
-#
-# A tutorial is available in the REXML distribution in docs/tutorial.html,
-# or can be accessed
-# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
-module REXML
- COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
- DATE = "2008/019"
- VERSION = "3.1.7.3"
- REVISION = "$Revision$".gsub(/\$Revision:|\$/,'').strip
-
- Copyright = COPYRIGHT
- Version = VERSION
-end
diff --git a/trunk/lib/rexml/sax2listener.rb b/trunk/lib/rexml/sax2listener.rb
deleted file mode 100644
index 8db1389d06..0000000000
--- a/trunk/lib/rexml/sax2listener.rb
+++ /dev/null
@@ -1,97 +0,0 @@
-module REXML
- # A template for stream parser listeners.
- # Note that the declarations (attlistdecl, elementdecl, etc) are trivially
- # processed; REXML doesn't yet handle doctype entity declarations, so you
- # have to parse them out yourself.
- # === Missing methods from SAX2
- # ignorable_whitespace
- # === Methods extending SAX2
- # +WARNING+
- # These methods are certainly going to change, until DTDs are fully
- # supported. Be aware of this.
- # start_document
- # end_document
- # doctype
- # elementdecl
- # attlistdecl
- # entitydecl
- # notationdecl
- # cdata
- # xmldecl
- # comment
- module SAX2Listener
- def start_document
- end
- def end_document
- end
- def start_prefix_mapping prefix, uri
- end
- def end_prefix_mapping prefix
- end
- def start_element uri, localname, qname, attributes
- end
- def end_element uri, localname, qname
- end
- def characters text
- end
- def processing_instruction target, data
- end
- # Handles a doctype declaration. Any attributes of the doctype which are
- # not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
- # @p name the name of the doctype; EG, "me"
- # @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
- # @p long_name the supplied long name, or nil. EG, "foo"
- # @p uri the uri of the doctype, or nil. EG, "bar"
- def doctype name, pub_sys, long_name, uri
- end
- # If a doctype includes an ATTLIST declaration, it will cause this
- # method to be called. The content is the declaration itself, unparsed.
- # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
- # attr CDATA #REQUIRED". This is the same for all of the .*decl
- # methods.
- def attlistdecl(element, pairs, contents)
- end
- # <!ELEMENT ...>
- def elementdecl content
- end
- # <!ENTITY ...>
- # The argument passed to this method is an array of the entity
- # declaration. It can be in a number of formats, but in general it
- # returns (example, result):
- # <!ENTITY % YN '"Yes"'>
- # ["%", "YN", "'\"Yes\"'", "\""]
- # <!ENTITY % YN 'Yes'>
- # ["%", "YN", "'Yes'", "s"]
- # <!ENTITY WhatHeSaid "He said %YN;">
- # ["WhatHeSaid", "\"He said %YN;\"", "YN"]
- # <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
- # ["open-hatch", "SYSTEM", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
- # <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
- # ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
- # <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
- # ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
- def entitydecl name, decl
- end
- # <!NOTATION ...>
- def notationdecl content
- end
- # Called when <![CDATA[ ... ]]> is encountered in a document.
- # @p content "..."
- def cdata content
- end
- # Called when an XML PI is encountered in the document.
- # EG: <?xml version="1.0" encoding="utf"?>
- # @p version the version attribute value. EG, "1.0"
- # @p encoding the encoding attribute value, or nil. EG, "utf"
- # @p standalone the standalone attribute value, or nil. EG, nil
- # @p spaced the declaration is followed by a line break
- def xmldecl version, encoding, standalone
- end
- # Called when a comment is encountered.
- # @p comment The content of the comment
- def comment comment
- end
- def progress position
- end
- end
-end
diff --git a/trunk/lib/rexml/source.rb b/trunk/lib/rexml/source.rb
deleted file mode 100644
index d4335138a1..0000000000
--- a/trunk/lib/rexml/source.rb
+++ /dev/null
@@ -1,258 +0,0 @@
-require 'rexml/encoding'
-
-module REXML
- # Generates Source-s. USE THIS CLASS.
- class SourceFactory
- # Generates a Source object
- # @param arg Either a String, or an IO
- # @return a Source, or nil if a bad argument was given
- def SourceFactory::create_from(arg)
- if arg.respond_to? :read and
- arg.respond_to? :readline and
- arg.respond_to? :nil? and
- arg.respond_to? :eof?
- IOSource.new(arg)
- elsif arg.respond_to? :to_str
- require 'stringio'
- IOSource.new(StringIO.new(arg))
- elsif arg.kind_of? Source
- arg
- else
- raise "#{arg.class} is not a valid input stream. It must walk \n"+
- "like either a String, an IO, or a Source."
- end
- end
- end
-
- # A Source can be searched for patterns, and wraps buffers and other
- # objects and provides consumption of text
- class Source
- include Encoding
- # The current buffer (what we're going to read next)
- attr_reader :buffer
- # The line number of the last consumed text
- attr_reader :line
- attr_reader :encoding
-
- # Constructor
- # @param arg must be a String, and should be a valid XML document
- # @param encoding if non-null, sets the encoding of the source to this
- # value, overriding all encoding detection
- def initialize(arg, encoding=nil)
- @orig = @buffer = arg
- if encoding
- self.encoding = encoding
- else
- self.encoding = check_encoding( @buffer )
- end
- @line = 0
- end
-
-
- # Inherited from Encoding
- # Overridden to support optimized en/decoding
- def encoding=(enc)
- return unless super
- @line_break = encode( '>' )
- if enc != UTF_8
- @buffer = decode(@buffer)
- @to_utf = true
- else
- @to_utf = false
- if @buffer.respond_to? :force_encoding
- @buffer.force_encoding Encoding::UTF_8
- end
- end
- end
-
- # Scans the source for a given pattern. Note, that this is not your
- # usual scan() method. For one thing, the pattern argument has some
- # requirements; for another, the source can be consumed. You can easily
- # confuse this method. Originally, the patterns were easier
- # to construct and this method more robust, because this method
- # generated search regexes on the fly; however, this was
- # computationally expensive and slowed down the entire REXML package
- # considerably, since this is by far the most commonly called method.
- # @param pattern must be a Regexp, and must be in the form of
- # /^\s*(#{your pattern, with no groups})(.*)/. The first group
- # will be returned; the second group is used if the consume flag is
- # set.
- # @param consume if true, the pattern returned will be consumed, leaving
- # everything after it in the Source.
- # @return the pattern, if found, or nil if the Source is empty or the
- # pattern is not found.
- def scan(pattern, cons=false)
- return nil if @buffer.nil?
- rv = @buffer.scan(pattern)
- @buffer = $' if cons and rv.size>0
- rv
- end
-
- def read
- end
-
- def consume( pattern )
- @buffer = $' if pattern.match( @buffer )
- end
-
- def match_to( char, pattern )
- return pattern.match(@buffer)
- end
-
- def match_to_consume( char, pattern )
- md = pattern.match(@buffer)
- @buffer = $'
- return md
- end
-
- def match(pattern, cons=false)
- md = pattern.match(@buffer)
- @buffer = $' if cons and md
- return md
- end
-
- # @return true if the Source is exhausted
- def empty?
- @buffer == ""
- end
-
- def position
- @orig.index( @buffer )
- end
-
- # @return the current line in the source
- def current_line
- lines = @orig.split
- res = lines.grep @buffer[0..30]
- res = res[-1] if res.kind_of? Array
- lines.index( res ) if res
- end
- end
-
- # A Source that wraps an IO. See the Source class for method
- # documentation
- class IOSource < Source
- #attr_reader :block_size
-
- # block_size has been deprecated
- def initialize(arg, block_size=500, encoding=nil)
- @er_source = @source = arg
- @to_utf = false
-
- # Determining the encoding is a deceptively difficult issue to resolve.
- # First, we check the first two bytes for UTF-16. Then we
- # assume that the encoding is at least ASCII enough for the '>', and
- # we read until we get one of those. This gives us the XML declaration,
- # if there is one. If there isn't one, the file MUST be UTF-8, as per
- # the XML spec. If there is one, we can determine the encoding from
- # it.
- @buffer = ""
- str = @source.read( 2 ) || ''
- if encoding
- self.encoding = encoding
- elsif str[0,2] == "\xfe\xff"
- @line_break = "\000>"
- elsif str[0,2] == "\xff\xfe"
- @line_break = ">\000"
- elsif str[0,2] == "\xef\xbb"
- str += @source.read(1)
- str = '' if (str[2,1] == "\xBF")
- @line_break = ">"
- else
- @line_break = ">"
- end
- super( @source.eof? ? str : str+@source.readline( @line_break ) )
- end
-
- def scan(pattern, cons=false)
- rv = super
- # You'll notice that this next section is very similar to the same
- # section in match(), but just a liiittle different. This is
- # because it is a touch faster to do it this way with scan()
- # than the way match() does it; enough faster to warrent duplicating
- # some code
- if rv.size == 0
- until @buffer =~ pattern or @source.nil?
- begin
- # READLINE OPT
- #str = @source.read(@block_size)
- str = @source.readline(@line_break)
- str = decode(str) if @to_utf and str
- @buffer << str
- rescue Iconv::IllegalSequence
- raise
- rescue
- @source = nil
- end
- end
- rv = super
- end
- rv.taint
- rv
- end
-
- def read
- begin
- str = @source.readline(@line_break)
- str = decode(str) if @to_utf and str
- @buffer << str
- if not @to_utf and @buffer.respond_to? :force_encoding
- @buffer.force_encoding Encoding::UTF_8
- end
- rescue Exception, NameError
- @source = nil
- end
- end
-
- def consume( pattern )
- match( pattern, true )
- end
-
- def match( pattern, cons=false )
- rv = pattern.match(@buffer)
- @buffer = $' if cons and rv
- while !rv and @source
- begin
- str = @source.readline(@line_break)
- str = decode(str) if @to_utf and str
- @buffer << str
- rv = pattern.match(@buffer)
- @buffer = $' if cons and rv
- rescue
- @source = nil
- end
- end
- rv.taint
- rv
- end
-
- def empty?
- super and ( @source.nil? || @source.eof? )
- end
-
- def position
- @er_source.pos rescue 0
- end
-
- # @return the current line in the source
- def current_line
- begin
- pos = @er_source.pos # The byte position in the source
- lineno = @er_source.lineno # The XML < position in the source
- @er_source.rewind
- line = 0 # The \r\n position in the source
- begin
- while @er_source.pos < pos
- @er_source.readline
- line += 1
- end
- rescue
- end
- rescue IOError
- pos = -1
- line = -1
- end
- [pos, lineno, line]
- end
- end
-end
diff --git a/trunk/lib/rexml/streamlistener.rb b/trunk/lib/rexml/streamlistener.rb
deleted file mode 100644
index 6f401125b5..0000000000
--- a/trunk/lib/rexml/streamlistener.rb
+++ /dev/null
@@ -1,92 +0,0 @@
-module REXML
- # A template for stream parser listeners.
- # Note that the declarations (attlistdecl, elementdecl, etc) are trivially
- # processed; REXML doesn't yet handle doctype entity declarations, so you
- # have to parse them out yourself.
- module StreamListener
- # Called when a tag is encountered.
- # @p name the tag name
- # @p attrs an array of arrays of attribute/value pairs, suitable for
- # use with assoc or rassoc. IE, <tag attr1="value1" attr2="value2">
- # will result in
- # tag_start( "tag", # [["attr1","value1"],["attr2","value2"]])
- def tag_start name, attrs
- end
- # Called when the end tag is reached. In the case of <tag/>, tag_end
- # will be called immidiately after tag_start
- # @p the name of the tag
- def tag_end name
- end
- # Called when text is encountered in the document
- # @p text the text content.
- def text text
- end
- # Called when an instruction is encountered. EG: <?xsl sheet='foo'?>
- # @p name the instruction name; in the example, "xsl"
- # @p instruction the rest of the instruction. In the example,
- # "sheet='foo'"
- def instruction name, instruction
- end
- # Called when a comment is encountered.
- # @p comment The content of the comment
- def comment comment
- end
- # Handles a doctype declaration. Any attributes of the doctype which are
- # not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
- # @p name the name of the doctype; EG, "me"
- # @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
- # @p long_name the supplied long name, or nil. EG, "foo"
- # @p uri the uri of the doctype, or nil. EG, "bar"
- def doctype name, pub_sys, long_name, uri
- end
- # Called when the doctype is done
- def doctype_end
- end
- # If a doctype includes an ATTLIST declaration, it will cause this
- # method to be called. The content is the declaration itself, unparsed.
- # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
- # attr CDATA #REQUIRED". This is the same for all of the .*decl
- # methods.
- def attlistdecl element_name, attributes, raw_content
- end
- # <!ELEMENT ...>
- def elementdecl content
- end
- # <!ENTITY ...>
- # The argument passed to this method is an array of the entity
- # declaration. It can be in a number of formats, but in general it
- # returns (example, result):
- # <!ENTITY % YN '"Yes"'>
- # ["%", "YN", "'\"Yes\"'", "\""]
- # <!ENTITY % YN 'Yes'>
- # ["%", "YN", "'Yes'", "s"]
- # <!ENTITY WhatHeSaid "He said %YN;">
- # ["WhatHeSaid", "\"He said %YN;\"", "YN"]
- # <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
- # ["open-hatch", "SYSTEM", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
- # <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
- # ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
- # <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
- # ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
- def entitydecl content
- end
- # <!NOTATION ...>
- def notationdecl content
- end
- # Called when %foo; is encountered in a doctype declaration.
- # @p content "foo"
- def entity content
- end
- # Called when <![CDATA[ ... ]]> is encountered in a document.
- # @p content "..."
- def cdata content
- end
- # Called when an XML PI is encountered in the document.
- # EG: <?xml version="1.0" encoding="utf"?>
- # @p version the version attribute value. EG, "1.0"
- # @p encoding the encoding attribute value, or nil. EG, "utf"
- # @p standalone the standalone attribute value, or nil. EG, nil
- def xmldecl version, encoding, standalone
- end
- end
-end
diff --git a/trunk/lib/rexml/syncenumerator.rb b/trunk/lib/rexml/syncenumerator.rb
deleted file mode 100644
index 11609bdf3d..0000000000
--- a/trunk/lib/rexml/syncenumerator.rb
+++ /dev/null
@@ -1,32 +0,0 @@
-module REXML
- class SyncEnumerator
- include Enumerable
-
- # Creates a new SyncEnumerator which enumerates rows of given
- # Enumerable objects.
- def initialize(*enums)
- @gens = enums
- @length = @gens.collect {|x| x.size }.max
- end
-
- # Returns the number of enumerated Enumerable objects, i.e. the size
- # of each row.
- def size
- @gens.size
- end
-
- # Returns the number of enumerated Enumerable objects, i.e. the size
- # of each row.
- def length
- @gens.length
- end
-
- # Enumerates rows of the Enumerable objects.
- def each
- @length.times {|i|
- yield @gens.collect {|x| x[i]}
- }
- self
- end
- end
-end
diff --git a/trunk/lib/rexml/text.rb b/trunk/lib/rexml/text.rb
deleted file mode 100644
index fac5ac3e41..0000000000
--- a/trunk/lib/rexml/text.rb
+++ /dev/null
@@ -1,404 +0,0 @@
-require 'rexml/entity'
-require 'rexml/doctype'
-require 'rexml/child'
-require 'rexml/doctype'
-require 'rexml/parseexception'
-
-module REXML
- # Represents text nodes in an XML document
- class Text < Child
- include Comparable
- # The order in which the substitutions occur
- SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ]
- SUBSTITUTES = ['&amp;', '&lt;', '&gt;', '&quot;', '&apos;', '&#13;']
- # Characters which are substituted in written strings
- SLAICEPS = [ '<', '>', '"', "'", '&' ]
- SETUTITSBUS = [ /&lt;/u, /&gt;/u, /&quot;/u, /&apos;/u, /&amp;/u ]
-
- # If +raw+ is true, then REXML leaves the value alone
- attr_accessor :raw
-
- NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
- NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
- VALID_CHAR = [
- 0x9, 0xA, 0xD,
- (0x20..0xD7FF),
- (0xE000..0xFFFD),
- (0x10000..0x10FFFF)
- ]
-
- if String.method_defined? :encode
- VALID_XML_CHARS = Regexp.new('^['+
- VALID_CHAR.map { |item|
- case item
- when Fixnum
- [item].pack('U').force_encoding('utf-8')
- when Range
- [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
- end
- }.join +
- ']*$')
- else
- VALID_XML_CHARS = /^(
- [\x09\x0A\x0D\x20-\x7E] # ASCII
- | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
- | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
- | [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
- | \xEF[\x80-\xBE]{2} #
- | \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
- | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
- | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
- | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
- | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
- )*$/nx;
- end
-
- # Constructor
- # +arg+ if a String, the content is set to the String. If a Text,
- # the object is shallowly cloned.
- #
- # +respect_whitespace+ (boolean, false) if true, whitespace is
- # respected
- #
- # +parent+ (nil) if this is a Parent object, the parent
- # will be set to this.
- #
- # +raw+ (nil) This argument can be given three values.
- # If true, then the value of used to construct this object is expected to
- # contain no unescaped XML markup, and REXML will not change the text. If
- # this value is false, the string may contain any characters, and REXML will
- # escape any and all defined entities whose values are contained in the
- # text. If this value is nil (the default), then the raw value of the
- # parent will be used as the raw value for this node. If there is no raw
- # value for the parent, and no value is supplied, the default is false.
- # Use this field if you have entities defined for some text, and you don't
- # want REXML to escape that text in output.
- # Text.new( "<&", false, nil, false ) #-> "&lt;&amp;"
- # Text.new( "&lt;&amp;", false, nil, false ) #-> "&amp;lt;&amp;amp;"
- # Text.new( "<&", false, nil, true ) #-> Parse exception
- # Text.new( "&lt;&amp;", false, nil, true ) #-> "&lt;&amp;"
- # # Assume that the entity "s" is defined to be "sean"
- # # and that the entity "r" is defined to be "russell"
- # Text.new( "sean russell" ) #-> "&s; &r;"
- # Text.new( "sean russell", false, nil, true ) #-> "sean russell"
- #
- # +entity_filter+ (nil) This can be an array of entities to match in the
- # supplied text. This argument is only useful if +raw+ is set to false.
- # Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"
- # Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
- # In the last example, the +entity_filter+ argument is ignored.
- #
- # +pattern+ INTERNAL USE ONLY
- def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
- entity_filter=nil, illegal=NEEDS_A_SECOND_CHECK )
-
- @raw = false
-
- if parent
- super( parent )
- @raw = parent.raw
- else
- @parent = nil
- end
-
- @raw = raw unless raw.nil?
- @entity_filter = entity_filter
- @normalized = @unnormalized = nil
-
- if arg.kind_of? String
- @string = arg.clone
- @string.squeeze!(" \n\t") unless respect_whitespace
- elsif arg.kind_of? Text
- @string = arg.to_s
- @raw = arg.raw
- elsif
- raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})"
- end
-
- @string.gsub!( /\r\n?/, "\n" )
-
- Text.check(@string, NEEDS_A_SECOND_CHECK, doctype) if @raw and @parent
- end
-
- def parent= parent
- super(parent)
- Text.check(@string, NEEDS_A_SECOND_CHECK, doctype) if @raw and @parent
- end
-
- # check for illegal characters
- def Text.check string, pattern, doctype
-
- # illegal anywhere
- if string !~ VALID_XML_CHARS
- if String.method_defined? :encode
- string.chars.each do |c|
- case c.ord
- when *VALID_CHAR
- else
- raise "Illegal character #{c.inspect} in raw string \"#{string}\""
- end
- end
- else
- string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
- case c.unpack('U')
- when *VALID_CHAR
- else
- raise "Illegal character #{c.inspect} in raw string \"#{string}\""
- end
- end
- end
- end
-
- # context sensitive
- string.scan(pattern) do
- if $1[-1] != ?;
- raise "Illegal character '#{$1}' in raw string \"#{string}\""
- elsif $1[0] == ?&
- if $5 and $5[0] == ?#
- case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
- when *VALID_CHAR
- else
- raise "Illegal character '#{$1}' in raw string \"#{string}\""
- end
- elsif $3 and !SUBSTITUTES.include?($1)
- if !doctype or !doctype.entities.has_key?($3)
- raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
- end
- end
- end
- end
- end
-
- def node_type
- :text
- end
-
- def empty?
- @string.size==0
- end
-
-
- def clone
- return Text.new(self)
- end
-
-
- # Appends text to this text node. The text is appended in the +raw+ mode
- # of this text node.
- def <<( to_append )
- @string << to_append.gsub( /\r\n?/, "\n" )
- end
-
-
- # +other+ a String or a Text
- # +returns+ the result of (to_s <=> arg.to_s)
- def <=>( other )
- to_s() <=> other.to_s
- end
-
- def doctype
- if @parent
- doc = @parent.document
- doc.doctype if doc
- end
- end
-
- REFERENCE = /#{Entity::REFERENCE}/
- # Returns the string value of this text node. This string is always
- # escaped, meaning that it is a valid XML text node string, and all
- # entities that can be escaped, have been inserted. This method respects
- # the entity filter set in the constructor.
- #
- # # Assume that the entity "s" is defined to be "sean", and that the
- # # entity "r" is defined to be "russell"
- # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
- # t.to_s #-> "&lt; &amp; &s; russell"
- # t = Text.new( "< & &s; russell", false, nil, false )
- # t.to_s #-> "&lt; &amp; &s; russell"
- # u = Text.new( "sean russell", false, nil, true )
- # u.to_s #-> "sean russell"
- def to_s
- return @string if @raw
- return @normalized if @normalized
-
- @normalized = Text::normalize( @string, doctype, @entity_filter )
- end
-
- def inspect
- @string.inspect
- end
-
- # Returns the string value of this text. This is the text without
- # entities, as it might be used programmatically, or printed to the
- # console. This ignores the 'raw' attribute setting, and any
- # entity_filter.
- #
- # # Assume that the entity "s" is defined to be "sean", and that the
- # # entity "r" is defined to be "russell"
- # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
- # t.value #-> "< & sean russell"
- # t = Text.new( "< & &s; russell", false, nil, false )
- # t.value #-> "< & sean russell"
- # u = Text.new( "sean russell", false, nil, true )
- # u.value #-> "sean russell"
- def value
- return @unnormalized if @unnormalized
- @unnormalized = Text::unnormalize( @string, doctype )
- end
-
- # Sets the contents of this text node. This expects the text to be
- # unnormalized. It returns self.
- #
- # e = Element.new( "a" )
- # e.add_text( "foo" ) # <a>foo</a>
- # e[0].value = "bar" # <a>bar</a>
- # e[0].value = "<a>" # <a>&lt;a&gt;</a>
- def value=( val )
- @string = val.gsub( /\r\n?/, "\n" )
- @unnormalized = nil
- @normalized = nil
- @raw = false
- end
-
- def wrap(string, width, addnewline=false)
- # Recursively wrap string at width.
- return string if string.length <= width
- place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
- if addnewline then
- return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
- else
- return string[0,place] + "\n" + wrap(string[place+1..-1], width)
- end
- end
-
- def indent_text(string, level=1, style="\t", indentfirstline=true)
- return string if level < 0
- new_string = ''
- string.each { |line|
- indent_string = style * level
- new_line = (indent_string + line).sub(/[\s]+$/,'')
- new_string << new_line
- }
- new_string.strip! unless indentfirstline
- return new_string
- end
-
- # == DEPRECATED
- # See REXML::Formatters
- #
- def write( writer, indent=-1, transitive=false, ie_hack=false )
- Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
- formatter = if indent > -1
- REXML::Formatters::Pretty.new( indent )
- else
- REXML::Formatters::Default.new
- end
- formatter.write( self, writer )
- end
-
- # FIXME
- # This probably won't work properly
- def xpath
- path = @parent.xpath
- path += "/text()"
- return path
- end
-
- # Writes out text, substituting special characters beforehand.
- # +out+ A String, IO, or any other object supporting <<( String )
- # +input+ the text to substitute and the write out
- #
- # z=utf8.unpack("U*")
- # ascOut=""
- # z.each{|r|
- # if r < 0x100
- # ascOut.concat(r.chr)
- # else
- # ascOut.concat(sprintf("&#x%x;", r))
- # end
- # }
- # puts ascOut
- def write_with_substitution out, input
- copy = input.clone
- # Doing it like this rather than in a loop improves the speed
- copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
- copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
- copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
- copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
- copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
- copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
- out << copy
- end
-
- # Reads text, substituting entities
- def Text::read_with_substitution( input, illegal=nil )
- copy = input.clone
-
- if copy =~ illegal
- raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
- end if illegal
-
- copy.gsub!( /\r\n?/, "\n" )
- if copy.include? ?&
- copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
- copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
- copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
- copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
- copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
- copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {
- m=$1
- #m='0' if m==''
- m = "0#{m}" if m[0] == ?x
- [Integer(m)].pack('U*')
- }
- end
- copy
- end
-
- EREFERENCE = /&(?!#{Entity::NAME};)/
- # Escapes all possible entities
- def Text::normalize( input, doctype=nil, entity_filter=nil )
- copy = input.to_s
- # Doing it like this rather than in a loop improves the speed
- #copy = copy.gsub( EREFERENCE, '&amp;' )
- copy = copy.gsub( "&", "&amp;" )
- if doctype
- # Replace all ampersands that aren't part of an entity
- doctype.entities.each_value do |entity|
- copy = copy.gsub( entity.value,
- "&#{entity.name};" ) if entity.value and
- not( entity_filter and entity_filter.include?(entity) )
- end
- else
- # Replace all ampersands that aren't part of an entity
- DocType::DEFAULT_ENTITIES.each_value do |entity|
- copy = copy.gsub(entity.value, "&#{entity.name};" )
- end
- end
- copy
- end
-
- # Unescapes all possible entities
- def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
- string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
- ref = $&
- if ref[1] == ?#
- if ref[2] == ?x
- [ref[3...-1].to_i(16)].pack('U*')
- else
- [ref[2...-1].to_i].pack('U*')
- end
- elsif ref == '&amp;'
- '&'
- elsif filter and filter.include?( ref[1...-1] )
- ref
- elsif doctype
- doctype.entity( ref[1...-1] ) or ref
- else
- entity_value = DocType::DEFAULT_ENTITIES[ ref[1...-1] ]
- entity_value ? entity_value.value : ref
- end
- }
- end
- end
-end
diff --git a/trunk/lib/rexml/undefinednamespaceexception.rb b/trunk/lib/rexml/undefinednamespaceexception.rb
deleted file mode 100644
index 8ebfdfd0a9..0000000000
--- a/trunk/lib/rexml/undefinednamespaceexception.rb
+++ /dev/null
@@ -1,8 +0,0 @@
-require 'rexml/parseexception'
-module REXML
- class UndefinedNamespaceException < ParseException
- def initialize( prefix, source, parser )
- super( "Undefined prefix #{prefix} found" )
- end
- end
-end
diff --git a/trunk/lib/rexml/validation/relaxng.rb b/trunk/lib/rexml/validation/relaxng.rb
deleted file mode 100644
index 969f51bc95..0000000000
--- a/trunk/lib/rexml/validation/relaxng.rb
+++ /dev/null
@@ -1,559 +0,0 @@
-require "rexml/validation/validation"
-require "rexml/parsers/baseparser"
-
-module REXML
- module Validation
- # Implemented:
- # * empty
- # * element
- # * attribute
- # * text
- # * optional
- # * choice
- # * oneOrMore
- # * zeroOrMore
- # * group
- # * value
- # * interleave
- # * mixed
- # * ref
- # * grammar
- # * start
- # * define
- #
- # Not implemented:
- # * data
- # * param
- # * include
- # * externalRef
- # * notAllowed
- # * anyName
- # * nsName
- # * except
- # * name
- class RelaxNG
- include Validator
-
- INFINITY = 1.0 / 0.0
- EMPTY = Event.new( nil )
- TEXT = [:start_element, "text"]
- attr_accessor :current
- attr_accessor :count
- attr_reader :references
-
- # FIXME: Namespaces
- def initialize source
- parser = REXML::Parsers::BaseParser.new( source )
-
- @count = 0
- @references = {}
- @root = @current = Sequence.new(self)
- @root.previous = true
- states = [ @current ]
- begin
- event = parser.pull
- case event[0]
- when :start_element
- case event[1]
- when "empty"
- when "element", "attribute", "text", "value"
- states[-1] << event
- when "optional"
- states << Optional.new( self )
- states[-2] << states[-1]
- when "choice"
- states << Choice.new( self )
- states[-2] << states[-1]
- when "oneOrMore"
- states << OneOrMore.new( self )
- states[-2] << states[-1]
- when "zeroOrMore"
- states << ZeroOrMore.new( self )
- states[-2] << states[-1]
- when "group"
- states << Sequence.new( self )
- states[-2] << states[-1]
- when "interleave"
- states << Interleave.new( self )
- states[-2] << states[-1]
- when "mixed"
- states << Interleave.new( self )
- states[-2] << states[-1]
- states[-1] << TEXT
- when "define"
- states << [ event[2]["name"] ]
- when "ref"
- states[-1] << Ref.new( event[2]["name"] )
- when "anyName"
- states << AnyName.new( self )
- states[-2] << states[-1]
- when "nsName"
- when "except"
- when "name"
- when "data"
- when "param"
- when "include"
- when "grammar"
- when "start"
- when "externalRef"
- when "notAllowed"
- end
- when :end_element
- case event[1]
- when "element", "attribute"
- states[-1] << event
- when "zeroOrMore", "oneOrMore", "choice", "optional",
- "interleave", "group", "mixed"
- states.pop
- when "define"
- ref = states.pop
- @references[ ref.shift ] = ref
- #when "empty"
- end
- when :end_document
- states[-1] << event
- when :text
- states[-1] << event
- end
- end while event[0] != :end_document
- end
-
- def receive event
- validate( event )
- end
- end
-
- class State
- def initialize( context )
- @previous = []
- @events = []
- @current = 0
- @count = context.count += 1
- @references = context.references
- @value = false
- end
-
- def reset
- return if @current == 0
- @current = 0
- @events.each {|s| s.reset if s.kind_of? State }
- end
-
- def previous=( previous )
- @previous << previous
- end
-
- def next( event )
- #print "In next with #{event.inspect}. "
- #puts "Next (#@current) is #{@events[@current]}"
- #p @previous
- return @previous.pop.next( event ) if @events[@current].nil?
- expand_ref_in( @events, @current ) if @events[@current].class == Ref
- if ( @events[@current].kind_of? State )
- @current += 1
- @events[@current-1].previous = self
- return @events[@current-1].next( event )
- end
- #puts "Current isn't a state"
- if ( @events[@current].matches?(event) )
- @current += 1
- if @events[@current].nil?
- #puts "#{inspect[0,5]} 1RETURNING #{@previous.inspect[0,5]}"
- return @previous.pop
- elsif @events[@current].kind_of? State
- @current += 1
- #puts "#{inspect[0,5]} 2RETURNING (#{@current-1}) #{@events[@current-1].inspect[0,5]}; on return, next is #{@events[@current]}"
- @events[@current-1].previous = self
- return @events[@current-1]
- else
- #puts "#{inspect[0,5]} RETURNING self w/ next(#@current) = #{@events[@current]}"
- return self
- end
- else
- return nil
- end
- end
-
- def to_s
- # Abbreviated:
- self.class.name =~ /(?:::)(\w)\w+$/
- # Full:
- #self.class.name =~ /(?:::)(\w+)$/
- "#$1.#@count"
- end
-
- def inspect
- "< #{to_s} #{@events.collect{|e|
- pre = e == @events[@current] ? '#' : ''
- pre + e.inspect unless self == e
- }.join(', ')} >"
- end
-
- def expected
- return [@events[@current]]
- end
-
- def <<( event )
- add_event_to_arry( @events, event )
- end
-
-
- protected
- def expand_ref_in( arry, ind )
- new_events = []
- @references[ arry[ind].to_s ].each{ |evt|
- add_event_to_arry(new_events,evt)
- }
- arry[ind,1] = new_events
- end
-
- def add_event_to_arry( arry, evt )
- evt = generate_event( evt )
- if evt.kind_of? String
- arry[-1].event_arg = evt if arry[-1].kind_of? Event and @value
- @value = false
- else
- arry << evt
- end
- end
-
- def generate_event( event )
- return event if event.kind_of? State or event.class == Ref
- evt = nil
- arg = nil
- case event[0]
- when :start_element
- case event[1]
- when "element"
- evt = :start_element
- arg = event[2]["name"]
- when "attribute"
- evt = :start_attribute
- arg = event[2]["name"]
- when "text"
- evt = :text
- when "value"
- evt = :text
- @value = true
- end
- when :text
- return event[1]
- when :end_document
- return Event.new( event[0] )
- else # then :end_element
- case event[1]
- when "element"
- evt = :end_element
- when "attribute"
- evt = :end_attribute
- end
- end
- return Event.new( evt, arg )
- end
- end
-
-
- class Sequence < State
- def matches?(event)
- @events[@current].matches?( event )
- end
- end
-
-
- class Optional < State
- def next( event )
- if @current == 0
- rv = super
- return rv if rv
- @prior = @previous.pop
- return @prior.next( event )
- end
- super
- end
-
- def matches?(event)
- @events[@current].matches?(event) ||
- (@current == 0 and @previous[-1].matches?(event))
- end
-
- def expected
- return [ @prior.expected, @events[0] ].flatten if @current == 0
- return [@events[@current]]
- end
- end
-
-
- class ZeroOrMore < Optional
- def next( event )
- expand_ref_in( @events, @current ) if @events[@current].class == Ref
- if ( @events[@current].matches?(event) )
- @current += 1
- if @events[@current].nil?
- @current = 0
- return self
- elsif @events[@current].kind_of? State
- @current += 1
- @events[@current-1].previous = self
- return @events[@current-1]
- else
- return self
- end
- else
- @prior = @previous.pop
- return @prior.next( event ) if @current == 0
- return nil
- end
- end
-
- def expected
- return [ @prior.expected, @events[0] ].flatten if @current == 0
- return [@events[@current]]
- end
- end
-
-
- class OneOrMore < State
- def initialize context
- super
- @ord = 0
- end
-
- def reset
- super
- @ord = 0
- end
-
- def next( event )
- expand_ref_in( @events, @current ) if @events[@current].class == Ref
- if ( @events[@current].matches?(event) )
- @current += 1
- @ord += 1
- if @events[@current].nil?
- @current = 0
- return self
- elsif @events[@current].kind_of? State
- @current += 1
- @events[@current-1].previous = self
- return @events[@current-1]
- else
- return self
- end
- else
- return @previous.pop.next( event ) if @current == 0 and @ord > 0
- return nil
- end
- end
-
- def matches?( event )
- @events[@current].matches?(event) ||
- (@current == 0 and @ord > 0 and @previous[-1].matches?(event))
- end
-
- def expected
- if @current == 0 and @ord > 0
- return [@previous[-1].expected, @events[0]].flatten
- else
- return [@events[@current]]
- end
- end
- end
-
-
- class Choice < State
- def initialize context
- super
- @choices = []
- end
-
- def reset
- super
- @events = []
- @choices.each { |c| c.each { |s| s.reset if s.kind_of? State } }
- end
-
- def <<( event )
- add_event_to_arry( @choices, event )
- end
-
- def next( event )
- # Make the choice if we haven't
- if @events.size == 0
- c = 0 ; max = @choices.size
- while c < max
- if @choices[c][0].class == Ref
- expand_ref_in( @choices[c], 0 )
- @choices += @choices[c]
- @choices.delete( @choices[c] )
- max -= 1
- else
- c += 1
- end
- end
- @events = @choices.find { |evt| evt[0].matches? event }
- # Remove the references
- # Find the events
- end
- #puts "In next with #{event.inspect}."
- #puts "events is #{@events.inspect}"
- unless @events
- @events = []
- return nil
- end
- #puts "current = #@current"
- super
- end
-
- def matches?( event )
- return @events[@current].matches?( event ) if @events.size > 0
- !@choices.find{|evt| evt[0].matches?(event)}.nil?
- end
-
- def expected
- #puts "IN CHOICE EXPECTED"
- #puts "EVENTS = #{@events.inspect}"
- return [@events[@current]] if @events.size > 0
- return @choices.collect do |x|
- if x[0].kind_of? State
- x[0].expected
- else
- x[0]
- end
- end.flatten
- end
-
- def inspect
- "< #{to_s} #{@choices.collect{|e| e.collect{|f|f.to_s}.join(', ')}.join(' or ')} >"
- end
-
- protected
- def add_event_to_arry( arry, evt )
- if evt.kind_of? State or evt.class == Ref
- arry << [evt]
- elsif evt[0] == :text
- if arry[-1] and
- arry[-1][-1].kind_of?( Event ) and
- arry[-1][-1].event_type == :text and @value
-
- arry[-1][-1].event_arg = evt[1]
- @value = false
- end
- else
- arry << [] if evt[0] == :start_element
- arry[-1] << generate_event( evt )
- end
- end
- end
-
-
- class Interleave < Choice
- def initialize context
- super
- @choice = 0
- end
-
- def reset
- @choice = 0
- end
-
- def next_current( event )
- # Expand references
- c = 0 ; max = @choices.size
- while c < max
- if @choices[c][0].class == Ref
- expand_ref_in( @choices[c], 0 )
- @choices += @choices[c]
- @choices.delete( @choices[c] )
- max -= 1
- else
- c += 1
- end
- end
- @events = @choices[@choice..-1].find { |evt| evt[0].matches? event }
- @current = 0
- if @events
- # reorder the choices
- old = @choices[@choice]
- idx = @choices.index( @events )
- @choices[@choice] = @events
- @choices[idx] = old
- @choice += 1
- end
-
- #puts "In next with #{event.inspect}."
- #puts "events is #{@events.inspect}"
- @events = [] unless @events
- end
-
-
- def next( event )
- # Find the next series
- next_current(event) unless @events[@current]
- return nil unless @events[@current]
-
- expand_ref_in( @events, @current ) if @events[@current].class == Ref
- #puts "In next with #{event.inspect}."
- #puts "Next (#@current) is #{@events[@current]}"
- if ( @events[@current].kind_of? State )
- @current += 1
- @events[@current-1].previous = self
- return @events[@current-1].next( event )
- end
- #puts "Current isn't a state"
- return @previous.pop.next( event ) if @events[@current].nil?
- if ( @events[@current].matches?(event) )
- @current += 1
- if @events[@current].nil?
- #puts "#{inspect[0,5]} 1RETURNING self" unless @choices[@choice].nil?
- return self unless @choices[@choice].nil?
- #puts "#{inspect[0,5]} 1RETURNING #{@previous[-1].inspect[0,5]}"
- return @previous.pop
- elsif @events[@current].kind_of? State
- @current += 1
- #puts "#{inspect[0,5]} 2RETURNING (#{@current-1}) #{@events[@current-1].inspect[0,5]}; on return, next is #{@events[@current]}"
- @events[@current-1].previous = self
- return @events[@current-1]
- else
- #puts "#{inspect[0,5]} RETURNING self w/ next(#@current) = #{@events[@current]}"
- return self
- end
- else
- return nil
- end
- end
-
- def matches?( event )
- return @events[@current].matches?( event ) if @events[@current]
- !@choices[@choice..-1].find{|evt| evt[0].matches?(event)}.nil?
- end
-
- def expected
- #puts "IN CHOICE EXPECTED"
- #puts "EVENTS = #{@events.inspect}"
- return [@events[@current]] if @events[@current]
- return @choices[@choice..-1].collect do |x|
- if x[0].kind_of? State
- x[0].expected
- else
- x[0]
- end
- end.flatten
- end
-
- def inspect
- "< #{to_s} #{@choices.collect{|e| e.collect{|f|f.to_s}.join(', ')}.join(' and ')} >"
- end
- end
-
- class Ref
- def initialize value
- @value = value
- end
- def to_s
- @value
- end
- def inspect
- "{#{to_s}}"
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/validation/validation.rb b/trunk/lib/rexml/validation/validation.rb
deleted file mode 100644
index 93f5bfb329..0000000000
--- a/trunk/lib/rexml/validation/validation.rb
+++ /dev/null
@@ -1,155 +0,0 @@
-require 'rexml/validation/validationexception'
-
-module REXML
- module Validation
- module Validator
- NILEVENT = [ nil ]
- def reset
- @current = @root
- @root.reset
- @root.previous = true
- @attr_stack = []
- self
- end
- def dump
- puts @root.inspect
- end
- def validate( event )
- #puts "Current: #@current"
- #puts "Event: #{event.inspect}"
- @attr_stack = [] unless defined? @attr_stack
- match = @current.next(event)
- raise ValidationException.new( "Validation error. Expected: "+
- @current.expected.join( " or " )+" from #{@current.inspect} "+
- " but got #{Event.new( event[0], event[1] ).inspect}" ) unless match
- @current = match
-
- # Check for attributes
- case event[0]
- when :start_element
- #puts "Checking attributes"
- @attr_stack << event[2]
- begin
- sattr = [:start_attribute, nil]
- eattr = [:end_attribute]
- text = [:text, nil]
- k,v = event[2].find { |key,value|
- sattr[1] = key
- #puts "Looking for #{sattr.inspect}"
- m = @current.next( sattr )
- #puts "Got #{m.inspect}"
- if m
- # If the state has text children...
- #puts "Looking for #{eattr.inspect}"
- #puts "Expect #{m.expected}"
- if m.matches?( eattr )
- #puts "Got end"
- @current = m
- else
- #puts "Didn't get end"
- text[1] = value
- #puts "Looking for #{text.inspect}"
- m = m.next( text )
- #puts "Got #{m.inspect}"
- text[1] = nil
- return false unless m
- @current = m if m
- end
- m = @current.next( eattr )
- if m
- @current = m
- true
- else
- false
- end
- else
- false
- end
- }
- event[2].delete(k) if k
- end while k
- when :end_element
- attrs = @attr_stack.pop
- raise ValidationException.new( "Validation error. Illegal "+
- " attributes: #{attrs.inspect}") if attrs.length > 0
- end
- end
- end
-
- class Event
- def initialize(event_type, event_arg=nil )
- @event_type = event_type
- @event_arg = event_arg
- end
-
- attr_reader :event_type
- attr_accessor :event_arg
-
- def done?
- @done
- end
-
- def single?
- return (@event_type != :start_element and @event_type != :start_attribute)
- end
-
- def matches?( event )
- #puts "#@event_type =? #{event[0]} && #@event_arg =? #{event[1]} "
- return false unless event[0] == @event_type
- case event[0]
- when nil
- return true
- when :start_element
- return true if event[1] == @event_arg
- when :end_element
- return true
- when :start_attribute
- return true if event[1] == @event_arg
- when :end_attribute
- return true
- when :end_document
- return true
- when :text
- return (@event_arg.nil? or @event_arg == event[1])
-=begin
- when :processing_instruction
- false
- when :xmldecl
- false
- when :start_doctype
- false
- when :end_doctype
- false
- when :externalentity
- false
- when :elementdecl
- false
- when :entity
- false
- when :attlistdecl
- false
- when :notationdecl
- false
- when :end_doctype
- false
-=end
- else
- false
- end
- end
-
- def ==( other )
- return false unless other.kind_of? Event
- @event_type == other.event_type and @event_arg == other.event_arg
- end
-
- def to_s
- inspect
- end
-
- def inspect
- "#{@event_type.inspect}( #@event_arg )"
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/validation/validationexception.rb b/trunk/lib/rexml/validation/validationexception.rb
deleted file mode 100644
index 4723d9e4d3..0000000000
--- a/trunk/lib/rexml/validation/validationexception.rb
+++ /dev/null
@@ -1,9 +0,0 @@
-module REXML
- module Validation
- class ValidationException < RuntimeError
- def initialize msg
- super
- end
- end
- end
-end
diff --git a/trunk/lib/rexml/xmldecl.rb b/trunk/lib/rexml/xmldecl.rb
deleted file mode 100644
index 427eb78cf8..0000000000
--- a/trunk/lib/rexml/xmldecl.rb
+++ /dev/null
@@ -1,119 +0,0 @@
-require 'rexml/encoding'
-require 'rexml/source'
-
-module REXML
- # NEEDS DOCUMENTATION
- class XMLDecl < Child
- include Encoding
-
- DEFAULT_VERSION = "1.0";
- DEFAULT_ENCODING = "UTF-8";
- DEFAULT_STANDALONE = "no";
- START = '<\?xml';
- STOP = '\?>';
-
- attr_accessor :version, :standalone
- attr_reader :writeencoding, :writethis
-
- def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
- @writethis = true
- @writeencoding = !encoding.nil?
- if version.kind_of? XMLDecl
- super()
- @version = version.version
- self.encoding = version.encoding
- @writeencoding = version.writeencoding
- @standalone = version.standalone
- else
- super()
- @version = version
- self.encoding = encoding
- @standalone = standalone
- end
- @version = DEFAULT_VERSION if @version.nil?
- end
-
- def clone
- XMLDecl.new(self)
- end
-
- # indent::
- # Ignored. There must be no whitespace before an XML declaration
- # transitive::
- # Ignored
- # ie_hack::
- # Ignored
- def write(writer, indent=-1, transitive=false, ie_hack=false)
- return nil unless @writethis or writer.kind_of? Output
- writer << START.sub(/\\/u, '')
- if writer.kind_of? Output
- writer << " #{content writer.encoding}"
- else
- writer << " #{content encoding}"
- end
- writer << STOP.sub(/\\/u, '')
- end
-
- def ==( other )
- other.kind_of?(XMLDecl) and
- other.version == @version and
- other.encoding == self.encoding and
- other.standalone == @standalone
- end
-
- def xmldecl version, encoding, standalone
- @version = version
- self.encoding = encoding
- @standalone = standalone
- end
-
- def node_type
- :xmldecl
- end
-
- alias :stand_alone? :standalone
- alias :old_enc= :encoding=
-
- def encoding=( enc )
- if enc.nil?
- self.old_enc = "UTF-8"
- @writeencoding = false
- else
- self.old_enc = enc
- @writeencoding = true
- end
- self.dowrite
- end
-
- # Only use this if you do not want the XML declaration to be written;
- # this object is ignored by the XML writer. Otherwise, instantiate your
- # own XMLDecl and add it to the document.
- #
- # Note that XML 1.1 documents *must* include an XML declaration
- def XMLDecl.default
- rv = XMLDecl.new( "1.0" )
- rv.nowrite
- rv
- end
-
- def nowrite
- @writethis = false
- end
-
- def dowrite
- @writethis = true
- end
-
- def inspect
- START.sub(/\\/u, '') + " ... " + STOP.sub(/\\/u, '')
- end
-
- private
- def content(enc)
- rv = "version='#@version'"
- rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
- rv << " standalone='#@standalone'" if @standalone
- rv
- end
- end
-end
diff --git a/trunk/lib/rexml/xmltokens.rb b/trunk/lib/rexml/xmltokens.rb
deleted file mode 100644
index 6bbe5b07d5..0000000000
--- a/trunk/lib/rexml/xmltokens.rb
+++ /dev/null
@@ -1,18 +0,0 @@
-module REXML
- # Defines a number of tokens used for parsing XML. Not for general
- # consumption.
- module XMLTokens
- NCNAME_STR= '[\w:][\-\w\d.]*'
- NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
-
- NAMECHAR = '[\-\w\d\.:]'
- NAME = "([\\w:]#{NAMECHAR}*)"
- NMTOKEN = "(?:#{NAMECHAR})+"
- NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
- REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
-
- #REFERENCE = "(?:#{ENTITYREF}|#{CHARREF})"
- #ENTITYREF = "&#{NAME};"
- #CHARREF = "&#\\d+;|&#x[0-9a-fA-F]+;"
- end
-end
diff --git a/trunk/lib/rexml/xpath.rb b/trunk/lib/rexml/xpath.rb
deleted file mode 100644
index e8813efd3e..0000000000
--- a/trunk/lib/rexml/xpath.rb
+++ /dev/null
@@ -1,66 +0,0 @@
-require 'rexml/functions'
-require 'rexml/xpath_parser'
-
-module REXML
- # Wrapper class. Use this class to access the XPath functions.
- class XPath
- include Functions
- EMPTY_HASH = {}
-
- # Finds and returns the first node that matches the supplied xpath.
- # element::
- # The context element
- # path::
- # The xpath to search for. If not supplied or nil, returns the first
- # node matching '*'.
- # namespaces::
- # If supplied, a Hash which defines a namespace mapping.
- #
- # XPath.first( node )
- # XPath.first( doc, "//b"} )
- # XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
- def XPath::first element, path=nil, namespaces=nil, variables={}
- raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash)
- raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash)
- parser = XPathParser.new
- parser.namespaces = namespaces
- parser.variables = variables
- path = "*" unless path
- element = [element] unless element.kind_of? Array
- parser.parse(path, element).flatten[0]
- end
-
- # Iterates over nodes that match the given path, calling the supplied
- # block with the match.
- # element::
- # The context element
- # path::
- # The xpath to search for. If not supplied or nil, defaults to '*'
- # namespaces::
- # If supplied, a Hash which defines a namespace mapping
- #
- # XPath.each( node ) { |el| ... }
- # XPath.each( node, '/*[@attr='v']' ) { |el| ... }
- # XPath.each( node, 'ancestor::x' ) { |el| ... }
- def XPath::each element, path=nil, namespaces=nil, variables={}, &block
- raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash)
- raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash)
- parser = XPathParser.new
- parser.namespaces = namespaces
- parser.variables = variables
- path = "*" unless path
- element = [element] unless element.kind_of? Array
- parser.parse(path, element).each( &block )
- end
-
- # Returns an array of nodes matching a given XPath.
- def XPath::match element, path=nil, namespaces=nil, variables={}
- parser = XPathParser.new
- parser.namespaces = namespaces
- parser.variables = variables
- path = "*" unless path
- element = [element] unless element.kind_of? Array
- parser.parse(path,element)
- end
- end
-end
diff --git a/trunk/lib/rexml/xpath_parser.rb b/trunk/lib/rexml/xpath_parser.rb
deleted file mode 100644
index 4275c2db0e..0000000000
--- a/trunk/lib/rexml/xpath_parser.rb
+++ /dev/null
@@ -1,792 +0,0 @@
-require 'rexml/namespace'
-require 'rexml/xmltokens'
-require 'rexml/attribute'
-require 'rexml/syncenumerator'
-require 'rexml/parsers/xpathparser'
-
-class Object
- def dclone
- clone
- end
-end
-class Symbol
- def dclone ; self ; end
-end
-class Fixnum
- def dclone ; self ; end
-end
-class Float
- def dclone ; self ; end
-end
-class Array
- def dclone
- klone = self.clone
- klone.clear
- self.each{|v| klone << v.dclone}
- klone
- end
-end
-
-module REXML
- # You don't want to use this class. Really. Use XPath, which is a wrapper
- # for this class. Believe me. You don't want to poke around in here.
- # There is strange, dark magic at work in this code. Beware. Go back! Go
- # back while you still can!
- class XPathParser
- include XMLTokens
- LITERAL = /^'([^']*)'|^"([^"]*)"/u
-
- def initialize( )
- @parser = REXML::Parsers::XPathParser.new
- @namespaces = nil
- @variables = {}
- end
-
- def namespaces=( namespaces={} )
- Functions::namespace_context = namespaces
- @namespaces = namespaces
- end
-
- def variables=( vars={} )
- Functions::variables = vars
- @variables = vars
- end
-
- def parse path, nodeset
- #puts "#"*40
- path_stack = @parser.parse( path )
- #puts "PARSE: #{path} => #{path_stack.inspect}"
- #puts "PARSE: nodeset = #{nodeset.inspect}"
- match( path_stack, nodeset )
- end
-
- def get_first path, nodeset
- #puts "#"*40
- path_stack = @parser.parse( path )
- #puts "PARSE: #{path} => #{path_stack.inspect}"
- #puts "PARSE: nodeset = #{nodeset.inspect}"
- first( path_stack, nodeset )
- end
-
- def predicate path, nodeset
- path_stack = @parser.parse( path )
- expr( path_stack, nodeset )
- end
-
- def []=( variable_name, value )
- @variables[ variable_name ] = value
- end
-
-
- # Performs a depth-first (document order) XPath search, and returns the
- # first match. This is the fastest, lightest way to return a single result.
- #
- # FIXME: This method is incomplete!
- def first( path_stack, node )
- #puts "#{depth}) Entering match( #{path.inspect}, #{tree.inspect} )"
- return nil if path.size == 0
-
- case path[0]
- when :document
- # do nothing
- return first( path[1..-1], node )
- when :child
- for c in node.children
- #puts "#{depth}) CHILD checking #{name(c)}"
- r = first( path[1..-1], c )
- #puts "#{depth}) RETURNING #{r.inspect}" if r
- return r if r
- end
- when :qname
- name = path[2]
- #puts "#{depth}) QNAME #{name(tree)} == #{name} (path => #{path.size})"
- if node.name == name
- #puts "#{depth}) RETURNING #{tree.inspect}" if path.size == 3
- return node if path.size == 3
- return first( path[3..-1], node )
- else
- return nil
- end
- when :descendant_or_self
- r = first( path[1..-1], node )
- return r if r
- for c in node.children
- r = first( path, c )
- return r if r
- end
- when :node
- return first( path[1..-1], node )
- when :any
- return first( path[1..-1], node )
- end
- return nil
- end
-
-
- def match( path_stack, nodeset )
- #puts "MATCH: path_stack = #{path_stack.inspect}"
- #puts "MATCH: nodeset = #{nodeset.inspect}"
- r = expr( path_stack, nodeset )
- #puts "MAIN EXPR => #{r.inspect}"
- r
- end
-
- private
-
-
- # Returns a String namespace for a node, given a prefix
- # The rules are:
- #
- # 1. Use the supplied namespace mapping first.
- # 2. If no mapping was supplied, use the context node to look up the namespace
- def get_namespace( node, prefix )
- if @namespaces
- return @namespaces[prefix] || ''
- else
- return node.namespace( prefix ) if node.node_type == :element
- return ''
- end
- end
-
-
- # Expr takes a stack of path elements and a set of nodes (either a Parent
- # or an Array and returns an Array of matching nodes
- ALL = [ :attribute, :element, :text, :processing_instruction, :comment ]
- ELEMENTS = [ :element ]
- def expr( path_stack, nodeset, context=nil )
- #puts "#"*15
- #puts "In expr with #{path_stack.inspect}"
- #puts "Returning" if path_stack.length == 0 || nodeset.length == 0
- node_types = ELEMENTS
- return nodeset if path_stack.length == 0 || nodeset.length == 0
- while path_stack.length > 0
- #puts "#"*5
- #puts "Path stack = #{path_stack.inspect}"
- #puts "Nodeset is #{nodeset.inspect}"
- if nodeset.length == 0
- path_stack.clear
- return []
- end
- case (op = path_stack.shift)
- when :document
- nodeset = [ nodeset[0].root_node ]
- #puts ":document, nodeset = #{nodeset.inspect}"
-
- when :qname
- #puts "IN QNAME"
- prefix = path_stack.shift
- name = path_stack.shift
- nodeset.delete_if do |node|
- # FIXME: This DOUBLES the time XPath searches take
- ns = get_namespace( node, prefix )
- #puts "NS = #{ns.inspect}"
- #puts "node.node_type == :element => #{node.node_type == :element}"
- if node.node_type == :element
- #puts "node.name == #{name} => #{node.name == name}"
- if node.name == name
- #puts "node.namespace == #{ns.inspect} => #{node.namespace == ns}"
- end
- end
- !(node.node_type == :element and
- node.name == name and
- node.namespace == ns )
- end
- node_types = ELEMENTS
-
- when :any
- #puts "ANY 1: nodeset = #{nodeset.inspect}"
- #puts "ANY 1: node_types = #{node_types.inspect}"
- nodeset.delete_if { |node| !node_types.include?(node.node_type) }
- #puts "ANY 2: nodeset = #{nodeset.inspect}"
-
- when :self
- # This space left intentionally blank
-
- when :processing_instruction
- target = path_stack.shift
- nodeset.delete_if do |node|
- (node.node_type != :processing_instruction) or
- ( target!='' and ( node.target != target ) )
- end
-
- when :text
- nodeset.delete_if { |node| node.node_type != :text }
-
- when :comment
- nodeset.delete_if { |node| node.node_type != :comment }
-
- when :node
- # This space left intentionally blank
- node_types = ALL
-
- when :child
- new_nodeset = []
- nt = nil
- nodeset.each do |node|
- nt = node.node_type
- new_nodeset += node.children if nt == :element or nt == :document
- end
- nodeset = new_nodeset
- node_types = ELEMENTS
-
- when :literal
- return path_stack.shift
-
- when :attribute
- new_nodeset = []
- case path_stack.shift
- when :qname
- prefix = path_stack.shift
- name = path_stack.shift
- for element in nodeset
- if element.node_type == :element
- #puts "Element name = #{element.name}"
- #puts "get_namespace( #{element.inspect}, #{prefix} ) = #{get_namespace(element, prefix)}"
- attrib = element.attribute( name, get_namespace(element, prefix) )
- #puts "attrib = #{attrib.inspect}"
- new_nodeset << attrib if attrib
- end
- end
- when :any
- #puts "ANY"
- for element in nodeset
- if element.node_type == :element
- new_nodeset += element.attributes.to_a
- end
- end
- end
- nodeset = new_nodeset
-
- when :parent
- #puts "PARENT 1: nodeset = #{nodeset}"
- nodeset = nodeset.collect{|n| n.parent}.compact
- #nodeset = expr(path_stack.dclone, nodeset.collect{|n| n.parent}.compact)
- #puts "PARENT 2: nodeset = #{nodeset.inspect}"
- node_types = ELEMENTS
-
- when :ancestor
- new_nodeset = []
- nodeset.each do |node|
- while node.parent
- node = node.parent
- new_nodeset << node unless new_nodeset.include? node
- end
- end
- nodeset = new_nodeset
- node_types = ELEMENTS
-
- when :ancestor_or_self
- new_nodeset = []
- nodeset.each do |node|
- if node.node_type == :element
- new_nodeset << node
- while ( node.parent )
- node = node.parent
- new_nodeset << node unless new_nodeset.include? node
- end
- end
- end
- nodeset = new_nodeset
- node_types = ELEMENTS
-
- when :predicate
- new_nodeset = []
- subcontext = { :size => nodeset.size }
- pred = path_stack.shift
- nodeset.each_with_index { |node, index|
- subcontext[ :node ] = node
- #puts "PREDICATE SETTING CONTEXT INDEX TO #{index+1}"
- subcontext[ :index ] = index+1
- pc = pred.dclone
- #puts "#{node.hash}) Recursing with #{pred.inspect} and [#{node.inspect}]"
- result = expr( pc, [node], subcontext )
- result = result[0] if result.kind_of? Array and result.length == 1
- #puts "#{node.hash}) Result = #{result.inspect} (#{result.class.name})"
- if result.kind_of? Numeric
- #puts "Adding node #{node.inspect}" if result == (index+1)
- new_nodeset << node if result == (index+1)
- elsif result.instance_of? Array
- if result.size > 0 and result.inject(false) {|k,s| s or k}
- #puts "Adding node #{node.inspect}" if result.size > 0
- new_nodeset << node if result.size > 0
- end
- else
- #puts "Adding node #{node.inspect}" if result
- new_nodeset << node if result
- end
- }
- #puts "New nodeset = #{new_nodeset.inspect}"
- #puts "Path_stack = #{path_stack.inspect}"
- nodeset = new_nodeset
-=begin
- predicate = path_stack.shift
- ns = nodeset.clone
- result = expr( predicate, ns )
- #puts "Result = #{result.inspect} (#{result.class.name})"
- #puts "nodeset = #{nodeset.inspect}"
- if result.kind_of? Array
- nodeset = result.zip(ns).collect{|m,n| n if m}.compact
- else
- nodeset = result ? nodeset : []
- end
- #puts "Outgoing NS = #{nodeset.inspect}"
-=end
-
- when :descendant_or_self
- rv = descendant_or_self( path_stack, nodeset )
- path_stack.clear
- nodeset = rv
- node_types = ELEMENTS
-
- when :descendant
- results = []
- nt = nil
- nodeset.each do |node|
- nt = node.node_type
- results += expr( path_stack.dclone.unshift( :descendant_or_self ),
- node.children ) if nt == :element or nt == :document
- end
- nodeset = results
- node_types = ELEMENTS
-
- when :following_sibling
- #puts "FOLLOWING_SIBLING 1: nodeset = #{nodeset}"
- results = []
- nodeset.each do |node|
- next if node.parent.nil?
- all_siblings = node.parent.children
- current_index = all_siblings.index( node )
- following_siblings = all_siblings[ current_index+1 .. -1 ]
- results += expr( path_stack.dclone, following_siblings )
- end
- #puts "FOLLOWING_SIBLING 2: nodeset = #{nodeset}"
- nodeset = results
-
- when :preceding_sibling
- results = []
- nodeset.each do |node|
- next if node.parent.nil?
- all_siblings = node.parent.children
- current_index = all_siblings.index( node )
- preceding_siblings = all_siblings[ 0, current_index ].reverse
- results += preceding_siblings
- end
- nodeset = results
- node_types = ELEMENTS
-
- when :preceding
- new_nodeset = []
- nodeset.each do |node|
- new_nodeset += preceding( node )
- end
- #puts "NEW NODESET => #{new_nodeset.inspect}"
- nodeset = new_nodeset
- node_types = ELEMENTS
-
- when :following
- new_nodeset = []
- nodeset.each do |node|
- new_nodeset += following( node )
- end
- nodeset = new_nodeset
- node_types = ELEMENTS
-
- when :namespace
- #puts "In :namespace"
- new_nodeset = []
- prefix = path_stack.shift
- nodeset.each do |node|
- if (node.node_type == :element or node.node_type == :attribute)
- if @namespaces
- namespaces = @namespaces
- elsif (node.node_type == :element)
- namespaces = node.namespaces
- else
- namespaces = node.element.namesapces
- end
- #puts "Namespaces = #{namespaces.inspect}"
- #puts "Prefix = #{prefix.inspect}"
- #puts "Node.namespace = #{node.namespace}"
- if (node.namespace == namespaces[prefix])
- new_nodeset << node
- end
- end
- end
- nodeset = new_nodeset
-
- when :variable
- var_name = path_stack.shift
- return @variables[ var_name ]
-
- # :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq
- # TODO: Special case for :or and :and -- not evaluate the right
- # operand if the left alone determines result (i.e. is true for
- # :or and false for :and).
- when :eq, :neq, :lt, :lteq, :gt, :gteq, :and, :or
- left = expr( path_stack.shift, nodeset.dup, context )
- #puts "LEFT => #{left.inspect} (#{left.class.name})"
- right = expr( path_stack.shift, nodeset.dup, context )
- #puts "RIGHT => #{right.inspect} (#{right.class.name})"
- res = equality_relational_compare( left, op, right )
- #puts "RES => #{res.inspect}"
- return res
-
- when :and
- left = expr( path_stack.shift, nodeset.dup, context )
- #puts "LEFT => #{left.inspect} (#{left.class.name})"
- if left == false || left.nil? || !left.inject(false) {|a,b| a | b}
- return []
- end
- right = expr( path_stack.shift, nodeset.dup, context )
- #puts "RIGHT => #{right.inspect} (#{right.class.name})"
- res = equality_relational_compare( left, op, right )
- #puts "RES => #{res.inspect}"
- return res
-
- when :div
- left = Functions::number(expr(path_stack.shift, nodeset, context)).to_f
- right = Functions::number(expr(path_stack.shift, nodeset, context)).to_f
- return (left / right)
-
- when :mod
- left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- return (left % right)
-
- when :mult
- left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- return (left * right)
-
- when :plus
- left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- return (left + right)
-
- when :minus
- left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- return (left - right)
-
- when :union
- left = expr( path_stack.shift, nodeset, context )
- right = expr( path_stack.shift, nodeset, context )
- return (left | right)
-
- when :neg
- res = expr( path_stack, nodeset, context )
- return -(res.to_f)
-
- when :not
- when :function
- func_name = path_stack.shift.tr('-','_')
- arguments = path_stack.shift
- #puts "FUNCTION 0: #{func_name}(#{arguments.collect{|a|a.inspect}.join(', ')})"
- subcontext = context ? nil : { :size => nodeset.size }
-
- res = []
- cont = context
- nodeset.each_with_index { |n, i|
- if subcontext
- subcontext[:node] = n
- subcontext[:index] = i
- cont = subcontext
- end
- arg_clone = arguments.dclone
- args = arg_clone.collect { |arg|
- #puts "FUNCTION 1: Calling expr( #{arg.inspect}, [#{n.inspect}] )"
- expr( arg, [n], cont )
- }
- #puts "FUNCTION 2: #{func_name}(#{args.collect{|a|a.inspect}.join(', ')})"
- Functions.context = cont
- res << Functions.send( func_name, *args )
- #puts "FUNCTION 3: #{res[-1].inspect}"
- }
- return res
-
- end
- end # while
- #puts "EXPR returning #{nodeset.inspect}"
- return nodeset
- end
-
-
- ##########################################################
- # FIXME
- # The next two methods are BAD MOJO!
- # This is my achilles heel. If anybody thinks of a better
- # way of doing this, be my guest. This really sucks, but
- # it is a wonder it works at all.
- # ########################################################
-
- def descendant_or_self( path_stack, nodeset )
- rs = []
- #puts "#"*80
- #puts "PATH_STACK = #{path_stack.inspect}"
- #puts "NODESET = #{nodeset.collect{|n|n.inspect}.inspect}"
- d_o_s( path_stack, nodeset, rs )
- #puts "RS = #{rs.collect{|n|n.inspect}.inspect}"
- document_order(rs.flatten.compact)
- #rs.flatten.compact
- end
-
- def d_o_s( p, ns, r )
- #puts "IN DOS with #{ns.inspect}; ALREADY HAVE #{r.inspect}"
- nt = nil
- ns.each_index do |i|
- n = ns[i]
- #puts "P => #{p.inspect}"
- x = expr( p.dclone, [ n ] )
- nt = n.node_type
- d_o_s( p, n.children, x ) if nt == :element or nt == :document and n.children.size > 0
- r.concat(x) if x.size > 0
- end
- end
-
-
- # Reorders an array of nodes so that they are in document order
- # It tries to do this efficiently.
- #
- # FIXME: I need to get rid of this, but the issue is that most of the XPath
- # interpreter functions as a filter, which means that we lose context going
- # in and out of function calls. If I knew what the index of the nodes was,
- # I wouldn't have to do this. Maybe add a document IDX for each node?
- # Problems with mutable documents. Or, rewrite everything.
- def document_order( array_of_nodes )
- new_arry = []
- array_of_nodes.each { |node|
- node_idx = []
- np = node.node_type == :attribute ? node.element : node
- while np.parent and np.parent.node_type == :element
- node_idx << np.parent.index( np )
- np = np.parent
- end
- new_arry << [ node_idx.reverse, node ]
- }
- #puts "new_arry = #{new_arry.inspect}"
- new_arry.sort{ |s1, s2| s1[0] <=> s2[0] }.collect{ |s| s[1] }
- end
-
-
- def recurse( nodeset, &block )
- for node in nodeset
- yield node
- recurse( node, &block ) if node.node_type == :element
- end
- end
-
-
-
- # Builds a nodeset of all of the preceding nodes of the supplied node,
- # in reverse document order
- # preceding:: includes every element in the document that precedes this node,
- # except for ancestors
- def preceding( node )
- #puts "IN PRECEDING"
- ancestors = []
- p = node.parent
- while p
- ancestors << p
- p = p.parent
- end
-
- acc = []
- p = preceding_node_of( node )
- #puts "P = #{p.inspect}"
- while p
- if ancestors.include? p
- ancestors.delete(p)
- else
- acc << p
- end
- p = preceding_node_of( p )
- #puts "P = #{p.inspect}"
- end
- acc
- end
-
- def preceding_node_of( node )
- #puts "NODE: #{node.inspect}"
- #puts "PREVIOUS NODE: #{node.previous_sibling_node.inspect}"
- #puts "PARENT NODE: #{node.parent}"
- psn = node.previous_sibling_node
- if psn.nil?
- if node.parent.nil? or node.parent.class == Document
- return nil
- end
- return node.parent
- #psn = preceding_node_of( node.parent )
- end
- while psn and psn.kind_of? Element and psn.children.size > 0
- psn = psn.children[-1]
- end
- psn
- end
-
- def following( node )
- #puts "IN PRECEDING"
- acc = []
- p = next_sibling_node( node )
- #puts "P = #{p.inspect}"
- while p
- acc << p
- p = following_node_of( p )
- #puts "P = #{p.inspect}"
- end
- acc
- end
-
- def following_node_of( node )
- #puts "NODE: #{node.inspect}"
- #puts "PREVIOUS NODE: #{node.previous_sibling_node.inspect}"
- #puts "PARENT NODE: #{node.parent}"
- if node.kind_of? Element and node.children.size > 0
- return node.children[0]
- end
- return next_sibling_node(node)
- end
-
- def next_sibling_node(node)
- psn = node.next_sibling_node
- while psn.nil?
- if node.parent.nil? or node.parent.class == Document
- return nil
- end
- node = node.parent
- psn = node.next_sibling_node
- #puts "psn = #{psn.inspect}"
- end
- return psn
- end
-
- def norm b
- case b
- when true, false
- return b
- when 'true', 'false'
- return Functions::boolean( b )
- when /^\d+(\.\d+)?$/
- return Functions::number( b )
- else
- return Functions::string( b )
- end
- end
-
- def equality_relational_compare( set1, op, set2 )
- #puts "EQ_REL_COMP(#{set1.inspect} #{op.inspect} #{set2.inspect})"
- if set1.kind_of? Array and set2.kind_of? Array
- #puts "#{set1.size} & #{set2.size}"
- if set1.size == 1 and set2.size == 1
- set1 = set1[0]
- set2 = set2[0]
- elsif set1.size == 0 or set2.size == 0
- nd = set1.size==0 ? set2 : set1
- rv = nd.collect { |il| compare( il, op, nil ) }
- #puts "RV = #{rv.inspect}"
- return rv
- else
- res = []
- enum = SyncEnumerator.new( set1, set2 ).each { |i1, i2|
- #puts "i1 = #{i1.inspect} (#{i1.class.name})"
- #puts "i2 = #{i2.inspect} (#{i2.class.name})"
- i1 = norm( i1 )
- i2 = norm( i2 )
- res << compare( i1, op, i2 )
- }
- return res
- end
- end
- #puts "EQ_REL_COMP: #{set1.inspect} (#{set1.class.name}), #{op}, #{set2.inspect} (#{set2.class.name})"
- #puts "COMPARING VALUES"
- # If one is nodeset and other is number, compare number to each item
- # in nodeset s.t. number op number(string(item))
- # If one is nodeset and other is string, compare string to each item
- # in nodeset s.t. string op string(item)
- # If one is nodeset and other is boolean, compare boolean to each item
- # in nodeset s.t. boolean op boolean(item)
- if set1.kind_of? Array or set2.kind_of? Array
- #puts "ISA ARRAY"
- if set1.kind_of? Array
- a = set1
- b = set2
- else
- a = set2
- b = set1
- end
-
- case b
- when true, false
- return a.collect {|v| compare( Functions::boolean(v), op, b ) }
- when Numeric
- return a.collect {|v| compare( Functions::number(v), op, b )}
- when /^\d+(\.\d+)?$/
- b = Functions::number( b )
- #puts "B = #{b.inspect}"
- return a.collect {|v| compare( Functions::number(v), op, b )}
- else
- #puts "Functions::string( #{b}(#{b.class.name}) ) = #{Functions::string(b)}"
- b = Functions::string( b )
- return a.collect { |v| compare( Functions::string(v), op, b ) }
- end
- else
- # If neither is nodeset,
- # If op is = or !=
- # If either boolean, convert to boolean
- # If either number, convert to number
- # Else, convert to string
- # Else
- # Convert both to numbers and compare
- s1 = set1.to_s
- s2 = set2.to_s
- #puts "EQ_REL_COMP: #{set1}=>#{s1}, #{set2}=>#{s2}"
- if s1 == 'true' or s1 == 'false' or s2 == 'true' or s2 == 'false'
- #puts "Functions::boolean(#{set1})=>#{Functions::boolean(set1)}"
- #puts "Functions::boolean(#{set2})=>#{Functions::boolean(set2)}"
- set1 = Functions::boolean( set1 )
- set2 = Functions::boolean( set2 )
- else
- if op == :eq or op == :neq
- if s1 =~ /^\d+(\.\d+)?$/ or s2 =~ /^\d+(\.\d+)?$/
- set1 = Functions::number( s1 )
- set2 = Functions::number( s2 )
- else
- set1 = Functions::string( set1 )
- set2 = Functions::string( set2 )
- end
- else
- set1 = Functions::number( set1 )
- set2 = Functions::number( set2 )
- end
- end
- #puts "EQ_REL_COMP: #{set1} #{op} #{set2}"
- #puts ">>> #{compare( set1, op, set2 )}"
- return compare( set1, op, set2 )
- end
- return false
- end
-
- def compare a, op, b
- #puts "COMPARE #{a.inspect}(#{a.class.name}) #{op} #{b.inspect}(#{b.class.name})"
- case op
- when :eq
- a == b
- when :neq
- a != b
- when :lt
- a < b
- when :lteq
- a <= b
- when :gt
- a > b
- when :gteq
- a >= b
- when :and
- a and b
- when :or
- a or b
- else
- false
- end
- end
- end
-end