require "rexml/element" require "rexml/xmldecl" require "rexml/source" require "rexml/comment" require "rexml/doctype" require "rexml/instruction" require "rexml/rexml" require "rexml/parseexception" require "rexml/output" require "rexml/parsers/baseparser" require "rexml/parsers/streamparser" require "rexml/parsers/treeparser" module REXML # Represents a full XML document, including PIs, a doctype, etc. A # Document has a single child that can be accessed by root(). # Note that if you want to have an XML declaration written for a document # you create, you must add one; REXML documents do not write a default # declaration for you. See |DECLARATION| and |write|. class Document < Element # A convenient default XML declaration. If you want an XML declaration, # the easiest way to add one is mydoc << Document::DECLARATION # +DEPRECATED+ # Use: mydoc << XMLDecl.default DECLARATION = XMLDecl.default # Constructor # @param source if supplied, must be a Document, String, or IO. # Documents have their context and Element attributes cloned. # Strings are expected to be valid XML documents. IOs are expected # to be sources of valid XML documents. # @param context if supplied, contains the context of the document; # this should be a Hash. def initialize( source = nil, context = {} ) @entity_expansion_count = 0 super() @context = context return if source.nil? if source.kind_of? Document @context = source.context super source else build( source ) end end def node_type :document end # Should be obvious def clone Document.new self end # According to the XML spec, a root node has no expanded name def expanded_name '' #d = doc_type #d ? d.name : "UNDEFINED" end alias :name :expanded_name # We override this, because XMLDecls and DocTypes must go at the start # of the document def add( child ) if child.kind_of? XMLDecl if @children[0].kind_of? XMLDecl @children[0] = child else @children.unshift child end child.parent = self elsif child.kind_of? DocType # Find first Element or DocType node and insert the decl right # before it. If there is no such node, just insert the child at the # end. If there is a child and it is an DocType, then replace it. insert_before_index = @children.find_index { |x| x.kind_of?(Element) || x.kind_of?(DocType) } if insert_before_index # Not null = not end of list if @children[ insert_before_index ].kind_of? DocType @children[ insert_before_index ] = child else @children[ insert_before_index-1, 0 ] = child end else # Insert at end of list @children << child end child.parent = self else rv = super raise "attempted adding second root element to document" if @elements.size > 1 rv end end alias :<< :add def add_element(arg=nil, arg2=nil) rv = super raise "attempted adding second root element to document" if @elements.size > 1 rv end # @return the root Element of the document, or nil if this document # has no children. def root elements[1] #self #@children.find { |item| item.kind_of? Element } end # @return the DocType child of the document, if one exists, # and nil otherwise. def doctype @children.find { |item| item.kind_of? DocType } end # @return the XMLDecl of this document; if no XMLDecl has been # set, the default declaration is returned. def xml_decl rv = @children[0] return rv if rv.kind_of? XMLDecl rv = @children.unshift(XMLDecl.default)[0] end # @return the XMLDecl version of this document as a String. # If no XMLDecl has been set, returns the default version. def version xml_decl().version end # @return the XMLDecl encoding of this document as an # Encoding object. # If no XMLDecl has been set, returns the default encoding. def encoding xml_decl().encoding end # @return the XMLDecl standalone value of this document as a String. # If no XMLDecl has been set, returns the default setting. def stand_alone? xml_decl().stand_alone? end # :call-seq: # doc.write(output=$stdout, indent=-1, transtive=false, ie_hack=false, encoding=nil) # doc.write(options={:output => $stdout, :indent => -1, :transtive => false, :ie_hack => false, :encoding => nil}) # # Write the XML tree out, optionally with indent. This writes out the # entire XML document, including XML declarations, doctype declarations, # and processing instructions (if any are given). # # A controversial point is whether Document should always write the XML # declaration () whether or not one is given by the # user (or source document). REXML does not write one if one was not # specified, because it adds unnecessary bandwidth to applications such # as XML-RPC. # # Accept Nth argument style and options Hash style as argument. # The recommended style is options Hash style for one or more # arguments case. # # _Examples_ # Document.new("").write # # output = "" # Document.new("").write(output) # # output = "" # Document.new("").write(:output => output, :indent => 2) # # See also the classes in the rexml/formatters package for the proper way # to change the default formatting of XML output. # # _Examples_ # # output = "" # tr = Transitive.new # tr.write(Document.new(""), output) # # output:: # output an object which supports '<< string'; this is where the # document will be written. # indent:: # An integer. If -1, no indenting will be used; otherwise, the # indentation will be twice this number of spaces, and children will be # indented an additional amount. For a value of 3, every item will be # indented 3 more levels, or 6 more spaces (2 * 3). Defaults to -1 # transitive:: # If transitive is true and indent is >= 0, then the output will be # pretty-printed in such a way that the added whitespace does not affect # the absolute *value* of the document -- that is, it leaves the value # and number of Text nodes in the document unchanged. # ie_hack:: # Internet Explorer is the worst piece of crap to have ever been # written, with the possible exception of Windows itself. Since IE is # unable to parse proper XML, we have to provide a hack to generate XML # that IE's limited abilities can handle. This hack inserts a space # before the /> on empty tags. Defaults to false # encoding:: # Encoding name as String. Change output encoding to specified encoding # instead of encoding in XML declaration. # Defaults to nil. It means encoding in XML declaration is used. def write(*arguments) if arguments.size == 1 and arguments[0].class == Hash options = arguments[0] output = options[:output] indent = options[:indent] transitive = options[:transitive] ie_hack = options[:ie_hack] encoding = options[:encoding] else output, indent, transitive, ie_hack, encoding, = *arguments end output ||= $stdout indent ||= -1 transitive = false if transitive.nil? ie_hack = false if ie_hack.nil? encoding ||= xml_decl.encoding if encoding != 'UTF-8' && !output.kind_of?(Output) output = Output.new( output, encoding ) end formatter = if indent > -1 if transitive require "rexml/formatters/transitive" REXML::Formatters::Transitive.new( indent, ie_hack ) else REXML::Formatters::Pretty.new( indent, ie_hack ) end else REXML::Formatters::Default.new( ie_hack ) end formatter.write( self, output ) end def Document::parse_stream( source, listener ) Parsers::StreamParser.new( source, listener ).parse end @@entity_expansion_limit = 10_000 # Set the entity expansion limit. By default the limit is set to 10000. def Document::entity_expansion_limit=( val ) @@entity_expansion_limit = val end # Get the entity expansion limit. By default the limit is set to 10000. def Document::entity_expansion_limit return @@entity_expansion_limit end attr_reader :entity_expansion_count def record_entity_expansion @entity_expansion_count += 1 if @entity_expansion_count > @@entity_expansion_limit raise "number of entity expansions exceeded, processing aborted." end end private def build( source ) Parsers::TreeParser.new( source, self ).parse end end end