diff options
-rw-r--r-- | lib/rexml/cdata.rb | 2 | ||||
-rw-r--r-- | lib/rexml/document.rb | 7 | ||||
-rw-r--r-- | lib/rexml/element.rb | 12 | ||||
-rw-r--r-- | lib/rexml/source.rb | 20 | ||||
-rw-r--r-- | lib/rexml/xmldecl.rb | 2 |
5 files changed, 28 insertions, 15 deletions
diff --git a/lib/rexml/cdata.rb b/lib/rexml/cdata.rb index 9e82376cd8..ffedac1b53 100644 --- a/lib/rexml/cdata.rb +++ b/lib/rexml/cdata.rb @@ -59,7 +59,7 @@ module REXML # c = CData.new( " Some text " ) # c.write( $stdout ) #-> <![CDATA[ Some text ]]> def write( output=$stdout, indent=-1, transitive=false, ie_hack=false ) - indent( output, indent ) unless transitive + #indent( output, indent ) unless transitive output << START output << @string output << STOP diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb index 084ebd2a39..a7f056ad3f 100644 --- a/lib/rexml/document.rb +++ b/lib/rexml/document.rb @@ -159,9 +159,10 @@ module REXML def write( output=$stdout, indent=-1, transitive=false, ie_hack=false ) output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output) @children.each { |node| - indent( output, indent) if node.node_type == :element - node.write( output, indent, transitive, ie_hack ) - output << "\n" unless indent<0 or node == @children[-1] + indent( output, indent ) if node.node_type == :element + if node.write( output, indent, transitive, ie_hack ) + output << "\n" unless indent<0 or node == @children[-1] + end } end diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index 25e530de41..b76c0179c7 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -107,7 +107,8 @@ module REXML # Evaluates to the document to which this element belongs, or nil if this # element doesn't belong to a document. def document - root.parent if root + rt = root + rt.parent if rt end # Evaluates to +true+ if whitespace is respected for this element. This @@ -670,7 +671,7 @@ module REXML end writer << "/" else - if transitive and indent>-1 and !@children[0].instance_of? Text + if transitive and indent>-1 and !@children[0].kind_of? Text writer << "\n" indent writer, indent+1 end @@ -1044,10 +1045,11 @@ module REXML return attr end end - if @element.document and @element.document.doctype + element_document = @element.document + if element_document and element_document.doctype expn = @element.expanded_name - expn = @element.document.doctype.name if expn.size == 0 - attr_val = @element.document.doctype.attribute_of(expn, name) + expn = element_document.doctype.name if expn.size == 0 + attr_val = element_document.doctype.attribute_of(expn, name) return Attribute.new( name, attr_val ) if attr_val end return nil diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index ce10d03a6c..7251666160 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -116,11 +116,21 @@ module REXML def initialize(arg, block_size=500) @er_source = @source = arg @to_utf = false - # FIXME - # This is broken. If the user puts in enough carriage returns, this can fail - # to calculate the correct encoding. - super @source.read( 100 ) - @line_break = encode( '>' ) + # Determining the encoding is a deceptively difficult issue to resolve. + # First, we check the first two bytes for UTF-16. Then we + # assume that the encoding is at least ASCII enough for the '>', and + # we read until we get one of those. This gives us the XML declaration, + # if there is one. If there isn't one, the file MUST be UTF-8, as per + # the XML spec. If there is one, we can determine the encoding from + # it. + str = @source.read( 2 ) + if (str[0] == 254 && str[1] == 255) || (str[0] == 255 && str[1] == 254) + @encoding = check_encoding( str ) + @line_break = encode( '>' ) + else + @line_break = '>' + end + super str+@source.readline( @line_break ) end def scan(pattern, cons=false) diff --git a/lib/rexml/xmldecl.rb b/lib/rexml/xmldecl.rb index 1c0bde8f4d..3f90e0160f 100644 --- a/lib/rexml/xmldecl.rb +++ b/lib/rexml/xmldecl.rb @@ -38,7 +38,7 @@ module REXML end def write writer, indent=-1, transitive=false, ie_hack=false - return "" unless @writethis or writer.kind_of? Output + return nil unless @writethis or writer.kind_of? Output indent( writer, indent ) writer << START.sub(/\\/u, '') if writer.kind_of? Output |