summaryrefslogtreecommitdiff
path: root/lib/rexml
diff options
context:
space:
mode:
authorser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2004-04-02 03:26:19 +0000
committerser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2004-04-02 03:26:19 +0000
commit7a07ba45a001475b257734cd1f46166c73f5519c (patch)
treeb9d253eea1aa685030ce0fabc203bc2076191ed4 /lib/rexml
parent354d68f80b1e92f58c13004e13dec48b179d2b4d (diff)
REXML changes backported from the 1.9 branch:
* Minor source documentation changes * Changes to the pretty-printing code, including the addition of the word- wrap submission. * Bug fix for missing quotations in NOTATION DTD items * Bug fixes and improvements to whitespace handling in text nodes * Refactoring and bug fixes in encoding support * Minor speed optimizations in the core parser * Bug fixes in the SAX2 parserthe core parser * Copyright fixes * Version bump to REXML 3.0.0 * A change that caused speed degredation has been reversed * Addition of a value=() method in Text, for replacing the contents of a text node * Fixed the document order of the descendant-or-self axis in XPath git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@6071 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rexml')
-rw-r--r--lib/rexml/comment.rb5
-rw-r--r--lib/rexml/doctype.rb13
-rw-r--r--lib/rexml/document.rb7
-rw-r--r--lib/rexml/dtd/entitydecl.rb2
-rw-r--r--lib/rexml/dtd/notationdecl.rb2
-rw-r--r--lib/rexml/element.rb17
-rw-r--r--lib/rexml/encodings/ISO-8859-1.rb4
-rw-r--r--lib/rexml/encodings/SHIFT_JIS.rb34
-rw-r--r--lib/rexml/encodings/US-ASCII.rb4
-rw-r--r--lib/rexml/node.rb7
-rw-r--r--lib/rexml/parsers/baseparser.rb17
-rw-r--r--lib/rexml/parsers/sax2parser.rb9
-rw-r--r--lib/rexml/rexml.rb6
-rw-r--r--lib/rexml/source.rb16
-rw-r--r--lib/rexml/text.rb47
-rw-r--r--lib/rexml/xpath_parser.rb42
16 files changed, 141 insertions, 91 deletions
diff --git a/lib/rexml/comment.rb b/lib/rexml/comment.rb
index e439ddf9d8..7c3e79fe2a 100644
--- a/lib/rexml/comment.rb
+++ b/lib/rexml/comment.rb
@@ -39,7 +39,10 @@ module REXML
# indentation will be this number of spaces, and children will be
# indented an additional amount.
# transitive::
- # Who knows?
+ # If transitive is true and indent is >= 0, then the output will be
+ # pretty-printed in such a way that the added whitespace does not affect
+ # the absolute *value* of the document -- that is, it leaves the value
+ # and number of Text nodes in the document unchanged.
# ie_hack::
# Internet Explorer is the worst piece of crap to have ever been
# written, with the possible exception of Windows itself. Since IE is
diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb
index 084676afa9..b523155f8f 100644
--- a/lib/rexml/doctype.rb
+++ b/lib/rexml/doctype.rb
@@ -92,7 +92,10 @@ module REXML
# indentation will be this number of spaces, and children will be
# indented an additional amount.
# transitive::
- # Who knows?
+ # If transitive is true and indent is >= 0, then the output will be
+ # pretty-printed in such a way that the added whitespace does not affect
+ # the absolute *value* of the document -- that is, it leaves the value
+ # and number of Text nodes in the document unchanged.
# ie_hack::
# Internet Explorer is the worst piece of crap to have ever been
# written, with the possible exception of Windows itself. Since IE is
@@ -109,7 +112,7 @@ module REXML
output << " #@long_name" if @long_name
output << " #@uri" if @uri
unless @children.empty?
- next_indent = indent + 2
+ next_indent = indent + 1
output << ' ['
child = nil # speed
@children.each { |child|
@@ -123,6 +126,10 @@ module REXML
output << STOP
end
+ def context
+ @parent.context
+ end
+
def entity( name )
@entities[name].unnormalized if @entities[name]
end
@@ -185,7 +192,7 @@ module REXML
end
def to_s
- "<!NOTATION #@name #@middle #@rest>"
+ "<!NOTATION #@name '#@middle #@rest'>"
end
def write( output, indent=-1 )
diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb
index 1eefaea92a..52500f2afd 100644
--- a/lib/rexml/document.rb
+++ b/lib/rexml/document.rb
@@ -145,7 +145,10 @@ module REXML
# indentation will be this number of spaces, and children will be
# indented an additional amount. Defaults to -1
# transitive::
- # What the heck does this do? Defaults to false
+ # If transitive is true and indent is >= 0, then the output will be
+ # pretty-printed in such a way that the added whitespace does not affect
+ # the absolute *value* of the document -- that is, it leaves the value
+ # and number of Text nodes in the document unchanged.
# ie_hack::
# Internet Explorer is the worst piece of crap to have ever been
# written, with the possible exception of Windows itself. Since IE is
@@ -191,7 +194,7 @@ module REXML
build_context[-1] << event[1]
else
build_context.add(
- Text.new( event[1], true, nil, true )
+ Text.new( event[1], build_context.whitespace, nil, true )
) unless (
event[1].strip.size==0 and
build_context.ignore_whitespace_nodes
diff --git a/lib/rexml/dtd/entitydecl.rb b/lib/rexml/dtd/entitydecl.rb
index 164825570f..a5f1520f2b 100644
--- a/lib/rexml/dtd/entitydecl.rb
+++ b/lib/rexml/dtd/entitydecl.rb
@@ -42,7 +42,7 @@ module REXML
end
def write( output, indent )
- output << (' '*indent) if indent > 0
+ indent( output, indent )
output << to_s
end
diff --git a/lib/rexml/dtd/notationdecl.rb b/lib/rexml/dtd/notationdecl.rb
index d577ce0631..a47ff8f24b 100644
--- a/lib/rexml/dtd/notationdecl.rb
+++ b/lib/rexml/dtd/notationdecl.rb
@@ -25,7 +25,7 @@ module REXML
end
def write( output, indent )
- output << (' '*indent) if indent > 0
+ indent( output, indent )
output << to_s
end
diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb
index ffaeddbf54..b61d811141 100644
--- a/lib/rexml/element.rb
+++ b/lib/rexml/element.rb
@@ -98,8 +98,9 @@ module REXML
# is the case if:
# 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value
# 2. The context has :+respect_whitespace+ set to :+all+ or
- # an array containing the name of this element, and :+compress_whitespace+
- # isn't set to :+all+ or an array containing the name of this element.
+ # an array containing the name of this element, and
+ # :+compress_whitespace+ isn't set to :+all+ or an array containing the
+ # name of this element.
# The evaluation is tested against +expanded_name+, and so is namespace
# sensitive.
def whitespace
@@ -606,7 +607,9 @@ module REXML
# indentation will be this number of spaces, and children will be
# indented an additional amount. Defaults to -1
# transitive::
- # What the heck does this do? Defaults to false
+ # If transitive is true and indent is >= 0, then the output will be
+ # pretty-printed in such a way that the added whitespace does not affect
+ # the parse tree of the document
# ie_hack::
# Internet Explorer is the worst piece of crap to have ever been
# written, with the possible exception of Windows itself. Since IE is
@@ -632,7 +635,7 @@ module REXML
else
if transitive and indent>-1 and !@children[0].kind_of? Text
writer << "\n"
- indent writer, indent+2
+ indent writer, indent+1
end
writer << ">"
write_children( writer, indent, transitive, ie_hack )
@@ -640,7 +643,7 @@ module REXML
end
if transitive and indent>-1
writer << "\n"
- indent -= 2 if next_sibling.nil?
+ indent -= 1 if next_sibling.nil?
indent(writer, indent)
end
writer << ">"
@@ -661,12 +664,10 @@ module REXML
# A private helper method
def write_children( writer, indent, transitive, ie_hack )
cr = (indent < 0) ? '' : "\n"
- #if size == 1 and @children[0].kind_of?(Text)
- # self[0].write( writer, -1 )
if indent == -1
each { |child| child.write( writer, indent, transitive, ie_hack ) }
else
- next_indent = indent+2
+ next_indent = indent+1
last_child=nil
each { |child|
unless child.kind_of? Text or last_child.kind_of? Text or transitive
diff --git a/lib/rexml/encodings/ISO-8859-1.rb b/lib/rexml/encodings/ISO-8859-1.rb
index 32ddfbc909..f4e4527c2d 100644
--- a/lib/rexml/encodings/ISO-8859-1.rb
+++ b/lib/rexml/encodings/ISO-8859-1.rb
@@ -1,6 +1,6 @@
module REXML
module Encoding
- @@__REXML_encoding_methods =<<-'EOL'
+ @@__REXML_encoding_methods = %q~
# Convert from UTF-8
def encode content
array_utf8 = content.unpack('U*')
@@ -20,6 +20,6 @@ module REXML
def decode(str)
str.unpack('C*').pack('U*')
end
- EOL
+ ~
end
end
diff --git a/lib/rexml/encodings/SHIFT_JIS.rb b/lib/rexml/encodings/SHIFT_JIS.rb
index 27e4569403..e355704a7c 100644
--- a/lib/rexml/encodings/SHIFT_JIS.rb
+++ b/lib/rexml/encodings/SHIFT_JIS.rb
@@ -1,33 +1 @@
-begin
- require 'uconv'
-
- module REXML
- module Encoding
- def to_shift_jis content
- Uconv::u8tosjis(content)
- end
-
- def from_shift_jis(str)
- Uconv::sjistou8(str)
- end
- end
- end
-rescue LoadError
- begin
- require 'iconv'
- module REXML
- module Encoding
- def from_shift_jis(str)
- return Iconv::iconv("utf-8", "shift_jis", str).join('')
- end
-
- def to_shift_jis content
- return Iconv::iconv("shift_jis", "utf-8", content).join('')
- end
- end
- end
- rescue LoadError
- raise "uconv or iconv is required for Japanese encoding support."
- end
-
-end
+require 'rexml/encodings/SHIFT-JIS'
diff --git a/lib/rexml/encodings/US-ASCII.rb b/lib/rexml/encodings/US-ASCII.rb
index 32ddfbc909..f4e4527c2d 100644
--- a/lib/rexml/encodings/US-ASCII.rb
+++ b/lib/rexml/encodings/US-ASCII.rb
@@ -1,6 +1,6 @@
module REXML
module Encoding
- @@__REXML_encoding_methods =<<-'EOL'
+ @@__REXML_encoding_methods = %q~
# Convert from UTF-8
def encode content
array_utf8 = content.unpack('U*')
@@ -20,6 +20,6 @@ module REXML
def decode(str)
str.unpack('C*').pack('U*')
end
- EOL
+ ~
end
end
diff --git a/lib/rexml/node.rb b/lib/rexml/node.rb
index 41d9eee43b..5f414c03ef 100644
--- a/lib/rexml/node.rb
+++ b/lib/rexml/node.rb
@@ -25,7 +25,12 @@ module REXML
end
def indent to, ind
- to << " "*ind unless ind<1
+ if @parent and @parent.context and not @parent.context[:indentstyle].nil? then
+ indentstyle = @parent.context[:indentstyle]
+ else
+ indentstyle = ' '
+ end
+ to << indentstyle*ind unless ind<1
end
def parent?
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index 057617d6e8..025d43db54 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -122,14 +122,14 @@ module REXML
# Returns true if there are no more events
def empty?
- !has_next?
+ #puts "@source.empty? = #{@source.empty?}"
+ #puts "@stack.empty? = #{@stack.empty?}"
+ return (@source.empty? and @stack.empty?)
end
# Returns true if there are more events. Synonymous with !empty?
def has_next?
- return true if @closed
- @source.read if @source.buffer.size==0 and !@source.empty?
- (!@source.empty? and @source.buffer.strip.size>0) or @stack.size>0 or @closed
+ return !(@source.empty? and @stack.empty?)
end
# Push an event back on the head of the stream. This method
@@ -329,9 +329,12 @@ module REXML
end
else
md = @source.match( TEXT_PATTERN, true )
- #md = @source.match_to_consume( '<', TEXT_PATTERN )
- #@source.read
- raise REXML::ParseException("no text to add") if md[0].length == 0
+ if md[0].length == 0
+ #puts "EMPTY = #{empty?}"
+ #puts "BUFFER = \"#{@source.buffer}\""
+ @source.match( /(\s+)/, true )
+ end
+ #return [ :text, "" ] if md[0].length == 0
# unnormalized = Text::unnormalize( md[1], self )
# return PullEvent.new( :text, md[1], unnormalized )
return [ :text, md[1] ]
diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb
index aab87caae4..8c82cf8fc1 100644
--- a/lib/rexml/parsers/sax2parser.rb
+++ b/lib/rexml/parsers/sax2parser.rb
@@ -45,7 +45,7 @@ module REXML
if args.size == 2
args[1].each { |match| @procs << [args[0], match, blok] }
else
- add( [args[0], /.*/, blok] )
+ add( [args[0], nil, blok] )
end
elsif args[0].kind_of? Array
if args.size == 2
@@ -54,7 +54,7 @@ module REXML
args[0].each { |match| add( [ :start_element, match, blok ] ) }
end
else
- add([nil, /.*/, args[0]])
+ add([nil, nil, args[0]])
end
end
@@ -164,9 +164,10 @@ module REXML
def get_procs( symbol, name )
return nil if @procs.size == 0
@procs.find_all do |sym, match, block|
+ #puts sym.inspect+"=="+symbol.inspect+ "\t"+match.inspect+"=="+name.inspect+ "\t"+( (sym.nil? or symbol == sym) and ((name.nil? and match.nil?) or match.nil? or ( (name == match) or (match.kind_of? Regexp and name =~ match)))).to_s
(
(sym.nil? or symbol == sym) and
- (name.nil? or (
+ ((name.nil? and match.nil?) or match.nil? or (
(name == match) or
(match.kind_of? Regexp and name =~ match)
)
@@ -179,7 +180,7 @@ module REXML
@listeners.find_all do |sym, match, block|
(
(sym.nil? or symbol == sym) and
- (name.nil? or (
+ ((name.nil? and match.nil?) or match.nil? or (
(name == match) or
(match.kind_of? Regexp and name =~ match)
)
diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb
index b632188571..67b36a87f6 100644
--- a/lib/rexml/rexml.rb
+++ b/lib/rexml/rexml.rb
@@ -20,7 +20,7 @@
# be accessed online at http://www.germane-software.com/software/rexml_doc
# A tutorial is available in docs/tutorial.html
module REXML
- Copyright = "Copyright #{Time.now.year} Sean Russell <ser@germane-software.com>"
- Date = "+2003/346"
- Version = "2.7.3"
+ Copyright = "Copyright © 2001, 2002, 2003, 2004 Sean Russell <ser@germane-software.com>"
+ Date = "+2004/088"
+ Version = "3.0.0"
end
diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb
index 2110e6db66..a524e483ef 100644
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@@ -31,7 +31,6 @@ module REXML
def initialize(arg)
@orig = @buffer = arg
self.encoding = check_encoding( @buffer )
- #@buffer = decode(@buffer) unless @encoding == UTF_8
@line = 0
end
@@ -96,7 +95,7 @@ module REXML
# @return true if the Source is exhausted
def empty?
- @buffer.nil?
+ @buffer == ""
end
# @return the current line in the source
@@ -113,17 +112,14 @@ module REXML
class IOSource < Source
#attr_reader :block_size
+ # block_size has been deprecated
def initialize(arg, block_size=500)
@er_source = @source = arg
@to_utf = false
- # READLINE OPT
- # The following was commented out when IOSource started using readline
- # to pull the data from the stream.
- #@block_size = block_size
- #super @source.read(@block_size)
- @line_break = '>'
- #super @source.readline( "\n" )
- super @source.readline( @line_break )+@source.read
+ # FIXME
+ # This is broken. If the user puts in enough carriage returns, this can fail
+ # to calculate the correct encoding.
+ super @source.read( 100 )
@line_break = encode( '>' )
end
diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb
index 2494ad9e8a..2e54f9fa11 100644
--- a/lib/rexml/text.rb
+++ b/lib/rexml/text.rb
@@ -164,9 +164,54 @@ module REXML
end
@unnormalized = Text::unnormalize( @string, doctype )
end
+
+ def wrap(string, width, addnewline=false)
+ # Recursivly wrap string at width.
+ return string if string.length <= width
+ place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
+ if addnewline then
+ return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
+ else
+ return string[0,place] + "\n" + wrap(string[place+1..-1], width)
+ end
+ end
+ # Sets the contents of this text node. This expects the text to be
+ # unnormalized. It returns self.
+ #
+ # e = Element.new( "a" )
+ # e.add_text( "foo" ) # <a>foo</a>
+ # e[0].value = "bar" # <a>bar</a>
+ # e[0].value = "<a>" # <a>&lt;a&gt;</a>
+ def value=( val )
+ @string = val.gsub( /\r\n?/, "\n" )
+ @unnormalized = nil
+ @normalized = nil
+ @raw = false
+ end
+
+ def indent(string, level=1, style="\t", indentfirstline=true)
+ return string if level < 0
+ new_string = ''
+ string.each { |line|
+ indent_string = style * level
+ new_line = (indent_string + line).sub(/[\s]+$/,'')
+ new_string << new_line
+ }
+ new_string.strip! unless indentfirstline
+ return new_string
+ end
+
def write( writer, indent=-1, transitive=false, ie_hack=false )
- writer << to_s()
+ s = to_s()
+ if not (@parent and @parent.whitespace) then
+ s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all
+ if @parent and not @parent.context[:indentstyle].nil? and indent > 0 and s.count("\n") > 0
+ s = indent(s, indent, @parent.context[:indentstyle], false)
+ end
+ s.squeeze!(" \n\t") if @parent and !@parent.whitespace
+ end
+ writer << s
end
# Writes out text, substituting special characters beforehand.
diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb
index 9cd1e5d64c..377a51e885 100644
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@@ -29,7 +29,8 @@ module REXML
def parse path, nodeset
path_stack = @parser.parse( path )
- #puts "PARSE: #{path} => #{path_stack.inspect}"
+ #puts "PARSE: #{path} => #{path_stack.inspect}"
+ #puts "PARSE: nodeset = #{nodeset.collect{|x|x.to_s}.inspect}"
match( path_stack, nodeset )
end
@@ -46,7 +47,7 @@ module REXML
def match( path_stack, nodeset )
while ( path_stack.size > 0 and nodeset.size > 0 )
- #puts "PARSE: #{path_stack.inspect} '#{nodeset.collect{|n|n.type}.inspect}'"
+ #puts "PARSE: #{path_stack.inspect} '#{nodeset.collect{|n|n.class}.inspect}'"
nodeset = internal_parse( path_stack, nodeset )
#puts "NODESET: #{nodeset.size}"
#puts "PATH_STACK: #{path_stack.inspect}"
@@ -55,8 +56,9 @@ module REXML
end
def internal_parse path_stack, nodeset
+ #puts "INTERNAL_PARSE RETURNING WITH NO RESULTS" if nodeset.size == 0 or path_stack.size == 0
return nodeset if nodeset.size == 0 or path_stack.size == 0
- #puts "INTERNAL_PARSE: #{path_stack.inspect}, #{nodeset.collect{|n| n.type}.inspect}"
+ #puts "INTERNAL_PARSE: #{path_stack.inspect}, #{nodeset.collect{|n| n.class}.inspect}"
case path_stack.shift
when :document
return [ nodeset[0].root.parent ]
@@ -205,7 +207,7 @@ module REXML
Functions::index = index+1
#puts "Node #{node} and index=#{index+1}"
result = Predicate( predicate, node )
- #puts "Predicate returned #{result} (#{result.type}) for #{node.type}"
+ #puts "Predicate returned #{result} (#{result.class}) for #{node.class}"
if result.kind_of? Numeric
#puts "#{result} == #{index} => #{result == index}"
new_nodeset << node if result == (index+1)
@@ -285,6 +287,7 @@ module REXML
end
##########################################################
+ # FIXME
# The next two methods are BAD MOJO!
# This is my achilles heel. If anybody thinks of a better
# way of doing this, be my guest. This really sucks, but
@@ -294,24 +297,39 @@ module REXML
def descendant_or_self( path_stack, nodeset )
rs = []
d_o_s( path_stack, nodeset, rs )
- #puts "RS = #{rs.collect{|n|n.to_s}.inspect}"
- rs.flatten.compact
+ #puts "RS = #{rs.collect{|n|n.to_s}.inspect}"
+ document_order(rs.flatten.compact)
end
def d_o_s( p, ns, r )
- #puts r.collect{|n|n.to_s}.inspect
- #puts ns.collect{|n|n.to_s}.inspect
nt = nil
ns.each_index do |i|
n = ns[i]
x = match( p.clone, [ n ] )
- #puts "Got a match on #{p.inspect} for #{ns.collect{|n|n.to_s+"("+n.type.to_s+")"}.inspect}"
nt = n.node_type
- d_o_s( p, n.children, x ) if nt == :element or nt == :document
- r[i,0] = [x] if x.size > 0
+ d_o_s( p, n.children, x ) if nt == :element or nt == :document and n.children.size > 0
+ r.concat(x) if x.size > 0
end
end
+
+ # Reorders an array of nodes so that they are in document order
+ # It tries to do this efficiently.
+ def document_order( array_of_nodes )
+ new_arry = []
+ array_of_nodes.each { |node|
+ node_idx = []
+ np = node.node_type == :attribute ? node.element : node
+ while np.parent and np.parent.node_type == :element
+ node_idx << np.parent.children.index( np )
+ np = np.parent
+ end
+ new_arry << [ node_idx.reverse.join, node ]
+ }
+ new_arry.sort{ |s1, s2| s1[0] <=> s2[0] }.collect{ |s| s[1] }
+ end
+
+
def recurse( nodeset, &block )
for node in nodeset
yield node
@@ -324,7 +342,7 @@ module REXML
def Predicate( predicate, node )
predicate = predicate.clone
#puts "#"*20
- #puts "Predicate( #{predicate.inspect}, #{node.type} )"
+ #puts "Predicate( #{predicate.inspect}, #{node.class} )"
results = []
case (predicate[0])
when :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq