From 1448668244c81cfc720bcdfa93af352a17cc230f Mon Sep 17 00:00:00 2001 From: matz Date: Tue, 1 Jan 2008 05:43:50 +0000 Subject: * lib/rexml: 1.9 patch from Sam Ruby mentioned in his blog: [ruby-core:14639] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14826 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/rexml/doctype.rb | 1 - lib/rexml/element.rb | 13 +++++++----- lib/rexml/encoding.rb | 6 +++--- lib/rexml/entity.rb | 2 +- lib/rexml/formatters/pretty.rb | 1 + lib/rexml/functions.rb | 12 ++++++++--- lib/rexml/parsers/baseparser.rb | 5 +++++ lib/rexml/parsers/sax2parser.rb | 17 +++++++++++---- lib/rexml/parsers/treeparser.rb | 5 ++++- lib/rexml/parsers/xpathparser.rb | 6 +++--- lib/rexml/source.rb | 14 +++++++++---- lib/rexml/syncenumerator.rb | 7 +++---- lib/rexml/text.rb | 43 +++++++++++++------------------------- lib/rexml/validation/validation.rb | 6 +++--- lib/rexml/xpath_parser.rb | 14 ++++++------- 15 files changed, 85 insertions(+), 67 deletions(-) (limited to 'lib') diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb index 05cd4ab331..e90c12d51a 100644 --- a/lib/rexml/doctype.rb +++ b/lib/rexml/doctype.rb @@ -117,7 +117,6 @@ module REXML unless @children.empty? next_indent = indent + 1 output << ' [' - child = nil # speed @children.each { |child| output << "\n" f.write( child, output ) diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index 345e1734e5..55094111e6 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -296,7 +296,7 @@ module REXML raise "First argument must be either an element name, or an Element object" if element.nil? el = @elements.add(element) attrs.each do |key, value| - el.attributes[key]=Attribute.new(key,value,self) + el.attributes[key]=value end if attrs.kind_of? Hash el end @@ -552,7 +552,11 @@ module REXML def attribute( name, namespace=nil ) prefix = nil - prefix = namespaces.index(namespace) if namespace + if namespaces.respond_to? :key + prefix = namespaces.key(namespace) if namespace + else + prefix = namespaces.index(namespace) if namespace + end prefix = nil if prefix == 'xmlns' attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" ) end @@ -704,7 +708,6 @@ module REXML # A private helper method def each_with_something( test, max=0, name=nil ) num = 0 - child=nil @elements.each( name ){ |child| yield child if test.call(child) and num += 1 return if max>0 and num == max @@ -754,7 +757,6 @@ module REXML raise "index (#{index}) must be >= 1" if index < 1 name = literalize(name) if name num = 0 - child = nil @element.find { |child| child.kind_of? Element and (name.nil? ? true : child.has_name?( name )) and @@ -1217,7 +1219,8 @@ module REXML def get_attribute_ns(namespace, name) each_attribute() { |attribute| if name == attribute.name && - namespace == attribute.namespace() + namespace == attribute.namespace() && + ( !namespace.empty? || !attribute.fully_expanded_name.index(':') ) return attribute end } diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb index a01763be99..608c69cd65 100644 --- a/lib/rexml/encoding.rb +++ b/lib/rexml/encoding.rb @@ -56,14 +56,14 @@ module REXML def check_encoding str # We have to recognize UTF-16, LSB UTF-16, and UTF-8 - if str[0] == 0xfe && str[1] == 0xff + if str[0,2] == "\xfe\xff" str[0,2] = "" return UTF_16 - elsif str[0] == 0xff && str[1] == 0xfe + elsif str[0,2] == "\xff\xfe" str[0,2] = "" return UNILE end - str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um + str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m return $3.upcase if $3 return UTF_8 end diff --git a/lib/rexml/entity.rb b/lib/rexml/entity.rb index ff2d45f39b..1c6a25c410 100644 --- a/lib/rexml/entity.rb +++ b/lib/rexml/entity.rb @@ -139,7 +139,7 @@ module REXML if @parent matches.each do |entity_reference| entity_value = @parent.entity( entity_reference[0] ) - rv.gsub!( /%#{entity_reference};/um, entity_value ) + rv.gsub!( /%#{entity_reference.join};/um, entity_value ) end end return rv diff --git a/lib/rexml/formatters/pretty.rb b/lib/rexml/formatters/pretty.rb index 22b6d857cd..3666eddc96 100644 --- a/lib/rexml/formatters/pretty.rb +++ b/lib/rexml/formatters/pretty.rb @@ -31,6 +31,7 @@ module REXML @level = 0 @ie_hack = ie_hack @width = 80 + @compact = false end protected diff --git a/lib/rexml/functions.rb b/lib/rexml/functions.rb index 8293e9c5ac..b3c89877e5 100644 --- a/lib/rexml/functions.rb +++ b/lib/rexml/functions.rb @@ -256,9 +256,15 @@ module REXML end } - string(string).unpack('U*').collect { |c| - if map.has_key? c then map[c] else c end - }.compact.pack('U*') + if ''.respond_to? :chars + string(string).chars.collect { |c| + if map.has_key? c then map[c] else c end + }.compact.join + else + string(string).unpack('U*').collect { |c| + if map.has_key? c then map[c] else c end + }.compact.pack('U*') + end end # UNTESTED diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index fc2354a67f..854e707fae 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -242,6 +242,11 @@ module REXML @document_status = :after_doctype @source.read if @source.buffer.size<2 md = @source.match(/\s*/um, true) + if @source.encoding == "UTF-8" + if @source.buffer.respond_to? :force_encoding + @source.buffer.force_encoding(Encoding::UTF_8) + end + end end end if @document_status == :in_doctype diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb index e402eb7747..cafc39375d 100644 --- a/lib/rexml/parsers/sax2parser.rb +++ b/lib/rexml/parsers/sax2parser.rb @@ -149,17 +149,26 @@ module REXML procs = get_procs( :end_prefix_mapping, event[1] ) listeners = get_listeners( :end_prefix_mapping, event[1] ) if procs or listeners - namespace_mapping.each do |prefix, uri| + namespace_mapping.each do |ns_prefix, ns_uri| # notify observers of namespaces - procs.each { |ob| ob.call( prefix ) } if procs - listeners.each { |ob| ob.end_prefix_mapping(prefix) } if listeners + procs.each { |ob| ob.call( ns_prefix ) } if procs + listeners.each { |ob| ob.end_prefix_mapping(ns_prefix) } if listeners end end when :text #normalized = @parser.normalize( event[1] ) #handle( :characters, normalized ) copy = event[1].clone - @entities.each { |key, value| copy = copy.gsub("&#{key};", value) } + + esub = proc { |match| + if @entities.has_key?($1) + @entities[$1].gsub(Text::REFERENCE, &esub) + else + match + end + } + + copy.gsub!( Text::REFERENCE, &esub ) copy.gsub!( Text::NUMERICENTITY ) {|m| m=$1 m = "0#{m}" if m[0] == ?x diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb index 5c3e142ea7..30327d0dfd 100644 --- a/lib/rexml/parsers/treeparser.rb +++ b/lib/rexml/parsers/treeparser.rb @@ -30,7 +30,10 @@ module REXML return when :start_element tag_stack.push(event[1]) - el = @build_context = @build_context.add_element( event[1], event[2] ) + el = @build_context = @build_context.add_element( event[1] ) + event[2].each do |key, value| + el.attributes[key]=Attribute.new(key,value,self) + end when :end_element tag_stack.pop @build_context = @build_context.parent diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb index de2530e347..152198856d 100644 --- a/lib/rexml/parsers/xpathparser.rb +++ b/lib/rexml/parsers/xpathparser.rb @@ -332,12 +332,12 @@ module REXML predicates << expr[1..-2] if expr end #puts "PREDICATES = #{predicates.inspect}" - predicates.each{ |expr| - #puts "ORING #{expr}" + predicates.each{ |pred| + #puts "ORING #{pred}" preds = [] parsed << :predicate parsed << preds - OrExpr(expr, preds) + OrExpr(pred, preds) } #puts "PREDICATES = #{predicates.inspect}" path diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index e05460fea1..3f14239a35 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -59,6 +59,9 @@ module REXML @to_utf = true else @to_utf = false + if @buffer.respond_to? :force_encoding + @buffer.force_encoding Encoding::UTF_8 + end end end @@ -147,13 +150,13 @@ module REXML str = @source.read( 2 ) if encoding self.encoding = encoding - elsif 0xfe == str[0] && 0xff == str[1] + elsif str[0,2] == "\xfe\xff" @line_break = "\000>" - elsif 0xff == str[0] && 0xfe == str[1] + elsif str[0,2] == "\xff\xfe" @line_break = ">\000" - elsif 0xef == str[0] && 0xbb == str[1] + elsif str[0,2] == "\xef\xbb" str += @source.read(1) - str = '' if (0xbf == str[2]) + str = '' if (str[2,1] == "\xBF") @line_break = ">" else @line_break = ">" @@ -193,6 +196,9 @@ module REXML str = @source.readline(@line_break) str = decode(str) if @to_utf and str @buffer << str + if not @to_utf and @buffer.respond_to? :force_encoding + @buffer.force_encoding Encoding::UTF_8 + end rescue Exception, NameError @source = nil end diff --git a/lib/rexml/syncenumerator.rb b/lib/rexml/syncenumerator.rb index 955e006cb2..11609bdf3d 100644 --- a/lib/rexml/syncenumerator.rb +++ b/lib/rexml/syncenumerator.rb @@ -6,8 +6,7 @@ module REXML # Enumerable objects. def initialize(*enums) @gens = enums - @biggest = @gens[0] - @gens.each {|x| @biggest = x if x.size > @biggest.size } + @length = @gens.collect {|x| x.size }.max end # Returns the number of enumerated Enumerable objects, i.e. the size @@ -24,8 +23,8 @@ module REXML # Enumerates rows of the Enumerable objects. def each - @biggest.zip( *@gens ) {|a| - yield(*a[1..-1]) + @length.times {|i| + yield @gens.collect {|x| x[i]} } self end diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb index 9804aa710b..8058157605 100644 --- a/lib/rexml/text.rb +++ b/lib/rexml/text.rb @@ -308,37 +308,24 @@ module REXML # Unescapes all possible entities def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil ) - rv = string.clone - rv.gsub!( /\r\n?/, "\n" ) - matches = rv.scan( REFERENCE ) - return rv if matches.size == 0 - rv.gsub!( NUMERICENTITY ) {|m| - m=$1 - m = "0#{m}" if m[0] == ?x - [Integer(m)].pack('U*') - } - matches.collect!{|x|x[0]}.compact! - if matches.size > 0 - if doctype - matches.each do |entity_reference| - unless filter and filter.include?(entity_reference) - entity_value = doctype.entity( entity_reference ) - re = /&#{entity_reference};/ - rv.gsub!( re, entity_value ) if entity_value - end + string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) { |ref| + if ref[1] == ?# + if ref[2] == ?x + [ref[3...-1].to_i(16)].pack('U*') + else + [ref[2...-1].to_i].pack('U*') end + elsif ref == '&' + '&' + elsif filter and filter.include?( ref[1...-1] ) + ref + elsif doctype + doctype.entity( ref[1...-1] ) or ref else - matches.each do |entity_reference| - unless filter and filter.include?(entity_reference) - entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ] - re = /&#{entity_reference};/ - rv.gsub!( re, entity_value.value ) if entity_value - end - end + entity_value = DocType::DEFAULT_ENTITIES[ ref[1...-1] ] + entity_value ? entity_value.value : ref end - rv.gsub!( /&/, '&' ) - end - rv + } end end end diff --git a/lib/rexml/validation/validation.rb b/lib/rexml/validation/validation.rb index 160ea96b31..93f5bfb329 100644 --- a/lib/rexml/validation/validation.rb +++ b/lib/rexml/validation/validation.rb @@ -33,8 +33,8 @@ module REXML sattr = [:start_attribute, nil] eattr = [:end_attribute] text = [:text, nil] - k,v = event[2].find { |k,v| - sattr[1] = k + k,v = event[2].find { |key,value| + sattr[1] = key #puts "Looking for #{sattr.inspect}" m = @current.next( sattr ) #puts "Got #{m.inspect}" @@ -47,7 +47,7 @@ module REXML @current = m else #puts "Didn't get end" - text[1] = v + text[1] = value #puts "Looking for #{text.inspect}" m = m.next( text ) #puts "Got #{m.inspect}" diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index eb608fdb34..4275c2db0e 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -222,7 +222,7 @@ module REXML when :child new_nodeset = [] nt = nil - for node in nodeset + nodeset.each do |node| nt = node.node_type new_nodeset += node.children if nt == :element or nt == :document end @@ -266,7 +266,7 @@ module REXML when :ancestor new_nodeset = [] - for node in nodeset + nodeset.each do |node| while node.parent node = node.parent new_nodeset << node unless new_nodeset.include? node @@ -277,7 +277,7 @@ module REXML when :ancestor_or_self new_nodeset = [] - for node in nodeset + nodeset.each do |node| if node.node_type == :element new_nodeset << node while ( node.parent ) @@ -341,7 +341,7 @@ module REXML when :descendant results = [] nt = nil - for node in nodeset + nodeset.each do |node| nt = node.node_type results += expr( path_stack.dclone.unshift( :descendant_or_self ), node.children ) if nt == :element or nt == :document @@ -376,7 +376,7 @@ module REXML when :preceding new_nodeset = [] - for node in nodeset + nodeset.each do |node| new_nodeset += preceding( node ) end #puts "NEW NODESET => #{new_nodeset.inspect}" @@ -385,7 +385,7 @@ module REXML when :following new_nodeset = [] - for node in nodeset + nodeset.each do |node| new_nodeset += following( node ) end nodeset = new_nodeset @@ -395,7 +395,7 @@ module REXML #puts "In :namespace" new_nodeset = [] prefix = path_stack.shift - for node in nodeset + nodeset.each do |node| if (node.node_type == :element or node.node_type == :attribute) if @namespaces namespaces = @namespaces -- cgit v1.2.3