diff options
Diffstat (limited to 'lib/rexml/text.rb')
-rw-r--r-- | lib/rexml/text.rb | 53 |
1 files changed, 26 insertions, 27 deletions
diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb index a4a30b6d54..b6dbf45bc9 100644 --- a/lib/rexml/text.rb +++ b/lib/rexml/text.rb @@ -1,3 +1,4 @@ +require 'rexml/rexml' require 'rexml/entity' require 'rexml/doctype' require 'rexml/child' @@ -308,37 +309,35 @@ module REXML # Unescapes all possible entities def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil ) - rv = string.clone - rv.gsub!( /\r\n?/, "\n" ) - matches = rv.scan( REFERENCE ) - return rv if matches.size == 0 - rv.gsub!( NUMERICENTITY ) {|m| - m=$1 - m = "0#{m}" if m[0] == ?x - [Integer(m)].pack('U*') + sum = 0 + string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) { + s = Text.expand($&, doctype, filter) + if sum + s.bytesize > REXML.entity_expansion_text_limit + raise "entity expansion has grown too large" + else + sum += s.bytesize + end + s } - matches.collect!{|x|x[0]}.compact! - if matches.size > 0 - if doctype - matches.each do |entity_reference| - unless filter and filter.include?(entity_reference) - entity_value = doctype.entity( entity_reference ) - re = /&#{entity_reference};/ - rv.gsub!( re, entity_value ) if entity_value - end - end + end + + def Text.expand(ref, doctype, filter) + if ref[1] == ?# + if ref[2] == ?x + [ref[3...-1].to_i(16)].pack('U*') else - matches.each do |entity_reference| - unless filter and filter.include?(entity_reference) - entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ] - re = /&#{entity_reference};/ - rv.gsub!( re, entity_value.value ) if entity_value - end - end + [ref[2...-1].to_i].pack('U*') end - rv.gsub!( /&/, '&' ) + elsif ref == '&' + '&' + elsif filter and filter.include?( ref[1...-1] ) + ref + elsif doctype + doctype.entity( ref[1...-1] ) or ref + else + entity_value = DocType::DEFAULT_ENTITIES[ ref[1...-1] ] + entity_value ? entity_value.value : ref end - rv end end end |