diff options
author | yugui <yugui@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-08-25 15:13:14 +0000 |
---|---|---|
committer | yugui <yugui@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-08-25 15:13:14 +0000 |
commit | d0233291bc8a5068e52c69c210e5979e5324b5bc (patch) | |
tree | 7d9459449c33792c63eeb7baa071e76352e0baab /trunk/lib/rexml/text.rb | |
parent | 0dc342de848a642ecce8db697b8fecd83a63e117 (diff) | |
parent | 72eaacaa15256ab95c3b52ea386f88586fb9da40 (diff) |
re-adding tag v1_9_0_4 as an alias of trunk@18848v1_9_0_4
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/tags/v1_9_0_4@18849 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'trunk/lib/rexml/text.rb')
-rw-r--r-- | trunk/lib/rexml/text.rb | 404 |
1 files changed, 0 insertions, 404 deletions
diff --git a/trunk/lib/rexml/text.rb b/trunk/lib/rexml/text.rb deleted file mode 100644 index fac5ac3e41..0000000000 --- a/trunk/lib/rexml/text.rb +++ /dev/null @@ -1,404 +0,0 @@ -require 'rexml/entity' -require 'rexml/doctype' -require 'rexml/child' -require 'rexml/doctype' -require 'rexml/parseexception' - -module REXML - # Represents text nodes in an XML document - class Text < Child - include Comparable - # The order in which the substitutions occur - SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ] - SUBSTITUTES = ['&', '<', '>', '"', ''', ' '] - # Characters which are substituted in written strings - SLAICEPS = [ '<', '>', '"', "'", '&' ] - SETUTITSBUS = [ /</u, />/u, /"/u, /'/u, /&/u ] - - # If +raw+ is true, then REXML leaves the value alone - attr_accessor :raw - - NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um - NUMERICENTITY = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ - VALID_CHAR = [ - 0x9, 0xA, 0xD, - (0x20..0xD7FF), - (0xE000..0xFFFD), - (0x10000..0x10FFFF) - ] - - if String.method_defined? :encode - VALID_XML_CHARS = Regexp.new('^['+ - VALID_CHAR.map { |item| - case item - when Fixnum - [item].pack('U').force_encoding('utf-8') - when Range - [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8') - end - }.join + - ']*$') - else - VALID_XML_CHARS = /^( - [\x09\x0A\x0D\x20-\x7E] # ASCII - | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte - | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs - | [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte - | \xEF[\x80-\xBE]{2} # - | \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff - | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates - | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 - | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 - | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 - )*$/nx; - end - - # Constructor - # +arg+ if a String, the content is set to the String. If a Text, - # the object is shallowly cloned. - # - # +respect_whitespace+ (boolean, false) if true, whitespace is - # respected - # - # +parent+ (nil) if this is a Parent object, the parent - # will be set to this. - # - # +raw+ (nil) This argument can be given three values. - # If true, then the value of used to construct this object is expected to - # contain no unescaped XML markup, and REXML will not change the text. If - # this value is false, the string may contain any characters, and REXML will - # escape any and all defined entities whose values are contained in the - # text. If this value is nil (the default), then the raw value of the - # parent will be used as the raw value for this node. If there is no raw - # value for the parent, and no value is supplied, the default is false. - # Use this field if you have entities defined for some text, and you don't - # want REXML to escape that text in output. - # Text.new( "<&", false, nil, false ) #-> "<&" - # Text.new( "<&", false, nil, false ) #-> "&lt;&amp;" - # Text.new( "<&", false, nil, true ) #-> Parse exception - # Text.new( "<&", false, nil, true ) #-> "<&" - # # Assume that the entity "s" is defined to be "sean" - # # and that the entity "r" is defined to be "russell" - # Text.new( "sean russell" ) #-> "&s; &r;" - # Text.new( "sean russell", false, nil, true ) #-> "sean russell" - # - # +entity_filter+ (nil) This can be an array of entities to match in the - # supplied text. This argument is only useful if +raw+ is set to false. - # Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell" - # Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell" - # In the last example, the +entity_filter+ argument is ignored. - # - # +pattern+ INTERNAL USE ONLY - def initialize(arg, respect_whitespace=false, parent=nil, raw=nil, - entity_filter=nil, illegal=NEEDS_A_SECOND_CHECK ) - - @raw = false - - if parent - super( parent ) - @raw = parent.raw - else - @parent = nil - end - - @raw = raw unless raw.nil? - @entity_filter = entity_filter - @normalized = @unnormalized = nil - - if arg.kind_of? String - @string = arg.clone - @string.squeeze!(" \n\t") unless respect_whitespace - elsif arg.kind_of? Text - @string = arg.to_s - @raw = arg.raw - elsif - raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})" - end - - @string.gsub!( /\r\n?/, "\n" ) - - Text.check(@string, NEEDS_A_SECOND_CHECK, doctype) if @raw and @parent - end - - def parent= parent - super(parent) - Text.check(@string, NEEDS_A_SECOND_CHECK, doctype) if @raw and @parent - end - - # check for illegal characters - def Text.check string, pattern, doctype - - # illegal anywhere - if string !~ VALID_XML_CHARS - if String.method_defined? :encode - string.chars.each do |c| - case c.ord - when *VALID_CHAR - else - raise "Illegal character #{c.inspect} in raw string \"#{string}\"" - end - end - else - string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c| - case c.unpack('U') - when *VALID_CHAR - else - raise "Illegal character #{c.inspect} in raw string \"#{string}\"" - end - end - end - end - - # context sensitive - string.scan(pattern) do - if $1[-1] != ?; - raise "Illegal character '#{$1}' in raw string \"#{string}\"" - elsif $1[0] == ?& - if $5 and $5[0] == ?# - case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i) - when *VALID_CHAR - else - raise "Illegal character '#{$1}' in raw string \"#{string}\"" - end - elsif $3 and !SUBSTITUTES.include?($1) - if !doctype or !doctype.entities.has_key?($3) - raise "Undeclared entity '#{$1}' in raw string \"#{string}\"" - end - end - end - end - end - - def node_type - :text - end - - def empty? - @string.size==0 - end - - - def clone - return Text.new(self) - end - - - # Appends text to this text node. The text is appended in the +raw+ mode - # of this text node. - def <<( to_append ) - @string << to_append.gsub( /\r\n?/, "\n" ) - end - - - # +other+ a String or a Text - # +returns+ the result of (to_s <=> arg.to_s) - def <=>( other ) - to_s() <=> other.to_s - end - - def doctype - if @parent - doc = @parent.document - doc.doctype if doc - end - end - - REFERENCE = /#{Entity::REFERENCE}/ - # Returns the string value of this text node. This string is always - # escaped, meaning that it is a valid XML text node string, and all - # entities that can be escaped, have been inserted. This method respects - # the entity filter set in the constructor. - # - # # Assume that the entity "s" is defined to be "sean", and that the - # # entity "r" is defined to be "russell" - # t = Text.new( "< & sean russell", false, nil, false, ['s'] ) - # t.to_s #-> "< & &s; russell" - # t = Text.new( "< & &s; russell", false, nil, false ) - # t.to_s #-> "< & &s; russell" - # u = Text.new( "sean russell", false, nil, true ) - # u.to_s #-> "sean russell" - def to_s - return @string if @raw - return @normalized if @normalized - - @normalized = Text::normalize( @string, doctype, @entity_filter ) - end - - def inspect - @string.inspect - end - - # Returns the string value of this text. This is the text without - # entities, as it might be used programmatically, or printed to the - # console. This ignores the 'raw' attribute setting, and any - # entity_filter. - # - # # Assume that the entity "s" is defined to be "sean", and that the - # # entity "r" is defined to be "russell" - # t = Text.new( "< & sean russell", false, nil, false, ['s'] ) - # t.value #-> "< & sean russell" - # t = Text.new( "< & &s; russell", false, nil, false ) - # t.value #-> "< & sean russell" - # u = Text.new( "sean russell", false, nil, true ) - # u.value #-> "sean russell" - def value - return @unnormalized if @unnormalized - @unnormalized = Text::unnormalize( @string, doctype ) - end - - # Sets the contents of this text node. This expects the text to be - # unnormalized. It returns self. - # - # e = Element.new( "a" ) - # e.add_text( "foo" ) # <a>foo</a> - # e[0].value = "bar" # <a>bar</a> - # e[0].value = "<a>" # <a><a></a> - def value=( val ) - @string = val.gsub( /\r\n?/, "\n" ) - @unnormalized = nil - @normalized = nil - @raw = false - end - - def wrap(string, width, addnewline=false) - # Recursively wrap string at width. - return string if string.length <= width - place = string.rindex(' ', width) # Position in string with last ' ' before cutoff - if addnewline then - return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width) - else - return string[0,place] + "\n" + wrap(string[place+1..-1], width) - end - end - - def indent_text(string, level=1, style="\t", indentfirstline=true) - return string if level < 0 - new_string = '' - string.each { |line| - indent_string = style * level - new_line = (indent_string + line).sub(/[\s]+$/,'') - new_string << new_line - } - new_string.strip! unless indentfirstline - return new_string - end - - # == DEPRECATED - # See REXML::Formatters - # - def write( writer, indent=-1, transitive=false, ie_hack=false ) - Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters") - formatter = if indent > -1 - REXML::Formatters::Pretty.new( indent ) - else - REXML::Formatters::Default.new - end - formatter.write( self, writer ) - end - - # FIXME - # This probably won't work properly - def xpath - path = @parent.xpath - path += "/text()" - return path - end - - # Writes out text, substituting special characters beforehand. - # +out+ A String, IO, or any other object supporting <<( String ) - # +input+ the text to substitute and the write out - # - # z=utf8.unpack("U*") - # ascOut="" - # z.each{|r| - # if r < 0x100 - # ascOut.concat(r.chr) - # else - # ascOut.concat(sprintf("&#x%x;", r)) - # end - # } - # puts ascOut - def write_with_substitution out, input - copy = input.clone - # Doing it like this rather than in a loop improves the speed - copy.gsub!( SPECIALS[0], SUBSTITUTES[0] ) - copy.gsub!( SPECIALS[1], SUBSTITUTES[1] ) - copy.gsub!( SPECIALS[2], SUBSTITUTES[2] ) - copy.gsub!( SPECIALS[3], SUBSTITUTES[3] ) - copy.gsub!( SPECIALS[4], SUBSTITUTES[4] ) - copy.gsub!( SPECIALS[5], SUBSTITUTES[5] ) - out << copy - end - - # Reads text, substituting entities - def Text::read_with_substitution( input, illegal=nil ) - copy = input.clone - - if copy =~ illegal - raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" ) - end if illegal - - copy.gsub!( /\r\n?/, "\n" ) - if copy.include? ?& - copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] ) - copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] ) - copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] ) - copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] ) - copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] ) - copy.gsub!( /�*((?:\d+)|(?:x[a-f0-9]+));/ ) { - m=$1 - #m='0' if m=='' - m = "0#{m}" if m[0] == ?x - [Integer(m)].pack('U*') - } - end - copy - end - - EREFERENCE = /&(?!#{Entity::NAME};)/ - # Escapes all possible entities - def Text::normalize( input, doctype=nil, entity_filter=nil ) - copy = input.to_s - # Doing it like this rather than in a loop improves the speed - #copy = copy.gsub( EREFERENCE, '&' ) - copy = copy.gsub( "&", "&" ) - if doctype - # Replace all ampersands that aren't part of an entity - doctype.entities.each_value do |entity| - copy = copy.gsub( entity.value, - "&#{entity.name};" ) if entity.value and - not( entity_filter and entity_filter.include?(entity) ) - end - else - # Replace all ampersands that aren't part of an entity - DocType::DEFAULT_ENTITIES.each_value do |entity| - copy = copy.gsub(entity.value, "&#{entity.name};" ) - end - end - copy - end - - # Unescapes all possible entities - def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil ) - string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) { - ref = $& - if ref[1] == ?# - if ref[2] == ?x - [ref[3...-1].to_i(16)].pack('U*') - else - [ref[2...-1].to_i].pack('U*') - end - elsif ref == '&' - '&' - elsif filter and filter.include?( ref[1...-1] ) - ref - elsif doctype - doctype.entity( ref[1...-1] ) or ref - else - entity_value = DocType::DEFAULT_ENTITIES[ ref[1...-1] ] - entity_value ? entity_value.value : ref - end - } - end - end -end |