From 9ff1e787f915539b1980654e3d3d2013ff5c81d2 Mon Sep 17 00:00:00 2001 From: shyouhei Date: Mon, 7 Jul 2008 07:38:25 +0000 Subject: wrong commit; sorry git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/tags/v1_8_6_269@17938 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ruby_1_8_6/lib/rexml/text.rb | 344 ------------------------------------------- 1 file changed, 344 deletions(-) delete mode 100644 ruby_1_8_6/lib/rexml/text.rb (limited to 'ruby_1_8_6/lib/rexml/text.rb') diff --git a/ruby_1_8_6/lib/rexml/text.rb b/ruby_1_8_6/lib/rexml/text.rb deleted file mode 100644 index 9804aa710b..0000000000 --- a/ruby_1_8_6/lib/rexml/text.rb +++ /dev/null @@ -1,344 +0,0 @@ -require 'rexml/entity' -require 'rexml/doctype' -require 'rexml/child' -require 'rexml/doctype' -require 'rexml/parseexception' - -module REXML - # Represents text nodes in an XML document - class Text < Child - include Comparable - # The order in which the substitutions occur - SPECIALS = [ /&(?!#?[\w-]+;)/u, //u, /"/u, /'/u, /\r/u ] - SUBSTITUTES = ['&', '<', '>', '"', ''', ' '] - # Characters which are substituted in written strings - SLAICEPS = [ '<', '>', '"', "'", '&' ] - SETUTITSBUS = [ /</u, />/u, /"/u, /'/u, /&/u ] - - # If +raw+ is true, then REXML leaves the value alone - attr_accessor :raw - - ILLEGAL = /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um - NUMERICENTITY = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ - - # Constructor - # +arg+ if a String, the content is set to the String. If a Text, - # the object is shallowly cloned. - # - # +respect_whitespace+ (boolean, false) if true, whitespace is - # respected - # - # +parent+ (nil) if this is a Parent object, the parent - # will be set to this. - # - # +raw+ (nil) This argument can be given three values. - # If true, then the value of used to construct this object is expected to - # contain no unescaped XML markup, and REXML will not change the text. If - # this value is false, the string may contain any characters, and REXML will - # escape any and all defined entities whose values are contained in the - # text. If this value is nil (the default), then the raw value of the - # parent will be used as the raw value for this node. If there is no raw - # value for the parent, and no value is supplied, the default is false. - # Use this field if you have entities defined for some text, and you don't - # want REXML to escape that text in output. - # Text.new( "<&", false, nil, false ) #-> "<&" - # Text.new( "<&", false, nil, false ) #-> "&lt;&amp;" - # Text.new( "<&", false, nil, true ) #-> Parse exception - # Text.new( "<&", false, nil, true ) #-> "<&" - # # Assume that the entity "s" is defined to be "sean" - # # and that the entity "r" is defined to be "russell" - # Text.new( "sean russell" ) #-> "&s; &r;" - # Text.new( "sean russell", false, nil, true ) #-> "sean russell" - # - # +entity_filter+ (nil) This can be an array of entities to match in the - # supplied text. This argument is only useful if +raw+ is set to false. - # Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell" - # Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell" - # In the last example, the +entity_filter+ argument is ignored. - # - # +pattern+ INTERNAL USE ONLY - def initialize(arg, respect_whitespace=false, parent=nil, raw=nil, - entity_filter=nil, illegal=ILLEGAL ) - - @raw = false - - if parent - super( parent ) - @raw = parent.raw - else - @parent = nil - end - - @raw = raw unless raw.nil? - @entity_filter = entity_filter - @normalized = @unnormalized = nil - - if arg.kind_of? String - @string = arg.clone - @string.squeeze!(" \n\t") unless respect_whitespace - elsif arg.kind_of? Text - @string = arg.to_s - @raw = arg.raw - elsif - raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})" - end - - @string.gsub!( /\r\n?/, "\n" ) - - # check for illegal characters - if @raw - if @string =~ illegal - raise "Illegal character '#{$1}' in raw string \"#{@string}\"" - end - end - end - - def node_type - :text - end - - def empty? - @string.size==0 - end - - - def clone - return Text.new(self) - end - - - # Appends text to this text node. The text is appended in the +raw+ mode - # of this text node. - def <<( to_append ) - @string << to_append.gsub( /\r\n?/, "\n" ) - end - - - # +other+ a String or a Text - # +returns+ the result of (to_s <=> arg.to_s) - def <=>( other ) - to_s() <=> other.to_s - end - - REFERENCE = /#{Entity::REFERENCE}/ - # Returns the string value of this text node. This string is always - # escaped, meaning that it is a valid XML text node string, and all - # entities that can be escaped, have been inserted. This method respects - # the entity filter set in the constructor. - # - # # Assume that the entity "s" is defined to be "sean", and that the - # # entity "r" is defined to be "russell" - # t = Text.new( "< & sean russell", false, nil, false, ['s'] ) - # t.to_s #-> "< & &s; russell" - # t = Text.new( "< & &s; russell", false, nil, false ) - # t.to_s #-> "< & &s; russell" - # u = Text.new( "sean russell", false, nil, true ) - # u.to_s #-> "sean russell" - def to_s - return @string if @raw - return @normalized if @normalized - - doctype = nil - if @parent - doc = @parent.document - doctype = doc.doctype if doc - end - - @normalized = Text::normalize( @string, doctype, @entity_filter ) - end - - def inspect - @string.inspect - end - - # Returns the string value of this text. This is the text without - # entities, as it might be used programmatically, or printed to the - # console. This ignores the 'raw' attribute setting, and any - # entity_filter. - # - # # Assume that the entity "s" is defined to be "sean", and that the - # # entity "r" is defined to be "russell" - # t = Text.new( "< & sean russell", false, nil, false, ['s'] ) - # t.value #-> "< & sean russell" - # t = Text.new( "< & &s; russell", false, nil, false ) - # t.value #-> "< & sean russell" - # u = Text.new( "sean russell", false, nil, true ) - # u.value #-> "sean russell" - def value - @unnormalized if @unnormalized - doctype = nil - if @parent - doc = @parent.document - doctype = doc.doctype if doc - end - @unnormalized = Text::unnormalize( @string, doctype ) - end - - # Sets the contents of this text node. This expects the text to be - # unnormalized. It returns self. - # - # e = Element.new( "a" ) - # e.add_text( "foo" ) # foo - # e[0].value = "bar" # bar - # e[0].value = "" # <a> - def value=( val ) - @string = val.gsub( /\r\n?/, "\n" ) - @unnormalized = nil - @normalized = nil - @raw = false - end - - def wrap(string, width, addnewline=false) - # Recursivly wrap string at width. - return string if string.length <= width - place = string.rindex(' ', width) # Position in string with last ' ' before cutoff - if addnewline then - return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width) - else - return string[0,place] + "\n" + wrap(string[place+1..-1], width) - end - end - - def indent_text(string, level=1, style="\t", indentfirstline=true) - return string if level < 0 - new_string = '' - string.each { |line| - indent_string = style * level - new_line = (indent_string + line).sub(/[\s]+$/,'') - new_string << new_line - } - new_string.strip! unless indentfirstline - return new_string - end - - # == DEPRECATED - # See REXML::Formatters - # - def write( writer, indent=-1, transitive=false, ie_hack=false ) - Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters") - formatter = if indent > -1 - REXML::Formatters::Pretty.new( indent ) - else - REXML::Formatters::Default.new - end - formatter.write( self, writer ) - end - - # FIXME - # This probably won't work properly - def xpath - path = @parent.xpath - path += "/text()" - return path - end - - # Writes out text, substituting special characters beforehand. - # +out+ A String, IO, or any other object supporting <<( String ) - # +input+ the text to substitute and the write out - # - # z=utf8.unpack("U*") - # ascOut="" - # z.each{|r| - # if r < 0x100 - # ascOut.concat(r.chr) - # else - # ascOut.concat(sprintf("&#x%x;", r)) - # end - # } - # puts ascOut - def write_with_substitution out, input - copy = input.clone - # Doing it like this rather than in a loop improves the speed - copy.gsub!( SPECIALS[0], SUBSTITUTES[0] ) - copy.gsub!( SPECIALS[1], SUBSTITUTES[1] ) - copy.gsub!( SPECIALS[2], SUBSTITUTES[2] ) - copy.gsub!( SPECIALS[3], SUBSTITUTES[3] ) - copy.gsub!( SPECIALS[4], SUBSTITUTES[4] ) - copy.gsub!( SPECIALS[5], SUBSTITUTES[5] ) - out << copy - end - - # Reads text, substituting entities - def Text::read_with_substitution( input, illegal=nil ) - copy = input.clone - - if copy =~ illegal - raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" ) - end if illegal - - copy.gsub!( /\r\n?/, "\n" ) - if copy.include? ?& - copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] ) - copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] ) - copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] ) - copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] ) - copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] ) - copy.gsub!( /�*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m| - m=$1 - #m='0' if m=='' - m = "0#{m}" if m[0] == ?x - [Integer(m)].pack('U*') - } - end - copy - end - - EREFERENCE = /&(?!#{Entity::NAME};)/ - # Escapes all possible entities - def Text::normalize( input, doctype=nil, entity_filter=nil ) - copy = input - # Doing it like this rather than in a loop improves the speed - #copy = copy.gsub( EREFERENCE, '&' ) - copy = copy.gsub( "&", "&" ) - if doctype - # Replace all ampersands that aren't part of an entity - doctype.entities.each_value do |entity| - copy = copy.gsub( entity.value, - "&#{entity.name};" ) if entity.value and - not( entity_filter and entity_filter.include?(entity) ) - end - else - # Replace all ampersands that aren't part of an entity - DocType::DEFAULT_ENTITIES.each_value do |entity| - copy = copy.gsub(entity.value, "&#{entity.name};" ) - end - end - copy - end - - # Unescapes all possible entities - def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil ) - rv = string.clone - rv.gsub!( /\r\n?/, "\n" ) - matches = rv.scan( REFERENCE ) - return rv if matches.size == 0 - rv.gsub!( NUMERICENTITY ) {|m| - m=$1 - m = "0#{m}" if m[0] == ?x - [Integer(m)].pack('U*') - } - matches.collect!{|x|x[0]}.compact! - if matches.size > 0 - if doctype - matches.each do |entity_reference| - unless filter and filter.include?(entity_reference) - entity_value = doctype.entity( entity_reference ) - re = /&#{entity_reference};/ - rv.gsub!( re, entity_value ) if entity_value - end - end - else - matches.each do |entity_reference| - unless filter and filter.include?(entity_reference) - entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ] - re = /&#{entity_reference};/ - rv.gsub!( re, entity_value.value ) if entity_value - end - end - end - rv.gsub!( /&/, '&' ) - end - rv - end - end -end -- cgit v1.2.3