diff options
Diffstat (limited to 'doc/string/dump.rdoc')
| -rw-r--r-- | doc/string/dump.rdoc | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/doc/string/dump.rdoc b/doc/string/dump.rdoc new file mode 100644 index 0000000000..add3c35662 --- /dev/null +++ b/doc/string/dump.rdoc @@ -0,0 +1,89 @@ +For an ordinary string, this method, +String#dump+, +returns a printable ASCII-only version of +self+, enclosed in double-quotes. + +For a dumped string, method String#undump is the inverse of +String#dump+; +it returns a "restored" version of +self+, +where all the dumping changes have been undone. + +In the simplest case, the dumped string contains the original string, +enclosed in double-quotes; +this example is done in +irb+ (interactive Ruby), which uses method `inspect` to render the results: + + s = 'hello' # => "hello" + s.dump # => "\"hello\"" + s.dump.undump # => "hello" + +Keep in mind that in the second line above: + +- The outer double-quotes are put on by +inspect+, + and _are_ _not_ part of the output of #dump. +- The inner double-quotes _are_ part of the output of +dump+, + and are escaped by +inspect+ because they are within the outer double-quotes. + +To avoid confusion, we'll use this helper method to omit the outer double-quotes: + + def dump(s) + print "String: ", s, "\n" + print "Dumped: ", s.dump, "\n" + print "Undumped: ", s.dump.undump, "\n" + end + +So that for string <tt>'hello'</tt>, we'll see: + + String: hello + Dumped: "hello" + Undumped: hello + +In a dump, certain special characters are escaped: + + String: " + Dumped: "\"" + Undumped: " + + String: \ + Dumped: "\\" + Undumped: \ + +In a dump, unprintable characters are replaced by printable ones; +the unprintable characters are the whitespace characters (other than space itself); +here we see the ordinals for those characers, together with explanatory text: + + h = { + 7 => 'Alert (BEL)', + 8 => 'Backspace (BS)', + 9 => 'Horizontal tab (HT)', + 10 => 'Linefeed (LF)', + 11 => 'Vertical tab (VT)', + 12 => 'Formfeed (FF)', + 13 => 'Carriage return (CR)' + } + +In this example, the dumped output is printed by method #inspect, +and so contains both outer double-quotes and escaped inner double-quotes: + + s = '' + h.keys.each {|i| s << i } # => [7, 8, 9, 10, 11, 12, 13] + s # => "\a\b\t\n\v\f\r" + s.dump # => "\"\\a\\b\\t\\n\\v\\f\\r\"" + +If +self+ is encoded in UTF-8 and contains Unicode characters, +each Unicode character is dumped as a Unicode escape sequence: + + String: こんにちは + Dumped: "\u3053\u3093\u306B\u3061\u306F" + Undumped: こんにちは + +If the encoding of +self+ is not ASCII-compatible +(i.e., if <tt>self.encoding.ascii_compatible?</tt> returns +false+), +each ASCII-compatible byte is dumped as an ASCII character, +and all other bytes are dumped as hexadecimal; +also appends <tt>.dup.force_encoding(\"encoding\")</tt>, +where <tt><encoding></tt> is <tt>self.encoding.name</tt>: + + String: hello + Dumped: "\xFE\xFF\x00h\x00e\x00l\x00l\x00o".dup.force_encoding("UTF-16") + Undumped: hello + + String: こんにちは + Dumped: "\xFE\xFF0S0\x930k0a0o".dup.force_encoding("UTF-16") + Undumped: こんにちは |
