diff options
Diffstat (limited to 'doc/string')
50 files changed, 966 insertions, 251 deletions
diff --git a/doc/string/aref.rdoc b/doc/string/aref.rdoc new file mode 100644 index 0000000000..a9ab8857bc --- /dev/null +++ b/doc/string/aref.rdoc @@ -0,0 +1,96 @@ +Returns the substring of +self+ specified by the arguments. + +<b>Form <tt>self[offset]</tt></b> + +With non-negative integer argument +offset+ given, +returns the 1-character substring found in self at character offset +offset+: + + 'hello'[0] # => "h" + 'hello'[4] # => "o" + 'hello'[5] # => nil + 'こんにちは'[4] # => "は" + +With negative integer argument +offset+ given, +counts backward from the end of +self+: + + 'hello'[-1] # => "o" + 'hello'[-5] # => "h" + 'hello'[-6] # => nil + +<b>Form <tt>self[offset, size]</tt></b> + +With integer arguments +offset+ and +size+ given, +returns a substring of size +size+ characters (as available) +beginning at character offset specified by +offset+. + +If argument +offset+ is non-negative, +the offset is +offset+: + + 'hello'[0, 1] # => "h" + 'hello'[0, 5] # => "hello" + 'hello'[0, 6] # => "hello" + 'hello'[2, 3] # => "llo" + 'hello'[2, 0] # => "" + 'hello'[2, -1] # => nil + +If argument +offset+ is negative, +counts backward from the end of +self+: + + 'hello'[-1, 1] # => "o" + 'hello'[-5, 5] # => "hello" + 'hello'[-1, 0] # => "" + 'hello'[-6, 5] # => nil + +Special case: if +offset+ equals the size of +self+, +returns a new empty string: + + 'hello'[5, 3] # => "" + +<b>Form <tt>self[range]</tt></b> + +With Range argument +range+ given, +forms substring <tt>self[range.start, range.size]</tt>: + + 'hello'[0..2] # => "hel" + 'hello'[0, 3] # => "hel" + + 'hello'[0...2] # => "he" + 'hello'[0, 2] # => "he" + + 'hello'[0, 0] # => "" + 'hello'[0...0] # => "" + +<b>Form <tt>self[regexp, capture = 0]</tt></b> + +With Regexp argument +regexp+ given and +capture+ as zero, +searches for a matching substring in +self+; +updates {Regexp-related global variables}[rdoc-ref:Regexp@Global+Variables]: + + 'hello'[/ell/] # => "ell" + 'hello'[/l+/] # => "ll" + 'hello'[//] # => "" + 'hello'[/nosuch/] # => nil + +With +capture+ as a positive integer +n+, +returns the +n+th matched group: + + 'hello'[/(h)(e)(l+)(o)/] # => "hello" + 'hello'[/(h)(e)(l+)(o)/, 1] # => "h" + $1 # => "h" + 'hello'[/(h)(e)(l+)(o)/, 2] # => "e" + $2 # => "e" + 'hello'[/(h)(e)(l+)(o)/, 3] # => "ll" + 'hello'[/(h)(e)(l+)(o)/, 4] # => "o" + 'hello'[/(h)(e)(l+)(o)/, 5] # => nil + +<b>Form <tt>self[substring]</tt></b> + +With string argument +substring+ given, +returns the matching substring of +self+, if found: + + 'hello'['ell'] # => "ell" + 'hello'[''] # => "" + 'hello'['nosuch'] # => nil + 'こんにちは'['んにち'] # => "んにち" + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/aset.rdoc b/doc/string/aset.rdoc new file mode 100644 index 0000000000..98c58b59cc --- /dev/null +++ b/doc/string/aset.rdoc @@ -0,0 +1,179 @@ +Returns +self+ with all, a substring, or none of its contents replaced; +returns the argument +other_string+. + +<b>Form <tt>self[index] = other_string</tt></b> + +With non-negative integer argument +index+ given, +searches for the 1-character substring found in self at character offset index: + + s = 'hello' + s[0] = 'foo' # => "foo" + s # => "fooello" + + s = 'hello' + s[4] = 'foo' # => "foo" + s # => "hellfoo" + + s = 'hello' + s[5] = 'foo' # => "foo" + s # => "hellofoo" + + s = 'hello' + s[6] = 'foo' # Raises IndexError: index 6 out of string. + +With negative integer argument +index+ given, +counts backward from the end of +self+: + + s = 'hello' + s[-1] = 'foo' # => "foo" + s # => "hellfoo" + + s = 'hello' + s[-5] = 'foo' # => "foo" + s # => "fooello" + + s = 'hello' + s[-6] = 'foo' # Raises IndexError: index -6 out of string. + +<b>Form <tt>self[start, length] = other_string</tt></b> + +With integer arguments +start+ and +length+ given, +searches for a substring of size +length+ characters (as available) +beginning at character offset specified by +start+. + +If argument +start+ is non-negative, +the offset is +start+: + + s = 'hello' + s[0, 1] = 'foo' # => "foo" + s # => "fooello" + + s = 'hello' + s[0, 5] = 'foo' # => "foo" + s # => "foo" + + s = 'hello' + s[0, 9] = 'foo' # => "foo" + s # => "foo" + + s = 'hello' + s[2, 0] = 'foo' # => "foo" + s # => "hefoollo" + + s = 'hello' + s[2, -1] = 'foo' # Raises IndexError: negative length -1. + +If argument +start+ is negative, +counts backward from the end of +self+: + + s = 'hello' + s[-1, 1] = 'foo' # => "foo" + s # => "hellfoo" + + s = 'hello' + s[-1, 9] = 'foo' # => "foo" + s # => "hellfoo" + + s = 'hello' + s[-5, 2] = 'foo' # => "foo" + s # => "foollo" + + s = 'hello' + s[-3, 0] = 'foo' # => "foo" + s # => "hefoollo" + + s = 'hello' + s[-6, 2] = 'foo' # Raises IndexError: index -6 out of string. + +Special case: if +start+ equals the length of +self+, +the argument is appended to +self+: + + s = 'hello' + s[5, 3] = 'foo' # => "foo" + s # => "hellofoo" + +<b>Form <tt>self[range] = other_string</tt></b> + +With Range argument +range+ given, +equivalent to <tt>self[range.start, range.size] = other_string</tt>: + + s0 = 'hello' + s1 = 'hello' + s0[0..2] = 'foo' # => "foo" + s1[0, 3] = 'foo' # => "foo" + s0 # => "foolo" + s1 # => "foolo" + + s = 'hello' + s[0...2] = 'foo' # => "foo" + s # => "foollo" + + s = 'hello' + s[0...0] = 'foo' # => "foo" + s # => "foohello" + + s = 'hello' + s[9..10] = 'foo' # Raises RangeError: 9..10 out of range + +<b>Form <tt>self[regexp, capture = 0] = other_string</tt></b> + +With Regexp argument +regexp+ given and +capture+ as zero, +searches for a matching substring in +self+; +updates {Regexp-related global variables}[rdoc-ref:Regexp@Global+Variables]: + + s = 'hello' + s[/l/] = 'L' # => "L" + [$`, $&, $'] # => ["he", "l", "lo"] + s[/eLlo/] = 'owdy' # => "owdy" + [$`, $&, $'] # => ["h", "eLlo", ""] + s[/eLlo/] = 'owdy' # Raises IndexError: regexp not matched. + [$`, $&, $'] # => [nil, nil, nil] + +With +capture+ as a positive integer +n+, +searches for the +n+th matched group: + + s = 'hello' + s[/(h)(e)(l+)(o)/] = 'foo' # => "foo" + [$`, $&, $'] # => ["", "hello", ""] + + s = 'hello' + s[/(h)(e)(l+)(o)/, 1] = 'foo' # => "foo" + s # => "fooello" + [$`, $&, $'] # => ["", "hello", ""] + + s = 'hello' + s[/(h)(e)(l+)(o)/, 2] = 'foo' # => "foo" + s # => "hfoollo" + [$`, $&, $'] # => ["", "hello", ""] + + s = 'hello' + s[/(h)(e)(l+)(o)/, 4] = 'foo' # => "foo" + s # => "hellfoo" + [$`, $&, $'] # => ["", "hello", ""] + + s = 'hello' + # => "hello" + s[/(h)(e)(l+)(o)/, 5] = 'foo # Raises IndexError: index 5 out of regexp. + + s = 'hello' + s[/nosuch/] = 'foo' # Raises IndexError: regexp not matched. + +<b>Form <tt>self[substring] = other_string</tt></b> + +With string argument +substring+ given: + + s = 'hello' + s['l'] = 'foo' # => "foo" + s # => "hefoolo" + + s = 'hello' + s['ll'] = 'foo' # => "foo" + s # => "hefooo" + + s = 'こんにちは' + s['んにち'] = 'foo' # => "foo" + s # => "こfooは" + + s['nosuch'] = 'foo' # Raises IndexError: string not matched. + +Related: see {Modifying}[rdoc-ref:String@Modifying]. diff --git a/doc/string/bytes.rdoc b/doc/string/bytes.rdoc index f4b071f630..16fa8e0bb0 100644 --- a/doc/string/bytes.rdoc +++ b/doc/string/bytes.rdoc @@ -1,8 +1,7 @@ Returns an array of the bytes in +self+: - 'hello'.bytes # => [104, 101, 108, 108, 111] - 'тест'.bytes # => [209, 130, 208, 181, 209, 129, 209, 130] + 'hello'.bytes # => [104, 101, 108, 108, 111] 'こんにちは'.bytes # => [227, 129, 147, 227, 130, 147, 227, 129, 171, 227, 129, 161, 227, 129, 175] -Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non-String]. diff --git a/doc/string/bytesize.rdoc b/doc/string/bytesize.rdoc index 5166dd7dc6..8d12a0d454 100644 --- a/doc/string/bytesize.rdoc +++ b/doc/string/bytesize.rdoc @@ -5,9 +5,6 @@ Note that the byte count may be different from the character count (returned by s = 'foo' s.bytesize # => 3 s.size # => 3 - s = 'тест' - s.bytesize # => 8 - s.size # => 4 s = 'こんにちは' s.bytesize # => 15 s.size # => 5 diff --git a/doc/string/bytesplice.rdoc b/doc/string/bytesplice.rdoc index 5689ef4a2b..790f9eb9a0 100644 --- a/doc/string/bytesplice.rdoc +++ b/doc/string/bytesplice.rdoc @@ -20,7 +20,7 @@ And either count may be zero (i.e., specifying an empty string): '0123456789'.bytesplice(0, 0, 'abc') # => "abc0123456789" # Empty target. In the second form, just as in the first, -arugments +offset+ and +length+ determine the target bytes; +arguments +offset+ and +length+ determine the target bytes; argument +str+ _contains_ the source bytes, and the additional arguments +str_offset+ and +str_length+ determine the actual source bytes: @@ -42,7 +42,7 @@ and the source bytes are all of the given +str+: '0123456789'.bytesplice(0...0, 'abc') # => "abc0123456789" # Empty target. In the fourth form, just as in the third, -arugment +range+ determines the target bytes; +argument +range+ determines the target bytes; argument +str+ _contains_ the source bytes, and the additional argument +str_range+ determines the actual source bytes: @@ -63,4 +63,3 @@ and so has character boundaries at offsets 0, 3, 6, 9, 12, and 15. 'こんにちは'.bytesplice(0, 3, 'abc') # => "abcんにちは" 'こんにちは'.bytesplice(1, 3, 'abc') # Raises IndexError. 'こんにちは'.bytesplice(0, 2, 'abc') # Raises IndexError. - diff --git a/doc/string/capitalize.rdoc b/doc/string/capitalize.rdoc new file mode 100644 index 0000000000..3a1a2dcb8b --- /dev/null +++ b/doc/string/capitalize.rdoc @@ -0,0 +1,26 @@ +Returns a string containing the characters in +self+, +each with possibly changed case: + +- The first character made uppercase. +- All other characters are made lowercase. + +Examples: + + 'hello'.capitalize # => "Hello" + 'HELLO'.capitalize # => "Hello" + 'straße'.capitalize # => "Straße" # Lowercase 'ß' not changed. + 'STRAẞE'.capitalize # => "Straße" # Uppercase 'ẞ' downcased to 'ß'. + +Some characters (and some character sets) do not have upcase and downcase versions; +see {Case Mapping}[rdoc-ref:case_mapping.rdoc]: + + s = '1, 2, 3, ...' + s.capitalize == s # => true + s = 'こんにちは' + s.capitalize == s # => true + +The casing is affected by the given +mapping+, +which may be +:ascii+, +:fold+, or +:turkic+; +see {Case Mappings}[rdoc-ref:case_mapping.rdoc@Case+Mappings]. + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/center.rdoc b/doc/string/center.rdoc index 343f6ba263..b86c8b5916 100644 --- a/doc/string/center.rdoc +++ b/doc/string/center.rdoc @@ -9,7 +9,6 @@ centered and padded on one or both ends with +pad_string+: 'hello'.center(20, '-|') # => "-|-|-|-hello-|-|-|-|" # Some padding repeated. 'hello'.center(10, 'abcdefg') # => "abhelloabc" # Some padding not used. ' hello '.center(13) # => " hello " - 'тест'.center(10) # => " тест " 'こんにちは'.center(10) # => " こんにちは " # Multi-byte characters. If +size+ is less than or equal to the size of +self+, returns an unpadded copy of +self+: diff --git a/doc/string/chars.rdoc b/doc/string/chars.rdoc index 094384271b..47fb01b43a 100644 --- a/doc/string/chars.rdoc +++ b/doc/string/chars.rdoc @@ -1,8 +1,7 @@ Returns an array of the characters in +self+: 'hello'.chars # => ["h", "e", "l", "l", "o"] - 'тест'.chars # => ["т", "е", "с", "т"] 'こんにちは'.chars # => ["こ", "ん", "に", "ち", "は"] ''.chars # => [] -Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non-String]. diff --git a/doc/string/chomp.rdoc b/doc/string/chomp.rdoc index 6ec7664f6b..4efff5c291 100644 --- a/doc/string/chomp.rdoc +++ b/doc/string/chomp.rdoc @@ -9,7 +9,6 @@ if they are <tt>"\r"</tt>, <tt>"\n"</tt>, or <tt>"\r\n"</tt> "abc\n".chomp # => "abc" "abc\r\n".chomp # => "abc" "abc\n\r".chomp # => "abc\n" - "тест\r\n".chomp # => "тест" "こんにちは\r\n".chomp # => "こんにちは" When +line_sep+ is <tt>''</tt> (an empty string), diff --git a/doc/string/chop.rdoc b/doc/string/chop.rdoc index 2c48e91129..d818ba467a 100644 --- a/doc/string/chop.rdoc +++ b/doc/string/chop.rdoc @@ -3,13 +3,11 @@ Returns a new string copied from +self+, with trailing characters possibly remov Removes <tt>"\r\n"</tt> if those are the last two characters. "abc\r\n".chop # => "abc" - "тест\r\n".chop # => "тест" "こんにちは\r\n".chop # => "こんにちは" Otherwise removes the last character if it exists. 'abcd'.chop # => "abc" - 'тест'.chop # => "тес" 'こんにちは'.chop # => "こんにち" ''.chop # => "" diff --git a/doc/string/chr.rdoc b/doc/string/chr.rdoc index 1ada3854cb..153d5d71c3 100644 --- a/doc/string/chr.rdoc +++ b/doc/string/chr.rdoc @@ -1,7 +1,6 @@ Returns a string containing the first character of +self+: 'hello'.chr # => "h" - 'тест'.chr # => "т" 'こんにちは'.chr # => "こ" ''.chr # => "" diff --git a/doc/string/codepoints.rdoc b/doc/string/codepoints.rdoc index d9586d2e0b..0ad866389e 100644 --- a/doc/string/codepoints.rdoc +++ b/doc/string/codepoints.rdoc @@ -2,8 +2,7 @@ Returns an array of the codepoints in +self+; each codepoint is the integer value for a character: 'hello'.codepoints # => [104, 101, 108, 108, 111] - 'тест'.codepoints # => [1090, 1077, 1089, 1090] 'こんにちは'.codepoints # => [12371, 12435, 12395, 12385, 12399] ''.codepoints # => [] -Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non-String]. diff --git a/doc/string/concat.rdoc b/doc/string/concat.rdoc index 2ba0c714af..92ba664b8c 100644 --- a/doc/string/concat.rdoc +++ b/doc/string/concat.rdoc @@ -6,7 +6,6 @@ For each given object +object+ that is an integer, the value is considered a codepoint and converted to a character before concatenation: 'foo'.concat(32, 'bar', 32, 'baz') # => "foo bar baz" # Embeds spaces. - 'те'.concat(1089, 1090) # => "тест" 'こん'.concat(12395, 12385, 12399) # => "こんにちは" Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/count.rdoc b/doc/string/count.rdoc index 092c672d7d..7a3b9f1e21 100644 --- a/doc/string/count.rdoc +++ b/doc/string/count.rdoc @@ -9,10 +9,6 @@ returns the count of instances of that character: s.count('x') # => 0 s.count('') # => 0 - s = 'тест' - s.count('т') # => 2 - s.count('е') # => 1 - s = 'よろしくお願いします' s.count('よ') # => 1 s.count('し') # => 2 diff --git a/doc/string/delete.rdoc b/doc/string/delete.rdoc index e8ff4c0ae4..1827f177e6 100644 --- a/doc/string/delete.rdoc +++ b/doc/string/delete.rdoc @@ -10,10 +10,6 @@ removes all instances of that character: s.delete('x') # => "abracadabra" s.delete('') # => "abracadabra" - s = 'тест' - s.delete('т') # => "ес" - s.delete('е') # => "тст" - s = 'よろしくお願いします' s.delete('よ') # => "ろしくお願いします" s.delete('し') # => "よろくお願います" diff --git a/doc/string/delete_prefix.rdoc b/doc/string/delete_prefix.rdoc index 1135f3d19d..6255e300e3 100644 --- a/doc/string/delete_prefix.rdoc +++ b/doc/string/delete_prefix.rdoc @@ -4,7 +4,6 @@ Returns a copy of +self+ with leading substring +prefix+ removed: 'oof'.delete_prefix('oo') # => "f" 'oof'.delete_prefix('oof') # => "" 'oof'.delete_prefix('x') # => "oof" - 'тест'.delete_prefix('те') # => "ст" 'こんにちは'.delete_prefix('こん') # => "にちは" Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/delete_suffix.rdoc b/doc/string/delete_suffix.rdoc index 2fb70ce012..a4d9a80f85 100644 --- a/doc/string/delete_suffix.rdoc +++ b/doc/string/delete_suffix.rdoc @@ -5,7 +5,6 @@ Returns a copy of +self+ with trailing substring <tt>suffix</tt> removed: 'foo'.delete_suffix('foo') # => "" 'foo'.delete_suffix('f') # => "foo" 'foo'.delete_suffix('x') # => "foo" - 'тест'.delete_suffix('ст') # => "те" 'こんにちは'.delete_suffix('ちは') # => "こんに" Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/downcase.rdoc b/doc/string/downcase.rdoc index 0fb67daaeb..d5fffa037b 100644 --- a/doc/string/downcase.rdoc +++ b/doc/string/downcase.rdoc @@ -1,12 +1,20 @@ Returns a new string containing the downcased characters in +self+: - 'Hello, World!'.downcase # => "hello, world!" - 'ТЕСТ'.downcase # => "тест" - 'よろしくお願いします'.downcase # => "よろしくお願いします" + 'HELLO'.downcase # => "hello" + 'STRAẞE'.downcase # => "straße" + 'ПРИВЕТ'.downcase # => "привет" + 'RubyGems.org'.downcase # => "rubygems.org" -Some characters do not have upcased and downcased versions. +Some characters (and some character sets) do not have upcase and downcase versions; +see {Case Mapping}[rdoc-ref:case_mapping.rdoc]: -The casing may be affected by the given +mapping+; -see {Case Mapping}[rdoc-ref:case_mapping.rdoc]. + s = '1, 2, 3, ...' + s.downcase == s # => true + s = 'こんにちは' + s.downcase == s # => true + +The casing is affected by the given +mapping+, +which may be +:ascii+, +:fold+, or +:turkic+; +see {Case Mappings}[rdoc-ref:case_mapping.rdoc@Case+Mappings]. Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/dump.rdoc b/doc/string/dump.rdoc index a5ab0bb42f..7b688c28a6 100644 --- a/doc/string/dump.rdoc +++ b/doc/string/dump.rdoc @@ -1,52 +1,89 @@ -Returns a printable version of +self+, enclosed in double-quotes: +For an ordinary string, this method, +String#dump+, +returns a printable ASCII-only version of +self+, enclosed in double-quotes. - 'hello'.dump # => "\"hello\"" +For a dumped string, method String#undump is the inverse of +String#dump+; +it returns a "restored" version of +self+, +where all the dumping changes have been undone. -Certain special characters are rendered with escapes: +In the simplest case, the dumped string contains the original string, +enclosed in double-quotes; +this example is done in +irb+ (interactive Ruby), which uses method `inspect` to render the results: - '"'.dump # => "\"\\\"\"" - '\\'.dump # => "\"\\\\\"" + s = 'hello' # => "hello" + s.dump # => "\"hello\"" + s.dump.undump # => "hello" -Non-printing characters are rendered with escapes: +Keep in mind that in the second line above: + +- The outer double-quotes are put on by +inspect+, + and _are_ _not_ part of the output of #dump. +- The inner double-quotes _are_ part of the output of +dump+, + and are escaped by +inspect+ because they are within the outer double-quotes. + +To avoid confusion, we'll use this helper method to omit the outer double-quotes: + + def dump(s) + print "String: ", s, "\n" + print "Dumped: ", s.dump, "\n" + print "Undumped: ", s.dump.undump, "\n" + end + +So that for string <tt>'hello'</tt>, we'll see: + + String: hello + Dumped: "hello" + Undumped: hello + +In a dump, certain special characters are escaped: + + String: " + Dumped: "\"" + Undumped: " + + String: \ + Dumped: "\\" + Undumped: \ + +In a dump, unprintable characters are replaced by printable ones; +the unprintable characters are the whitespace characters (other than space itself); +here we see the ordinals for those characters, together with explanatory text: + + h = { + 7 => 'Alert (BEL)', + 8 => 'Backspace (BS)', + 9 => 'Horizontal tab (HT)', + 10 => 'Linefeed (LF)', + 11 => 'Vertical tab (VT)', + 12 => 'Formfeed (FF)', + 13 => 'Carriage return (CR)' + } + +In this example, the dumped output is printed by method #inspect, +and so contains both outer double-quotes and escaped inner double-quotes: s = '' - s << 7 # Alarm (bell). - s << 8 # Back space. - s << 9 # Horizontal tab. - s << 10 # Line feed. - s << 11 # Vertical tab. - s << 12 # Form feed. - s << 13 # Carriage return. - s # => "\a\b\t\n\v\f\r" - s.dump # => "\"\\a\\b\\t\\n\\v\\f\\r\"" - -If +self+ is encoded in UTF-8 and contains Unicode characters, renders Unicode -characters in Unicode escape sequence: - - 'тест'.dump # => "\"\\u0442\\u0435\\u0441\\u0442\"" - 'こんにちは'.dump # => "\"\\u3053\\u3093\\u306B\\u3061\\u306F\"" - -If the encoding of +self+ is not ASCII-compatible (i.e., +self.encoding.ascii_compatible?+ -returns +false+), renders all ASCII-compatible bytes as ASCII characters and all -other bytes as hexadecimal. Appends <tt>.dup.force_encoding(\"encoding\")</tt>, where -<tt><encoding></tt> is +self.encoding.name+: - - s = 'hello' - s.encoding # => #<Encoding:UTF-8> - s.dump # => "\"hello\"" - s.encode('utf-16').dump # => "\"\\xFE\\xFF\\x00h\\x00e\\x00l\\x00l\\x00o\".dup.force_encoding(\"UTF-16\")" - s.encode('utf-16le').dump # => "\"h\\x00e\\x00l\\x00l\\x00o\\x00\".dup.force_encoding(\"UTF-16LE\")" - - s = 'тест' - s.encoding # => #<Encoding:UTF-8> - s.dump # => "\"\\u0442\\u0435\\u0441\\u0442\"" - s.encode('utf-16').dump # => "\"\\xFE\\xFF\\x04B\\x045\\x04A\\x04B\".dup.force_encoding(\"UTF-16\")" - s.encode('utf-16le').dump # => "\"B\\x045\\x04A\\x04B\\x04\".dup.force_encoding(\"UTF-16LE\")" - - s = 'こんにちは' - s.encoding # => #<Encoding:UTF-8> - s.dump # => "\"\\u3053\\u3093\\u306B\\u3061\\u306F\"" - s.encode('utf-16').dump # => "\"\\xFE\\xFF0S0\\x930k0a0o\".dup.force_encoding(\"UTF-16\")" - s.encode('utf-16le').dump # => "\"S0\\x930k0a0o0\".dup.force_encoding(\"UTF-16LE\")" - -Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. + h.keys.each {|i| s << i } # => [7, 8, 9, 10, 11, 12, 13] + s # => "\a\b\t\n\v\f\r" + s.dump # => "\"\\a\\b\\t\\n\\v\\f\\r\"" + +If +self+ is encoded in UTF-8 and contains Unicode characters, +each Unicode character is dumped as a Unicode escape sequence: + + String: こんにちは + Dumped: "\u3053\u3093\u306B\u3061\u306F" + Undumped: こんにちは + +If the encoding of +self+ is not ASCII-compatible +(i.e., if <tt>self.encoding.ascii_compatible?</tt> returns +false+), +each ASCII-compatible byte is dumped as an ASCII character, +and all other bytes are dumped as hexadecimal; +also appends <tt>.dup.force_encoding(\"encoding\")</tt>, +where <tt><encoding></tt> is <tt>self.encoding.name</tt>: + + String: hello + Dumped: "\xFE\xFF\x00h\x00e\x00l\x00l\x00o".dup.force_encoding("UTF-16") + Undumped: hello + + String: こんにちは + Dumped: "\xFE\xFF0S0\x930k0a0o".dup.force_encoding("UTF-16") + Undumped: こんにちは diff --git a/doc/string/each_byte.rdoc b/doc/string/each_byte.rdoc index 1f1069863b..642d71e84b 100644 --- a/doc/string/each_byte.rdoc +++ b/doc/string/each_byte.rdoc @@ -5,9 +5,6 @@ returns +self+: 'hello'.each_byte {|byte| a.push(byte) } # Five 1-byte characters. a # => [104, 101, 108, 108, 111] a = [] - 'тест'.each_byte {|byte| a.push(byte) } # Four 2-byte characters. - a # => [209, 130, 208, 181, 209, 129, 209, 130] - a = [] 'こんにちは'.each_byte {|byte| a.push(byte) } # Five 3-byte characters. a # => [227, 129, 147, 227, 130, 147, 227, 129, 171, 227, 129, 161, 227, 129, 175] diff --git a/doc/string/each_char.rdoc b/doc/string/each_char.rdoc index 5aa85b28ad..2dd56711d3 100644 --- a/doc/string/each_char.rdoc +++ b/doc/string/each_char.rdoc @@ -7,11 +7,6 @@ returns +self+: end a # => ["h", "e", "l", "l", "o"] a = [] - 'тест'.each_char do |char| - a.push(char) - end - a # => ["т", "е", "с", "т"] - a = [] 'こんにちは'.each_char do |char| a.push(char) end diff --git a/doc/string/each_codepoint.rdoc b/doc/string/each_codepoint.rdoc index 0e687082d3..8e4e7545e6 100644 --- a/doc/string/each_codepoint.rdoc +++ b/doc/string/each_codepoint.rdoc @@ -8,11 +8,6 @@ returns +self+: end a # => [104, 101, 108, 108, 111] a = [] - 'тест'.each_codepoint do |codepoint| - a.push(codepoint) - end - a # => [1090, 1077, 1089, 1090] - a = [] 'こんにちは'.each_codepoint do |codepoint| a.push(codepoint) end diff --git a/doc/string/each_grapheme_cluster.rdoc b/doc/string/each_grapheme_cluster.rdoc index 8bc6f78aaa..384cd6967d 100644 --- a/doc/string/each_grapheme_cluster.rdoc +++ b/doc/string/each_grapheme_cluster.rdoc @@ -9,12 +9,6 @@ returns +self+: a # => ["h", "e", "l", "l", "o"] a = [] - 'тест'.each_grapheme_cluster do |grapheme_cluster| - a.push(grapheme_cluster) - end - a # => ["т", "е", "с", "т"] - - a = [] 'こんにちは'.each_grapheme_cluster do |grapheme_cluster| a.push(grapheme_cluster) end diff --git a/doc/string/end_with_p.rdoc b/doc/string/end_with_p.rdoc index fcd9242122..9a95d74fde 100644 --- a/doc/string/end_with_p.rdoc +++ b/doc/string/end_with_p.rdoc @@ -4,7 +4,6 @@ Returns whether +self+ ends with any of the given +strings+: 'foo'.end_with?('bar', 'oo') # => true 'foo'.end_with?('bar', 'baz') # => false 'foo'.end_with?('') # => true - 'тест'.end_with?('т') # => true 'こんにちは'.end_with?('は') # => true Related: see {Querying}[rdoc-ref:String@Querying]. diff --git a/doc/string/getbyte.rdoc b/doc/string/getbyte.rdoc index ba1c06fd27..974e21c473 100644 --- a/doc/string/getbyte.rdoc +++ b/doc/string/getbyte.rdoc @@ -16,11 +16,8 @@ Returns +nil+ if +index+ is out of range: More examples: - s = 'тест' - s.bytes # => [209, 130, 208, 181, 209, 129, 209, 130] - s.getbyte(2) # => 208 s = 'こんにちは' s.bytes # => [227, 129, 147, 227, 130, 147, 227, 129, 171, 227, 129, 161, 227, 129, 175] s.getbyte(2) # => 147 -Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non-String]. diff --git a/doc/string/grapheme_clusters.rdoc b/doc/string/grapheme_clusters.rdoc index 07ea1e318b..ee8b45700e 100644 --- a/doc/string/grapheme_clusters.rdoc +++ b/doc/string/grapheme_clusters.rdoc @@ -16,4 +16,4 @@ Details: s.chars # => ["a", "̈"] # Two characters. s.chars.map {|char| char.ord } # => [97, 776] # Their values. -Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non-String]. diff --git a/doc/string/index.rdoc b/doc/string/index.rdoc index cc34bc68e6..c3cff24dac 100644 --- a/doc/string/index.rdoc +++ b/doc/string/index.rdoc @@ -8,10 +8,9 @@ returns the index of the first matching substring in +self+: 'foo'.index('o') # => 1 'foo'.index('oo') # => 1 'foo'.index('ooo') # => nil - 'тест'.index('с') # => 2 # Characters, not bytes. 'こんにちは'.index('ち') # => 3 -When +pattern is a Regexp, returns the index of the first match in +self+: +When +pattern+ is a Regexp, returns the index of the first match in +self+: 'foo'.index(/o./) # => 1 'foo'.index(/.o/) # => 0 @@ -24,9 +23,6 @@ the returned index is relative to the beginning of +self+: 'bar'.index('r', 2) # => 2 'bar'.index('r', 3) # => nil 'bar'.index(/[r-z]/, 0) # => 2 - 'тест'.index('с', 1) # => 2 - 'тест'.index('с', 2) # => 2 - 'тест'.index('с', 3) # => nil # Offset in characters, not bytes. 'こんにちは'.index('ち', 2) # => 3 With negative integer argument +offset+, selects the search position by counting backward diff --git a/doc/string/insert.rdoc b/doc/string/insert.rdoc index d8252d5ec5..73205f2069 100644 --- a/doc/string/insert.rdoc +++ b/doc/string/insert.rdoc @@ -5,7 +5,6 @@ If the given +index+ is non-negative, inserts +other_string+ at offset +index+: 'foo'.insert(0, 'bar') # => "barfoo" 'foo'.insert(1, 'bar') # => "fbaroo" 'foo'.insert(3, 'bar') # => "foobar" - 'тест'.insert(2, 'bar') # => "теbarст" # Characters, not bytes. 'こんにちは'.insert(2, 'bar') # => "こんbarにちは" If the +index+ is negative, counts backward from the end of +self+ diff --git a/doc/string/inspect.rdoc b/doc/string/inspect.rdoc new file mode 100644 index 0000000000..398a5a74c5 --- /dev/null +++ b/doc/string/inspect.rdoc @@ -0,0 +1,38 @@ +Returns a printable version of +self+, enclosed in double-quotes. + +Most printable characters are rendered simply as themselves: + + 'abc'.inspect # => "\"abc\"" + '012'.inspect # => "\"012\"" + ''.inspect # => "\"\"" + "\u000012".inspect # => "\"\\u000012\"" + 'こんにちは'.inspect # => "\"こんにちは\"" + +But printable characters double-quote (<tt>'"'</tt>) and backslash and (<tt>'\\'</tt>) are escaped: + + '"'.inspect # => "\"\\\"\"" + '\\'.inspect # => "\"\\\\\"" + +Unprintable characters are the {ASCII characters}[https://en.wikipedia.org/wiki/ASCII] +whose values are in range <tt>0..31</tt>, +along with the character whose value is +127+. + +Most of these characters are rendered thus: + + 0.chr.inspect # => "\"\\x00\"" + 1.chr.inspect # => "\"\\x01\"" + 2.chr.inspect # => "\"\\x02\"" + # ... + +A few, however, have special renderings: + + 7.chr.inspect # => "\"\\a\"" # BEL + 8.chr.inspect # => "\"\\b\"" # BS + 9.chr.inspect # => "\"\\t\"" # TAB + 10.chr.inspect # => "\"\\n\"" # LF + 11.chr.inspect # => "\"\\v\"" # VT + 12.chr.inspect # => "\"\\f\"" # FF + 13.chr.inspect # => "\"\\r\"" # CR + 27.chr.inspect # => "\"\\e\"" # ESC + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non-String]. diff --git a/doc/string/intern.rdoc b/doc/string/intern.rdoc new file mode 100644 index 0000000000..c82302b906 --- /dev/null +++ b/doc/string/intern.rdoc @@ -0,0 +1,8 @@ +Returns the Symbol object derived from +self+, +creating it if it did not already exist: + + 'foo'.intern # => :foo + 'こんにちは'.intern # => :こんにちは + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non-String]. + diff --git a/doc/string/length.rdoc b/doc/string/length.rdoc index 544bca269f..eb68edb10c 100644 --- a/doc/string/length.rdoc +++ b/doc/string/length.rdoc @@ -1,12 +1,11 @@ Returns the count of characters (not bytes) in +self+: 'foo'.length # => 3 - 'тест'.length # => 4 - 'こんにちは'.length # => 5 + 'こんにちは'.length # => 5 Contrast with String#bytesize: 'foo'.bytesize # => 3 - 'тест'.bytesize # => 8 - 'こんにちは'.bytesize # => 15 + 'こんにちは'.bytesize # => 15 +Related: see {Querying}[rdoc-ref:String@Querying]. diff --git a/doc/string/ljust.rdoc b/doc/string/ljust.rdoc index 8e23c1fc8f..a8ca62ee76 100644 --- a/doc/string/ljust.rdoc +++ b/doc/string/ljust.rdoc @@ -1,16 +1,13 @@ -Returns a left-justified copy of +self+. - -If integer argument +size+ is greater than the size (in characters) of +self+, -returns a new string of length +size+ that is a copy of +self+, -left justified and padded on the right with +pad_string+: +Returns a copy of +self+, left-justified and, if necessary, right-padded with the +pad_string+: 'hello'.ljust(10) # => "hello " ' hello'.ljust(10) # => " hello " 'hello'.ljust(10, 'ab') # => "helloababa" - 'тест'.ljust(10) # => "тест " - 'こんにちは'.ljust(10) # => "こんにちは " + 'こんにちは'.ljust(10) # => "こんにちは " -If +size+ is not greater than the size of +self+, returns a copy of +self+: +If <tt>width <= self.length</tt>, returns a copy of +self+: 'hello'.ljust(5) # => "hello" - 'hello'.ljust(1) # => "hello" + 'hello'.ljust(1) # => "hello" # Does not truncate to width. + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/ord.rdoc b/doc/string/ord.rdoc index d586363d44..8c460d3ba4 100644 --- a/doc/string/ord.rdoc +++ b/doc/string/ord.rdoc @@ -2,5 +2,6 @@ Returns the integer ordinal of the first character of +self+: 'h'.ord # => 104 'hello'.ord # => 104 - 'тест'.ord # => 1090 'こんにちは'.ord # => 12371 + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non-String]. diff --git a/doc/string/partition.rdoc b/doc/string/partition.rdoc index ebe575e8eb..b2e620a9fc 100644 --- a/doc/string/partition.rdoc +++ b/doc/string/partition.rdoc @@ -1,24 +1,43 @@ Returns a 3-element array of substrings of +self+. -Matches a pattern against +self+, scanning from the beginning. -The pattern is: +If +pattern+ is matched, returns the array: -- +string_or_regexp+ itself, if it is a Regexp. -- <tt>Regexp.quote(string_or_regexp)</tt>, if +string_or_regexp+ is a string. + [pre_match, first_match, post_match] -If the pattern is matched, returns pre-match, first-match, post-match: +where: - 'hello'.partition('l') # => ["he", "l", "lo"] - 'hello'.partition('ll') # => ["he", "ll", "o"] - 'hello'.partition('h') # => ["", "h", "ello"] - 'hello'.partition('o') # => ["hell", "o", ""] - 'hello'.partition(/l+/) #=> ["he", "ll", "o"] - 'hello'.partition('') # => ["", "", "hello"] - 'тест'.partition('т') # => ["", "т", "ест"] - 'こんにちは'.partition('に') # => ["こん", "に", "ちは"] +- +first_match+ is the first-found matching substring. +- +pre_match+ and +post_match+ are the preceding and following substrings. -If the pattern is not matched, returns a copy of +self+ and two empty strings: +If +pattern+ is not matched, returns the array: - 'hello'.partition('x') # => ["hello", "", ""] + [self.dup, "", ""] -Related: String#rpartition, String#split. +Note that in the examples below, a returned string <tt>'hello'</tt> +is a copy of +self+, not +self+. + +If +pattern+ is a Regexp, performs the equivalent of <tt>self.match(pattern)</tt> +(also setting {matched-data variables}[rdoc-ref:language/globals.md@Matched+Data]): + + 'hello'.partition(/h/) # => ["", "h", "ello"] + 'hello'.partition(/l/) # => ["he", "l", "lo"] + 'hello'.partition(/l+/) # => ["he", "ll", "o"] + 'hello'.partition(/o/) # => ["hell", "o", ""] + 'hello'.partition(/^/) # => ["", "", "hello"] + 'hello'.partition(//) # => ["", "", "hello"] + 'hello'.partition(/$/) # => ["hello", "", ""] + 'hello'.partition(/x/) # => ["hello", "", ""] + +If +pattern+ is not a Regexp, converts it to a string (if it is not already one), +then performs the equivalent of <tt>self.index(pattern)</tt> +(and does _not_ set {matched-data global variables}[rdoc-ref:language/globals.md@Matched+Data]): + + 'hello'.partition('h') # => ["", "h", "ello"] + 'hello'.partition('l') # => ["he", "l", "lo"] + 'hello'.partition('ll') # => ["he", "ll", "o"] + 'hello'.partition('o') # => ["hell", "o", ""] + 'hello'.partition('') # => ["", "", "hello"] + 'hello'.partition('x') # => ["hello", "", ""] + 'こんにちは'.partition('に') # => ["こん", "に", "ちは"] + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non-String]. diff --git a/doc/string/rindex.rdoc b/doc/string/rindex.rdoc new file mode 100644 index 0000000000..2b81c3716d --- /dev/null +++ b/doc/string/rindex.rdoc @@ -0,0 +1,51 @@ +Returns the integer position of the _last_ substring that matches the given argument +pattern+, +or +nil+ if none found. + +When +pattern+ is a string, returns the index of the last matching substring in self: + + 'foo'.rindex('f') # => 0 + 'foo'.rindex('o') # => 2 + 'foo'.rindex('oo' # => 1 + 'foo'.rindex('ooo') # => nil + 'こんにちは'.rindex('ち') # => 3 + +When +pattern+ is a Regexp, returns the index of the last match in self: + + 'foo'.rindex(/f/) # => 0 + 'foo'.rindex(/o/) # => 2 + 'foo'.rindex(/oo/) # => 1 + 'foo'.rindex(/ooo/) # => nil + +When +offset+ is non-negative, it specifies the maximum starting position in the +string to end the search: + + 'foo'.rindex('o', 0) # => nil + 'foo'.rindex('o', 1) # => 1 + 'foo'.rindex('o', 2) # => 2 + 'foo'.rindex('o', 3) # => 2 + +With negative integer argument +offset+, +selects the search position by counting backward from the end of +self+: + + 'foo'.rindex('o', -1) # => 2 + 'foo'.rindex('o', -2) # => 1 + 'foo'.rindex('o', -3) # => nil + 'foo'.rindex('o', -4) # => nil + +The last match means starting at the possible last position, not +the last of longest matches: + + 'foo'.rindex(/o+/) # => 2 + $~ # => #<MatchData "o"> + +To get the last longest match, combine with negative lookbehind: + + 'foo'.rindex(/(?<!o)o+/) # => 1 + $~ # => #<MatchData "oo"> + +Or String#index with negative lookforward. + + 'foo'.index(/o+(?!.*o)/) # => 1 + $~ # => #<MatchData "oo"> + +Related: see {Querying}[rdoc-ref:String@Querying]. diff --git a/doc/string/rjust.rdoc b/doc/string/rjust.rdoc index 24e7bf3159..acd3f198d4 100644 --- a/doc/string/rjust.rdoc +++ b/doc/string/rjust.rdoc @@ -1,16 +1,17 @@ Returns a right-justified copy of +self+. -If integer argument +size+ is greater than the size (in characters) of +self+, -returns a new string of length +size+ that is a copy of +self+, +If integer argument +width+ is greater than the size (in characters) of +self+, +returns a new string of length +width+ that is a copy of +self+, right justified and padded on the left with +pad_string+: 'hello'.rjust(10) # => " hello" 'hello '.rjust(10) # => " hello " 'hello'.rjust(10, 'ab') # => "ababahello" - 'тест'.rjust(10) # => " тест" 'こんにちは'.rjust(10) # => " こんにちは" -If +size+ is not greater than the size of +self+, returns a copy of +self+: +If <tt>width <= self.size</tt>, returns a copy of +self+: 'hello'.rjust(5, 'ab') # => "hello" 'hello'.rjust(1, 'ab') # => "hello" + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/rpartition.rdoc b/doc/string/rpartition.rdoc index d24106fb9f..add95b1f40 100644 --- a/doc/string/rpartition.rdoc +++ b/doc/string/rpartition.rdoc @@ -1,24 +1,47 @@ Returns a 3-element array of substrings of +self+. -Matches a pattern against +self+, scanning backwards from the end. -The pattern is: +Searches +self+ for a match of +pattern+, seeking the _last_ match. -- +string_or_regexp+ itself, if it is a Regexp. -- <tt>Regexp.quote(string_or_regexp)</tt>, if +string_or_regexp+ is a string. +If +pattern+ is not matched, returns the array: -If the pattern is matched, returns pre-match, last-match, post-match: + ["", "", self.dup] - 'hello'.rpartition('l') # => ["hel", "l", "o"] - 'hello'.rpartition('ll') # => ["he", "ll", "o"] - 'hello'.rpartition('h') # => ["", "h", "ello"] - 'hello'.rpartition('o') # => ["hell", "o", ""] - 'hello'.rpartition(/l+/) # => ["hel", "l", "o"] - 'hello'.rpartition('') # => ["hello", "", ""] - 'тест'.rpartition('т') # => ["тес", "т", ""] - 'こんにちは'.rpartition('に') # => ["こん", "に", "ちは"] +If +pattern+ is matched, returns the array: -If the pattern is not matched, returns two empty strings and a copy of +self+: + [pre_match, last_match, post_match] - 'hello'.rpartition('x') # => ["", "", "hello"] +where: -Related: String#partition, String#split. +- +last_match+ is the last-found matching substring. +- +pre_match+ and +post_match+ are the preceding and following substrings. + +The pattern used is: + +- +pattern+ itself, if it is a Regexp. +- <tt>Regexp.quote(pattern)</tt>, if +pattern+ is a string. + +Note that in the examples below, a returned string <tt>'hello'</tt> is a copy of +self+, not +self+. + +If +pattern+ is a Regexp, searches for the last matching substring +(also setting {matched-data global variables}[rdoc-ref:language/globals.md@Matched+Data]): + + 'hello'.rpartition(/l/) # => ["hel", "l", "o"] + 'hello'.rpartition(/ll/) # => ["he", "ll", "o"] + 'hello'.rpartition(/h/) # => ["", "h", "ello"] + 'hello'.rpartition(/o/) # => ["hell", "o", ""] + 'hello'.rpartition(//) # => ["hello", "", ""] + 'hello'.rpartition(/x/) # => ["", "", "hello"] + 'こんにちは'.rpartition(/に/) # => ["こん", "に", "ちは"] + +If +pattern+ is not a Regexp, converts it to a string (if it is not already one), +then searches for the last matching substring +(and does _not_ set {matched-data global variables}[rdoc-ref:language/globals.md@Matched+Data]): + + 'hello'.rpartition('l') # => ["hel", "l", "o"] + 'hello'.rpartition('ll') # => ["he", "ll", "o"] + 'hello'.rpartition('h') # => ["", "h", "ello"] + 'hello'.rpartition('o') # => ["hell", "o", ""] + 'hello'.rpartition('') # => ["hello", "", ""] + 'こんにちは'.rpartition('に') # => ["こん", "に", "ちは"] + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non-String]. diff --git a/doc/string/scan.rdoc b/doc/string/scan.rdoc new file mode 100644 index 0000000000..d39b5b6dfa --- /dev/null +++ b/doc/string/scan.rdoc @@ -0,0 +1,35 @@ +Matches a pattern against +self+: + +- If +pattern+ is a Regexp, the pattern used is +pattern+ itself. +- If +pattern+ is a string, the pattern used is <tt>Regexp.quote(pattern)</tt>. + +Generates a collection of matching results +and updates {regexp-related global variables}[rdoc-ref:Regexp@Global+Variables]: + +- If the pattern contains no groups, each result is a matched substring. +- If the pattern contains groups, each result is an array + containing a matched substring for each group. + +With no block given, returns an array of the results: + + 'cruel world'.scan(/\w+/) # => ["cruel", "world"] + 'cruel world'.scan(/.../) # => ["cru", "el ", "wor"] + 'cruel world'.scan(/(...)/) # => [["cru"], ["el "], ["wor"]] + 'cruel world'.scan(/(..)(..)/) # => [["cr", "ue"], ["l ", "wo"]] + 'こんにちは'.scan(/../) # => ["こん", "にち"] + 'abracadabra'.scan('ab') # => ["ab", "ab"] + 'abracadabra'.scan('nosuch') # => [] + +With a block given, calls the block with each result; returns +self+: + + 'cruel world'.scan(/\w+/) {|w| p w } + # => "cruel" + # => "world" + 'cruel world'.scan(/(.)(.)/) {|x, y| p [x, y] } + # => ["c", "r"] + # => ["u", "e"] + # => ["l", " "] + # => ["w", "o"] + # => ["r", "l"] + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non-String]. diff --git a/doc/string/scrub.rdoc b/doc/string/scrub.rdoc index 1a5b1c79d0..314b28c465 100644 --- a/doc/string/scrub.rdoc +++ b/doc/string/scrub.rdoc @@ -1,25 +1,22 @@ Returns a copy of +self+ with each invalid byte sequence replaced by the given +replacement_string+. -With no block given and no argument, replaces each invalid sequence -with the default replacement string -(<tt>"�"</tt> for a Unicode encoding, <tt>'?'</tt> otherwise): +With no block given, replaces each invalid sequence +with the given +default_replacement_string+ +(by default, <tt>"�"</tt> for a Unicode encoding, <tt>'?'</tt> otherwise): - s = "foo\x81\x81bar" - s.scrub # => "foo��bar" + "foo\x81\x81bar".scrub # => "foo��bar" + "foo\x81\x81bar".force_encoding('US-ASCII').scrub # => "foo??bar" + "foo\x81\x81bar".scrub('xyzzy') # => "fooxyzzyxyzzybar" -With no block given and argument +replacement_string+ given, -replaces each invalid sequence with that string: +With a block given, calls the block with each invalid sequence, +and replaces that sequence with the return value of the block: - "foo\x81\x81bar".scrub('xyzzy') # => "fooxyzzyxyzzybar" + "foo\x81\x81bar".scrub {|sequence| p sequence; 'XYZZY' } # => "fooXYZZYXYZZYbar" -With a block given, replaces each invalid sequence with the value -of the block: - - "foo\x81\x81bar".scrub {|bytes| p bytes; 'XYZZY' } - # => "fooXYZZYXYZZYbar" - -Output: +Output : "\x81" "\x81" + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/split.rdoc b/doc/string/split.rdoc index 131c14b83f..8679149003 100644 --- a/doc/string/split.rdoc +++ b/doc/string/split.rdoc @@ -1,99 +1,101 @@ -Returns an array of substrings of +self+ -that are the result of splitting +self+ +Creates an array of substrings by splitting +self+ at each occurrence of the given field separator +field_sep+. -When +field_sep+ is <tt>$;</tt>: +With no arguments given, +splits using the field separator <tt>$;</tt>, +whose default value is +nil+. -- If <tt>$;</tt> is +nil+ (its default value), - the split occurs just as if +field_sep+ were given as a space character - (see below). +With no block given, returns the array of substrings: -- If <tt>$;</tt> is a string, - the split occurs just as if +field_sep+ were given as that string - (see below). + 'abracadabra'.split('a') # => ["", "br", "c", "d", "br"] -When +field_sep+ is <tt>' '</tt> and +limit+ is +0+ (its default value), -the split occurs at each sequence of whitespace: +When +field_sep+ is +nil+ or <tt>' '</tt> (a single space), +splits at each sequence of whitespace: - 'abc def ghi'.split(' ') # => ["abc", "def", "ghi"] - "abc \n\tdef\t\n ghi".split(' ') # => ["abc", "def", "ghi"] - 'abc def ghi'.split(' ') # => ["abc", "def", "ghi"] + 'foo bar baz'.split(nil) # => ["foo", "bar", "baz"] + 'foo bar baz'.split(' ') # => ["foo", "bar", "baz"] + "foo \n\tbar\t\n baz".split(' ') # => ["foo", "bar", "baz"] + 'foo bar baz'.split(' ') # => ["foo", "bar", "baz"] ''.split(' ') # => [] -When +field_sep+ is a string different from <tt>' '</tt> -and +limit+ is +0+, -the split occurs at each occurrence of +field_sep+; -trailing empty substrings are not returned: +When +field_sep+ is an empty string, +splits at every character: - 'abracadabra'.split('ab') # => ["", "racad", "ra"] - 'aaabcdaaa'.split('a') # => ["", "", "", "bcd"] - ''.split('a') # => [] - '3.14159'.split('1') # => ["3.", "4", "59"] - '!@#$%^$&*($)_+'.split('$') # => ["!@#", "%^", "&*(", ")_+"] - 'тест'.split('т') # => ["", "ес"] - 'こんにちは'.split('に') # => ["こん", "ちは"] + 'abracadabra'.split('') # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a"] + ''.split('') # => [] + 'こんにちは'.split('') # => ["こ", "ん", "に", "ち", "は"] -When +field_sep+ is a Regexp and +limit+ is +0+, -the split occurs at each occurrence of a match; -trailing empty substrings are not returned: +When +field_sep+ is a non-empty string and different from <tt>' '</tt> (a single space), +uses that string as the separator: + + 'abracadabra'.split('a') # => ["", "br", "c", "d", "br"] + 'abracadabra'.split('ab') # => ["", "racad", "ra"] + ''.split('a') # => [] + 'こんにちは'.split('に') # => ["こん", "ちは"] + +When +field_sep+ is a Regexp, +splits at each occurrence of a matching substring: 'abracadabra'.split(/ab/) # => ["", "racad", "ra"] - 'aaabcdaaa'.split(/a/) # => ["", "", "", "bcd"] - 'aaabcdaaa'.split(//) # => ["a", "a", "a", "b", "c", "d", "a", "a", "a"] '1 + 1 == 2'.split(/\W+/) # => ["1", "1", "2"] + 'abracadabra'.split(//) # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a"] -If the \Regexp contains groups, their matches are also included +If the \Regexp contains groups, their matches are included in the returned array: '1:2:3'.split(/(:)()()/, 2) # => ["1", ":", "", "", "2:3"] -As seen above, if +limit+ is +0+, -trailing empty substrings are not returned: +Argument +limit+ sets a limit on the size of the returned array; +it also determines whether trailing empty strings are included in the returned array. - 'aaabcdaaa'.split('a') # => ["", "", "", "bcd"] +When +limit+ is zero, +there is no limit on the size of the array, +but trailing empty strings are omitted: -If +limit+ is positive integer +n+, no more than <tt>n - 1-</tt> -splits occur, so that at most +n+ substrings are returned, -and trailing empty substrings are included: + 'abracadabra'.split('', 0) # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a"] + 'abracadabra'.split('a', 0) # => ["", "br", "c", "d", "br"] # Empty string after last 'a' omitted. - 'aaabcdaaa'.split('a', 1) # => ["aaabcdaaa"] - 'aaabcdaaa'.split('a', 2) # => ["", "aabcdaaa"] - 'aaabcdaaa'.split('a', 5) # => ["", "", "", "bcd", "aa"] - 'aaabcdaaa'.split('a', 7) # => ["", "", "", "bcd", "", "", ""] - 'aaabcdaaa'.split('a', 8) # => ["", "", "", "bcd", "", "", ""] +When +limit+ is a positive integer, +there is a limit on the size of the array (no more than <tt>n - 1</tt> splits occur), +and trailing empty strings are included: -Note that if +field_sep+ is a \Regexp containing groups, -their matches are in the returned array, but do not count toward the limit. + 'abracadabra'.split('', 3) # => ["a", "b", "racadabra"] + 'abracadabra'.split('a', 3) # => ["", "br", "cadabra"] + 'abracadabra'.split('', 30) # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a", ""] + 'abracadabra'.split('a', 30) # => ["", "br", "c", "d", "br", ""] + 'abracadabra'.split('', 1) # => ["abracadabra"] + 'abracadabra'.split('a', 1) # => ["abracadabra"] -If +limit+ is negative, it behaves the same as if +limit+ was zero, -meaning that there is no limit, -and trailing empty substrings are included: +When +limit+ is negative, +there is no limit on the size of the array, +and trailing empty strings are omitted: - 'aaabcdaaa'.split('a', -1) # => ["", "", "", "bcd", "", "", ""] + 'abracadabra'.split('', -1) # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a", ""] + 'abracadabra'.split('a', -1) # => ["", "br", "c", "d", "br", ""] If a block is given, it is called with each substring and returns +self+: - 'abc def ghi'.split(' ') {|substring| p substring } + 'foo bar baz'.split(' ') {|substring| p substring } + +Output : + + "foo" + "bar" + "baz" -Output: +Note that the above example is functionally equivalent to: - "abc" - "def" - "ghi" - => "abc def ghi" + 'foo bar baz'.split(' ').each {|substring| p substring } -Note that the above example is functionally the same as calling +#each+ after -+#split+ and giving the same block. However, the above example has better -performance because it avoids the creation of an intermediate array. Also, -note the different return values. +Output : - 'abc def ghi'.split(' ').each {|substring| p substring } + "foo" + "bar" + "baz" -Output: +But the latter: - "abc" - "def" - "ghi" - => ["abc", "def", "ghi"] +- Has poorer performance because it creates an intermediate array. +- Returns an array (instead of +self+). -Related: String#partition, String#rpartition. +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non-String]. diff --git a/doc/string/squeeze.rdoc b/doc/string/squeeze.rdoc new file mode 100644 index 0000000000..1a38c08b32 --- /dev/null +++ b/doc/string/squeeze.rdoc @@ -0,0 +1,33 @@ +Returns a copy of +self+ with each tuple (doubling, tripling, etc.) of specified characters +"squeezed" down to a single character. + +The tuples to be squeezed are specified by arguments +selectors+, +each of which is a string; +see {Character Selectors}[rdoc-ref:character_selectors.rdoc@Character+Selectors]. + +A single argument may be a single character: + + 'Noooooo!'.squeeze('o') # => "No!" + 'foo bar baz'.squeeze(' ') # => "foo bar baz" + 'Mississippi'.squeeze('s') # => "Misisippi" + 'Mississippi'.squeeze('p') # => "Mississipi" + 'Mississippi'.squeeze('x') # => "Mississippi" # Unused selector character is ignored. + 'бессонница'.squeeze('с') # => "бесонница" + 'бессонница'.squeeze('н') # => "бессоница" + +A single argument may be a string of characters: + + 'Mississippi'.squeeze('sp') # => "Misisipi" + 'Mississippi'.squeeze('ps') # => "Misisipi" # Order doesn't matter. + 'Mississippi'.squeeze('nonsense') # => "Misisippi" # Unused selector characters are ignored. + +A single argument may be a range of characters: + + 'Mississippi'.squeeze('a-p') # => "Mississipi" + 'Mississippi'.squeeze('q-z') # => "Misisippi" + 'Mississippi'.squeeze('a-z') # => "Misisipi" + +Multiple arguments are allowed; +see {Multiple Character Selectors}[rdoc-ref:character_selectors.rdoc@Multiple+Character+Selectors]. + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/start_with_p.rdoc b/doc/string/start_with_p.rdoc index 5d1f9f9543..f78edc7fa3 100644 --- a/doc/string/start_with_p.rdoc +++ b/doc/string/start_with_p.rdoc @@ -1,10 +1,9 @@ -Returns whether +self+ starts with any of the given +string_or_regexp+. +Returns whether +self+ starts with any of the given +patterns+. -Matches patterns against the beginning of +self+. -For each given +string_or_regexp+, the pattern is: +For each argument, the pattern used is: -- +string_or_regexp+ itself, if it is a Regexp. -- <tt>Regexp.quote(string_or_regexp)</tt>, if +string_or_regexp+ is a string. +- The pattern itself, if it is a Regexp. +- <tt>Regexp.quote(pattern)</tt>, if it is a string. Returns +true+ if any pattern matches the beginning, +false+ otherwise: @@ -12,7 +11,6 @@ Returns +true+ if any pattern matches the beginning, +false+ otherwise: 'hello'.start_with?(/H/i) # => true 'hello'.start_with?('heaven', 'hell') # => true 'hello'.start_with?('heaven', 'paradise') # => false - 'тест'.start_with?('т') # => true 'こんにちは'.start_with?('こ') # => true -Related: String#end_with?. +Related: see {Querying}[rdoc-ref:String@Querying]. diff --git a/doc/string/sub.rdoc b/doc/string/sub.rdoc new file mode 100644 index 0000000000..ff051ea177 --- /dev/null +++ b/doc/string/sub.rdoc @@ -0,0 +1,33 @@ +Returns a copy of self, possibly with a substring replaced. + +Argument +pattern+ may be a string or a Regexp; +argument +replacement+ may be a string or a Hash. + +Varying types for the argument values makes this method very versatile. + +Below are some simple examples; for many more examples, +see {Substitution Methods}[rdoc-ref:String@Substitution+Methods]. + +With arguments +pattern+ and string +replacement+ given, +replaces the first matching substring with the given replacement string: + + s = 'abracadabra' # => "abracadabra" + s.sub('bra', 'xyzzy') # => "axyzzycadabra" + s.sub(/bra/, 'xyzzy') # => "axyzzycadabra" + s.sub('nope', 'xyzzy') # => "abracadabra" + +With arguments +pattern+ and hash +replacement+ given, +replaces the first matching substring with a value from the given replacement hash, or removes it: + + h = {'a' => 'A', 'b' => 'B', 'c' => 'C'} + s.sub('b', h) # => "aBracadabra" + s.sub(/b/, h) # => "aBracadabra" + s.sub(/d/, h) # => "abracaabra" # 'd' removed. + +With argument +pattern+ and a block given, +calls the block with each matching substring; +replaces that substring with the block’s return value: + + s.sub('b') {|match| match.upcase } # => "aBracadabra" + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/succ.rdoc b/doc/string/succ.rdoc new file mode 100644 index 0000000000..1b4b936a8e --- /dev/null +++ b/doc/string/succ.rdoc @@ -0,0 +1,52 @@ +Returns the successor to +self+. The successor is calculated by +incrementing characters. + +The first character to be incremented is the rightmost alphanumeric: +or, if no alphanumerics, the rightmost character: + + 'THX1138'.succ # => "THX1139" + '<<koala>>'.succ # => "<<koalb>>" + '***'.succ # => '**+' + 'こんにちは'.succ # => "こんにちば" + +The successor to a digit is another digit, "carrying" to the next-left +character for a "rollover" from 9 to 0, and prepending another digit +if necessary: + + '00'.succ # => "01" + '09'.succ # => "10" + '99'.succ # => "100" + +The successor to a letter is another letter of the same case, +carrying to the next-left character for a rollover, +and prepending another same-case letter if necessary: + + 'aa'.succ # => "ab" + 'az'.succ # => "ba" + 'zz'.succ # => "aaa" + 'AA'.succ # => "AB" + 'AZ'.succ # => "BA" + 'ZZ'.succ # => "AAA" + +The successor to a non-alphanumeric character is the next character +in the underlying character set's collating sequence, +carrying to the next-left character for a rollover, +and prepending another character if necessary: + + s = 0.chr * 3 # => "\x00\x00\x00" + s.succ # => "\x00\x00\x01" + s = 255.chr * 3 # => "\xFF\xFF\xFF" + s.succ # => "\x01\x00\x00\x00" + +Carrying can occur between and among mixtures of alphanumeric characters: + + s = 'zz99zz99' # => "zz99zz99" + s.succ # => "aaa00aa00" + s = '99zz99zz' # => "99zz99zz" + s.succ # => "100aa00aa" + +The successor to an empty +String+ is a new empty +String+: + + ''.succ # => "" + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/sum.rdoc b/doc/string/sum.rdoc index 5de24e6402..22045e5f4d 100644 --- a/doc/string/sum.rdoc +++ b/doc/string/sum.rdoc @@ -1,11 +1,12 @@ -Returns a basic +n+-bit checksum of the characters in +self+; +Returns a basic +n+-bit {checksum}[https://en.wikipedia.org/wiki/Checksum] of the characters in +self+; the checksum is the sum of the binary value of each byte in +self+, modulo <tt>2**n - 1</tt>: 'hello'.sum # => 532 'hello'.sum(4) # => 4 'hello'.sum(64) # => 532 - 'тест'.sum # => 1405 'こんにちは'.sum # => 2582 This is not a particularly strong checksum. + +Related: see {Querying}[rdoc-ref:String@Querying]. diff --git a/doc/string/swapcase.rdoc b/doc/string/swapcase.rdoc new file mode 100644 index 0000000000..4353c8528a --- /dev/null +++ b/doc/string/swapcase.rdoc @@ -0,0 +1,31 @@ +Returns a string containing the characters in +self+, with cases reversed: + +- Each uppercase character is downcased. +- Each lowercase character is upcased. + +Examples: + + 'Hello'.swapcase # => "hELLO" + 'Straße'.swapcase # => "sTRASSE" + 'RubyGems.org'.swapcase # => "rUBYgEMS.ORG" + +The sizes of +self+ and the upcased result may differ: + + s = 'Straße' + s.size # => 6 + s.swapcase # => "sTRASSE" + s.swapcase.size # => 7 + +Some characters (and some character sets) do not have upcase and downcase versions; +see {Case Mapping}[rdoc-ref:case_mapping.rdoc]: + + s = '1, 2, 3, ...' + s.swapcase == s # => true + s = 'こんにちは' + s.swapcase == s # => true + +The casing is affected by the given +mapping+, +which may be +:ascii+, +:fold+, or +:turkic+; +see {Case Mappings}[rdoc-ref:case_mapping.rdoc@Case+Mappings]. + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/unicode_normalize.rdoc b/doc/string/unicode_normalize.rdoc new file mode 100644 index 0000000000..5f733c0fb8 --- /dev/null +++ b/doc/string/unicode_normalize.rdoc @@ -0,0 +1,28 @@ +Returns a copy of +self+ with +{Unicode normalization}[https://unicode.org/reports/tr15] applied. + +Argument +form+ must be one of the following symbols +(see {Unicode normalization forms}[https://unicode.org/reports/tr15/#Norm_Forms]): + +- +:nfc+: Canonical decomposition, followed by canonical composition. +- +:nfd+: Canonical decomposition. +- +:nfkc+: Compatibility decomposition, followed by canonical composition. +- +:nfkd+: Compatibility decomposition. + +The encoding of +self+ must be one of: + +- <tt>Encoding::UTF_8</tt>. +- <tt>Encoding::UTF_16BE</tt>. +- <tt>Encoding::UTF_16LE</tt>. +- <tt>Encoding::UTF_32BE</tt>. +- <tt>Encoding::UTF_32LE</tt>. +- <tt>Encoding::GB18030</tt>. +- <tt>Encoding::UCS_2BE</tt>. +- <tt>Encoding::UCS_4BE</tt>. + +Examples: + + "a\u0300".unicode_normalize # => "à" # Lowercase 'a' with grave accens. + "a\u0300".unicode_normalize(:nfd) # => "à" # Same. + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/upcase.rdoc b/doc/string/upcase.rdoc new file mode 100644 index 0000000000..ad859e8973 --- /dev/null +++ b/doc/string/upcase.rdoc @@ -0,0 +1,27 @@ +Returns a new string containing the upcased characters in +self+: + + 'hello'.upcase # => "HELLO" + 'straße'.upcase # => "STRASSE" + 'привет'.upcase # => "ПРИВЕТ" + 'RubyGems.org'.upcase # => "RUBYGEMS.ORG" + +The sizes of +self+ and the upcased result may differ: + + s = 'Straße' + s.size # => 6 + s.upcase # => "STRASSE" + s.upcase.size # => 7 + +Some characters (and some character sets) do not have upcase and downcase versions; +see {Case Mapping}[rdoc-ref:case_mapping.rdoc]: + + s = '1, 2, 3, ...' + s.upcase == s # => true + s = 'こんにちは' + s.upcase == s # => true + +The casing is affected by the given +mapping+, +which may be +:ascii+, +:fold+, or +:turkic+; +see {Case Mappings}[rdoc-ref:case_mapping.rdoc@Case+Mappings]. + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/upto.rdoc b/doc/string/upto.rdoc new file mode 100644 index 0000000000..f860fe84fe --- /dev/null +++ b/doc/string/upto.rdoc @@ -0,0 +1,38 @@ +With a block given, calls the block with each +String+ value +returned by successive calls to String#succ; +the first value is +self+, the next is <tt>self.succ</tt>, and so on; +the sequence terminates when value +other_string+ is reached; +returns +self+: + + a = [] + 'a'.upto('f') {|c| a.push(c) } + a # => ["a", "b", "c", "d", "e", "f"] + + a = [] + 'Ж'.upto('П') {|c| a.push(c) } + a # => ["Ж", "З", "И", "Й", "К", "Л", "М", "Н", "О", "П"] + + a = [] + 'よ'.upto('ろ') {|c| a.push(c) } + a # => ["よ", "ら", "り", "る", "れ", "ろ"] + + a = [] + 'a8'.upto('b6') {|c| a.push(c) } + a # => ["a8", "a9", "b0", "b1", "b2", "b3", "b4", "b5", "b6"] + +If argument +exclusive+ is given as a truthy object, the last value is omitted: + + a = [] + 'a'.upto('f', true) {|c| a.push(c) } + a # => ["a", "b", "c", "d", "e"] + +If +other_string+ would not be reached, does not call the block: + + '25'.upto('5') {|s| fail s } + 'aa'.upto('a') {|s| fail s } + +With no block given, returns a new Enumerator: + + 'a8'.upto('b6') # => #<Enumerator: "a8":upto("b6")> + +Related: see {Iterating}[rdoc-ref:String@Iterating]. diff --git a/doc/string/valid_encoding_p.rdoc b/doc/string/valid_encoding_p.rdoc new file mode 100644 index 0000000000..e1db55174a --- /dev/null +++ b/doc/string/valid_encoding_p.rdoc @@ -0,0 +1,8 @@ +Returns whether +self+ is encoded correctly: + + s = 'Straße' + s.valid_encoding? # => true + s.encoding # => #<Encoding:UTF-8> + s.force_encoding(Encoding::ASCII).valid_encoding? # => false + +Related: see {Querying}[rdoc-ref:String@Querying]. |
