diff options
Diffstat (limited to 'doc/string')
58 files changed, 1525 insertions, 277 deletions
diff --git a/doc/string/aref.rdoc b/doc/string/aref.rdoc new file mode 100644 index 0000000000..a9ab8857bc --- /dev/null +++ b/doc/string/aref.rdoc @@ -0,0 +1,96 @@ +Returns the substring of +self+ specified by the arguments. + +<b>Form <tt>self[offset]</tt></b> + +With non-negative integer argument +offset+ given, +returns the 1-character substring found in self at character offset +offset+: + + 'hello'[0] # => "h" + 'hello'[4] # => "o" + 'hello'[5] # => nil + 'こんにちは'[4] # => "は" + +With negative integer argument +offset+ given, +counts backward from the end of +self+: + + 'hello'[-1] # => "o" + 'hello'[-5] # => "h" + 'hello'[-6] # => nil + +<b>Form <tt>self[offset, size]</tt></b> + +With integer arguments +offset+ and +size+ given, +returns a substring of size +size+ characters (as available) +beginning at character offset specified by +offset+. + +If argument +offset+ is non-negative, +the offset is +offset+: + + 'hello'[0, 1] # => "h" + 'hello'[0, 5] # => "hello" + 'hello'[0, 6] # => "hello" + 'hello'[2, 3] # => "llo" + 'hello'[2, 0] # => "" + 'hello'[2, -1] # => nil + +If argument +offset+ is negative, +counts backward from the end of +self+: + + 'hello'[-1, 1] # => "o" + 'hello'[-5, 5] # => "hello" + 'hello'[-1, 0] # => "" + 'hello'[-6, 5] # => nil + +Special case: if +offset+ equals the size of +self+, +returns a new empty string: + + 'hello'[5, 3] # => "" + +<b>Form <tt>self[range]</tt></b> + +With Range argument +range+ given, +forms substring <tt>self[range.start, range.size]</tt>: + + 'hello'[0..2] # => "hel" + 'hello'[0, 3] # => "hel" + + 'hello'[0...2] # => "he" + 'hello'[0, 2] # => "he" + + 'hello'[0, 0] # => "" + 'hello'[0...0] # => "" + +<b>Form <tt>self[regexp, capture = 0]</tt></b> + +With Regexp argument +regexp+ given and +capture+ as zero, +searches for a matching substring in +self+; +updates {Regexp-related global variables}[rdoc-ref:Regexp@Global+Variables]: + + 'hello'[/ell/] # => "ell" + 'hello'[/l+/] # => "ll" + 'hello'[//] # => "" + 'hello'[/nosuch/] # => nil + +With +capture+ as a positive integer +n+, +returns the +n+th matched group: + + 'hello'[/(h)(e)(l+)(o)/] # => "hello" + 'hello'[/(h)(e)(l+)(o)/, 1] # => "h" + $1 # => "h" + 'hello'[/(h)(e)(l+)(o)/, 2] # => "e" + $2 # => "e" + 'hello'[/(h)(e)(l+)(o)/, 3] # => "ll" + 'hello'[/(h)(e)(l+)(o)/, 4] # => "o" + 'hello'[/(h)(e)(l+)(o)/, 5] # => nil + +<b>Form <tt>self[substring]</tt></b> + +With string argument +substring+ given, +returns the matching substring of +self+, if found: + + 'hello'['ell'] # => "ell" + 'hello'[''] # => "" + 'hello'['nosuch'] # => nil + 'こんにちは'['んにち'] # => "んにち" + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/aset.rdoc b/doc/string/aset.rdoc new file mode 100644 index 0000000000..98c58b59cc --- /dev/null +++ b/doc/string/aset.rdoc @@ -0,0 +1,179 @@ +Returns +self+ with all, a substring, or none of its contents replaced; +returns the argument +other_string+. + +<b>Form <tt>self[index] = other_string</tt></b> + +With non-negative integer argument +index+ given, +searches for the 1-character substring found in self at character offset index: + + s = 'hello' + s[0] = 'foo' # => "foo" + s # => "fooello" + + s = 'hello' + s[4] = 'foo' # => "foo" + s # => "hellfoo" + + s = 'hello' + s[5] = 'foo' # => "foo" + s # => "hellofoo" + + s = 'hello' + s[6] = 'foo' # Raises IndexError: index 6 out of string. + +With negative integer argument +index+ given, +counts backward from the end of +self+: + + s = 'hello' + s[-1] = 'foo' # => "foo" + s # => "hellfoo" + + s = 'hello' + s[-5] = 'foo' # => "foo" + s # => "fooello" + + s = 'hello' + s[-6] = 'foo' # Raises IndexError: index -6 out of string. + +<b>Form <tt>self[start, length] = other_string</tt></b> + +With integer arguments +start+ and +length+ given, +searches for a substring of size +length+ characters (as available) +beginning at character offset specified by +start+. + +If argument +start+ is non-negative, +the offset is +start+: + + s = 'hello' + s[0, 1] = 'foo' # => "foo" + s # => "fooello" + + s = 'hello' + s[0, 5] = 'foo' # => "foo" + s # => "foo" + + s = 'hello' + s[0, 9] = 'foo' # => "foo" + s # => "foo" + + s = 'hello' + s[2, 0] = 'foo' # => "foo" + s # => "hefoollo" + + s = 'hello' + s[2, -1] = 'foo' # Raises IndexError: negative length -1. + +If argument +start+ is negative, +counts backward from the end of +self+: + + s = 'hello' + s[-1, 1] = 'foo' # => "foo" + s # => "hellfoo" + + s = 'hello' + s[-1, 9] = 'foo' # => "foo" + s # => "hellfoo" + + s = 'hello' + s[-5, 2] = 'foo' # => "foo" + s # => "foollo" + + s = 'hello' + s[-3, 0] = 'foo' # => "foo" + s # => "hefoollo" + + s = 'hello' + s[-6, 2] = 'foo' # Raises IndexError: index -6 out of string. + +Special case: if +start+ equals the length of +self+, +the argument is appended to +self+: + + s = 'hello' + s[5, 3] = 'foo' # => "foo" + s # => "hellofoo" + +<b>Form <tt>self[range] = other_string</tt></b> + +With Range argument +range+ given, +equivalent to <tt>self[range.start, range.size] = other_string</tt>: + + s0 = 'hello' + s1 = 'hello' + s0[0..2] = 'foo' # => "foo" + s1[0, 3] = 'foo' # => "foo" + s0 # => "foolo" + s1 # => "foolo" + + s = 'hello' + s[0...2] = 'foo' # => "foo" + s # => "foollo" + + s = 'hello' + s[0...0] = 'foo' # => "foo" + s # => "foohello" + + s = 'hello' + s[9..10] = 'foo' # Raises RangeError: 9..10 out of range + +<b>Form <tt>self[regexp, capture = 0] = other_string</tt></b> + +With Regexp argument +regexp+ given and +capture+ as zero, +searches for a matching substring in +self+; +updates {Regexp-related global variables}[rdoc-ref:Regexp@Global+Variables]: + + s = 'hello' + s[/l/] = 'L' # => "L" + [$`, $&, $'] # => ["he", "l", "lo"] + s[/eLlo/] = 'owdy' # => "owdy" + [$`, $&, $'] # => ["h", "eLlo", ""] + s[/eLlo/] = 'owdy' # Raises IndexError: regexp not matched. + [$`, $&, $'] # => [nil, nil, nil] + +With +capture+ as a positive integer +n+, +searches for the +n+th matched group: + + s = 'hello' + s[/(h)(e)(l+)(o)/] = 'foo' # => "foo" + [$`, $&, $'] # => ["", "hello", ""] + + s = 'hello' + s[/(h)(e)(l+)(o)/, 1] = 'foo' # => "foo" + s # => "fooello" + [$`, $&, $'] # => ["", "hello", ""] + + s = 'hello' + s[/(h)(e)(l+)(o)/, 2] = 'foo' # => "foo" + s # => "hfoollo" + [$`, $&, $'] # => ["", "hello", ""] + + s = 'hello' + s[/(h)(e)(l+)(o)/, 4] = 'foo' # => "foo" + s # => "hellfoo" + [$`, $&, $'] # => ["", "hello", ""] + + s = 'hello' + # => "hello" + s[/(h)(e)(l+)(o)/, 5] = 'foo # Raises IndexError: index 5 out of regexp. + + s = 'hello' + s[/nosuch/] = 'foo' # Raises IndexError: regexp not matched. + +<b>Form <tt>self[substring] = other_string</tt></b> + +With string argument +substring+ given: + + s = 'hello' + s['l'] = 'foo' # => "foo" + s # => "hefoolo" + + s = 'hello' + s['ll'] = 'foo' # => "foo" + s # => "hefooo" + + s = 'こんにちは' + s['んにち'] = 'foo' # => "foo" + s # => "こfooは" + + s['nosuch'] = 'foo' # Raises IndexError: string not matched. + +Related: see {Modifying}[rdoc-ref:String@Modifying]. diff --git a/doc/string/b.rdoc b/doc/string/b.rdoc index f8ad2910b4..8abd6d9532 100644 --- a/doc/string/b.rdoc +++ b/doc/string/b.rdoc @@ -12,3 +12,5 @@ the underlying bytes are not modified: t = s.b # => "\xE4\x82\x95" t.encoding # => #<Encoding:ASCII-8BIT> t.bytes # => [228, 130, 149] + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/bytes.rdoc b/doc/string/bytes.rdoc index a9e89f1cd1..6dde0a745d 100644 --- a/doc/string/bytes.rdoc +++ b/doc/string/bytes.rdoc @@ -1,6 +1,7 @@ Returns an array of the bytes in +self+: - 'hello'.bytes # => [104, 101, 108, 108, 111] - 'тест'.bytes # => [209, 130, 208, 181, 209, 129, 209, 130] + 'hello'.bytes # => [104, 101, 108, 108, 111] 'こんにちは'.bytes # => [227, 129, 147, 227, 130, 147, 227, 129, 171, 227, 129, 161, 227, 129, 175] + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. diff --git a/doc/string/bytesize.rdoc b/doc/string/bytesize.rdoc index b0567ff67b..8d12a0d454 100644 --- a/doc/string/bytesize.rdoc +++ b/doc/string/bytesize.rdoc @@ -1,11 +1,12 @@ -Returns the count of bytes (not characters) in +self+: +Returns the count of bytes in +self+. - 'foo'.bytesize # => 3 - 'тест'.bytesize # => 8 - 'こんにちは'.bytesize # => 15 +Note that the byte count may be different from the character count (returned by #size): -Contrast with String#length: + s = 'foo' + s.bytesize # => 3 + s.size # => 3 + s = 'こんにちは' + s.bytesize # => 15 + s.size # => 5 - 'foo'.length # => 3 - 'тест'.length # => 4 - 'こんにちは'.length # => 5 +Related: see {Querying}[rdoc-ref:String@Querying]. diff --git a/doc/string/byteslice.rdoc b/doc/string/byteslice.rdoc new file mode 100644 index 0000000000..d70441fb2b --- /dev/null +++ b/doc/string/byteslice.rdoc @@ -0,0 +1,54 @@ +Returns a substring of +self+, or +nil+ if the substring cannot be constructed. + +With integer arguments +offset+ and +length+ given, +returns the substring beginning at the given +offset+ +and of the given +length+ (as available): + + s = '0123456789' # => "0123456789" + s.byteslice(2) # => "2" + s.byteslice(200) # => nil + s.byteslice(4, 3) # => "456" + s.byteslice(4, 30) # => "456789" + +Returns +nil+ if +length+ is negative or +offset+ falls outside of +self+: + + s.byteslice(4, -1) # => nil + s.byteslice(40, 2) # => nil + +Counts backwards from the end of +self+ +if +offset+ is negative: + + s = '0123456789' # => "0123456789" + s.byteslice(-4) # => "6" + s.byteslice(-4, 3) # => "678" + +With Range argument +range+ given, returns +<tt>byteslice(range.begin, range.size)</tt>: + + s = '0123456789' # => "0123456789" + s.byteslice(4..6) # => "456" + s.byteslice(-6..-4) # => "456" + s.byteslice(5..2) # => "" # range.size is zero. + s.byteslice(40..42) # => nil + +The starting and ending offsets need not be on character boundaries: + + s = 'こんにちは' + s.byteslice(0, 3) # => "こ" + s.byteslice(1, 3) # => "\x81\x93\xE3" + +The encodings of +self+ and the returned substring +are always the same: + + s.encoding # => #<Encoding:UTF-8> + s.byteslice(0, 3).encoding # => #<Encoding:UTF-8> + s.byteslice(1, 3).encoding # => #<Encoding:UTF-8> + +But, depending on the character boundaries, +the encoding of the returned substring may not be valid: + + s.valid_encoding? # => true + s.byteslice(0, 3).valid_encoding? # => true + s.byteslice(1, 3).valid_encoding? # => false + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/bytesplice.rdoc b/doc/string/bytesplice.rdoc new file mode 100644 index 0000000000..5689ef4a2b --- /dev/null +++ b/doc/string/bytesplice.rdoc @@ -0,0 +1,66 @@ +Replaces <i>target bytes</i> in +self+ with <i>source bytes</i> from the given string +str+; +returns +self+. + +In the first form, arguments +offset+ and +length+ determine the target bytes, +and the source bytes are all of the given +str+: + + '0123456789'.bytesplice(0, 3, 'abc') # => "abc3456789" + '0123456789'.bytesplice(3, 3, 'abc') # => "012abc6789" + '0123456789'.bytesplice(0, 50, 'abc') # => "abc" + '0123456789'.bytesplice(50, 3, 'abc') # Raises IndexError. + +The counts of the target bytes and source source bytes may be different: + + '0123456789'.bytesplice(0, 6, 'abc') # => "abc6789" # Shorter source. + '0123456789'.bytesplice(0, 1, 'abc') # => "abc123456789" # Shorter target. + +And either count may be zero (i.e., specifying an empty string): + + '0123456789'.bytesplice(0, 3, '') # => "3456789" # Empty source. + '0123456789'.bytesplice(0, 0, 'abc') # => "abc0123456789" # Empty target. + +In the second form, just as in the first, +arugments +offset+ and +length+ determine the target bytes; +argument +str+ _contains_ the source bytes, +and the additional arguments +str_offset+ and +str_length+ +determine the actual source bytes: + + '0123456789'.bytesplice(0, 3, 'abc', 0, 3) # => "abc3456789" + '0123456789'.bytesplice(0, 3, 'abc', 1, 1) # => "b3456789" # Shorter source. + '0123456789'.bytesplice(0, 1, 'abc', 0, 3) # => "abc123456789" # Shorter target. + '0123456789'.bytesplice(0, 3, 'abc', 1, 0) # => "3456789" # Empty source. + '0123456789'.bytesplice(0, 0, 'abc', 0, 3) # => "abc0123456789" # Empty target. + +In the third form, argument +range+ determines the target bytes +and the source bytes are all of the given +str+: + + '0123456789'.bytesplice(0..2, 'abc') # => "abc3456789" + '0123456789'.bytesplice(3..5, 'abc') # => "012abc6789" + '0123456789'.bytesplice(0..5, 'abc') # => "abc6789" # Shorter source. + '0123456789'.bytesplice(0..0, 'abc') # => "abc123456789" # Shorter target. + '0123456789'.bytesplice(0..2, '') # => "3456789" # Empty source. + '0123456789'.bytesplice(0...0, 'abc') # => "abc0123456789" # Empty target. + +In the fourth form, just as in the third, +arugment +range+ determines the target bytes; +argument +str+ _contains_ the source bytes, +and the additional argument +str_range+ +determines the actual source bytes: + + '0123456789'.bytesplice(0..2, 'abc', 0..2) # => "abc3456789" + '0123456789'.bytesplice(3..5, 'abc', 0..2) # => "012abc6789" + '0123456789'.bytesplice(0..2, 'abc', 0..1) # => "ab3456789" # Shorter source. + '0123456789'.bytesplice(0..1, 'abc', 0..2) # => "abc23456789" # Shorter target. + '0123456789'.bytesplice(0..2, 'abc', 0...0) # => "3456789" # Empty source. + '0123456789'.bytesplice(0...0, 'abc', 0..2) # => "abc0123456789" # Empty target. + +In any of the forms, the beginnings and endings of both source and target +must be on character boundaries. + +In these examples, +self+ has five 3-byte characters, +and so has character boundaries at offsets 0, 3, 6, 9, 12, and 15. + + 'こんにちは'.bytesplice(0, 3, 'abc') # => "abcんにちは" + 'こんにちは'.bytesplice(1, 3, 'abc') # Raises IndexError. + 'こんにちは'.bytesplice(0, 2, 'abc') # Raises IndexError. + diff --git a/doc/string/capitalize.rdoc b/doc/string/capitalize.rdoc new file mode 100644 index 0000000000..3a1a2dcb8b --- /dev/null +++ b/doc/string/capitalize.rdoc @@ -0,0 +1,26 @@ +Returns a string containing the characters in +self+, +each with possibly changed case: + +- The first character made uppercase. +- All other characters are made lowercase. + +Examples: + + 'hello'.capitalize # => "Hello" + 'HELLO'.capitalize # => "Hello" + 'straße'.capitalize # => "Straße" # Lowercase 'ß' not changed. + 'STRAẞE'.capitalize # => "Straße" # Uppercase 'ẞ' downcased to 'ß'. + +Some characters (and some character sets) do not have upcase and downcase versions; +see {Case Mapping}[rdoc-ref:case_mapping.rdoc]: + + s = '1, 2, 3, ...' + s.capitalize == s # => true + s = 'こんにちは' + s.capitalize == s # => true + +The casing is affected by the given +mapping+, +which may be +:ascii+, +:fold+, or +:turkic+; +see {Case Mappings}[rdoc-ref:case_mapping.rdoc@Case+Mappings]. + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/center.rdoc b/doc/string/center.rdoc index d53d921ad5..b86c8b5916 100644 --- a/doc/string/center.rdoc +++ b/doc/string/center.rdoc @@ -2,15 +2,18 @@ Returns a centered copy of +self+. If integer argument +size+ is greater than the size (in characters) of +self+, returns a new string of length +size+ that is a copy of +self+, -centered and padded on both ends with +pad_string+: +centered and padded on one or both ends with +pad_string+: - 'hello'.center(10) # => " hello " - ' hello'.center(10) # => " hello " - 'hello'.center(10, 'ab') # => "abhelloaba" - 'тест'.center(10) # => " тест " - 'こんにちは'.center(10) # => " こんにちは " + 'hello'.center(6) # => "hello " # Padded on one end. + 'hello'.center(10) # => " hello " # Padded on both ends. + 'hello'.center(20, '-|') # => "-|-|-|-hello-|-|-|-|" # Some padding repeated. + 'hello'.center(10, 'abcdefg') # => "abhelloabc" # Some padding not used. + ' hello '.center(13) # => " hello " + 'こんにちは'.center(10) # => " こんにちは " # Multi-byte characters. -If +size+ is not greater than the size of +self+, returns a copy of +self+: +If +size+ is less than or equal to the size of +self+, returns an unpadded copy of +self+: - 'hello'.center(5) # => "hello" - 'hello'.center(1) # => "hello" + 'hello'.center(5) # => "hello" + 'hello'.center(-10) # => "hello" + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/chars.rdoc b/doc/string/chars.rdoc index d24a1cc3a9..d4d15bf2ad 100644 --- a/doc/string/chars.rdoc +++ b/doc/string/chars.rdoc @@ -1,5 +1,7 @@ Returns an array of the characters in +self+: 'hello'.chars # => ["h", "e", "l", "l", "o"] - 'тест'.chars # => ["т", "е", "с", "т"] 'こんにちは'.chars # => ["こ", "ん", "に", "ち", "は"] + ''.chars # => [] + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. diff --git a/doc/string/chomp.rdoc b/doc/string/chomp.rdoc index b6fb9ff38c..4efff5c291 100644 --- a/doc/string/chomp.rdoc +++ b/doc/string/chomp.rdoc @@ -9,7 +9,6 @@ if they are <tt>"\r"</tt>, <tt>"\n"</tt>, or <tt>"\r\n"</tt> "abc\n".chomp # => "abc" "abc\r\n".chomp # => "abc" "abc\n\r".chomp # => "abc\n" - "тест\r\n".chomp # => "тест" "こんにちは\r\n".chomp # => "こんにちは" When +line_sep+ is <tt>''</tt> (an empty string), @@ -25,5 +24,8 @@ removes multiple trailing occurrences of <tt>"\n"</tt> or <tt>"\r\n"</tt> When +line_sep+ is neither <tt>"\n"</tt> nor <tt>''</tt>, removes a single trailing line separator if there is one: - 'abcd'.chomp('d') # => "abc" - 'abcdd'.chomp('d') # => "abcd" + 'abcd'.chomp('cd') # => "ab" + 'abcdcd'.chomp('cd') # => "abcd" + 'abcd'.chomp('xx') # => "abcd" + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/chop.rdoc b/doc/string/chop.rdoc index 8ef82f8a49..d818ba467a 100644 --- a/doc/string/chop.rdoc +++ b/doc/string/chop.rdoc @@ -3,14 +3,15 @@ Returns a new string copied from +self+, with trailing characters possibly remov Removes <tt>"\r\n"</tt> if those are the last two characters. "abc\r\n".chop # => "abc" - "тест\r\n".chop # => "тест" "こんにちは\r\n".chop # => "こんにちは" Otherwise removes the last character if it exists. 'abcd'.chop # => "abc" - 'тест'.chop # => "тес" 'こんにちは'.chop # => "こんにち" ''.chop # => "" -If you only need to remove the newline separator at the end of the string, String#chomp is a better alternative. +If you only need to remove the newline separator at the end of the string, +String#chomp is a better alternative. + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/chr.rdoc b/doc/string/chr.rdoc new file mode 100644 index 0000000000..153d5d71c3 --- /dev/null +++ b/doc/string/chr.rdoc @@ -0,0 +1,7 @@ +Returns a string containing the first character of +self+: + + 'hello'.chr # => "h" + 'こんにちは'.chr # => "こ" + ''.chr # => "" + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/codepoints.rdoc b/doc/string/codepoints.rdoc index 0c55d3f4b9..22cb22c889 100644 --- a/doc/string/codepoints.rdoc +++ b/doc/string/codepoints.rdoc @@ -2,5 +2,7 @@ Returns an array of the codepoints in +self+; each codepoint is the integer value for a character: 'hello'.codepoints # => [104, 101, 108, 108, 111] - 'тест'.codepoints # => [1090, 1077, 1089, 1090] 'こんにちは'.codepoints # => [12371, 12435, 12395, 12385, 12399] + ''.codepoints # => [] + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. diff --git a/doc/string/concat.rdoc b/doc/string/concat.rdoc new file mode 100644 index 0000000000..92ba664b8c --- /dev/null +++ b/doc/string/concat.rdoc @@ -0,0 +1,11 @@ +Concatenates each object in +objects+ to +self+; returns +self+: + + 'foo'.concat('bar', 'baz') # => "foobarbaz" + +For each given object +object+ that is an integer, +the value is considered a codepoint and converted to a character before concatenation: + + 'foo'.concat(32, 'bar', 32, 'baz') # => "foo bar baz" # Embeds spaces. + 'こん'.concat(12395, 12385, 12399) # => "こんにちは" + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/count.rdoc b/doc/string/count.rdoc new file mode 100644 index 0000000000..7a3b9f1e21 --- /dev/null +++ b/doc/string/count.rdoc @@ -0,0 +1,74 @@ +Returns the total number of characters in +self+ that are specified by the given selectors. + +For one 1-character selector, +returns the count of instances of that character: + + s = 'abracadabra' + s.count('a') # => 5 + s.count('b') # => 2 + s.count('x') # => 0 + s.count('') # => 0 + + s = 'よろしくお願いします' + s.count('よ') # => 1 + s.count('し') # => 2 + +For one multi-character selector, +returns the count of instances for all specified characters: + + s = 'abracadabra' + s.count('ab') # => 7 + s.count('abc') # => 8 + s.count('abcd') # => 9 + s.count('abcdr') # => 11 + s.count('abcdrx') # => 11 + +Order and repetition do not matter: + + s.count('ba') == s.count('ab') # => true + s.count('baab') == s.count('ab') # => true + +For multiple selectors, +forms a single selector that is the intersection of characters in all selectors +and returns the count of instances for that selector: + + s = 'abcdefg' + s.count('abcde', 'dcbfg') == s.count('bcd') # => true + s.count('abc', 'def') == s.count('') # => true + +In a character selector, three characters get special treatment: + +- A caret (<tt>'^'</tt>) functions as a _negation_ operator + for the immediately following characters: + + s = 'abracadabra' + s.count('^bc') # => 8 # Count of all except 'b' and 'c'. + +- A hyphen (<tt>'-'</tt>) between two other characters defines a _range_ of characters: + + s = 'abracadabra' + s.count('a-c') # => 8 # Count of all 'a', 'b', and 'c'. + +- A backslash (<tt>'\'</tt>) acts as an escape for a caret, a hyphen, + or another backslash: + + s = 'abracadabra' + s.count('\^bc') # => 3 # Count of '^', 'b', and 'c'. + s.count('a\-c') # => 6 # Count of 'a', '-', and 'c'. + 'foo\bar\baz'.count('\\') # => 2 # Count of '\'. + +These usages may be mixed: + + s = 'abracadabra' + s.count('a-cq-t') # => 10 # Multiple ranges. + s.count('ac-d') # => 7 # Range mixed with plain characters. + s.count('^a-c') # => 3 # Range mixed with negation. + +For multiple selectors, all forms may be used, including negations, ranges, and escapes. + + s = 'abracadabra' + s.count('^abc', '^def') == s.count('^abcdef') # => true + s.count('a-e', 'c-g') == s.count('cde') # => true + s.count('^abc', 'c-g') == s.count('defg') # => true + +Related: see {Querying}[rdoc-ref:String@Querying]. diff --git a/doc/string/delete.rdoc b/doc/string/delete.rdoc new file mode 100644 index 0000000000..1827f177e6 --- /dev/null +++ b/doc/string/delete.rdoc @@ -0,0 +1,75 @@ +Returns a new string that is a copy of +self+ with certain characters removed; +the removed characters are all instances of those specified by the given string +selectors+. + +For one 1-character selector, +removes all instances of that character: + + s = 'abracadabra' + s.delete('a') # => "brcdbr" + s.delete('b') # => "aracadara" + s.delete('x') # => "abracadabra" + s.delete('') # => "abracadabra" + + s = 'よろしくお願いします' + s.delete('よ') # => "ろしくお願いします" + s.delete('し') # => "よろくお願います" + +For one multi-character selector, +removes all instances of the specified characters: + + s = 'abracadabra' + s.delete('ab') # => "rcdr" + s.delete('abc') # => "rdr" + s.delete('abcd') # => "rr" + s.delete('abcdr') # => "" + s.delete('abcdrx') # => "" + +Order and repetition do not matter: + + s.delete('ba') == s.delete('ab') # => true + s.delete('baab') == s.delete('ab') # => true + +For multiple selectors, +forms a single selector that is the intersection of characters in all selectors +and removes all instances of characters specified by that selector: + + s = 'abcdefg' + s.delete('abcde', 'dcbfg') == s.delete('bcd') # => true + s.delete('abc', 'def') == s.delete('') # => true + +In a character selector, three characters get special treatment: + +- A caret (<tt>'^'</tt>) functions as a _negation_ operator + for the immediately following characters: + + s = 'abracadabra' + s.delete('^bc') # => "bcb" # Deletes all except 'b' and 'c'. + +- A hyphen (<tt>'-'</tt>) between two other characters defines a _range_ of characters: + + s = 'abracadabra' + s.delete('a-c') # => "rdr" # Deletes all 'a', 'b', and 'c'. + +- A backslash (<tt>'\'</tt>) acts as an escape for a caret, a hyphen, + or another backslash: + + s = 'abracadabra' + s.delete('\^bc') # => "araadara" # Deletes all '^', 'b', and 'c'. + s.delete('a\-c') # => "brdbr" # Deletes all 'a', '-', and 'c'. + 'foo\bar\baz'.delete('\\') # => "foobarbaz" # Deletes all '\'. + +These usages may be mixed: + + s = 'abracadabra' + s.delete('a-cq-t') # => "d" # Multiple ranges. + s.delete('ac-d') # => "brbr" # Range mixed with plain characters. + s.delete('^a-c') # => "abacaaba" # Range mixed with negation. + +For multiple selectors, all forms may be used, including negations, ranges, and escapes. + + s = 'abracadabra' + s.delete('^abc', '^def') == s.delete('^abcdef') # => true + s.delete('a-e', 'c-g') == s.delete('cde') # => true + s.delete('^abc', 'c-g') == s.delete('defg') # => true + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/delete_prefix.rdoc b/doc/string/delete_prefix.rdoc index fa9d8abd38..6255e300e3 100644 --- a/doc/string/delete_prefix.rdoc +++ b/doc/string/delete_prefix.rdoc @@ -1,8 +1,9 @@ -Returns a copy of +self+ with leading substring <tt>prefix</tt> removed: +Returns a copy of +self+ with leading substring +prefix+ removed: - 'hello'.delete_prefix('hel') # => "lo" - 'hello'.delete_prefix('llo') # => "hello" - 'тест'.delete_prefix('те') # => "ст" + 'oof'.delete_prefix('o') # => "of" + 'oof'.delete_prefix('oo') # => "f" + 'oof'.delete_prefix('oof') # => "" + 'oof'.delete_prefix('x') # => "oof" 'こんにちは'.delete_prefix('こん') # => "にちは" -Related: String#delete_prefix!, String#delete_suffix. +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/delete_suffix.rdoc b/doc/string/delete_suffix.rdoc index 4862b725cf..a4d9a80f85 100644 --- a/doc/string/delete_suffix.rdoc +++ b/doc/string/delete_suffix.rdoc @@ -1,8 +1,10 @@ Returns a copy of +self+ with trailing substring <tt>suffix</tt> removed: - 'hello'.delete_suffix('llo') # => "he" - 'hello'.delete_suffix('hel') # => "hello" - 'тест'.delete_suffix('ст') # => "те" + 'foo'.delete_suffix('o') # => "fo" + 'foo'.delete_suffix('oo') # => "f" + 'foo'.delete_suffix('foo') # => "" + 'foo'.delete_suffix('f') # => "foo" + 'foo'.delete_suffix('x') # => "foo" 'こんにちは'.delete_suffix('ちは') # => "こんに" -Related: String#delete_suffix!, String#delete_prefix. +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/downcase.rdoc b/doc/string/downcase.rdoc new file mode 100644 index 0000000000..d5fffa037b --- /dev/null +++ b/doc/string/downcase.rdoc @@ -0,0 +1,20 @@ +Returns a new string containing the downcased characters in +self+: + + 'HELLO'.downcase # => "hello" + 'STRAẞE'.downcase # => "straße" + 'ПРИВЕТ'.downcase # => "привет" + 'RubyGems.org'.downcase # => "rubygems.org" + +Some characters (and some character sets) do not have upcase and downcase versions; +see {Case Mapping}[rdoc-ref:case_mapping.rdoc]: + + s = '1, 2, 3, ...' + s.downcase == s # => true + s = 'こんにちは' + s.downcase == s # => true + +The casing is affected by the given +mapping+, +which may be +:ascii+, +:fold+, or +:turkic+; +see {Case Mappings}[rdoc-ref:case_mapping.rdoc@Case+Mappings]. + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/dump.rdoc b/doc/string/dump.rdoc new file mode 100644 index 0000000000..add3c35662 --- /dev/null +++ b/doc/string/dump.rdoc @@ -0,0 +1,89 @@ +For an ordinary string, this method, +String#dump+, +returns a printable ASCII-only version of +self+, enclosed in double-quotes. + +For a dumped string, method String#undump is the inverse of +String#dump+; +it returns a "restored" version of +self+, +where all the dumping changes have been undone. + +In the simplest case, the dumped string contains the original string, +enclosed in double-quotes; +this example is done in +irb+ (interactive Ruby), which uses method `inspect` to render the results: + + s = 'hello' # => "hello" + s.dump # => "\"hello\"" + s.dump.undump # => "hello" + +Keep in mind that in the second line above: + +- The outer double-quotes are put on by +inspect+, + and _are_ _not_ part of the output of #dump. +- The inner double-quotes _are_ part of the output of +dump+, + and are escaped by +inspect+ because they are within the outer double-quotes. + +To avoid confusion, we'll use this helper method to omit the outer double-quotes: + + def dump(s) + print "String: ", s, "\n" + print "Dumped: ", s.dump, "\n" + print "Undumped: ", s.dump.undump, "\n" + end + +So that for string <tt>'hello'</tt>, we'll see: + + String: hello + Dumped: "hello" + Undumped: hello + +In a dump, certain special characters are escaped: + + String: " + Dumped: "\"" + Undumped: " + + String: \ + Dumped: "\\" + Undumped: \ + +In a dump, unprintable characters are replaced by printable ones; +the unprintable characters are the whitespace characters (other than space itself); +here we see the ordinals for those characers, together with explanatory text: + + h = { + 7 => 'Alert (BEL)', + 8 => 'Backspace (BS)', + 9 => 'Horizontal tab (HT)', + 10 => 'Linefeed (LF)', + 11 => 'Vertical tab (VT)', + 12 => 'Formfeed (FF)', + 13 => 'Carriage return (CR)' + } + +In this example, the dumped output is printed by method #inspect, +and so contains both outer double-quotes and escaped inner double-quotes: + + s = '' + h.keys.each {|i| s << i } # => [7, 8, 9, 10, 11, 12, 13] + s # => "\a\b\t\n\v\f\r" + s.dump # => "\"\\a\\b\\t\\n\\v\\f\\r\"" + +If +self+ is encoded in UTF-8 and contains Unicode characters, +each Unicode character is dumped as a Unicode escape sequence: + + String: こんにちは + Dumped: "\u3053\u3093\u306B\u3061\u306F" + Undumped: こんにちは + +If the encoding of +self+ is not ASCII-compatible +(i.e., if <tt>self.encoding.ascii_compatible?</tt> returns +false+), +each ASCII-compatible byte is dumped as an ASCII character, +and all other bytes are dumped as hexadecimal; +also appends <tt>.dup.force_encoding(\"encoding\")</tt>, +where <tt><encoding></tt> is <tt>self.encoding.name</tt>: + + String: hello + Dumped: "\xFE\xFF\x00h\x00e\x00l\x00l\x00o".dup.force_encoding("UTF-16") + Undumped: hello + + String: こんにちは + Dumped: "\xFE\xFF0S0\x930k0a0o".dup.force_encoding("UTF-16") + Undumped: こんにちは diff --git a/doc/string/each_byte.rdoc b/doc/string/each_byte.rdoc index 643118fea3..642d71e84b 100644 --- a/doc/string/each_byte.rdoc +++ b/doc/string/each_byte.rdoc @@ -1,17 +1,15 @@ -Calls the given block with each successive byte from +self+; +With a block given, calls the block with each successive byte from +self+; returns +self+: - 'hello'.each_byte {|byte| print byte, ' ' } - print "\n" - 'тест'.each_byte {|byte| print byte, ' ' } - print "\n" - 'こんにちは'.each_byte {|byte| print byte, ' ' } - print "\n" + a = [] + 'hello'.each_byte {|byte| a.push(byte) } # Five 1-byte characters. + a # => [104, 101, 108, 108, 111] + a = [] + 'こんにちは'.each_byte {|byte| a.push(byte) } # Five 3-byte characters. + a # => [227, 129, 147, 227, 130, 147, 227, 129, 171, 227, 129, 161, 227, 129, 175] -Output: +With no block given, returns an enumerator. + +Related: see {Iterating}[rdoc-ref:String@Iterating]. - 104 101 108 108 111 - 209 130 208 181 209 129 209 130 - 227 129 147 227 130 147 227 129 171 227 129 161 227 129 175 -Returns an enumerator if no block is given. diff --git a/doc/string/each_char.rdoc b/doc/string/each_char.rdoc index e5ae5a1812..2dd56711d3 100644 --- a/doc/string/each_char.rdoc +++ b/doc/string/each_char.rdoc @@ -1,17 +1,17 @@ -Calls the given block with each successive character from +self+; +With a block given, calls the block with each successive character from +self+; returns +self+: - 'hello'.each_char {|char| print char, ' ' } - print "\n" - 'тест'.each_char {|char| print char, ' ' } - print "\n" - 'こんにちは'.each_char {|char| print char, ' ' } - print "\n" + a = [] + 'hello'.each_char do |char| + a.push(char) + end + a # => ["h", "e", "l", "l", "o"] + a = [] + 'こんにちは'.each_char do |char| + a.push(char) + end + a # => ["こ", "ん", "に", "ち", "は"] -Output: +With no block given, returns an enumerator. - h e l l o - т е с т - こ ん に ち は - -Returns an enumerator if no block is given. +Related: see {Iterating}[rdoc-ref:String@Iterating]. diff --git a/doc/string/each_codepoint.rdoc b/doc/string/each_codepoint.rdoc index 88bfcbd1c0..8e4e7545e6 100644 --- a/doc/string/each_codepoint.rdoc +++ b/doc/string/each_codepoint.rdoc @@ -1,18 +1,18 @@ -Calls the given block with each successive codepoint from +self+; -each codepoint is the integer value for a character; +With a block given, calls the block with each successive codepoint from +self+; +each {codepoint}[https://en.wikipedia.org/wiki/Code_point] is the integer value for a character; returns +self+: - 'hello'.each_codepoint {|codepoint| print codepoint, ' ' } - print "\n" - 'тест'.each_codepoint {|codepoint| print codepoint, ' ' } - print "\n" - 'こんにちは'.each_codepoint {|codepoint| print codepoint, ' ' } - print "\n" + a = [] + 'hello'.each_codepoint do |codepoint| + a.push(codepoint) + end + a # => [104, 101, 108, 108, 111] + a = [] + 'こんにちは'.each_codepoint do |codepoint| + a.push(codepoint) + end + a # => [12371, 12435, 12395, 12385, 12399] -Output: +With no block given, returns an enumerator. - 104 101 108 108 111 - 1090 1077 1089 1090 - 12371 12435 12395 12385 12399 - -Returns an enumerator if no block is given. +Related: see {Iterating}[rdoc-ref:String@Iterating]. diff --git a/doc/string/each_grapheme_cluster.rdoc b/doc/string/each_grapheme_cluster.rdoc index 40be95fcac..384cd6967d 100644 --- a/doc/string/each_grapheme_cluster.rdoc +++ b/doc/string/each_grapheme_cluster.rdoc @@ -1,12 +1,19 @@ -Calls the given block with each successive grapheme cluster from +self+ +With a block given, calls the given block with each successive grapheme cluster from +self+ (see {Unicode Grapheme Cluster Boundaries}[https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries]); returns +self+: - s = "\u0061\u0308-pqr-\u0062\u0308-xyz-\u0063\u0308" # => "ä-pqr-b̈-xyz-c̈" - s.each_grapheme_cluster {|gc| print gc, ' ' } + a = [] + 'hello'.each_grapheme_cluster do |grapheme_cluster| + a.push(grapheme_cluster) + end + a # => ["h", "e", "l", "l", "o"] -Output: + a = [] + 'こんにちは'.each_grapheme_cluster do |grapheme_cluster| + a.push(grapheme_cluster) + end + a # => ["こ", "ん", "に", "ち", "は"] - ä - p q r - b̈ - x y z - c̈ +With no block given, returns an enumerator. -Returns an enumerator if no block is given. +Related: see {Iterating}[rdoc-ref:String@Iterating]. diff --git a/doc/string/each_line.rdoc b/doc/string/each_line.rdoc index e254c22d40..217c188e35 100644 --- a/doc/string/each_line.rdoc +++ b/doc/string/each_line.rdoc @@ -1,9 +1,12 @@ -With a block given, forms the substrings ("lines") +With a block given, forms the substrings (lines) that are the result of splitting +self+ -at each occurrence of the given line separator +line_sep+; +at each occurrence of the given +record_separator+; passes each line to the block; -returns +self+: +returns +self+. +With the default +record_separator+: + + $/ # => "\n" s = <<~EOT This is the first line. This is line two. @@ -11,7 +14,6 @@ returns +self+: This is line four. This is line five. EOT - s.each_line {|line| p line } Output: @@ -22,9 +24,10 @@ Output: "This is line four.\n" "This is line five.\n" -With a different +line_sep+: +With a different +record_separator+: - s.each_line(' is ') {|line| p line } + record_separator = ' is ' + s.each_line(record_separator) {|line| p line } Output: @@ -34,7 +37,7 @@ Output: "line four.\nThis is " "line five.\n" -With +chomp+ as +true+, removes the trailing +line_sep+ from each line: +With +chomp+ as +true+, removes the trailing +record_separator+ from each line: s.each_line(chomp: true) {|line| p line } @@ -46,11 +49,12 @@ Output: "This is line four." "This is line five." -With an empty string as +line_sep+, +With an empty string as +record_separator+, forms and passes "paragraphs" by splitting at each occurrence of two or more newlines: - s.each_line('') {|line| p line } + record_separator = '' + s.each_line(record_separator) {|line| p line } Output: @@ -58,3 +62,5 @@ Output: "This is line four.\nThis is line five.\n" With no block given, returns an enumerator. + +Related: see {Iterating}[rdoc-ref:String@Iterating]. diff --git a/doc/string/encode.rdoc b/doc/string/encode.rdoc index 65872fdfd4..14b959ffff 100644 --- a/doc/string/encode.rdoc +++ b/doc/string/encode.rdoc @@ -1,4 +1,6 @@ -Returns a copy of +self+ transcoded as determined by +dst_encoding+. +Returns a copy of +self+ transcoded as determined by +dst_encoding+; +see {Encodings}[rdoc-ref:encodings.rdoc]. + By default, raises an exception if +self+ contains an invalid byte or a character not defined in +dst_encoding+; that behavior may be modified by encoding options; see below. @@ -45,3 +47,4 @@ given, conversion from an encoding +enc+ to the same encoding +enc+ no-op, i.e. the string is simply copied without any changes, and no exceptions are raised, even if there are invalid bytes. +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/end_with_p.rdoc b/doc/string/end_with_p.rdoc index f959cf7aaa..9a95d74fde 100644 --- a/doc/string/end_with_p.rdoc +++ b/doc/string/end_with_p.rdoc @@ -1,11 +1,9 @@ -Returns whether +self+ ends with any of the given +strings+. +Returns whether +self+ ends with any of the given +strings+: -Returns +true+ if any given string matches the end, +false+ otherwise: + 'foo'.end_with?('oo') # => true + 'foo'.end_with?('bar', 'oo') # => true + 'foo'.end_with?('bar', 'baz') # => false + 'foo'.end_with?('') # => true + 'こんにちは'.end_with?('は') # => true - 'hello'.end_with?('ello') #=> true - 'hello'.end_with?('heaven', 'ello') #=> true - 'hello'.end_with?('heaven', 'paradise') #=> false - 'тест'.end_with?('т') # => true - 'こんにちは'.end_with?('は') # => true - -Related: String#start_with?. +Related: see {Querying}[rdoc-ref:String@Querying]. diff --git a/doc/string/eql_p.rdoc b/doc/string/eql_p.rdoc new file mode 100644 index 0000000000..85409c5ed6 --- /dev/null +++ b/doc/string/eql_p.rdoc @@ -0,0 +1,18 @@ +Returns whether +self+ and +object+ have the same length and content: + + s = 'foo' + s.eql?('foo') # => true + s.eql?('food') # => false + s.eql?('FOO') # => false + +Returns +false+ if the two strings' encodings are not compatible: + + s0 = "äöü" # => "äöü" + s1 = s0.encode(Encoding::ISO_8859_1) # => "\xE4\xF6\xFC" + s0.encoding # => #<Encoding:UTF-8> + s1.encoding # => #<Encoding:ISO-8859-1> + s0.eql?(s1) # => false + +See {Encodings}[rdoc-ref:encodings.rdoc]. + +Related: see {Querying}[rdoc-ref:String@Querying]. diff --git a/doc/string/force_encoding.rdoc b/doc/string/force_encoding.rdoc index fd9615caaa..a509e67f80 100644 --- a/doc/string/force_encoding.rdoc +++ b/doc/string/force_encoding.rdoc @@ -1,5 +1,6 @@ -Changes the encoding of +self+ to +encoding+, +Changes the encoding of +self+ to the given +encoding+, which may be a string encoding name or an Encoding object; +does not change the underlying bytes; returns self: s = 'łał' @@ -7,14 +8,14 @@ returns self: s.encoding # => #<Encoding:UTF-8> s.force_encoding('ascii') # => "\xC5\x82a\xC5\x82" s.encoding # => #<Encoding:US-ASCII> - -Does not change the underlying bytes: - + s.valid_encoding? # => true s.bytes # => [197, 130, 97, 197, 130] Makes the change even if the given +encoding+ is invalid for +self+ (as is the change above): - s.valid_encoding? # => false - s.force_encoding(Encoding::UTF_8) # => "łał" - s.valid_encoding? # => true + s.valid_encoding? # => false + +See {Encodings}[rdoc-ref:encodings.rdoc]. + +Related: see {Modifying}[rdoc-ref:String@Modifying]. diff --git a/doc/string/getbyte.rdoc b/doc/string/getbyte.rdoc new file mode 100644 index 0000000000..1d0ed2a5a4 --- /dev/null +++ b/doc/string/getbyte.rdoc @@ -0,0 +1,23 @@ +Returns the byte at zero-based +index+ as an integer: + + s = 'foo' + s.getbyte(0) # => 102 + s.getbyte(1) # => 111 + s.getbyte(2) # => 111 + +Counts backward from the end if +index+ is negative: + + s.getbyte(-3) # => 102 + +Returns +nil+ if +index+ is out of range: + + s.getbyte(3) # => nil + s.getbyte(-4) # => nil + +More examples: + + s = 'こんにちは' + s.bytes # => [227, 129, 147, 227, 130, 147, 227, 129, 171, 227, 129, 161, 227, 129, 175] + s.getbyte(2) # => 147 + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. diff --git a/doc/string/grapheme_clusters.rdoc b/doc/string/grapheme_clusters.rdoc index 8c7f5a7259..07ea1e318b 100644 --- a/doc/string/grapheme_clusters.rdoc +++ b/doc/string/grapheme_clusters.rdoc @@ -1,6 +1,19 @@ Returns an array of the grapheme clusters in +self+ (see {Unicode Grapheme Cluster Boundaries}[https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries]): - s = "\u0061\u0308-pqr-\u0062\u0308-xyz-\u0063\u0308" # => "ä-pqr-b̈-xyz-c̈" + s = "ä-pqr-b̈-xyz-c̈" + s.size # => 16 + s.bytesize # => 19 + s.grapheme_clusters.size # => 13 s.grapheme_clusters # => ["ä", "-", "p", "q", "r", "-", "b̈", "-", "x", "y", "z", "-", "c̈"] + +Details: + + s = "ä" + s.grapheme_clusters # => ["ä"] # One grapheme cluster. + s.bytes # => [97, 204, 136] # Three bytes. + s.chars # => ["a", "̈"] # Two characters. + s.chars.map {|char| char.ord } # => [97, 776] # Their values. + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. diff --git a/doc/string/hash.rdoc b/doc/string/hash.rdoc new file mode 100644 index 0000000000..fe94770ed9 --- /dev/null +++ b/doc/string/hash.rdoc @@ -0,0 +1,19 @@ +Returns the integer hash value for +self+. + +Two \String objects that have identical content and compatible encodings +also have the same hash value; +see Object#hash and {Encodings}[rdoc-ref:encodings.rdoc]: + + s = 'foo' + h = s.hash # => -569050784 + h == 'foo'.hash # => true + h == 'food'.hash # => false + h == 'FOO'.hash # => false + + s0 = "äöü" + s1 = s0.encode(Encoding::ISO_8859_1) + s0.encoding # => #<Encoding:UTF-8> + s1.encoding # => #<Encoding:ISO-8859-1> + s0.hash == s1.hash # => false + +Related: see {Querying}[rdoc-ref:String@Querying]. diff --git a/doc/string/index.rdoc b/doc/string/index.rdoc index ce09a37bdf..c3cff24dac 100644 --- a/doc/string/index.rdoc +++ b/doc/string/index.rdoc @@ -1,31 +1,31 @@ -Returns the integer index of the first match for the given argument, -or +nil+ if none found; -the search of +self+ is forward, and begins at position +offset+ (in characters). +Returns the integer position of the first substring that matches the given argument +pattern+, +or +nil+ if none found. -With string argument +substring+, +When +pattern+ is a string, returns the index of the first matching substring in +self+: 'foo'.index('f') # => 0 'foo'.index('o') # => 1 'foo'.index('oo') # => 1 'foo'.index('ooo') # => nil - 'тест'.index('с') # => 2 - 'こんにちは'.index('ち') # => 3 + 'こんにちは'.index('ち') # => 3 -With Regexp argument +regexp+, returns the index of the first match in +self+: +When +pattern+ is a Regexp, returns the index of the first match in +self+: 'foo'.index(/o./) # => 1 'foo'.index(/.o/) # => 0 -With positive integer +offset+, begins the search at position +offset+: +When +offset+ is non-negative, begins the search at position +offset+; +the returned index is relative to the beginning of +self+: - 'foo'.index('o', 1) # => 1 - 'foo'.index('o', 2) # => 2 - 'foo'.index('o', 3) # => nil - 'тест'.index('с', 1) # => 2 - 'こんにちは'.index('ち', 2) # => 3 + 'bar'.index('r', 0) # => 2 + 'bar'.index('r', 1) # => 2 + 'bar'.index('r', 2) # => 2 + 'bar'.index('r', 3) # => nil + 'bar'.index(/[r-z]/, 0) # => 2 + 'こんにちは'.index('ち', 2) # => 3 -With negative integer +offset+, selects the search position by counting backward +With negative integer argument +offset+, selects the search position by counting backward from the end of +self+: 'foo'.index('o', -1) # => 2 @@ -35,4 +35,4 @@ from the end of +self+: 'foo'.index(/o./, -2) # => 1 'foo'.index(/.o/, -2) # => 1 -Related: String#rindex. +Related: see {Querying}[rdoc-ref:String@Querying]. diff --git a/doc/string/insert.rdoc b/doc/string/insert.rdoc new file mode 100644 index 0000000000..73205f2069 --- /dev/null +++ b/doc/string/insert.rdoc @@ -0,0 +1,15 @@ +Inserts the given +other_string+ into +self+; returns +self+. + +If the given +index+ is non-negative, inserts +other_string+ at offset +index+: + + 'foo'.insert(0, 'bar') # => "barfoo" + 'foo'.insert(1, 'bar') # => "fbaroo" + 'foo'.insert(3, 'bar') # => "foobar" + 'こんにちは'.insert(2, 'bar') # => "こんbarにちは" + +If the +index+ is negative, counts backward from the end of +self+ +and inserts +other_string+ _after_ the offset: + + 'foo'.insert(-2, 'bar') # => "fobaro" + +Related: see {Modifying}[rdoc-ref:String@Modifying]. diff --git a/doc/string/inspect.rdoc b/doc/string/inspect.rdoc new file mode 100644 index 0000000000..907828c2af --- /dev/null +++ b/doc/string/inspect.rdoc @@ -0,0 +1,38 @@ +Returns a printable version of +self+, enclosed in double-quotes. + +Most printable characters are rendered simply as themselves: + + 'abc'.inspect # => "\"abc\"" + '012'.inspect # => "\"012\"" + ''.inspect # => "\"\"" + "\u000012".inspect # => "\"\\u000012\"" + 'こんにちは'.inspect # => "\"こんにちは\"" + +But printable characters double-quote (<tt>'"'</tt>) and backslash and (<tt>'\\'</tt>) are escaped: + + '"'.inspect # => "\"\\\"\"" + '\\'.inspect # => "\"\\\\\"" + +Unprintable characters are the {ASCII characters}[https://en.wikipedia.org/wiki/ASCII] +whose values are in range <tt>0..31</tt>, +along with the character whose value is +127+. + +Most of these characters are rendered thus: + + 0.chr.inspect # => "\"\\x00\"" + 1.chr.inspect # => "\"\\x01\"" + 2.chr.inspect # => "\"\\x02\"" + # ... + +A few, however, have special renderings: + + 7.chr.inspect # => "\"\\a\"" # BEL + 8.chr.inspect # => "\"\\b\"" # BS + 9.chr.inspect # => "\"\\t\"" # TAB + 10.chr.inspect # => "\"\\n\"" # LF + 11.chr.inspect # => "\"\\v\"" # VT + 12.chr.inspect # => "\"\\f\"" # FF + 13.chr.inspect # => "\"\\r\"" # CR + 27.chr.inspect # => "\"\\e\"" # ESC + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. diff --git a/doc/string/intern.rdoc b/doc/string/intern.rdoc new file mode 100644 index 0000000000..eded6ac3d7 --- /dev/null +++ b/doc/string/intern.rdoc @@ -0,0 +1,8 @@ +Returns the Symbol object derived from +self+, +creating it if it did not already exist: + + 'foo'.intern # => :foo + 'こんにちは'.intern # => :こんにちは + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. + diff --git a/doc/string/length.rdoc b/doc/string/length.rdoc index 544bca269f..eb68edb10c 100644 --- a/doc/string/length.rdoc +++ b/doc/string/length.rdoc @@ -1,12 +1,11 @@ Returns the count of characters (not bytes) in +self+: 'foo'.length # => 3 - 'тест'.length # => 4 - 'こんにちは'.length # => 5 + 'こんにちは'.length # => 5 Contrast with String#bytesize: 'foo'.bytesize # => 3 - 'тест'.bytesize # => 8 - 'こんにちは'.bytesize # => 15 + 'こんにちは'.bytesize # => 15 +Related: see {Querying}[rdoc-ref:String@Querying]. diff --git a/doc/string/ljust.rdoc b/doc/string/ljust.rdoc index 8e23c1fc8f..a8ca62ee76 100644 --- a/doc/string/ljust.rdoc +++ b/doc/string/ljust.rdoc @@ -1,16 +1,13 @@ -Returns a left-justified copy of +self+. - -If integer argument +size+ is greater than the size (in characters) of +self+, -returns a new string of length +size+ that is a copy of +self+, -left justified and padded on the right with +pad_string+: +Returns a copy of +self+, left-justified and, if necessary, right-padded with the +pad_string+: 'hello'.ljust(10) # => "hello " ' hello'.ljust(10) # => " hello " 'hello'.ljust(10, 'ab') # => "helloababa" - 'тест'.ljust(10) # => "тест " - 'こんにちは'.ljust(10) # => "こんにちは " + 'こんにちは'.ljust(10) # => "こんにちは " -If +size+ is not greater than the size of +self+, returns a copy of +self+: +If <tt>width <= self.length</tt>, returns a copy of +self+: 'hello'.ljust(5) # => "hello" - 'hello'.ljust(1) # => "hello" + 'hello'.ljust(1) # => "hello" # Does not truncate to width. + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/new.rdoc b/doc/string/new.rdoc index d955e61c87..e2752d6e1f 100644 --- a/doc/string/new.rdoc +++ b/doc/string/new.rdoc @@ -1,34 +1,38 @@ -Returns a new \String that is a copy of +string+. +Returns a new \String object containing the given +string+. -With no arguments, returns the empty string with the Encoding <tt>ASCII-8BIT</tt>: +The +options+ are optional keyword options (see below). - s = String.new - s # => "" - s.encoding # => #<Encoding:ASCII-8BIT> +With no argument given and keyword +encoding+ also not given, +returns an empty string with the Encoding <tt>ASCII-8BIT</tt>: -With optional argument +string+ and no keyword arguments, -returns a copy of +string+ with the same encoding: + s = String.new # => "" + s.encoding # => #<Encoding:ASCII-8BIT> - String.new('foo') # => "foo" - String.new('тест') # => "тест" - String.new('こんにちは') # => "こんにちは" +With argument +string+ given and keyword option +encoding+ not given, +returns a new string with the same encoding as +string+: + + s0 = 'foo'.encode(Encoding::UTF_16) + s1 = String.new(s0) + s1.encoding # => #<Encoding:UTF-16 (dummy)> (Unlike \String.new, a {string literal}[rdoc-ref:syntax/literals.rdoc@String+Literals] like <tt>''</tt> or a {here document literal}[rdoc-ref:syntax/literals.rdoc@Here+Document+Literals] always has {script encoding}[rdoc-ref:encodings.rdoc@Script+Encoding].) -With optional keyword argument +encoding+, returns a copy of +string+ -with the specified encoding; +With keyword option +encoding+ given, +returns a string with the specified encoding; the +encoding+ may be an Encoding object, an encoding name, or an encoding name alias: + String.new(encoding: Encoding::US_ASCII).encoding # => #<Encoding:US-ASCII> + String.new('', encoding: Encoding::US_ASCII).encoding # => #<Encoding:US-ASCII> String.new('foo', encoding: Encoding::US_ASCII).encoding # => #<Encoding:US-ASCII> String.new('foo', encoding: 'US-ASCII').encoding # => #<Encoding:US-ASCII> String.new('foo', encoding: 'ASCII').encoding # => #<Encoding:US-ASCII> The given encoding need not be valid for the string's content, -and that validity is not checked: +and its validity is not checked: s = String.new('こんにちは', encoding: 'ascii') s.valid_encoding? # => false @@ -37,15 +41,11 @@ But the given +encoding+ itself is checked: String.new('foo', encoding: 'bar') # Raises ArgumentError. -With optional keyword argument +capacity+, returns a copy of +string+ -(or an empty string, if +string+ is not given); -the given +capacity+ is advisory only, +With keyword option +capacity+ given, +the given value is advisory only, and may or may not set the size of the internal buffer, which may in turn affect performance: - String.new(capacity: 1) - String.new('foo', capacity: 4096) - -The +string+, +encoding+, and +capacity+ arguments may all be used together: - - String.new('hello', encoding: 'UTF-8', capacity: 25) + String.new('foo', capacity: 1) # Buffer size is at least 4 (includes terminal null byte). + String.new('foo', capacity: 4096) # Buffer size is at least 4; + # may be equal to, greater than, or less than 4096. diff --git a/doc/string/ord.rdoc b/doc/string/ord.rdoc index d586363d44..87b469db02 100644 --- a/doc/string/ord.rdoc +++ b/doc/string/ord.rdoc @@ -2,5 +2,6 @@ Returns the integer ordinal of the first character of +self+: 'h'.ord # => 104 'hello'.ord # => 104 - 'тест'.ord # => 1090 'こんにちは'.ord # => 12371 + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. diff --git a/doc/string/partition.rdoc b/doc/string/partition.rdoc index ebe575e8eb..614ad029d4 100644 --- a/doc/string/partition.rdoc +++ b/doc/string/partition.rdoc @@ -1,24 +1,43 @@ Returns a 3-element array of substrings of +self+. -Matches a pattern against +self+, scanning from the beginning. -The pattern is: +If +pattern+ is matched, returns the array: -- +string_or_regexp+ itself, if it is a Regexp. -- <tt>Regexp.quote(string_or_regexp)</tt>, if +string_or_regexp+ is a string. + [pre_match, first_match, post_match] -If the pattern is matched, returns pre-match, first-match, post-match: +where: - 'hello'.partition('l') # => ["he", "l", "lo"] - 'hello'.partition('ll') # => ["he", "ll", "o"] - 'hello'.partition('h') # => ["", "h", "ello"] - 'hello'.partition('o') # => ["hell", "o", ""] - 'hello'.partition(/l+/) #=> ["he", "ll", "o"] - 'hello'.partition('') # => ["", "", "hello"] - 'тест'.partition('т') # => ["", "т", "ест"] - 'こんにちは'.partition('に') # => ["こん", "に", "ちは"] +- +first_match+ is the first-found matching substring. +- +pre_match+ and +post_match+ are the preceding and following substrings. -If the pattern is not matched, returns a copy of +self+ and two empty strings: +If +pattern+ is not matched, returns the array: - 'hello'.partition('x') # => ["hello", "", ""] + [self.dup, "", ""] -Related: String#rpartition, String#split. +Note that in the examples below, a returned string <tt>'hello'</tt> +is a copy of +self+, not +self+. + +If +pattern+ is a Regexp, performs the equivalent of <tt>self.match(pattern)</tt> +(also setting {matched-data variables}[rdoc-ref:language/globals.md@Matched+Data]): + + 'hello'.partition(/h/) # => ["", "h", "ello"] + 'hello'.partition(/l/) # => ["he", "l", "lo"] + 'hello'.partition(/l+/) # => ["he", "ll", "o"] + 'hello'.partition(/o/) # => ["hell", "o", ""] + 'hello'.partition(/^/) # => ["", "", "hello"] + 'hello'.partition(//) # => ["", "", "hello"] + 'hello'.partition(/$/) # => ["hello", "", ""] + 'hello'.partition(/x/) # => ["hello", "", ""] + +If +pattern+ is not a Regexp, converts it to a string (if it is not already one), +then performs the equivalent of <tt>self.index(pattern)</tt> +(and does _not_ set {matched-data global variables}[rdoc-ref:language/globals.md@Matched+Data]): + + 'hello'.partition('h') # => ["", "h", "ello"] + 'hello'.partition('l') # => ["he", "l", "lo"] + 'hello'.partition('ll') # => ["he", "ll", "o"] + 'hello'.partition('o') # => ["hell", "o", ""] + 'hello'.partition('') # => ["", "", "hello"] + 'hello'.partition('x') # => ["hello", "", ""] + 'こんにちは'.partition('に') # => ["こん", "に", "ちは"] + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. diff --git a/doc/string/rindex.rdoc b/doc/string/rindex.rdoc new file mode 100644 index 0000000000..2b81c3716d --- /dev/null +++ b/doc/string/rindex.rdoc @@ -0,0 +1,51 @@ +Returns the integer position of the _last_ substring that matches the given argument +pattern+, +or +nil+ if none found. + +When +pattern+ is a string, returns the index of the last matching substring in self: + + 'foo'.rindex('f') # => 0 + 'foo'.rindex('o') # => 2 + 'foo'.rindex('oo' # => 1 + 'foo'.rindex('ooo') # => nil + 'こんにちは'.rindex('ち') # => 3 + +When +pattern+ is a Regexp, returns the index of the last match in self: + + 'foo'.rindex(/f/) # => 0 + 'foo'.rindex(/o/) # => 2 + 'foo'.rindex(/oo/) # => 1 + 'foo'.rindex(/ooo/) # => nil + +When +offset+ is non-negative, it specifies the maximum starting position in the +string to end the search: + + 'foo'.rindex('o', 0) # => nil + 'foo'.rindex('o', 1) # => 1 + 'foo'.rindex('o', 2) # => 2 + 'foo'.rindex('o', 3) # => 2 + +With negative integer argument +offset+, +selects the search position by counting backward from the end of +self+: + + 'foo'.rindex('o', -1) # => 2 + 'foo'.rindex('o', -2) # => 1 + 'foo'.rindex('o', -3) # => nil + 'foo'.rindex('o', -4) # => nil + +The last match means starting at the possible last position, not +the last of longest matches: + + 'foo'.rindex(/o+/) # => 2 + $~ # => #<MatchData "o"> + +To get the last longest match, combine with negative lookbehind: + + 'foo'.rindex(/(?<!o)o+/) # => 1 + $~ # => #<MatchData "oo"> + +Or String#index with negative lookforward. + + 'foo'.index(/o+(?!.*o)/) # => 1 + $~ # => #<MatchData "oo"> + +Related: see {Querying}[rdoc-ref:String@Querying]. diff --git a/doc/string/rjust.rdoc b/doc/string/rjust.rdoc index 24e7bf3159..acd3f198d4 100644 --- a/doc/string/rjust.rdoc +++ b/doc/string/rjust.rdoc @@ -1,16 +1,17 @@ Returns a right-justified copy of +self+. -If integer argument +size+ is greater than the size (in characters) of +self+, -returns a new string of length +size+ that is a copy of +self+, +If integer argument +width+ is greater than the size (in characters) of +self+, +returns a new string of length +width+ that is a copy of +self+, right justified and padded on the left with +pad_string+: 'hello'.rjust(10) # => " hello" 'hello '.rjust(10) # => " hello " 'hello'.rjust(10, 'ab') # => "ababahello" - 'тест'.rjust(10) # => " тест" 'こんにちは'.rjust(10) # => " こんにちは" -If +size+ is not greater than the size of +self+, returns a copy of +self+: +If <tt>width <= self.size</tt>, returns a copy of +self+: 'hello'.rjust(5, 'ab') # => "hello" 'hello'.rjust(1, 'ab') # => "hello" + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/rpartition.rdoc b/doc/string/rpartition.rdoc index d24106fb9f..eed03949a5 100644 --- a/doc/string/rpartition.rdoc +++ b/doc/string/rpartition.rdoc @@ -1,24 +1,47 @@ Returns a 3-element array of substrings of +self+. -Matches a pattern against +self+, scanning backwards from the end. -The pattern is: +Searches +self+ for a match of +pattern+, seeking the _last_ match. -- +string_or_regexp+ itself, if it is a Regexp. -- <tt>Regexp.quote(string_or_regexp)</tt>, if +string_or_regexp+ is a string. +If +pattern+ is not matched, returns the array: -If the pattern is matched, returns pre-match, last-match, post-match: + ["", "", self.dup] - 'hello'.rpartition('l') # => ["hel", "l", "o"] - 'hello'.rpartition('ll') # => ["he", "ll", "o"] - 'hello'.rpartition('h') # => ["", "h", "ello"] - 'hello'.rpartition('o') # => ["hell", "o", ""] - 'hello'.rpartition(/l+/) # => ["hel", "l", "o"] - 'hello'.rpartition('') # => ["hello", "", ""] - 'тест'.rpartition('т') # => ["тес", "т", ""] - 'こんにちは'.rpartition('に') # => ["こん", "に", "ちは"] +If +pattern+ is matched, returns the array: -If the pattern is not matched, returns two empty strings and a copy of +self+: + [pre_match, last_match, post_match] - 'hello'.rpartition('x') # => ["", "", "hello"] +where: -Related: String#partition, String#split. +- +last_match+ is the last-found matching substring. +- +pre_match+ and +post_match+ are the preceding and following substrings. + +The pattern used is: + +- +pattern+ itself, if it is a Regexp. +- <tt>Regexp.quote(pattern)</tt>, if +pattern+ is a string. + +Note that in the examples below, a returned string <tt>'hello'</tt> is a copy of +self+, not +self+. + +If +pattern+ is a Regexp, searches for the last matching substring +(also setting {matched-data global variables}[rdoc-ref:language/globals.md@Matched+Data]): + + 'hello'.rpartition(/l/) # => ["hel", "l", "o"] + 'hello'.rpartition(/ll/) # => ["he", "ll", "o"] + 'hello'.rpartition(/h/) # => ["", "h", "ello"] + 'hello'.rpartition(/o/) # => ["hell", "o", ""] + 'hello'.rpartition(//) # => ["hello", "", ""] + 'hello'.rpartition(/x/) # => ["", "", "hello"] + 'こんにちは'.rpartition(/に/) # => ["こん", "に", "ちは"] + +If +pattern+ is not a Regexp, converts it to a string (if it is not already one), +then searches for the last matching substring +(and does _not_ set {matched-data global variables}[rdoc-ref:language/globals.md@Matched+Data]): + + 'hello'.rpartition('l') # => ["hel", "l", "o"] + 'hello'.rpartition('ll') # => ["he", "ll", "o"] + 'hello'.rpartition('h') # => ["", "h", "ello"] + 'hello'.rpartition('o') # => ["hell", "o", ""] + 'hello'.rpartition('') # => ["hello", "", ""] + 'こんにちは'.rpartition('に') # => ["こん", "に", "ちは"] + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. diff --git a/doc/string/scan.rdoc b/doc/string/scan.rdoc new file mode 100644 index 0000000000..04a2b02ff4 --- /dev/null +++ b/doc/string/scan.rdoc @@ -0,0 +1,35 @@ +Matches a pattern against +self+: + +- If +pattern+ is a Regexp, the pattern used is +pattern+ itself. +- If +pattern+ is a string, the pattern used is <tt>Regexp.quote(pattern)</tt>. + +Generates a collection of matching results +and updates {regexp-related global variables}[rdoc-ref:Regexp@Global+Variables]: + +- If the pattern contains no groups, each result is a matched substring. +- If the pattern contains groups, each result is an array + containing a matched substring for each group. + +With no block given, returns an array of the results: + + 'cruel world'.scan(/\w+/) # => ["cruel", "world"] + 'cruel world'.scan(/.../) # => ["cru", "el ", "wor"] + 'cruel world'.scan(/(...)/) # => [["cru"], ["el "], ["wor"]] + 'cruel world'.scan(/(..)(..)/) # => [["cr", "ue"], ["l ", "wo"]] + 'こんにちは'.scan(/../) # => ["こん", "にち"] + 'abracadabra'.scan('ab') # => ["ab", "ab"] + 'abracadabra'.scan('nosuch') # => [] + +With a block given, calls the block with each result; returns +self+: + + 'cruel world'.scan(/\w+/) {|w| p w } + # => "cruel" + # => "world" + 'cruel world'.scan(/(.)(.)/) {|x, y| p [x, y] } + # => ["c", "r"] + # => ["u", "e"] + # => ["l", " "] + # => ["w", "o"] + # => ["r", "l"] + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. diff --git a/doc/string/scrub.rdoc b/doc/string/scrub.rdoc index 1a5b1c79d0..5ace376cdb 100644 --- a/doc/string/scrub.rdoc +++ b/doc/string/scrub.rdoc @@ -1,25 +1,22 @@ Returns a copy of +self+ with each invalid byte sequence replaced by the given +replacement_string+. -With no block given and no argument, replaces each invalid sequence -with the default replacement string -(<tt>"�"</tt> for a Unicode encoding, <tt>'?'</tt> otherwise): +With no block given, replaces each invalid sequence +with the given +default_replacement_string+ +(by default, <tt>"�"</tt> for a Unicode encoding, <tt>'?'</tt> otherwise): - s = "foo\x81\x81bar" - s.scrub # => "foo��bar" + "foo\x81\x81bar"scrub # => "foo��bar" + "foo\x81\x81bar".force_encoding('US-ASCII').scrub # => "foo??bar" + "foo\x81\x81bar".scrub('xyzzy') # => "fooxyzzyxyzzybar" -With no block given and argument +replacement_string+ given, -replaces each invalid sequence with that string: +With a block given, calls the block with each invalid sequence, +and replaces that sequence with the return value of the block: - "foo\x81\x81bar".scrub('xyzzy') # => "fooxyzzyxyzzybar" + "foo\x81\x81bar".scrub {|sequence| p sequence; 'XYZZY' } # => "fooXYZZYXYZZYbar" -With a block given, replaces each invalid sequence with the value -of the block: - - "foo\x81\x81bar".scrub {|bytes| p bytes; 'XYZZY' } - # => "fooXYZZYXYZZYbar" - -Output: +Output : "\x81" "\x81" + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/split.rdoc b/doc/string/split.rdoc index 2b5e14ddb6..1aee1de0a4 100644 --- a/doc/string/split.rdoc +++ b/doc/string/split.rdoc @@ -1,86 +1,101 @@ -Returns an array of substrings of +self+ -that are the result of splitting +self+ +Creates an array of substrings by splitting +self+ at each occurrence of the given field separator +field_sep+. -When +field_sep+ is <tt>$;</tt>: +With no arguments given, +splits using the field separator <tt>$;</tt>, +whose default value is +nil+. -- If <tt>$;</tt> is +nil+ (its default value), - the split occurs just as if +field_sep+ were given as a space character - (see below). +With no block given, returns the array of substrings: -- If <tt>$;</tt> is a string, - the split ocurs just as if +field_sep+ were given as that string - (see below). + 'abracadabra'.split('a') # => ["", "br", "c", "d", "br"] -When +field_sep+ is <tt>' '</tt> and +limit+ is +nil+, -the split occurs at each sequence of whitespace: +When +field_sep+ is +nil+ or <tt>' '</tt> (a single space), +splits at each sequence of whitespace: - 'abc def ghi'.split(' ') => ["abc", "def", "ghi"] - "abc \n\tdef\t\n ghi".split(' ') # => ["abc", "def", "ghi"] - 'abc def ghi'.split(' ') => ["abc", "def", "ghi"] - ''.split(' ') => [] + 'foo bar baz'.split(nil) # => ["foo", "bar", "baz"] + 'foo bar baz'.split(' ') # => ["foo", "bar", "baz"] + "foo \n\tbar\t\n baz".split(' ') # => ["foo", "bar", "baz"] + 'foo bar baz'.split(' ') # => ["foo", "bar", "baz"] + ''.split(' ') # => [] -When +field_sep+ is a string different from <tt>' '</tt> -and +limit+ is +nil+, -the split occurs at each occurrence of +field_sep+; -trailing empty substrings are not returned: +When +field_sep+ is an empty string, +splits at every character: - 'abracadabra'.split('ab') => ["", "racad", "ra"] - 'aaabcdaaa'.split('a') => ["", "", "", "bcd"] - ''.split('a') => [] - '3.14159'.split('1') => ["3.", "4", "59"] - '!@#$%^$&*($)_+'.split('$') # => ["!@#", "%^", "&*(", ")_+"] - 'тест'.split('т') => ["", "ес"] - 'こんにちは'.split('に') => ["こん", "ちは"] + 'abracadabra'.split('') # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a"] + ''.split('') # => [] + 'こんにちは'.split('') # => ["こ", "ん", "に", "ち", "は"] -When +field_sep+ is a Regexp and +limit+ is +nil+, -the split occurs at each occurrence of a match; -trailing empty substrings are not returned: +When +field_sep+ is a non-empty string and different from <tt>' '</tt> (a single space), +uses that string as the separator: + + 'abracadabra'.split('a') # => ["", "br", "c", "d", "br"] + 'abracadabra'.split('ab') # => ["", "racad", "ra"] + ''.split('a') # => [] + 'こんにちは'.split('に') # => ["こん", "ちは"] + +When +field_sep+ is a Regexp, +splits at each occurrence of a matching substring: 'abracadabra'.split(/ab/) # => ["", "racad", "ra"] - 'aaabcdaaa'.split(/a/) => ["", "", "", "bcd"] - 'aaabcdaaa'.split(//) => ["a", "a", "a", "b", "c", "d", "a", "a", "a"] '1 + 1 == 2'.split(/\W+/) # => ["1", "1", "2"] + 'abracadabra'.split(//) # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a"] -If the \Regexp contains groups, their matches are also included +If the \Regexp contains groups, their matches are included in the returned array: '1:2:3'.split(/(:)()()/, 2) # => ["1", ":", "", "", "2:3"] -As seen above, if +limit+ is +nil+, -trailing empty substrings are not returned; -the same is true if +limit+ is zero: +Argument +limit+ sets a limit on the size of the returned array; +it also determines whether trailing empty strings are included in the returned array. + +When +limit+ is zero, +there is no limit on the size of the array, +but trailing empty strings are omitted: + + 'abracadabra'.split('', 0) # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a"] + 'abracadabra'.split('a', 0) # => ["", "br", "c", "d", "br"] # Empty string after last 'a' omitted. + +When +limit+ is a positive integer, +there is a limit on the size of the array (no more than <tt>n - 1</tt> splits occur), +and trailing empty strings are included: + + 'abracadabra'.split('', 3) # => ["a", "b", "racadabra"] + 'abracadabra'.split('a', 3) # => ["", "br", "cadabra"] + 'abracadabra'.split('', 30) # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a", ""] + 'abracadabra'.split('a', 30) # => ["", "br", "c", "d", "br", ""] + 'abracadabra'.split('', 1) # => ["abracadabra"] + 'abracadabra'.split('a', 1) # => ["abracadabra"] + +When +limit+ is negative, +there is no limit on the size of the array, +and trailing empty strings are omitted: + + 'abracadabra'.split('', -1) # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a", ""] + 'abracadabra'.split('a', -1) # => ["", "br", "c", "d", "br", ""] - 'aaabcdaaa'.split('a') => ["", "", "", "bcd"] - 'aaabcdaaa'.split('a', 0) # => ["", "", "", "bcd"] +If a block is given, it is called with each substring and returns +self+: -If +limit+ is positive integer +n+, no more than <tt>n - 1-</tt> -splits occur, so that at most +n+ substrings are returned, -and trailing empty substrings are included: + 'foo bar baz'.split(' ') {|substring| p substring } - 'aaabcdaaa'.split('a', 1) # => ["aaabcdaaa"] - 'aaabcdaaa'.split('a', 2) # => ["", "aabcdaaa"] - 'aaabcdaaa'.split('a', 5) # => ["", "", "", "bcd", "aa"] - 'aaabcdaaa'.split('a', 7) # => ["", "", "", "bcd", "", "", ""] - 'aaabcdaaa'.split('a', 8) # => ["", "", "", "bcd", "", "", ""] +Output : -Note that if +field_sep+ is a \Regexp containing groups, -their matches are in the returned array, but do not count toward the limit. + "foo" + "bar" + "baz" -If +limit+ is negative, it behaves the same as if +limit+ was +nil+, -meaning that there is no limit, -and trailing empty substrings are included: +Note that the above example is functionally equivalent to: - 'aaabcdaaa'.split('a', -1) # => ["", "", "", "bcd", "", "", ""] + 'foo bar baz'.split(' ').each {|substring| p substring } -If a block is given, it is called with each substring: +Output : - 'abc def ghi'.split(' ') {|substring| p substring } + "foo" + "bar" + "baz" -Output: +But the latter: - "abc" - "def" - "ghi" +- Has poorer performance because it creates an intermediate array. +- Returns an array (instead of +self+). -Related: String#partition, String#rpartition. +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. diff --git a/doc/string/squeeze.rdoc b/doc/string/squeeze.rdoc new file mode 100644 index 0000000000..1a38c08b32 --- /dev/null +++ b/doc/string/squeeze.rdoc @@ -0,0 +1,33 @@ +Returns a copy of +self+ with each tuple (doubling, tripling, etc.) of specified characters +"squeezed" down to a single character. + +The tuples to be squeezed are specified by arguments +selectors+, +each of which is a string; +see {Character Selectors}[rdoc-ref:character_selectors.rdoc@Character+Selectors]. + +A single argument may be a single character: + + 'Noooooo!'.squeeze('o') # => "No!" + 'foo bar baz'.squeeze(' ') # => "foo bar baz" + 'Mississippi'.squeeze('s') # => "Misisippi" + 'Mississippi'.squeeze('p') # => "Mississipi" + 'Mississippi'.squeeze('x') # => "Mississippi" # Unused selector character is ignored. + 'бессонница'.squeeze('с') # => "бесонница" + 'бессонница'.squeeze('н') # => "бессоница" + +A single argument may be a string of characters: + + 'Mississippi'.squeeze('sp') # => "Misisipi" + 'Mississippi'.squeeze('ps') # => "Misisipi" # Order doesn't matter. + 'Mississippi'.squeeze('nonsense') # => "Misisippi" # Unused selector characters are ignored. + +A single argument may be a range of characters: + + 'Mississippi'.squeeze('a-p') # => "Mississipi" + 'Mississippi'.squeeze('q-z') # => "Misisippi" + 'Mississippi'.squeeze('a-z') # => "Misisipi" + +Multiple arguments are allowed; +see {Multiple Character Selectors}[rdoc-ref:character_selectors.rdoc@Multiple+Character+Selectors]. + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/start_with_p.rdoc b/doc/string/start_with_p.rdoc index 5d1f9f9543..f78edc7fa3 100644 --- a/doc/string/start_with_p.rdoc +++ b/doc/string/start_with_p.rdoc @@ -1,10 +1,9 @@ -Returns whether +self+ starts with any of the given +string_or_regexp+. +Returns whether +self+ starts with any of the given +patterns+. -Matches patterns against the beginning of +self+. -For each given +string_or_regexp+, the pattern is: +For each argument, the pattern used is: -- +string_or_regexp+ itself, if it is a Regexp. -- <tt>Regexp.quote(string_or_regexp)</tt>, if +string_or_regexp+ is a string. +- The pattern itself, if it is a Regexp. +- <tt>Regexp.quote(pattern)</tt>, if it is a string. Returns +true+ if any pattern matches the beginning, +false+ otherwise: @@ -12,7 +11,6 @@ Returns +true+ if any pattern matches the beginning, +false+ otherwise: 'hello'.start_with?(/H/i) # => true 'hello'.start_with?('heaven', 'hell') # => true 'hello'.start_with?('heaven', 'paradise') # => false - 'тест'.start_with?('т') # => true 'こんにちは'.start_with?('こ') # => true -Related: String#end_with?. +Related: see {Querying}[rdoc-ref:String@Querying]. diff --git a/doc/string/sub.rdoc b/doc/string/sub.rdoc new file mode 100644 index 0000000000..ff051ea177 --- /dev/null +++ b/doc/string/sub.rdoc @@ -0,0 +1,33 @@ +Returns a copy of self, possibly with a substring replaced. + +Argument +pattern+ may be a string or a Regexp; +argument +replacement+ may be a string or a Hash. + +Varying types for the argument values makes this method very versatile. + +Below are some simple examples; for many more examples, +see {Substitution Methods}[rdoc-ref:String@Substitution+Methods]. + +With arguments +pattern+ and string +replacement+ given, +replaces the first matching substring with the given replacement string: + + s = 'abracadabra' # => "abracadabra" + s.sub('bra', 'xyzzy') # => "axyzzycadabra" + s.sub(/bra/, 'xyzzy') # => "axyzzycadabra" + s.sub('nope', 'xyzzy') # => "abracadabra" + +With arguments +pattern+ and hash +replacement+ given, +replaces the first matching substring with a value from the given replacement hash, or removes it: + + h = {'a' => 'A', 'b' => 'B', 'c' => 'C'} + s.sub('b', h) # => "aBracadabra" + s.sub(/b/, h) # => "aBracadabra" + s.sub(/d/, h) # => "abracaabra" # 'd' removed. + +With argument +pattern+ and a block given, +calls the block with each matching substring; +replaces that substring with the block’s return value: + + s.sub('b') {|match| match.upcase } # => "aBracadabra" + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/succ.rdoc b/doc/string/succ.rdoc new file mode 100644 index 0000000000..1b4b936a8e --- /dev/null +++ b/doc/string/succ.rdoc @@ -0,0 +1,52 @@ +Returns the successor to +self+. The successor is calculated by +incrementing characters. + +The first character to be incremented is the rightmost alphanumeric: +or, if no alphanumerics, the rightmost character: + + 'THX1138'.succ # => "THX1139" + '<<koala>>'.succ # => "<<koalb>>" + '***'.succ # => '**+' + 'こんにちは'.succ # => "こんにちば" + +The successor to a digit is another digit, "carrying" to the next-left +character for a "rollover" from 9 to 0, and prepending another digit +if necessary: + + '00'.succ # => "01" + '09'.succ # => "10" + '99'.succ # => "100" + +The successor to a letter is another letter of the same case, +carrying to the next-left character for a rollover, +and prepending another same-case letter if necessary: + + 'aa'.succ # => "ab" + 'az'.succ # => "ba" + 'zz'.succ # => "aaa" + 'AA'.succ # => "AB" + 'AZ'.succ # => "BA" + 'ZZ'.succ # => "AAA" + +The successor to a non-alphanumeric character is the next character +in the underlying character set's collating sequence, +carrying to the next-left character for a rollover, +and prepending another character if necessary: + + s = 0.chr * 3 # => "\x00\x00\x00" + s.succ # => "\x00\x00\x01" + s = 255.chr * 3 # => "\xFF\xFF\xFF" + s.succ # => "\x01\x00\x00\x00" + +Carrying can occur between and among mixtures of alphanumeric characters: + + s = 'zz99zz99' # => "zz99zz99" + s.succ # => "aaa00aa00" + s = '99zz99zz' # => "99zz99zz" + s.succ # => "100aa00aa" + +The successor to an empty +String+ is a new empty +String+: + + ''.succ # => "" + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/sum.rdoc b/doc/string/sum.rdoc index 5de24e6402..22045e5f4d 100644 --- a/doc/string/sum.rdoc +++ b/doc/string/sum.rdoc @@ -1,11 +1,12 @@ -Returns a basic +n+-bit checksum of the characters in +self+; +Returns a basic +n+-bit {checksum}[https://en.wikipedia.org/wiki/Checksum] of the characters in +self+; the checksum is the sum of the binary value of each byte in +self+, modulo <tt>2**n - 1</tt>: 'hello'.sum # => 532 'hello'.sum(4) # => 4 'hello'.sum(64) # => 532 - 'тест'.sum # => 1405 'こんにちは'.sum # => 2582 This is not a particularly strong checksum. + +Related: see {Querying}[rdoc-ref:String@Querying]. diff --git a/doc/string/swapcase.rdoc b/doc/string/swapcase.rdoc new file mode 100644 index 0000000000..4353c8528a --- /dev/null +++ b/doc/string/swapcase.rdoc @@ -0,0 +1,31 @@ +Returns a string containing the characters in +self+, with cases reversed: + +- Each uppercase character is downcased. +- Each lowercase character is upcased. + +Examples: + + 'Hello'.swapcase # => "hELLO" + 'Straße'.swapcase # => "sTRASSE" + 'RubyGems.org'.swapcase # => "rUBYgEMS.ORG" + +The sizes of +self+ and the upcased result may differ: + + s = 'Straße' + s.size # => 6 + s.swapcase # => "sTRASSE" + s.swapcase.size # => 7 + +Some characters (and some character sets) do not have upcase and downcase versions; +see {Case Mapping}[rdoc-ref:case_mapping.rdoc]: + + s = '1, 2, 3, ...' + s.swapcase == s # => true + s = 'こんにちは' + s.swapcase == s # => true + +The casing is affected by the given +mapping+, +which may be +:ascii+, +:fold+, or +:turkic+; +see {Case Mappings}[rdoc-ref:case_mapping.rdoc@Case+Mappings]. + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/unicode_normalize.rdoc b/doc/string/unicode_normalize.rdoc new file mode 100644 index 0000000000..5f733c0fb8 --- /dev/null +++ b/doc/string/unicode_normalize.rdoc @@ -0,0 +1,28 @@ +Returns a copy of +self+ with +{Unicode normalization}[https://unicode.org/reports/tr15] applied. + +Argument +form+ must be one of the following symbols +(see {Unicode normalization forms}[https://unicode.org/reports/tr15/#Norm_Forms]): + +- +:nfc+: Canonical decomposition, followed by canonical composition. +- +:nfd+: Canonical decomposition. +- +:nfkc+: Compatibility decomposition, followed by canonical composition. +- +:nfkd+: Compatibility decomposition. + +The encoding of +self+ must be one of: + +- <tt>Encoding::UTF_8</tt>. +- <tt>Encoding::UTF_16BE</tt>. +- <tt>Encoding::UTF_16LE</tt>. +- <tt>Encoding::UTF_32BE</tt>. +- <tt>Encoding::UTF_32LE</tt>. +- <tt>Encoding::GB18030</tt>. +- <tt>Encoding::UCS_2BE</tt>. +- <tt>Encoding::UCS_4BE</tt>. + +Examples: + + "a\u0300".unicode_normalize # => "à" # Lowercase 'a' with grave accens. + "a\u0300".unicode_normalize(:nfd) # => "à" # Same. + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/upcase.rdoc b/doc/string/upcase.rdoc new file mode 100644 index 0000000000..ad859e8973 --- /dev/null +++ b/doc/string/upcase.rdoc @@ -0,0 +1,27 @@ +Returns a new string containing the upcased characters in +self+: + + 'hello'.upcase # => "HELLO" + 'straße'.upcase # => "STRASSE" + 'привет'.upcase # => "ПРИВЕТ" + 'RubyGems.org'.upcase # => "RUBYGEMS.ORG" + +The sizes of +self+ and the upcased result may differ: + + s = 'Straße' + s.size # => 6 + s.upcase # => "STRASSE" + s.upcase.size # => 7 + +Some characters (and some character sets) do not have upcase and downcase versions; +see {Case Mapping}[rdoc-ref:case_mapping.rdoc]: + + s = '1, 2, 3, ...' + s.upcase == s # => true + s = 'こんにちは' + s.upcase == s # => true + +The casing is affected by the given +mapping+, +which may be +:ascii+, +:fold+, or +:turkic+; +see {Case Mappings}[rdoc-ref:case_mapping.rdoc@Case+Mappings]. + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/upto.rdoc b/doc/string/upto.rdoc new file mode 100644 index 0000000000..f860fe84fe --- /dev/null +++ b/doc/string/upto.rdoc @@ -0,0 +1,38 @@ +With a block given, calls the block with each +String+ value +returned by successive calls to String#succ; +the first value is +self+, the next is <tt>self.succ</tt>, and so on; +the sequence terminates when value +other_string+ is reached; +returns +self+: + + a = [] + 'a'.upto('f') {|c| a.push(c) } + a # => ["a", "b", "c", "d", "e", "f"] + + a = [] + 'Ж'.upto('П') {|c| a.push(c) } + a # => ["Ж", "З", "И", "Й", "К", "Л", "М", "Н", "О", "П"] + + a = [] + 'よ'.upto('ろ') {|c| a.push(c) } + a # => ["よ", "ら", "り", "る", "れ", "ろ"] + + a = [] + 'a8'.upto('b6') {|c| a.push(c) } + a # => ["a8", "a9", "b0", "b1", "b2", "b3", "b4", "b5", "b6"] + +If argument +exclusive+ is given as a truthy object, the last value is omitted: + + a = [] + 'a'.upto('f', true) {|c| a.push(c) } + a # => ["a", "b", "c", "d", "e"] + +If +other_string+ would not be reached, does not call the block: + + '25'.upto('5') {|s| fail s } + 'aa'.upto('a') {|s| fail s } + +With no block given, returns a new Enumerator: + + 'a8'.upto('b6') # => #<Enumerator: "a8":upto("b6")> + +Related: see {Iterating}[rdoc-ref:String@Iterating]. diff --git a/doc/string/valid_encoding_p.rdoc b/doc/string/valid_encoding_p.rdoc new file mode 100644 index 0000000000..e1db55174a --- /dev/null +++ b/doc/string/valid_encoding_p.rdoc @@ -0,0 +1,8 @@ +Returns whether +self+ is encoded correctly: + + s = 'Straße' + s.valid_encoding? # => true + s.encoding # => #<Encoding:UTF-8> + s.force_encoding(Encoding::ASCII).valid_encoding? # => false + +Related: see {Querying}[rdoc-ref:String@Querying]. |
