diff options
Diffstat (limited to 'doc/string')
31 files changed, 654 insertions, 0 deletions
diff --git a/doc/string/b.rdoc b/doc/string/b.rdoc new file mode 100644 index 0000000000..f8ad2910b4 --- /dev/null +++ b/doc/string/b.rdoc @@ -0,0 +1,14 @@ +Returns a copy of +self+ that has ASCII-8BIT encoding; +the underlying bytes are not modified: + + s = "\x99" + s.encoding # => #<Encoding:UTF-8> + t = s.b # => "\x99" + t.encoding # => #<Encoding:ASCII-8BIT> + + s = "\u4095" # => "䂕" + s.encoding # => #<Encoding:UTF-8> + s.bytes # => [228, 130, 149] + t = s.b # => "\xE4\x82\x95" + t.encoding # => #<Encoding:ASCII-8BIT> + t.bytes # => [228, 130, 149] diff --git a/doc/string/bytes.rdoc b/doc/string/bytes.rdoc new file mode 100644 index 0000000000..a9e89f1cd1 --- /dev/null +++ b/doc/string/bytes.rdoc @@ -0,0 +1,6 @@ +Returns an array of the bytes in +self+: + + 'hello'.bytes # => [104, 101, 108, 108, 111] + 'тест'.bytes # => [209, 130, 208, 181, 209, 129, 209, 130] + 'こんにちは'.bytes + # => [227, 129, 147, 227, 130, 147, 227, 129, 171, 227, 129, 161, 227, 129, 175] diff --git a/doc/string/bytesize.rdoc b/doc/string/bytesize.rdoc new file mode 100644 index 0000000000..b0567ff67b --- /dev/null +++ b/doc/string/bytesize.rdoc @@ -0,0 +1,11 @@ +Returns the count of bytes (not characters) in +self+: + + 'foo'.bytesize # => 3 + 'тест'.bytesize # => 8 + 'こんにちは'.bytesize # => 15 + +Contrast with String#length: + + 'foo'.length # => 3 + 'тест'.length # => 4 + 'こんにちは'.length # => 5 diff --git a/doc/string/center.rdoc b/doc/string/center.rdoc new file mode 100644 index 0000000000..d53d921ad5 --- /dev/null +++ b/doc/string/center.rdoc @@ -0,0 +1,16 @@ +Returns a centered copy of +self+. + +If integer argument +size+ is greater than the size (in characters) of +self+, +returns a new string of length +size+ that is a copy of +self+, +centered and padded on both ends with +pad_string+: + + 'hello'.center(10) # => " hello " + ' hello'.center(10) # => " hello " + 'hello'.center(10, 'ab') # => "abhelloaba" + 'тест'.center(10) # => " тест " + 'こんにちは'.center(10) # => " こんにちは " + +If +size+ is not greater than the size of +self+, returns a copy of +self+: + + 'hello'.center(5) # => "hello" + 'hello'.center(1) # => "hello" diff --git a/doc/string/chars.rdoc b/doc/string/chars.rdoc new file mode 100644 index 0000000000..d24a1cc3a9 --- /dev/null +++ b/doc/string/chars.rdoc @@ -0,0 +1,5 @@ +Returns an array of the characters in +self+: + + 'hello'.chars # => ["h", "e", "l", "l", "o"] + 'тест'.chars # => ["т", "е", "с", "т"] + 'こんにちは'.chars # => ["こ", "ん", "に", "ち", "は"] diff --git a/doc/string/chomp.rdoc b/doc/string/chomp.rdoc new file mode 100644 index 0000000000..b6fb9ff38c --- /dev/null +++ b/doc/string/chomp.rdoc @@ -0,0 +1,29 @@ +Returns a new string copied from +self+, with trailing characters possibly removed: + +When +line_sep+ is <tt>"\n"</tt>, removes the last one or two characters +if they are <tt>"\r"</tt>, <tt>"\n"</tt>, or <tt>"\r\n"</tt> +(but not <tt>"\n\r"</tt>): + + $/ # => "\n" + "abc\r".chomp # => "abc" + "abc\n".chomp # => "abc" + "abc\r\n".chomp # => "abc" + "abc\n\r".chomp # => "abc\n" + "тест\r\n".chomp # => "тест" + "こんにちは\r\n".chomp # => "こんにちは" + +When +line_sep+ is <tt>''</tt> (an empty string), +removes multiple trailing occurrences of <tt>"\n"</tt> or <tt>"\r\n"</tt> +(but not <tt>"\r"</tt> or <tt>"\n\r"</tt>): + + "abc\n\n\n".chomp('') # => "abc" + "abc\r\n\r\n\r\n".chomp('') # => "abc" + "abc\n\n\r\n\r\n\n\n".chomp('') # => "abc" + "abc\n\r\n\r\n\r".chomp('') # => "abc\n\r\n\r\n\r" + "abc\r\r\r".chomp('') # => "abc\r\r\r" + +When +line_sep+ is neither <tt>"\n"</tt> nor <tt>''</tt>, +removes a single trailing line separator if there is one: + + 'abcd'.chomp('d') # => "abc" + 'abcdd'.chomp('d') # => "abcd" diff --git a/doc/string/chop.rdoc b/doc/string/chop.rdoc new file mode 100644 index 0000000000..8ef82f8a49 --- /dev/null +++ b/doc/string/chop.rdoc @@ -0,0 +1,16 @@ +Returns a new string copied from +self+, with trailing characters possibly removed. + +Removes <tt>"\r\n"</tt> if those are the last two characters. + + "abc\r\n".chop # => "abc" + "тест\r\n".chop # => "тест" + "こんにちは\r\n".chop # => "こんにちは" + +Otherwise removes the last character if it exists. + + 'abcd'.chop # => "abc" + 'тест'.chop # => "тес" + 'こんにちは'.chop # => "こんにち" + ''.chop # => "" + +If you only need to remove the newline separator at the end of the string, String#chomp is a better alternative. diff --git a/doc/string/codepoints.rdoc b/doc/string/codepoints.rdoc new file mode 100644 index 0000000000..0c55d3f4b9 --- /dev/null +++ b/doc/string/codepoints.rdoc @@ -0,0 +1,6 @@ +Returns an array of the codepoints in +self+; +each codepoint is the integer value for a character: + + 'hello'.codepoints # => [104, 101, 108, 108, 111] + 'тест'.codepoints # => [1090, 1077, 1089, 1090] + 'こんにちは'.codepoints # => [12371, 12435, 12395, 12385, 12399] diff --git a/doc/string/delete_prefix.rdoc b/doc/string/delete_prefix.rdoc new file mode 100644 index 0000000000..fa9d8abd38 --- /dev/null +++ b/doc/string/delete_prefix.rdoc @@ -0,0 +1,8 @@ +Returns a copy of +self+ with leading substring <tt>prefix</tt> removed: + + 'hello'.delete_prefix('hel') # => "lo" + 'hello'.delete_prefix('llo') # => "hello" + 'тест'.delete_prefix('те') # => "ст" + 'こんにちは'.delete_prefix('こん') # => "にちは" + +Related: String#delete_prefix!, String#delete_suffix. diff --git a/doc/string/delete_suffix.rdoc b/doc/string/delete_suffix.rdoc new file mode 100644 index 0000000000..4862b725cf --- /dev/null +++ b/doc/string/delete_suffix.rdoc @@ -0,0 +1,8 @@ +Returns a copy of +self+ with trailing substring <tt>suffix</tt> removed: + + 'hello'.delete_suffix('llo') # => "he" + 'hello'.delete_suffix('hel') # => "hello" + 'тест'.delete_suffix('ст') # => "те" + 'こんにちは'.delete_suffix('ちは') # => "こんに" + +Related: String#delete_suffix!, String#delete_prefix. diff --git a/doc/string/each_byte.rdoc b/doc/string/each_byte.rdoc new file mode 100644 index 0000000000..643118fea3 --- /dev/null +++ b/doc/string/each_byte.rdoc @@ -0,0 +1,17 @@ +Calls the given block with each successive byte from +self+; +returns +self+: + + 'hello'.each_byte {|byte| print byte, ' ' } + print "\n" + 'тест'.each_byte {|byte| print byte, ' ' } + print "\n" + 'こんにちは'.each_byte {|byte| print byte, ' ' } + print "\n" + +Output: + + 104 101 108 108 111 + 209 130 208 181 209 129 209 130 + 227 129 147 227 130 147 227 129 171 227 129 161 227 129 175 + +Returns an enumerator if no block is given. diff --git a/doc/string/each_char.rdoc b/doc/string/each_char.rdoc new file mode 100644 index 0000000000..e5ae5a1812 --- /dev/null +++ b/doc/string/each_char.rdoc @@ -0,0 +1,17 @@ +Calls the given block with each successive character from +self+; +returns +self+: + + 'hello'.each_char {|char| print char, ' ' } + print "\n" + 'тест'.each_char {|char| print char, ' ' } + print "\n" + 'こんにちは'.each_char {|char| print char, ' ' } + print "\n" + +Output: + + h e l l o + т е с т + こ ん に ち は + +Returns an enumerator if no block is given. diff --git a/doc/string/each_codepoint.rdoc b/doc/string/each_codepoint.rdoc new file mode 100644 index 0000000000..88bfcbd1c0 --- /dev/null +++ b/doc/string/each_codepoint.rdoc @@ -0,0 +1,18 @@ +Calls the given block with each successive codepoint from +self+; +each codepoint is the integer value for a character; +returns +self+: + + 'hello'.each_codepoint {|codepoint| print codepoint, ' ' } + print "\n" + 'тест'.each_codepoint {|codepoint| print codepoint, ' ' } + print "\n" + 'こんにちは'.each_codepoint {|codepoint| print codepoint, ' ' } + print "\n" + +Output: + + 104 101 108 108 111 + 1090 1077 1089 1090 + 12371 12435 12395 12385 12399 + +Returns an enumerator if no block is given. diff --git a/doc/string/each_grapheme_cluster.rdoc b/doc/string/each_grapheme_cluster.rdoc new file mode 100644 index 0000000000..40be95fcac --- /dev/null +++ b/doc/string/each_grapheme_cluster.rdoc @@ -0,0 +1,12 @@ +Calls the given block with each successive grapheme cluster from +self+ +(see {Unicode Grapheme Cluster Boundaries}[https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries]); +returns +self+: + + s = "\u0061\u0308-pqr-\u0062\u0308-xyz-\u0063\u0308" # => "ä-pqr-b̈-xyz-c̈" + s.each_grapheme_cluster {|gc| print gc, ' ' } + +Output: + + ä - p q r - b̈ - x y z - c̈ + +Returns an enumerator if no block is given. diff --git a/doc/string/each_line.rdoc b/doc/string/each_line.rdoc new file mode 100644 index 0000000000..e254c22d40 --- /dev/null +++ b/doc/string/each_line.rdoc @@ -0,0 +1,60 @@ +With a block given, forms the substrings ("lines") +that are the result of splitting +self+ +at each occurrence of the given line separator +line_sep+; +passes each line to the block; +returns +self+: + + s = <<~EOT + This is the first line. + This is line two. + + This is line four. + This is line five. + EOT + + s.each_line {|line| p line } + +Output: + + "This is the first line.\n" + "This is line two.\n" + "\n" + "This is line four.\n" + "This is line five.\n" + +With a different +line_sep+: + + s.each_line(' is ') {|line| p line } + +Output: + + "This is " + "the first line.\nThis is " + "line two.\n\nThis is " + "line four.\nThis is " + "line five.\n" + +With +chomp+ as +true+, removes the trailing +line_sep+ from each line: + + s.each_line(chomp: true) {|line| p line } + +Output: + + "This is the first line." + "This is line two." + "" + "This is line four." + "This is line five." + +With an empty string as +line_sep+, +forms and passes "paragraphs" by splitting at each occurrence +of two or more newlines: + + s.each_line('') {|line| p line } + +Output: + + "This is the first line.\nThis is line two.\n\n" + "This is line four.\nThis is line five.\n" + +With no block given, returns an enumerator. diff --git a/doc/string/encode.rdoc b/doc/string/encode.rdoc new file mode 100644 index 0000000000..65872fdfd4 --- /dev/null +++ b/doc/string/encode.rdoc @@ -0,0 +1,47 @@ +Returns a copy of +self+ transcoded as determined by +dst_encoding+. +By default, raises an exception if +self+ +contains an invalid byte or a character not defined in +dst_encoding+; +that behavior may be modified by encoding options; see below. + +With no arguments: + +- Uses the same encoding if <tt>Encoding.default_internal</tt> is +nil+ + (the default): + + Encoding.default_internal # => nil + s = "Ruby\x99".force_encoding('Windows-1252') + s.encoding # => #<Encoding:Windows-1252> + s.bytes # => [82, 117, 98, 121, 153] + t = s.encode # => "Ruby\x99" + t.encoding # => #<Encoding:Windows-1252> + t.bytes # => [82, 117, 98, 121, 226, 132, 162] + +- Otherwise, uses the encoding <tt>Encoding.default_internal</tt>: + + Encoding.default_internal = 'UTF-8' + t = s.encode # => "Ruby™" + t.encoding # => #<Encoding:UTF-8> + +With only argument +dst_encoding+ given, uses that encoding: + + s = "Ruby\x99".force_encoding('Windows-1252') + s.encoding # => #<Encoding:Windows-1252> + t = s.encode('UTF-8') # => "Ruby™" + t.encoding # => #<Encoding:UTF-8> + +With arguments +dst_encoding+ and +src_encoding+ given, +interprets +self+ using +src_encoding+, encodes the new string using +dst_encoding+: + + s = "Ruby\x99" + t = s.encode('UTF-8', 'Windows-1252') # => "Ruby™" + t.encoding # => #<Encoding:UTF-8> + +Optional keyword arguments +enc_opts+ specify encoding options; +see {Encoding Options}[rdoc-ref:encodings.rdoc@Encoding+Options]. + +Please note that, unless <code>invalid: :replace</code> option is +given, conversion from an encoding +enc+ to the same encoding +enc+ +(independent of whether +enc+ is given explicitly or implicitly) is a +no-op, i.e. the string is simply copied without any changes, and no +exceptions are raised, even if there are invalid bytes. + diff --git a/doc/string/end_with_p.rdoc b/doc/string/end_with_p.rdoc new file mode 100644 index 0000000000..f959cf7aaa --- /dev/null +++ b/doc/string/end_with_p.rdoc @@ -0,0 +1,11 @@ +Returns whether +self+ ends with any of the given +strings+. + +Returns +true+ if any given string matches the end, +false+ otherwise: + + 'hello'.end_with?('ello') #=> true + 'hello'.end_with?('heaven', 'ello') #=> true + 'hello'.end_with?('heaven', 'paradise') #=> false + 'тест'.end_with?('т') # => true + 'こんにちは'.end_with?('は') # => true + +Related: String#start_with?. diff --git a/doc/string/force_encoding.rdoc b/doc/string/force_encoding.rdoc new file mode 100644 index 0000000000..fd9615caaa --- /dev/null +++ b/doc/string/force_encoding.rdoc @@ -0,0 +1,20 @@ +Changes the encoding of +self+ to +encoding+, +which may be a string encoding name or an Encoding object; +returns self: + + s = 'łał' + s.bytes # => [197, 130, 97, 197, 130] + s.encoding # => #<Encoding:UTF-8> + s.force_encoding('ascii') # => "\xC5\x82a\xC5\x82" + s.encoding # => #<Encoding:US-ASCII> + +Does not change the underlying bytes: + + s.bytes # => [197, 130, 97, 197, 130] + +Makes the change even if the given +encoding+ is invalid +for +self+ (as is the change above): + + s.valid_encoding? # => false + s.force_encoding(Encoding::UTF_8) # => "łał" + s.valid_encoding? # => true diff --git a/doc/string/grapheme_clusters.rdoc b/doc/string/grapheme_clusters.rdoc new file mode 100644 index 0000000000..8c7f5a7259 --- /dev/null +++ b/doc/string/grapheme_clusters.rdoc @@ -0,0 +1,6 @@ +Returns an array of the grapheme clusters in +self+ +(see {Unicode Grapheme Cluster Boundaries}[https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries]): + + s = "\u0061\u0308-pqr-\u0062\u0308-xyz-\u0063\u0308" # => "ä-pqr-b̈-xyz-c̈" + s.grapheme_clusters + # => ["ä", "-", "p", "q", "r", "-", "b̈", "-", "x", "y", "z", "-", "c̈"] diff --git a/doc/string/index.rdoc b/doc/string/index.rdoc new file mode 100644 index 0000000000..ce09a37bdf --- /dev/null +++ b/doc/string/index.rdoc @@ -0,0 +1,38 @@ +Returns the integer index of the first match for the given argument, +or +nil+ if none found; +the search of +self+ is forward, and begins at position +offset+ (in characters). + +With string argument +substring+, +returns the index of the first matching substring in +self+: + + 'foo'.index('f') # => 0 + 'foo'.index('o') # => 1 + 'foo'.index('oo') # => 1 + 'foo'.index('ooo') # => nil + 'тест'.index('с') # => 2 + 'こんにちは'.index('ち') # => 3 + +With Regexp argument +regexp+, returns the index of the first match in +self+: + + 'foo'.index(/o./) # => 1 + 'foo'.index(/.o/) # => 0 + +With positive integer +offset+, begins the search at position +offset+: + + 'foo'.index('o', 1) # => 1 + 'foo'.index('o', 2) # => 2 + 'foo'.index('o', 3) # => nil + 'тест'.index('с', 1) # => 2 + 'こんにちは'.index('ち', 2) # => 3 + +With negative integer +offset+, selects the search position by counting backward +from the end of +self+: + + 'foo'.index('o', -1) # => 2 + 'foo'.index('o', -2) # => 1 + 'foo'.index('o', -3) # => 1 + 'foo'.index('o', -4) # => nil + 'foo'.index(/o./, -2) # => 1 + 'foo'.index(/.o/, -2) # => 1 + +Related: String#rindex. diff --git a/doc/string/length.rdoc b/doc/string/length.rdoc new file mode 100644 index 0000000000..544bca269f --- /dev/null +++ b/doc/string/length.rdoc @@ -0,0 +1,12 @@ +Returns the count of characters (not bytes) in +self+: + + 'foo'.length # => 3 + 'тест'.length # => 4 + 'こんにちは'.length # => 5 + +Contrast with String#bytesize: + + 'foo'.bytesize # => 3 + 'тест'.bytesize # => 8 + 'こんにちは'.bytesize # => 15 + diff --git a/doc/string/ljust.rdoc b/doc/string/ljust.rdoc new file mode 100644 index 0000000000..8e23c1fc8f --- /dev/null +++ b/doc/string/ljust.rdoc @@ -0,0 +1,16 @@ +Returns a left-justified copy of +self+. + +If integer argument +size+ is greater than the size (in characters) of +self+, +returns a new string of length +size+ that is a copy of +self+, +left justified and padded on the right with +pad_string+: + + 'hello'.ljust(10) # => "hello " + ' hello'.ljust(10) # => " hello " + 'hello'.ljust(10, 'ab') # => "helloababa" + 'тест'.ljust(10) # => "тест " + 'こんにちは'.ljust(10) # => "こんにちは " + +If +size+ is not greater than the size of +self+, returns a copy of +self+: + + 'hello'.ljust(5) # => "hello" + 'hello'.ljust(1) # => "hello" diff --git a/doc/string/new.rdoc b/doc/string/new.rdoc new file mode 100644 index 0000000000..d955e61c87 --- /dev/null +++ b/doc/string/new.rdoc @@ -0,0 +1,51 @@ +Returns a new \String that is a copy of +string+. + +With no arguments, returns the empty string with the Encoding <tt>ASCII-8BIT</tt>: + + s = String.new + s # => "" + s.encoding # => #<Encoding:ASCII-8BIT> + +With optional argument +string+ and no keyword arguments, +returns a copy of +string+ with the same encoding: + + String.new('foo') # => "foo" + String.new('тест') # => "тест" + String.new('こんにちは') # => "こんにちは" + +(Unlike \String.new, +a {string literal}[rdoc-ref:syntax/literals.rdoc@String+Literals] like <tt>''</tt> or a +{here document literal}[rdoc-ref:syntax/literals.rdoc@Here+Document+Literals] +always has {script encoding}[rdoc-ref:encodings.rdoc@Script+Encoding].) + +With optional keyword argument +encoding+, returns a copy of +string+ +with the specified encoding; +the +encoding+ may be an Encoding object, an encoding name, +or an encoding name alias: + + String.new('foo', encoding: Encoding::US_ASCII).encoding # => #<Encoding:US-ASCII> + String.new('foo', encoding: 'US-ASCII').encoding # => #<Encoding:US-ASCII> + String.new('foo', encoding: 'ASCII').encoding # => #<Encoding:US-ASCII> + +The given encoding need not be valid for the string's content, +and that validity is not checked: + + s = String.new('こんにちは', encoding: 'ascii') + s.valid_encoding? # => false + +But the given +encoding+ itself is checked: + + String.new('foo', encoding: 'bar') # Raises ArgumentError. + +With optional keyword argument +capacity+, returns a copy of +string+ +(or an empty string, if +string+ is not given); +the given +capacity+ is advisory only, +and may or may not set the size of the internal buffer, +which may in turn affect performance: + + String.new(capacity: 1) + String.new('foo', capacity: 4096) + +The +string+, +encoding+, and +capacity+ arguments may all be used together: + + String.new('hello', encoding: 'UTF-8', capacity: 25) diff --git a/doc/string/ord.rdoc b/doc/string/ord.rdoc new file mode 100644 index 0000000000..d586363d44 --- /dev/null +++ b/doc/string/ord.rdoc @@ -0,0 +1,6 @@ +Returns the integer ordinal of the first character of +self+: + + 'h'.ord # => 104 + 'hello'.ord # => 104 + 'тест'.ord # => 1090 + 'こんにちは'.ord # => 12371 diff --git a/doc/string/partition.rdoc b/doc/string/partition.rdoc new file mode 100644 index 0000000000..ebe575e8eb --- /dev/null +++ b/doc/string/partition.rdoc @@ -0,0 +1,24 @@ +Returns a 3-element array of substrings of +self+. + +Matches a pattern against +self+, scanning from the beginning. +The pattern is: + +- +string_or_regexp+ itself, if it is a Regexp. +- <tt>Regexp.quote(string_or_regexp)</tt>, if +string_or_regexp+ is a string. + +If the pattern is matched, returns pre-match, first-match, post-match: + + 'hello'.partition('l') # => ["he", "l", "lo"] + 'hello'.partition('ll') # => ["he", "ll", "o"] + 'hello'.partition('h') # => ["", "h", "ello"] + 'hello'.partition('o') # => ["hell", "o", ""] + 'hello'.partition(/l+/) #=> ["he", "ll", "o"] + 'hello'.partition('') # => ["", "", "hello"] + 'тест'.partition('т') # => ["", "т", "ест"] + 'こんにちは'.partition('に') # => ["こん", "に", "ちは"] + +If the pattern is not matched, returns a copy of +self+ and two empty strings: + + 'hello'.partition('x') # => ["hello", "", ""] + +Related: String#rpartition, String#split. diff --git a/doc/string/rjust.rdoc b/doc/string/rjust.rdoc new file mode 100644 index 0000000000..24e7bf3159 --- /dev/null +++ b/doc/string/rjust.rdoc @@ -0,0 +1,16 @@ +Returns a right-justified copy of +self+. + +If integer argument +size+ is greater than the size (in characters) of +self+, +returns a new string of length +size+ that is a copy of +self+, +right justified and padded on the left with +pad_string+: + + 'hello'.rjust(10) # => " hello" + 'hello '.rjust(10) # => " hello " + 'hello'.rjust(10, 'ab') # => "ababahello" + 'тест'.rjust(10) # => " тест" + 'こんにちは'.rjust(10) # => " こんにちは" + +If +size+ is not greater than the size of +self+, returns a copy of +self+: + + 'hello'.rjust(5, 'ab') # => "hello" + 'hello'.rjust(1, 'ab') # => "hello" diff --git a/doc/string/rpartition.rdoc b/doc/string/rpartition.rdoc new file mode 100644 index 0000000000..d24106fb9f --- /dev/null +++ b/doc/string/rpartition.rdoc @@ -0,0 +1,24 @@ +Returns a 3-element array of substrings of +self+. + +Matches a pattern against +self+, scanning backwards from the end. +The pattern is: + +- +string_or_regexp+ itself, if it is a Regexp. +- <tt>Regexp.quote(string_or_regexp)</tt>, if +string_or_regexp+ is a string. + +If the pattern is matched, returns pre-match, last-match, post-match: + + 'hello'.rpartition('l') # => ["hel", "l", "o"] + 'hello'.rpartition('ll') # => ["he", "ll", "o"] + 'hello'.rpartition('h') # => ["", "h", "ello"] + 'hello'.rpartition('o') # => ["hell", "o", ""] + 'hello'.rpartition(/l+/) # => ["hel", "l", "o"] + 'hello'.rpartition('') # => ["hello", "", ""] + 'тест'.rpartition('т') # => ["тес", "т", ""] + 'こんにちは'.rpartition('に') # => ["こん", "に", "ちは"] + +If the pattern is not matched, returns two empty strings and a copy of +self+: + + 'hello'.rpartition('x') # => ["", "", "hello"] + +Related: String#partition, String#split. diff --git a/doc/string/scrub.rdoc b/doc/string/scrub.rdoc new file mode 100644 index 0000000000..1a5b1c79d0 --- /dev/null +++ b/doc/string/scrub.rdoc @@ -0,0 +1,25 @@ +Returns a copy of +self+ with each invalid byte sequence replaced +by the given +replacement_string+. + +With no block given and no argument, replaces each invalid sequence +with the default replacement string +(<tt>"�"</tt> for a Unicode encoding, <tt>'?'</tt> otherwise): + + s = "foo\x81\x81bar" + s.scrub # => "foo��bar" + +With no block given and argument +replacement_string+ given, +replaces each invalid sequence with that string: + + "foo\x81\x81bar".scrub('xyzzy') # => "fooxyzzyxyzzybar" + +With a block given, replaces each invalid sequence with the value +of the block: + + "foo\x81\x81bar".scrub {|bytes| p bytes; 'XYZZY' } + # => "fooXYZZYXYZZYbar" + +Output: + + "\x81" + "\x81" diff --git a/doc/string/split.rdoc b/doc/string/split.rdoc new file mode 100644 index 0000000000..5ab065093b --- /dev/null +++ b/doc/string/split.rdoc @@ -0,0 +1,86 @@ +Returns an array of substrings of +self+ +that are the result of splitting +self+ +at each occurrence of the given field separator +field_sep+. + +When +field_sep+ is <tt>$;</tt>: + +- If <tt>$;</tt> is +nil+ (its default value), + the split occurs just as if +field_sep+ were given as a space character + (see below). + +- If <tt>$;</tt> is a string, + the split occurs just as if +field_sep+ were given as that string + (see below). + +When +field_sep+ is <tt>' '</tt> and +limit+ is +nil+, +the split occurs at each sequence of whitespace: + + 'abc def ghi'.split(' ') => ["abc", "def", "ghi"] + "abc \n\tdef\t\n ghi".split(' ') # => ["abc", "def", "ghi"] + 'abc def ghi'.split(' ') => ["abc", "def", "ghi"] + ''.split(' ') => [] + +When +field_sep+ is a string different from <tt>' '</tt> +and +limit+ is +nil+, +the split occurs at each occurrence of +field_sep+; +trailing empty substrings are not returned: + + 'abracadabra'.split('ab') => ["", "racad", "ra"] + 'aaabcdaaa'.split('a') => ["", "", "", "bcd"] + ''.split('a') => [] + '3.14159'.split('1') => ["3.", "4", "59"] + '!@#$%^$&*($)_+'.split('$') # => ["!@#", "%^", "&*(", ")_+"] + 'тест'.split('т') => ["", "ес"] + 'こんにちは'.split('に') => ["こん", "ちは"] + +When +field_sep+ is a Regexp and +limit+ is +nil+, +the split occurs at each occurrence of a match; +trailing empty substrings are not returned: + + 'abracadabra'.split(/ab/) # => ["", "racad", "ra"] + 'aaabcdaaa'.split(/a/) => ["", "", "", "bcd"] + 'aaabcdaaa'.split(//) => ["a", "a", "a", "b", "c", "d", "a", "a", "a"] + '1 + 1 == 2'.split(/\W+/) # => ["1", "1", "2"] + +If the \Regexp contains groups, their matches are also included +in the returned array: + + '1:2:3'.split(/(:)()()/, 2) # => ["1", ":", "", "", "2:3"] + +As seen above, if +limit+ is +nil+, +trailing empty substrings are not returned; +the same is true if +limit+ is zero: + + 'aaabcdaaa'.split('a') => ["", "", "", "bcd"] + 'aaabcdaaa'.split('a', 0) # => ["", "", "", "bcd"] + +If +limit+ is positive integer +n+, no more than <tt>n - 1-</tt> +splits occur, so that at most +n+ substrings are returned, +and trailing empty substrings are included: + + 'aaabcdaaa'.split('a', 1) # => ["aaabcdaaa"] + 'aaabcdaaa'.split('a', 2) # => ["", "aabcdaaa"] + 'aaabcdaaa'.split('a', 5) # => ["", "", "", "bcd", "aa"] + 'aaabcdaaa'.split('a', 7) # => ["", "", "", "bcd", "", "", ""] + 'aaabcdaaa'.split('a', 8) # => ["", "", "", "bcd", "", "", ""] + +Note that if +field_sep+ is a \Regexp containing groups, +their matches are in the returned array, but do not count toward the limit. + +If +limit+ is negative, it behaves the same as if +limit+ was +nil+, +meaning that there is no limit, +and trailing empty substrings are included: + + 'aaabcdaaa'.split('a', -1) # => ["", "", "", "bcd", "", "", ""] + +If a block is given, it is called with each substring: + + 'abc def ghi'.split(' ') {|substring| p substring } + +Output: + + "abc" + "def" + "ghi" + +Related: String#partition, String#rpartition. diff --git a/doc/string/start_with_p.rdoc b/doc/string/start_with_p.rdoc new file mode 100644 index 0000000000..5d1f9f9543 --- /dev/null +++ b/doc/string/start_with_p.rdoc @@ -0,0 +1,18 @@ +Returns whether +self+ starts with any of the given +string_or_regexp+. + +Matches patterns against the beginning of +self+. +For each given +string_or_regexp+, the pattern is: + +- +string_or_regexp+ itself, if it is a Regexp. +- <tt>Regexp.quote(string_or_regexp)</tt>, if +string_or_regexp+ is a string. + +Returns +true+ if any pattern matches the beginning, +false+ otherwise: + + 'hello'.start_with?('hell') # => true + 'hello'.start_with?(/H/i) # => true + 'hello'.start_with?('heaven', 'hell') # => true + 'hello'.start_with?('heaven', 'paradise') # => false + 'тест'.start_with?('т') # => true + 'こんにちは'.start_with?('こ') # => true + +Related: String#end_with?. diff --git a/doc/string/sum.rdoc b/doc/string/sum.rdoc new file mode 100644 index 0000000000..5de24e6402 --- /dev/null +++ b/doc/string/sum.rdoc @@ -0,0 +1,11 @@ +Returns a basic +n+-bit checksum of the characters in +self+; +the checksum is the sum of the binary value of each byte in +self+, +modulo <tt>2**n - 1</tt>: + + 'hello'.sum # => 532 + 'hello'.sum(4) # => 4 + 'hello'.sum(64) # => 532 + 'тест'.sum # => 1405 + 'こんにちは'.sum # => 2582 + +This is not a particularly strong checksum. |