summaryrefslogtreecommitdiff
path: root/doc
diff options
context:
space:
mode:
authorBurdette Lamar <BurdetteLamar@Yahoo.com>2022-03-31 15:09:25 -0500
committerGitHub <noreply@github.com>2022-03-31 15:09:25 -0500
commit056b7a86335f38618c1749674a11e838de7c3c12 (patch)
treeb6c0eaabec11b7377c081b274e32ebeff04d2c86 /doc
parentbb037f6d8639b7d36ef263ca24d4117d725e71ef (diff)
[DOC] Enhanced RDoc for String (#5742)
Treats: #force_encoding #b #valid_encoding? #ascii_only? #scrub #scrub! #unicode_normalized? Plus a couple of minor tweaks.
Notes
Notes: Merged-By: BurdetteLamar <BurdetteLamar@Yahoo.com>
Diffstat (limited to 'doc')
-rw-r--r--doc/string/b.rdoc14
-rw-r--r--doc/string/force_encoding.rdoc20
-rw-r--r--doc/string/scrub.rdoc25
3 files changed, 59 insertions, 0 deletions
diff --git a/doc/string/b.rdoc b/doc/string/b.rdoc
new file mode 100644
index 0000000000..f8ad2910b4
--- /dev/null
+++ b/doc/string/b.rdoc
@@ -0,0 +1,14 @@
+Returns a copy of +self+ that has ASCII-8BIT encoding;
+the underlying bytes are not modified:
+
+ s = "\x99"
+ s.encoding # => #<Encoding:UTF-8>
+ t = s.b # => "\x99"
+ t.encoding # => #<Encoding:ASCII-8BIT>
+
+ s = "\u4095" # => "䂕"
+ s.encoding # => #<Encoding:UTF-8>
+ s.bytes # => [228, 130, 149]
+ t = s.b # => "\xE4\x82\x95"
+ t.encoding # => #<Encoding:ASCII-8BIT>
+ t.bytes # => [228, 130, 149]
diff --git a/doc/string/force_encoding.rdoc b/doc/string/force_encoding.rdoc
new file mode 100644
index 0000000000..fd9615caaa
--- /dev/null
+++ b/doc/string/force_encoding.rdoc
@@ -0,0 +1,20 @@
+Changes the encoding of +self+ to +encoding+,
+which may be a string encoding name or an Encoding object;
+returns self:
+
+ s = 'łał'
+ s.bytes # => [197, 130, 97, 197, 130]
+ s.encoding # => #<Encoding:UTF-8>
+ s.force_encoding('ascii') # => "\xC5\x82a\xC5\x82"
+ s.encoding # => #<Encoding:US-ASCII>
+
+Does not change the underlying bytes:
+
+ s.bytes # => [197, 130, 97, 197, 130]
+
+Makes the change even if the given +encoding+ is invalid
+for +self+ (as is the change above):
+
+ s.valid_encoding? # => false
+ s.force_encoding(Encoding::UTF_8) # => "łał"
+ s.valid_encoding? # => true
diff --git a/doc/string/scrub.rdoc b/doc/string/scrub.rdoc
new file mode 100644
index 0000000000..1a5b1c79d0
--- /dev/null
+++ b/doc/string/scrub.rdoc
@@ -0,0 +1,25 @@
+Returns a copy of +self+ with each invalid byte sequence replaced
+by the given +replacement_string+.
+
+With no block given and no argument, replaces each invalid sequence
+with the default replacement string
+(<tt>"�"</tt> for a Unicode encoding, <tt>'?'</tt> otherwise):
+
+ s = "foo\x81\x81bar"
+ s.scrub # => "foo��bar"
+
+With no block given and argument +replacement_string+ given,
+replaces each invalid sequence with that string:
+
+ "foo\x81\x81bar".scrub('xyzzy') # => "fooxyzzyxyzzybar"
+
+With a block given, replaces each invalid sequence with the value
+of the block:
+
+ "foo\x81\x81bar".scrub {|bytes| p bytes; 'XYZZY' }
+ # => "fooXYZZYXYZZYbar"
+
+Output:
+
+ "\x81"
+ "\x81"