summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--encoding.c156
2 files changed, 161 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 2614ec5af9..b146cdac0b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Wed Feb 15 06:58:21 2012 Eric Hodel <drbrain@segment7.net>
+
+ * encoding.c (Init_Encoding): Add Encoding documentation.
+ [ruby-trunk - Bug #5949]
+ * encoding.c (rb_set_default_external): Fix typo in documentation.
+
Tue Feb 14 20:22:11 2012 Narihiro Nakamura <authornari@gmail.com>
* gc.c (CEILDIV): rename to a appropriate name.
diff --git a/encoding.c b/encoding.c
index df425f6ebf..dd3def91b5 100644
--- a/encoding.c
+++ b/encoding.c
@@ -1322,7 +1322,7 @@ rb_enc_set_default_external(VALUE encoding)
*
* Sets default external encoding. You should not set
* Encoding::default_external in ruby code as strings created before changing
- * the value may have a different encoding from strings created after thevalue
+ * the value may have a different encoding from strings created after the value
* was changed., instead you should use <tt>ruby -E</tt> to invoke ruby with
* the correct default_external.
*
@@ -1603,6 +1603,160 @@ rb_enc_aliases(VALUE klass)
return aliases[0];
}
+/*
+ * An Encoding instance represents a character encoding usable in Ruby. It is
+ * defined as a constant under the Encoding namespace. It has a name and
+ * optionally, aliases:
+ *
+ * Encoding::ISO_8859_1.name
+ * #=> #<Encoding:ISO-8859-1>
+ *
+ * Encoding::ISO_8859_1.names
+ * #=> ["ISO-8859-1", "ISO8859-1"]
+ *
+ * Ruby methods dealing with encodings return or accept Encoding instances as
+ * arguments (when a method accepts an Encoding instance as an argument, it
+ * can be passed an Encoding name or alias instead).
+ *
+ * "some string".encoding
+ * #=> #<Encoding:UTF-8>
+ *
+ * string = "some string".encode(Encoding::ISO_8859_1)
+ * #=> "some string"
+ * string.encoding
+ * #=> #<Encoding:ISO-8859-1>
+ *
+ * "some string".encode("ISO-8859-1")
+ * #=> "some string"
+ *
+ * <code>Encoding::ASCII_8BIT</code> is a special encoding that does not
+ * correspond to any character encoding. In fact it represents the absence of
+ * encoding and objects with this encoding can be seen as binary data.
+ *
+ * == Changing an encoding
+ *
+ * The associated Encoding of a String can can be changed in two different
+ * ways.
+ *
+ * First, it is possible to set the Encoding of a string to a new Encoding
+ * without changing the internal byte representation of the string, with
+ * String#force_encoding. This is how you can tell Ruby the correct encoding
+ * of a string.
+ *
+ * string
+ * #=> "R\xC3\xA9sum\xC3\xA9"
+ * string.encoding
+ * #=> #<Encoding:ISO-8859-1>
+ * string.force_encoding(Encoding:UTF-8)
+ * #=> "Résumé"
+ *
+ * Second, it is possible to transcode a string, i.e. translate its internal
+ * byte representation to another encoding. Its associated encoding is also
+ * set to the other encoding. See String#encode for the various forms of
+ * transcoding, and the Encoding::Converter class for additional control over
+ * the transcoding process.
+ *
+ * string
+ * #=> "R\u00E9sum\u00E9"
+ * string.encoding
+ * #=> #<Encoding:UTF-8>
+ * string = string.encode! Encoding::ISO_8859_1
+ * #=> "R\xE9sum\xE9"
+ * string.encoding
+ * #=> #<Encoding::ISO-8859-1>
+ *
+ * == Locale encoding
+ *
+ * All Ruby source code has an associated Encoding which any String literal
+ * created in the source code will be associated to.
+ *
+ * The default locale encoding is <code>Encoding::US-ASCII</code>, but it can
+ * be changed by a magic comment on the first line of the source code file (or
+ * second line, if there is a shebang line on the first). The comment must
+ * contain the word <code>coding</code> or <code>encoding</code>, followed
+ * by a colon, space and the Encoding name or alias:
+ *
+ * # encoding: UTF-8
+ *
+ * "some string".encoding
+ * #=> #<Encoding:UTF-8>
+ *
+ * The <code>__ENCODING__</code> keyword returns the locale encoding that is
+ * currently active:
+ *
+ * # encoding: ISO-8859-1
+ *
+ * __ENCODING__
+ * #=> #<Encoding:ISO-8859-1>
+ *
+ * <code>ruby -K</code> will change the default locale encoding, but this is
+ * not recommended. Ruby source files should include the locale encoding
+ * comment even when they depend on US-ASCII strings or regular expressions.
+ *
+ * == External encoding
+ *
+ * Each IO object has an external encoding which indicates the encoding that
+ * Ruby will use to read its data. By default Ruby sets the external encoding
+ * of an IO object to the default external encoding (initialized from the
+ * user's environment, set with the interpreter <code>-E</code> option).
+ * Encoding.default_external returns the current value of the external
+ * encoding.
+ *
+ * ENV["LANG"]
+ * #=> "UTF-8"
+ * Encoding.default_external
+ * #=> #<Encoding:UTF-8>
+ *
+ * $ ruby -E ISO-8859-1 -e "p Encoding.default_external"
+ * #<Encoding:ISO-8859-1>
+ *
+ * $ LANG=C ruby -e 'p Encoding.default_external'
+ * #<Encoding:US-ASCII>
+ *
+ * The default external encoding may also be set through
+ * Encoding.default_external=, but you should not do this as strings created
+ * before and after the change will have inconsistent encodings. Instead use
+ * <code>ruby -E</code> to invoke ruby with the correct external encoding.
+ *
+ * When you know that the actual encoding of the data of an IO object is not
+ * the default external encoding, you can reset its external encoding with
+ * IO#set_encoding or set it at IO object creation (see IO.new options).
+ *
+ * == Internal encoding
+ *
+ * To process the data of an IO object which has an encoding different
+ * from its external encoding, you can set its internal encoding. Ruby will use
+ * this internal encoding to transcode the data when it is read from the IO
+ * object.
+ *
+ * Conversely, when data is written to the IO object it is transcoded from the
+ * internal encoding to the external encoding of the IO object.
+ *
+ * The internal encoding of an IO object can be set with
+ * IO#set_encoding or at IO object creation (see IO.new options).
+ *
+ * The internal encoding is optional and when not set, the Ruby default
+ * internal encoding is used. If not explicitly set this default internal
+ * encoding is +nil+ meaning that by default, no transcoding occurs.
+ *
+ * The default internal encoding can be set with the interpreter option
+ * <code>-E</code>. Encoding.default_internal returns the current internal
+ * encoding.
+ *
+ * $ ruby -e 'p Encoding.default_internal'
+ * nil
+ *
+ * $ ruby -E ISO-8859-1:UTF-8 -e "p [Encoding.default_external, \
+ * Encoding.default_internal]"
+ * [#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>]
+ *
+ * The default internal encoding may also be set through
+ * Encoding.default_internal=, but you should not do this as strings created
+ * before and after the change will have inconsistent encodings. Instead use
+ * <code>ruby -E</code> to invoke ruby with the correct internal encoding.
+ *
+ */
+
void
Init_Encoding(void)
{