From 3dbaf1ebf86d9dcedb1cfd76738cd1ac13ba0046 Mon Sep 17 00:00:00 2001 From: yugui Date: Tue, 30 Dec 2008 11:05:23 +0000 Subject: merges r21187 from trunk into ruby_1_9_1. * transcode.c: Minor fixes and tweaks in documentation. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_9_1@21200 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- transcode.c | 161 ++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 85 insertions(+), 76 deletions(-) (limited to 'transcode.c') diff --git a/transcode.c b/transcode.c index f925daa584..398c012e23 100644 --- a/transcode.c +++ b/transcode.c @@ -148,7 +148,7 @@ struct rb_econv_t { typedef struct { const char *sname; const char *dname; - const char *lib; /* maybe null. it means that don't load the library. */ + const char *lib; /* null means means no need to load a library */ const rb_transcoder *transcoder; } transcoder_entry_t; @@ -349,7 +349,7 @@ transcode_search_path(const char *sname, const char *dname, st_free_table(bfs.visited); - return pathlen; /* is -1 if !found */ + return pathlen; /* is -1 if not found */ } static const rb_transcoder * @@ -1694,7 +1694,7 @@ rb_econv_asciicompat_encoding(const char *ascii_incompat_name) /* * Assumption: - * There are at most one transcoder for + * There is at most one transcoder for * converting from ASCII incompatible encoding. * * For ISO-2022-JP, there is ISO-2022-JP -> stateless-ISO-2022-JP and no others. @@ -2574,33 +2574,43 @@ str_encode_bang(int argc, VALUE *argv, VALUE str) * to encoding +encoding+. * The second form returns a copy of str transcoded * from src_encoding to dst_encoding. - * The options Hash gives details for conversion. * The last form returns a copy of str transcoded to * Encoding.default_internal. + * By default, the first and second form raise + * Encoding::UndefinedConversionError for characters that are + * undefined in the destination encoding, and + * Encoding::InvalidByteSequenceError for invalid byte sequences + * in the source encoding. The last form by default does not raise + * exceptions but uses replacement strings. + * The options Hash gives details for conversion. * * === options - * A hash options can have the following keys: + * The hash options can have the following keys: * :invalid :: - * If the value is :replace #encode replaces - * invalid characters in str with the replacement character. + * If the value is :replace, #encode replaces + * invalid byte sequences in str with the replacement character. + * The default is to raise the exception * :undef :: - * If the value is :replace #encode replaces - * characters which are undefined in the destination character set with + * If the value is :replace, #encode replaces + * characters which are undefined in the destination encoding with * the replacement character. * :replace :: - * sets the replacement character to the value. + * Sets the replacement string to the value. The default replacement + * string is "\uFFFD" for Unicode encoding forms, and "?" otherwise. * :xml :: * The value must be :text or :attr. * If the value is :text #encode replaces - * undefined characters with its numerical character reference. - * If the value is :attr #encode also quotes - * the replacement result. + * undefined characters with their (upper-case hexadecimal) numeric + * character references. '&', '<', and '>' are converted to "&", + * "<", and ">", respectively. + * If the value is :attr, #encode also quotes + * the replacement result (using '"'), and replaces '"' with """. * :cr_newline :: - * replaces EOL with CR. + * Replaces LF ("\n") with CR ("\r") if value is true. * :crlf_newline :: - * replaces EOL with CR LF. + * Replaces LF ("\n") with CRLF ("\r\n") if value is true. * :universal_newline :: - * replaces EOL with LF. + * Replaces CRLF ("\r\n") and CR ("\r") with LF ("\n") if value is true. */ static VALUE @@ -2680,13 +2690,12 @@ make_encobj(const char *name) * It returns nil if the argument is an ASCII compatible encoding. * * "corresponding ASCII compatible encoding" is a ASCII compatible encoding which - * represents same characters in the given ASCII incompatible encoding. + * can represents exactly the same characters as the given ASCII incompatible encoding. + * So, no conversion undefined error occurs when converting between the two encodings. * - * So, no conversion undefined error occur between the ASCII compatible and incompatible encoding. - * - * Encoding::Converter.stateless_encoding("ISO-2022-JP") #=> # - * Encoding::Converter.stateless_encoding("UTF-16BE") #=> # - * Encoding::Converter.stateless_encoding("UTF-8") #=> nil + * Encoding::Converter.asciicompat_encoding("ISO-2022-JP") #=> # + * Encoding::Converter.asciicompat_encoding("UTF-16BE") #=> # + * Encoding::Converter.asciicompat_encoding("UTF-8") #=> nil * */ static VALUE @@ -2828,7 +2837,7 @@ search_convpath_i(const char *sname, const char *dname, int depth, void *arg) * Encoding::Converter.search_convpath(source_encoding, destination_encoding) -> ary * Encoding::Converter.search_convpath(source_encoding, destination_encoding, opt) -> ary * - * returns the conversion path. + * Returns a conversion path. * * p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP") * #=> [[#, #], @@ -2869,8 +2878,8 @@ econv_s_search_convpath(int argc, VALUE *argv, VALUE klass) } /* - * check the existance of converter. - * returns the count of the converting paths. + * Check the existence of a convertsion path. + * Returns the number of converters in the conversion path. * result: >=0:success -1:failure */ int @@ -2946,7 +2955,7 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, arg.ret = 0; ret = transcode_search_path(sname, dname, rb_econv_init_by_convpath_i, &arg); if (ret == -1 || arg.ret == -1) - rb_raise(rb_eArgError, "conversion add failed: %s to %s", sname, dname); + rb_raise(rb_eArgError, "adding conversion failed: %s to %s", sname, dname); if (first) { first = 0; *senc_p = senc; @@ -3001,15 +3010,15 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, * * Encoding::Converter.new creates an instance of Encoding::Converter. * - * source_encoding and destination_encoding should be a string or + * Source_encoding and destination_encoding should be a string or * Encoding object. * * opt should be nil, a hash or an integer. * * convpath should be an array. - * convpath should contains - * - two-element array which contains encoding or encoding name, or - * - a string of decorator name. + * convpath may contain + * - two-element arrays which contain encodings or encoding names, or + * - strings representing decorator names. * * Encoding::Converter.new optionally takes an option. * The option should be a hash or an integer. @@ -3018,32 +3027,32 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, * Encoding::Converter::INVALID_REPLACE, etc. * * [:invalid => nil] - * raise error on invalid byte sequence. This is a default behavior. + * Raise error on invalid byte sequence. This is a default behavior. * [:invalid => :replace] - * replace invalid byte sequence as a replacement string. + * Replace invalid byte sequence by replacement string. * [:undef => nil] - * raise error on conversion failure due to an character in source_encoding is not defined in destination_encoding. + * Raise an error if a character in source_encoding is not defined in destination_encoding. * This is a default behavior. * [:undef => :replace] - * replace undefined character in destination_encoding as a replacement string. + * Replace undefined character in destination_encoding with replacement string. * [:replace => string] - * specify the replacement string. + * Specify the replacement string. * If not specified, "\uFFFD" is used for Unicode encodings and "?" for others. * [:universal_newline => true] - * convert CRLF and CR to LF. + * Convert CRLF and CR to LF. * [:crlf_newline => true] - * convert LF to CRLF. + * Convert LF to CRLF. * [:cr_newline => true] - * convert LF to CR. + * Convert LF to CR. * [:xml => :text] - * escape as XML CharData. + * Escape as XML CharData. * This form can be used as a HTML 4.0 #PCDATA. * - '&' -> '&' * - '<' -> '<' * - '>' -> '>' * - undefined characters in destination_encoding -> hexadecimal CharRef such as &#xHH; * [:xml => :attr] - * escape as XML AttValue. + * Escape as XML AttValue. * The converted result is quoted as "...". * This form can be used as a HTML 4.0 attribute value. * - '&' -> '&' @@ -3052,11 +3061,11 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, * - '"' -> '"' * - undefined characters in destination_encoding -> hexadecimal CharRef such as &#xHH; * - * example: + * Examples: * # UTF-16BE to UTF-8 * ec = Encoding::Converter.new("UTF-16BE", "UTF-8") * - * # Usually, decorators such as newline conversion are inserted at last. + * # Usually, decorators such as newline conversion are inserted last. * ec = Encoding::Converter.new("UTF-16BE", "UTF-8", :universal_newline => true) * p ec.convpath #=> [[#, #], * # "universal_newline"] @@ -3067,7 +3076,7 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, * p ec.convpath #=> ["crlf_newline", * # [#, #]] * - * # conversion path can be specified directly. + * # Conversion path can be specified directly. * ec = Encoding::Converter.new(["universal_newline", ["EUC-JP", "UTF-8"], ["UTF-8", "UTF-16BE"]]) * p ec.convpath #=> ["universal_newline", * # [#, #], @@ -3166,7 +3175,7 @@ check_econv(VALUE self) * call-seq: * ec.source_encoding -> encoding * - * returns the source encoding as an Encoding object. + * Returns the source encoding as an Encoding object. */ static VALUE econv_source_encoding(VALUE self) @@ -3181,7 +3190,7 @@ econv_source_encoding(VALUE self) * call-seq: * ec.destination_encoding -> encoding * - * returns the destination encoding as an Encoding object. + * Returns the destination encoding as an Encoding object. */ static VALUE econv_destination_encoding(VALUE self) @@ -3196,7 +3205,7 @@ econv_destination_encoding(VALUE self) * call-seq: * ec.convpath -> ary * - * returns the conversion path of ec. + * Returns the conversion path of ec. * * The result is an array of conversions. * @@ -3206,9 +3215,9 @@ econv_destination_encoding(VALUE self) * # [#, #], * # "crlf_newline"] * - * A element of the array is a pair of encodings or a string. - * The pair means encoding conversion. - * The string means decorator. + * Each element of the array is a pair of encodings or a string. + * A pair means an encoding conversion. + * A string means a decorator. * * In the above example, [#, #] means * a converter from ISO-8859-1 to UTF-8. @@ -3452,11 +3461,11 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self) * call-seq: * ec.convert(source_string) -> destination_string * - * convert source_string and return destination_string. + * Convert source_string and return destination_string. * * source_string is assumed as a part of source. * i.e. :partial_input=>true is specified internally. - * finish method should be used at last. + * finish method should be used last. * * ec = Encoding::Converter.new("utf-8", "euc-jp") * puts ec.convert("\u3042").dump #=> "\xA4\xA2" @@ -3521,8 +3530,8 @@ econv_convert(VALUE self, VALUE source_string) * call-seq: * ec.finish -> string * - * finishes the converter. - * It returns the last part of converted string. + * Finishes the converter. + * It returns the last part of the converted string. * * ec = Encoding::Converter.new("utf-8", "iso-2022-jp") * p ec.convert("\u3042") #=> "\e$B$\"" @@ -3565,8 +3574,8 @@ econv_finish(VALUE self) * call-seq: * ec.primitive_errinfo -> array * - * primitive_errinfo returns a precious information of the last error result - * as a 5-elements array: + * primitive_errinfo returns important information regarding the last error + * as a 5-element array: * * [result, enc1, enc2, error_bytes, readagain_bytes] * @@ -3575,12 +3584,12 @@ econv_finish(VALUE self) * Other elements are only meaningful when result is * :invalid_byte_sequence, :incomplete_input or :undefined_conversion. * - * enc1 and enc2 indicates a conversion step as pair of strings. - * For example, a converter from EUC-JP to ISO-8859-1 converters - * a string as EUC-JP -> UTF-8 -> ISO-8859-1. - * So [enc1, enc2] is ["EUC-JP", "UTF-8"] or ["UTF-8", "ISO-8859-1"]. + * enc1 and enc2 indicate a conversion step as a pair of strings. + * For example, a converter from EUC-JP to ISO-8859-1 converts + * a string as follows: EUC-JP -> UTF-8 -> ISO-8859-1. + * So [enc1, enc2] is either ["EUC-JP", "UTF-8"] or ["UTF-8", "ISO-8859-1"]. * - * error_bytes and readagain_bytes indicates the byte sequences which causes the error. + * error_bytes and readagain_bytes indicate the byte sequences which caused the error. * error_bytes is discarded portion. * readagain_bytes is buffered portion which is read again on next conversion. * @@ -3666,14 +3675,14 @@ econv_primitive_errinfo(VALUE self) * call-seq: * ec.insert_output(string) -> nil * - * inserts string into the encoding converter. - * The string will be converted into the destination encoding and - * outputed on later conversions. + * Inserts string into the encoding converter. + * The string will be converted to the destination encoding and + * output on later conversions. * * If the destination encoding is stateful, - * string is converted according to the state and update the state. + * string is converted according to the state and the state is updated. * - * This method should be used only when a conversion error is occur. + * This method should be used only when a conversion error occurs. * * ec = Encoding::Converter.new("utf-8", "iso-8859-1") * src = "HIRAGANA LETTER A is \u{3042}." @@ -3720,7 +3729,7 @@ econv_insert_output(VALUE self, VALUE string) * ec.putback => string * ec.putback(max_numbytes) => string * - * put back the bytes which will be converted. + * Put back the bytes which will be converted. * * The bytes are caused by invalid_byte_sequence error. * When invalid_byte_sequence error, some bytes are discarded and @@ -3772,8 +3781,8 @@ econv_putback(int argc, VALUE *argv, VALUE self) * call-seq: * ec.last_error -> exception or nil * - * returns an exception object for the last conversion. - * It returns nil if the last conversion is not an error. + * Returns an exception object for the last conversion. + * Returns nil if the last conversion did not produce an error. * * "error" means that * Encoding::InvalidByteSequenceError and Encoding::UndefinedConversionError for @@ -3804,7 +3813,7 @@ econv_last_error(VALUE self) * call-seq: * ec.replacement -> string * - * returns the replacement string. + * Returns the replacement string. * * ec = Encoding::Converter.new("euc-jp", "us-ascii") * p ec.replacement #=> "?" @@ -3832,7 +3841,7 @@ econv_get_replacement(VALUE self) * call-seq: * ec.replacement = string * - * sets the replacement string. + * Sets the replacement string. * * ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace) * ec.replacement = "" @@ -3883,7 +3892,7 @@ rb_econv_check_error(rb_econv_t *ec) * call-seq: * ecerr.source_encoding_name -> string * - * returns the source encoding name as a string. + * Returns the source encoding name as a string. */ static VALUE ecerr_source_encoding_name(VALUE self) @@ -3895,7 +3904,7 @@ ecerr_source_encoding_name(VALUE self) * call-seq: * ecerr.source_encoding -> encoding * - * returns the source encoding as an encoding object. + * Returns the source encoding as an encoding object. * * Note that the result may not be equal to the source encoding of * the encoding converter if the conversion has multiple steps. @@ -3921,7 +3930,7 @@ ecerr_source_encoding(VALUE self) * call-seq: * ecerr.destination_encoding_name -> string * - * returns the destination encoding name as a string. + * Returns the destination encoding name as a string. */ static VALUE ecerr_destination_encoding_name(VALUE self) @@ -3933,7 +3942,7 @@ ecerr_destination_encoding_name(VALUE self) * call-seq: * ecerr.destination_encoding -> string * - * returns the destination encoding as an encoding object. + * Returns the destination encoding as an encoding object. */ static VALUE ecerr_destination_encoding(VALUE self) @@ -3945,7 +3954,7 @@ ecerr_destination_encoding(VALUE self) * call-seq: * ecerr.error_char -> string * - * returns the one-character string which cause Encoding::UndefinedConversionError. + * Returns the one-character string which cause Encoding::UndefinedConversionError. * * ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP") * begin @@ -3966,7 +3975,7 @@ ecerr_error_char(VALUE self) * call-seq: * ecerr.error_bytes -> string * - * returns the discarded bytes when Encoding::InvalidByteSequenceError occur. + * Returns the discarded bytes when Encoding::InvalidByteSequenceError occurs. * * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") * begin -- cgit v1.2.3