summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog39
-rw-r--r--enc/trans/escape.trans2
-rw-r--r--enc/trans/iso2022.trans8
-rw-r--r--enc/trans/japanese.trans4
-rw-r--r--enc/trans/newline.trans6
-rw-r--r--enc/trans/utf_16_32.trans16
-rw-r--r--include/ruby/encoding.h5
-rw-r--r--io.c2
-rw-r--r--test/ruby/test_econv.rb30
-rw-r--r--tool/transcode-tblgen.rb2
-rw-r--r--transcode.c86
-rw-r--r--transcode_data.h12
12 files changed, 130 insertions, 82 deletions
diff --git a/ChangeLog b/ChangeLog
index b980cec..90f769f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,42 @@
+Mon Sep 8 23:24:54 2008 Tanaka Akira <akr@fsij.org>
+
+ * include/ruby/encoding.h (rb_econv_asciicompat_encoding): renamed
+ from rb_econv_stateless_encoding to apply stateless ASCII
+ incompatible encodings such as UTF-16BE.
+
+ * io.c (make_writeconv): use rb_econv_asciicompat_encoding.
+
+ * transcode_data.h (rb_transcoder_asciicompat_type_t): renamed from
+ rb_transcoder_stateful_type_t.
+ (rb_transcoder): use rb_transcoder_asciicompat_type_t.
+
+ * transcode.c: follow the type change.
+ (asciicompat_encoding_i): renamed from stateless_encoding_i.
+ (rb_econv_asciicompat_encoding): renamed from
+ rb_econv_stateless_encoding.
+ (econv_s_asciicompat_encoding): method renamed.
+
+ * tool/transcode-tblgen.rb: follow the type change.
+
+ * enc/trans/utf_16_32.trans: follow the type change.
+ rb_from_UTF_16BE to UTF-8 is asciicompat_decoder.
+ rb_from_UTF_16LE to UTF-8 is asciicompat_decoder.
+ rb_from_UTF_32BE to UTF-8 is asciicompat_decoder.
+ rb_from_UTF_32LE to UTF-8 is asciicompat_decoder.
+ UTF-8 to rb_to_UTF_16BE is asciicompat_encoder.
+ UTF-8 to rb_to_UTF_16LE is asciicompat_encoder.
+ UTF-8 to rb_to_UTF_32BE is asciicompat_encoder.
+ UTF-8 to rb_to_UTF_32LE is asciicompat_encoder.
+
+ * enc/trans/newline.trans: follow the type change. universal newline
+ decoder is asciicompat_converter.
+
+ * enc/trans/escape.trans: follow the type change.
+
+ * enc/trans/iso2022.trans: ditto.
+
+ * enc/trans/japanese.trans: ditto.
+
Mon Sep 8 23:05:42 2008 Tanaka Akira <akr@fsij.org>
* transcode.c (rb_econv_insert_output): "readagain" part should be
diff --git a/enc/trans/escape.trans b/enc/trans/escape.trans
index cc15137..63836c3 100644
--- a/enc/trans/escape.trans
+++ b/enc/trans/escape.trans
@@ -79,7 +79,7 @@ rb_escape_xml_attr_quote = {
1, /* input_unit_length */
1, /* max_input */
7, /* max_output */
- stateful_encoder, /* stateful_type */
+ asciicompat_encoder, /* asciicompat_type */
1, escape_xml_attr_quote_init, escape_xml_attr_quote_init,
NULL, NULL, NULL, fun_so_escape_xml_attr_quote,
escape_xml_attr_quote_finish
diff --git a/enc/trans/iso2022.trans b/enc/trans/iso2022.trans
index b3dd7f0..f8a9c2c 100644
--- a/enc/trans/iso2022.trans
+++ b/enc/trans/iso2022.trans
@@ -114,7 +114,7 @@ rb_iso2022jp_decoder = {
1, /* input_unit_length */
3, /* max_input */
3, /* max_output */
- stateful_decoder, /* stateful_type */
+ asciicompat_decoder, /* asciicompat_type */
1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */
NULL, fun_si_iso2022jp_decoder, NULL, fun_so_iso2022jp_decoder
};
@@ -196,7 +196,7 @@ rb_iso2022jp_encoder = {
1, /* input_unit_length */
3, /* max_input */
5, /* max_output */
- stateful_encoder, /* stateful_type */
+ asciicompat_encoder, /* asciicompat_type */
1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_iso2022jp_encoder,
finish_iso2022jp_encoder,
@@ -218,7 +218,7 @@ rb_stateless_iso2022jp_to_eucjp = {
1, /* input_unit_length */
3, /* max_input */
2, /* max_output */
- stateless_converter, /* stateful_type */
+ asciicompat_converter, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_stateless_iso2022jp_to_eucjp,
};
@@ -239,7 +239,7 @@ rb_eucjp_to_stateless_iso2022jp = {
1, /* input_unit_length */
3, /* max_input */
3, /* max_output */
- stateless_converter, /* stateful_type */
+ asciicompat_converter, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_eucjp_to_stateless_iso2022jp,
};
diff --git a/enc/trans/japanese.trans b/enc/trans/japanese.trans
index 64f38fb..12321f5 100644
--- a/enc/trans/japanese.trans
+++ b/enc/trans/japanese.trans
@@ -73,7 +73,7 @@ rb_eucjp2sjis = {
1, /* input_unit_length */
3, /* max_input */
2, /* max_output */
- stateless_converter, /* stateful_type */
+ asciicompat_converter, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_eucjp2sjis
};
@@ -85,7 +85,7 @@ rb_sjis2eucjp = {
1, /* input_unit_length */
2, /* max_input */
2, /* max_output */
- stateless_converter, /* stateful_type */
+ asciicompat_converter, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_sjis2eucjp
};
diff --git a/enc/trans/newline.trans b/enc/trans/newline.trans
index 9e5229f..db49a9f 100644
--- a/enc/trans/newline.trans
+++ b/enc/trans/newline.trans
@@ -92,7 +92,7 @@ rb_universal_newline = {
1, /* input_unit_length */
1, /* max_input */
1, /* max_output */
- stateful_decoder, /* stateful_type */
+ asciicompat_converter, /* asciicompat_type */
2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_universal_newline,
universal_newline_finish
@@ -105,7 +105,7 @@ rb_crlf_newline = {
1, /* input_unit_length */
1, /* max_input */
2, /* max_output */
- stateless_converter, /* stateful_type */
+ asciicompat_converter, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, NULL
};
@@ -117,7 +117,7 @@ rb_cr_newline = {
1, /* input_unit_length */
1, /* max_input */
1, /* max_output */
- stateless_converter, /* stateful_type */
+ asciicompat_converter, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, NULL
};
diff --git a/enc/trans/utf_16_32.trans b/enc/trans/utf_16_32.trans
index 9ffff34..8864826 100644
--- a/enc/trans/utf_16_32.trans
+++ b/enc/trans/utf_16_32.trans
@@ -266,7 +266,7 @@ rb_from_UTF_16BE = {
2, /* input_unit_length */
4, /* max_input */
4, /* max_output */
- stateless_converter, /* stateful_type */
+ asciicompat_decoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_from_utf_16be
};
@@ -278,7 +278,7 @@ rb_to_UTF_16BE = {
1, /* input_unit_length */
4, /* max_input */
4, /* max_output */
- stateless_converter, /* stateful_type */
+ asciicompat_encoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_to_utf_16be
};
@@ -290,7 +290,7 @@ rb_from_UTF_16LE = {
2, /* input_unit_length */
4, /* max_input */
4, /* max_output */
- stateless_converter, /* stateful_type */
+ asciicompat_decoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_from_utf_16le
};
@@ -302,7 +302,7 @@ rb_to_UTF_16LE = {
1, /* input_unit_length */
4, /* max_input */
4, /* max_output */
- stateless_converter, /* stateful_type */
+ asciicompat_encoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_to_utf_16le
};
@@ -314,7 +314,7 @@ rb_from_UTF_32BE = {
4, /* input_unit_length */
4, /* max_input */
4, /* max_output */
- stateless_converter, /* stateful_type */
+ asciicompat_decoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_from_utf_32be
};
@@ -326,7 +326,7 @@ rb_to_UTF_32BE = {
1, /* input_unit_length */
4, /* max_input */
4, /* max_output */
- stateless_converter, /* stateful_type */
+ asciicompat_encoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_to_utf_32be
};
@@ -338,7 +338,7 @@ rb_from_UTF_32LE = {
4, /* input_unit_length */
4, /* max_input */
4, /* max_output */
- stateless_converter, /* stateful_type */
+ asciicompat_decoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_from_utf_32le
};
@@ -350,7 +350,7 @@ rb_to_UTF_32LE = {
1, /* input_unit_length */
4, /* max_input */
4, /* max_output */
- stateless_converter, /* stateful_type */
+ asciicompat_encoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_to_utf_32le
};
diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h
index 0519340..3d294a0 100644
--- a/include/ruby/encoding.h
+++ b/include/ruby/encoding.h
@@ -239,8 +239,9 @@ void rb_econv_check_error(rb_econv_t *ec);
int rb_econv_putbackable(rb_econv_t *ec);
void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n);
-/* returns corresponding stateless encoding, or NULL if not stateful. */
-const char *rb_econv_stateless_encoding(const char *stateful_enc);
+/* returns the corresponding ASCII compatible encoding for encname,
+ * or NULL if encname is not ASCII incompatible encoding. */
+const char *rb_econv_asciicompat_encoding(const char *encname);
VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags);
VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags);
diff --git a/io.c b/io.c
index b7a5314..d450186 100644
--- a/io.c
+++ b/io.c
@@ -713,7 +713,7 @@ make_writeconv(rb_io_t *fptr)
}
else {
enc = fptr->encs.enc2 ? fptr->encs.enc2 : fptr->encs.enc;
- senc = rb_econv_stateless_encoding(enc->name);
+ senc = rb_econv_asciicompat_encoding(enc->name);
if (!senc && !(fptr->encs.ecflags & ECONV_STATEFUL_ENCODER_MASK)) {
/* single conversion */
fptr->writeconv_pre_ecflags = ecflags;
diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb
index d796db8..1b2586a 100644
--- a/test/ruby/test_econv.rb
+++ b/test/ruby/test_econv.rb
@@ -27,20 +27,24 @@ class TestEncodingConverter < Test::Unit::TestCase
ec.primitive_errinfo)
end
- def test_s_stateless_encoding
- assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.stateless_encoding("ISO-2022-JP"))
- assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.stateless_encoding(Encoding::ISO_2022_JP))
- assert_nil(Encoding::Converter.stateless_encoding("EUC-JP"))
- assert_nil(Encoding::Converter.stateless_encoding("UTF-8"))
- assert_nil(Encoding::Converter.stateless_encoding("UTF-16BE"))
- assert_nil(Encoding::Converter.stateless_encoding(Encoding::UTF_8))
- assert_nil(Encoding::Converter.stateless_encoding("xml-attr-escaped"))
- end
-
- def test_stateless_encoding_iso2022jp
- slenc = Encoding::Converter.stateless_encoding("ISO-2022-JP")
+ def test_s_asciicompat_encoding
+ assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.asciicompat_encoding("ISO-2022-JP"))
+ assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.asciicompat_encoding(Encoding::ISO_2022_JP))
+ assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-16BE"))
+ assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-16LE"))
+ assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-32BE"))
+ assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-32LE"))
+ assert_nil(Encoding::Converter.asciicompat_encoding("EUC-JP"))
+ assert_nil(Encoding::Converter.asciicompat_encoding("UTF-8"))
+ assert_nil(Encoding::Converter.asciicompat_encoding(Encoding::UTF_8))
+ assert_nil(Encoding::Converter.asciicompat_encoding("xml-attr-escaped"))
+ assert_nil(Encoding::Converter.asciicompat_encoding("encoding-not-exist"))
+ end
+
+ def test_asciicompat_encoding_iso2022jp
+ acenc = Encoding::Converter.asciicompat_encoding("ISO-2022-JP")
str = "\e$B~~\(B".force_encoding("iso-2022-jp")
- str2 = str.encode(slenc)
+ str2 = str.encode(acenc)
str3 = str.encode("ISO-2022-JP")
assert_equal(str, str3)
end
diff --git a/tool/transcode-tblgen.rb b/tool/transcode-tblgen.rb
index 64d184c..5395460 100644
--- a/tool/transcode-tblgen.rb
+++ b/tool/transcode-tblgen.rb
@@ -634,7 +634,7 @@ static const rb_transcoder
#{input_unit_length}, /* input_unit_length */
#{max_input}, /* max_input */
#{max_output}, /* max_output */
- stateless_converter, /* stateful_type */
+ asciicompat_converter, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, NULL,
NULL, NULL, NULL
diff --git a/transcode.c b/transcode.c
index 8794f24..08f8bb0 100644
--- a/transcode.c
+++ b/transcode.c
@@ -1414,7 +1414,7 @@ rb_econv_encoding_to_insert_output(rb_econv_t *ec)
tr = tc->transcoder;
- if (tr->stateful_type == stateful_encoder)
+ if (tr->asciicompat_type == asciicompat_encoder)
return tr->src_encoding;
return tr->dst_encoding;
}
@@ -1528,7 +1528,7 @@ rb_econv_insert_output(rb_econv_t *ec,
data_end_p = &ec->in_data_end;
buf_end_p = &ec->in_buf_end;
}
- else if (tc->transcoder->stateful_type == stateful_encoder) {
+ else if (tc->transcoder->asciicompat_type == asciicompat_encoder) {
need += tc->readagain_len;
if (need < insert_len)
goto fail;
@@ -1580,7 +1580,7 @@ rb_econv_insert_output(rb_econv_t *ec,
memcpy(*data_end_p, insert_str, insert_len);
*data_end_p += insert_len;
- if (tc && tc->transcoder->stateful_type == stateful_encoder) {
+ if (tc && tc->transcoder->asciicompat_type == asciicompat_encoder) {
memcpy(*data_end_p, TRANSCODING_READBUF(tc)+tc->recognized_len, tc->readagain_len);
*data_end_p += tc->readagain_len;
tc->readagain_len = 0;
@@ -1633,27 +1633,31 @@ rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n)
tc->readagain_len -= n;
}
-struct stateless_encoding_t {
- const char *stateless_enc;
- const char *stateful_enc;
+struct asciicompat_encoding_t {
+ const char *ascii_compat_name;
+ const char *ascii_incompat_name;
};
static int
-stateless_encoding_i(st_data_t key, st_data_t val, st_data_t arg)
+asciicompat_encoding_i(st_data_t key, st_data_t val, st_data_t arg)
{
- struct stateless_encoding_t *data = (struct stateless_encoding_t *)arg;
+ struct asciicompat_encoding_t *data = (struct asciicompat_encoding_t *)arg;
st_table *table2 = (st_table *)val;
st_data_t v;
- if (st_lookup(table2, (st_data_t)data->stateful_enc, &v)) {
+ if (st_lookup(table2, (st_data_t)data->ascii_incompat_name, &v)) {
transcoder_entry_t *entry = (transcoder_entry_t *)v;
const rb_transcoder *tr;
- if (SUPPLEMENTAL_CONVERSION(entry->sname, entry->dname)) {
+ if (SUPPLEMENTAL_CONVERSION(entry->sname, entry->dname))
return ST_CONTINUE;
- }
tr = load_transcoder_entry(entry);
- if (tr && tr->stateful_type == stateful_encoder) {
- data->stateless_enc = tr->src_encoding;
+ if (tr && tr->asciicompat_type == asciicompat_encoder) {
+ /*
+ * Assumption:
+ * There is only one transcoder for
+ * converting to ASCII incompatible encoding.
+ */
+ data->ascii_compat_name = tr->src_encoding;
return ST_STOP;
}
}
@@ -1661,14 +1665,14 @@ stateless_encoding_i(st_data_t key, st_data_t val, st_data_t arg)
}
const char *
-rb_econv_stateless_encoding(const char *stateful_enc)
-{
- struct stateless_encoding_t data;
- data.stateful_enc = stateful_enc;
- data.stateless_enc = NULL;
- st_foreach(transcoder_table, stateless_encoding_i, (st_data_t)&data);
- if (data.stateless_enc)
- return data.stateless_enc;
+rb_econv_asciicompat_encoding(const char *ascii_incompat_name)
+{
+ struct asciicompat_encoding_t data;
+ data.ascii_incompat_name = ascii_incompat_name;
+ data.ascii_compat_name = NULL;
+ st_foreach(transcoder_table, asciicompat_encoding_i, (st_data_t)&data);
+ if (data.ascii_compat_name)
+ return data.ascii_compat_name;
return NULL;
}
@@ -2510,42 +2514,42 @@ make_dummy_encoding(const char *name)
/*
* call-seq:
- * Encoding::Converter.stateless_encoding(string) => encoding or nil
- * Encoding::Converter.stateless_encoding(encoding) => encoding or nil
- *
- * returns the corresponding stateless encoding.
+ * Encoding::Converter.asciicompat_encoding(string) => encoding or nil
+ * Encoding::Converter.asciicompat_encoding(encoding) => encoding or nil
*
- * It returns nil if the argument is not a stateful encoding.
+ * returns the corresponding ASCII compatible encoding.
*
- * "corresponding stateless encoding" is a stateless encoding which
- * represents same characters in the statefull encoding.
+ * It returns nil if the argument is an ASCII compatible encoding.
*
- * So, no conversion undefined error occur between the stateful encoding and the stateless encoding.
+ * "corresponding ASCII compatible encoding" is a ASCII compatible encoding which
+ * represents same characters in the given ASCII incompatible encoding.
*
- * For ISO-2022-JP, the dedicated stateless encoding, stateless-ISO-2022-JP, is defined.
+ * So, no conversion undefined error occur between the ASCII compatible and incompatible encoding.
*
* Encoding::Converter.stateless_encoding("ISO-2022-JP") #=> #<Encoding:stateless-ISO-2022-JP>
+ * Encoding::Converter.stateless_encoding("UTF-16BE") #=> #<Encoding:UTF-8>
+ * Encoding::Converter.stateless_encoding("UTF-8") #=> nil
*
*/
static VALUE
-econv_s_stateless_encoding(VALUE klass, VALUE arg)
+econv_s_asciicompat_encoding(VALUE klass, VALUE arg)
{
- const char *stateful_name, *stateless_name;
- rb_encoding *stateful_enc, *stateless_enc;
+ const char *arg_name, *result_name;
+ rb_encoding *arg_enc, *result_enc;
- enc_arg(arg, &stateful_name, &stateful_enc);
+ enc_arg(arg, &arg_name, &arg_enc);
- stateless_name = rb_econv_stateless_encoding(stateful_name);
+ result_name = rb_econv_asciicompat_encoding(arg_name);
- if (stateless_name == NULL)
+ if (result_name == NULL)
return Qnil;
- stateless_enc = rb_enc_find(stateless_name);
+ result_enc = rb_enc_find(result_name);
- if (!stateless_enc)
- stateless_enc = make_dummy_encoding(stateless_name);
+ if (!result_enc)
+ result_enc = make_dummy_encoding(result_name);
- return rb_enc_from_encoding(stateless_enc);
+ return rb_enc_from_encoding(result_enc);
}
/*
@@ -3563,7 +3567,7 @@ Init_transcode(void)
rb_cEncodingConverter = rb_define_class_under(rb_cEncoding, "Converter", rb_cData);
rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate);
- rb_define_singleton_method(rb_cEncodingConverter, "stateless_encoding", econv_s_stateless_encoding, 1);
+ rb_define_singleton_method(rb_cEncodingConverter, "asciicompat_encoding", econv_s_asciicompat_encoding, 1);
rb_define_method(rb_cEncodingConverter, "initialize", econv_init, -1);
rb_define_method(rb_cEncodingConverter, "inspect", econv_inspect, 0);
rb_define_method(rb_cEncodingConverter, "source_encoding", econv_source_encoding, 0);
diff --git a/transcode_data.h b/transcode_data.h
index 4587815..98f48a4 100644
--- a/transcode_data.h
+++ b/transcode_data.h
@@ -57,11 +57,11 @@
#define THREETRAIL /* legal but undefined if three more trailing UTF-8 */
typedef enum {
- stateless_converter, /* stateless -> stateless */
- stateful_decoder, /* stateful -> stateless */
- stateful_encoder /* stateless -> stateful */
- /* stateful -> stateful is intentionally ommitted. */
-} rb_transcoder_stateful_type_t;
+ asciicompat_converter, /* ASCII-compatible -> ASCII-compatible */
+ asciicompat_decoder, /* ASCII-incompatible -> ASCII-compatible */
+ asciicompat_encoder /* ASCII-compatible -> ASCII-incompatible */
+ /* ASCII-incompatible -> ASCII-incompatible is intentionally ommitted. */
+} rb_transcoder_asciicompat_type_t;
typedef struct rb_transcoder rb_transcoder;
@@ -78,7 +78,7 @@ struct rb_transcoder {
int input_unit_length;
int max_input;
int max_output;
- rb_transcoder_stateful_type_t stateful_type;
+ rb_transcoder_asciicompat_type_t asciicompat_type;
size_t state_size;
int (*state_init_func)(void*); /* ret==0:success ret!=0:failure(errno) */
int (*state_fini_func)(void*); /* ret==0:success ret!=0:failure(errno) */