summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-09-12 13:53:42 +0000
committerakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-09-12 13:53:42 +0000
commit2d80b1695d9f7b6fbe99f63117078033ac40ebb2 (patch)
tree1bebadb8d4bc02dcb1b5bf6553ad8f314195c739
parent89a5c998c7297f9f565e434dfae8d03765e3c393 (diff)
* transcode.c (decorator_names): extracted from rb_econv_open.
(rb_econv_open): use decorator_names. (econv_args): extracted from econv_init. (econv_init): use econv_args. (decorate_convpath): new function. (search_convpath_i): new function. (econv_s_search_convpath): new method. (Init_transcode): new method defined. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19305 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog11
-rw-r--r--test/ruby/test_econv.rb16
-rw-r--r--transcode.c245
3 files changed, 213 insertions, 59 deletions
diff --git a/ChangeLog b/ChangeLog
index 4b7befc76e..24a7f7ab6c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+Fri Sep 12 22:39:46 2008 Tanaka Akira <akr@fsij.org>
+
+ * transcode.c (decorator_names): extracted from rb_econv_open.
+ (rb_econv_open): use decorator_names.
+ (econv_args): extracted from econv_init.
+ (econv_init): use econv_args.
+ (decorate_convpath): new function.
+ (search_convpath_i): new function.
+ (econv_s_search_convpath): new method.
+ (Init_transcode): new method defined.
+
Fri Sep 12 21:55:43 2008 Tanaka Akira <akr@fsij.org>
* transcode.c (rb_econv_t): new field: num_allocated.
diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb
index 73e6e0ffb8..68175fe67e 100644
--- a/test/ruby/test_econv.rb
+++ b/test/ruby/test_econv.rb
@@ -816,6 +816,7 @@ class TestEncodingConverter < Test::Unit::TestCase
iso88591 = Encoding::ISO_8859_1
iso2022jp = Encoding::ISO_2022_JP
siso2022jp = Encoding::STATELESS_ISO_2022_JP
+
assert_equal([], Encoding::Converter.new("", "").convpath)
assert_equal([[eucjp, utf8], [utf8, iso88591]],
Encoding::Converter.new(eucjp, iso88591).convpath)
@@ -833,4 +834,19 @@ class TestEncodingConverter < Test::Unit::TestCase
assert_equal([[utf16be, utf8], "universal_newline", [utf8, utf16le]],
Encoding::Converter.new(utf16be, utf16le, universal_newline: true).convpath)
end
+
+ def test_search_convpath
+ eucjp = Encoding::EUC_JP
+ utf8 = Encoding::UTF_8
+ utf32be = Encoding::UTF_32BE
+ iso88591 = Encoding::ISO_8859_1
+ assert_equal([[iso88591,utf8], [utf8,eucjp]],
+ Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP"))
+ assert_equal([[iso88591,utf8], [utf8,eucjp]],
+ Encoding::Converter.search_convpath(iso88591, eucjp))
+ assert_equal([[iso88591,utf8], [utf8,eucjp], "universal_newline"],
+ Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", universal_newline: true))
+ assert_equal([[iso88591,utf8], "universal_newline", [utf8,utf32be]],
+ Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", universal_newline: true))
+ end
end
diff --git a/transcode.c b/transcode.c
index d4f349327f..077782937f 100644
--- a/transcode.c
+++ b/transcode.c
@@ -948,47 +948,55 @@ rb_econv_open0(const char *sname, const char *dname, int ecflags)
return ec;
}
-rb_econv_t *
-rb_econv_open(const char *sname, const char *dname, int ecflags)
+#define MAX_ECFLAGS_DECORATORS 32
+
+static int
+decorator_names(int ecflags, const char **decorators_ret)
{
- rb_econv_t *ec;
int num_decorators;
- const char *decorators[6];
- int i;
if ((ecflags & ECONV_CRLF_NEWLINE_DECORATOR) &&
(ecflags & ECONV_CR_NEWLINE_DECORATOR))
- return NULL;
+ return -1;
if ((ecflags & (ECONV_CRLF_NEWLINE_DECORATOR|ECONV_CR_NEWLINE_DECORATOR)) &&
(ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR))
- return NULL;
+ return -1;
if ((ecflags & ECONV_XML_TEXT_DECORATOR) &&
(ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR))
- return NULL;
+ return -1;
num_decorators = 0;
if (ecflags & ECONV_XML_TEXT_DECORATOR)
- if (!(decorators[num_decorators++] = "xml-text-escaped"))
- return NULL;
+ decorators_ret[num_decorators++] = "xml-text-escaped";
if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR)
- if (!(decorators[num_decorators++] = "xml-attr-content-escaped"))
- return NULL;
+ decorators_ret[num_decorators++] = "xml-attr-content-escaped";
if (ecflags & ECONV_XML_ATTR_QUOTE_DECORATOR)
- if (!(decorators[num_decorators++] = "xml-attr-quoted"))
- return NULL;
+ decorators_ret[num_decorators++] = "xml-attr-quoted";
if (ecflags & ECONV_CRLF_NEWLINE_DECORATOR)
- if (!(decorators[num_decorators++] = "crlf_newline"))
- return NULL;
+ decorators_ret[num_decorators++] = "crlf_newline";
if (ecflags & ECONV_CR_NEWLINE_DECORATOR)
- if (!(decorators[num_decorators++] = "cr_newline"))
- return NULL;
+ decorators_ret[num_decorators++] = "cr_newline";
if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR)
- if (!(decorators[num_decorators++] = "universal_newline"))
- return NULL;
+ decorators_ret[num_decorators++] = "universal_newline";
+
+ return num_decorators;
+}
+
+rb_econv_t *
+rb_econv_open(const char *sname, const char *dname, int ecflags)
+{
+ rb_econv_t *ec;
+ int num_decorators;
+ const char *decorators[MAX_ECFLAGS_DECORATORS];
+ int i;
+
+ num_decorators = decorator_names(ecflags, decorators);
+ if (num_decorators == -1)
+ return NULL;
ec = rb_econv_open0(sname, dname, ecflags & ECONV_ERROR_HANDLER_MASK);
if (!ec)
@@ -1932,7 +1940,7 @@ VALUE
rb_econv_open_exc(const char *sname, const char *dname, int ecflags)
{
VALUE mesg, exc;
- mesg = rb_str_new_cstr("code converter open failed (");
+ mesg = rb_str_new_cstr("code converter not found (");
econv_description(sname, dname, ecflags, mesg);
rb_str_cat2(mesg, ")");
exc = rb_exc_new3(rb_eNoConverter, mesg);
@@ -2641,6 +2649,160 @@ econv_s_asciicompat_encoding(VALUE klass, VALUE arg)
return rb_enc_from_encoding(result_enc);
}
+static void
+econv_args(int argc, VALUE *argv,
+ const char **sname_p, const char **dname_p,
+ rb_encoding **senc_p, rb_encoding **denc_p,
+ int *ecflags_p,
+ VALUE *ecopts_p)
+{
+ VALUE source_encoding, destination_encoding, opt, opthash, flags_v, ecopts;
+ int sidx, didx;
+ const char *sname, *dname;
+ rb_encoding *senc, *denc;
+ int ecflags;
+
+ rb_scan_args(argc, argv, "21", &source_encoding, &destination_encoding, &opt);
+
+ if (NIL_P(opt)) {
+ ecflags = 0;
+ ecopts = Qnil;
+ }
+ else if (!NIL_P(flags_v = rb_check_to_integer(opt, "to_int"))) {
+ ecflags = NUM2INT(flags_v);
+ ecopts = Qnil;
+ }
+ else {
+ opthash = rb_convert_type(opt, T_HASH, "Hash", "to_hash");
+ ecflags = rb_econv_prepare_opts(opthash, &ecopts);
+ }
+
+ senc = NULL;
+ sidx = rb_to_encoding_index(source_encoding);
+ if (0 <= sidx) {
+ senc = rb_enc_from_index(sidx);
+ }
+ else {
+ StringValue(source_encoding);
+ }
+
+ denc = NULL;
+ didx = rb_to_encoding_index(destination_encoding);
+ if (0 <= didx) {
+ denc = rb_enc_from_index(didx);
+ }
+ else {
+ StringValue(destination_encoding);
+ }
+
+ sname = senc ? senc->name : StringValueCStr(source_encoding);
+ dname = denc ? denc->name : StringValueCStr(destination_encoding);
+
+ *sname_p = sname;
+ *dname_p = dname;
+ *senc_p = senc;
+ *denc_p = denc;
+ *ecflags_p = ecflags;
+ *ecopts_p = ecopts;
+}
+
+static int
+decorate_convpath(VALUE convpath, int ecflags)
+{
+ int num_decorators;
+ const char *decorators[MAX_ECFLAGS_DECORATORS];
+ int i;
+ int n, len;
+
+ num_decorators = decorator_names(ecflags, decorators);
+ if (num_decorators == -1)
+ return -1;
+
+ len = n = RARRAY_LEN(convpath);
+ if (n != 0) {
+ VALUE pair = RARRAY_PTR(convpath)[n-1];
+ const char *sname = rb_to_encoding(RARRAY_PTR(pair)[0])->name;
+ const char *dname = rb_to_encoding(RARRAY_PTR(pair)[1])->name;
+ transcoder_entry_t *entry = get_transcoder_entry(sname, dname);
+ const rb_transcoder *tr = load_transcoder_entry(entry);
+ if (!tr)
+ return -1;
+ if (!SUPPLEMENTAL_CONVERSION(tr->src_encoding, tr->dst_encoding) &&
+ tr->asciicompat_type == asciicompat_encoder) {
+ n--;
+ rb_ary_store(convpath, len + num_decorators - 1, pair);
+ }
+ }
+
+ for (i = 0; i < num_decorators; i++)
+ rb_ary_store(convpath, n + i, rb_str_new_cstr(decorators[i]));
+
+ return 0;
+}
+
+static void
+search_convpath_i(const char *sname, const char *dname, int depth, void *arg)
+{
+ VALUE *ary_p = arg;
+ VALUE v;
+
+ if (*ary_p == Qnil) {
+ *ary_p = rb_ary_new();
+ }
+
+ if (SUPPLEMENTAL_CONVERSION(sname, dname)) {
+ v = rb_str_new_cstr(dname);
+ }
+ else {
+ v = rb_assoc_new(make_encobj(sname), make_encobj(dname));
+ }
+ rb_ary_store(*ary_p, depth, v);
+}
+
+/*
+ * call-seq:
+ * Encoding::Converter.search_convpath(source_encoding, destination_encoding) -> ary
+ * Encoding::Converter.search_convpath(source_encoding, destination_encoding, opt) -> ary
+ *
+ * returns the conversion path.
+ *
+ * p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP")
+ * #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
+ * # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>]]
+ *
+ * p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", universal_newline: true)
+ * #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
+ * # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>],
+ * # "universal_newline"]
+ *
+ * p Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", universal_newline: true)
+ * #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
+ * # "universal_newline",
+ * # [#<Encoding:UTF-8>, #<Encoding:UTF-32BE>]]
+ */
+static VALUE
+econv_s_search_convpath(int argc, VALUE *argv, VALUE klass)
+{
+ const char *sname, *dname;
+ rb_encoding *senc, *denc;
+ int ecflags;
+ VALUE ecopts;
+ VALUE convpath;
+
+ econv_args(argc, argv, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
+
+ convpath = Qnil;
+ transcode_search_path(sname, dname, search_convpath_i, &convpath);
+
+ if (NIL_P(convpath))
+ rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags));
+
+ if (decorate_convpath(convpath, ecflags) == -1)
+ rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags));
+
+ return convpath;
+}
+
/*
* call-seq:
* Encoding::Converter.new(source_encoding, destination_encoding)
@@ -2684,53 +2846,17 @@ econv_s_asciicompat_encoding(VALUE klass, VALUE arg)
static VALUE
econv_init(int argc, VALUE *argv, VALUE self)
{
- VALUE source_encoding, destination_encoding, opt, opthash, flags_v, ecopts;
- int sidx, didx;
+ VALUE ecopts;
const char *sname, *dname;
rb_encoding *senc, *denc;
rb_econv_t *ec;
int ecflags;
- rb_scan_args(argc, argv, "21", &source_encoding, &destination_encoding, &opt);
-
- if (NIL_P(opt)) {
- ecflags = 0;
- ecopts = Qnil;
- }
- else if (!NIL_P(flags_v = rb_check_to_integer(opt, "to_int"))) {
- ecflags = NUM2INT(flags_v);
- ecopts = Qnil;
- }
- else {
- opthash = rb_convert_type(opt, T_HASH, "Hash", "to_hash");
- ecflags = rb_econv_prepare_opts(opthash, &ecopts);
- }
-
- senc = NULL;
- sidx = rb_to_encoding_index(source_encoding);
- if (0 <= sidx) {
- senc = rb_enc_from_index(sidx);
- }
- else {
- StringValue(source_encoding);
- }
-
- denc = NULL;
- didx = rb_to_encoding_index(destination_encoding);
- if (0 <= didx) {
- denc = rb_enc_from_index(didx);
- }
- else {
- StringValue(destination_encoding);
- }
-
- sname = senc ? senc->name : StringValueCStr(source_encoding);
- dname = denc ? denc->name : StringValueCStr(destination_encoding);
-
if (DATA_PTR(self)) {
rb_raise(rb_eTypeError, "already initialized");
}
+ econv_args(argc, argv, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
ec = rb_econv_open_opts(sname, dname, ecflags, ecopts);
if (!ec) {
rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags));
@@ -3690,6 +3816,7 @@ Init_transcode(void)
rb_cEncodingConverter = rb_define_class_under(rb_cEncoding, "Converter", rb_cData);
rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate);
rb_define_singleton_method(rb_cEncodingConverter, "asciicompat_encoding", econv_s_asciicompat_encoding, 1);
+ rb_define_singleton_method(rb_cEncodingConverter, "search_convpath", econv_s_search_convpath, -1);
rb_define_method(rb_cEncodingConverter, "initialize", econv_init, -1);
rb_define_method(rb_cEncodingConverter, "inspect", econv_inspect, 0);
rb_define_method(rb_cEncodingConverter, "convpath", econv_convpath, 0);