summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-09-12 17:13:04 +0000
committerakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-09-12 17:13:04 +0000
commit248acf0025f047a731b6d6967382ce9b7675a05e (patch)
tree978f06b554ec15d5688f27d9d0e2d37c6fa2b1c8
parentcf8617633815a1baae3e4c21f4cef863dc98fc9f (diff)
* transcode.c (rb_econv_init_by_convpath_i): new function.
(rb_econv_init_by_convpath): new function. (econv_init): use rb_econv_init_by_convpath. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19310 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog6
-rw-r--r--test/ruby/test_econv.rb26
-rw-r--r--transcode.c131
3 files changed, 151 insertions, 12 deletions
diff --git a/ChangeLog b/ChangeLog
index 8ec49a9f0a..b7bfe6b9bd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Sat Sep 13 02:12:17 2008 Tanaka Akira <akr@fsij.org>
+
+ * transcode.c (rb_econv_init_by_convpath_i): new function.
+ (rb_econv_init_by_convpath): new function.
+ (econv_init): use rb_econv_init_by_convpath.
+
Sat Sep 13 01:11:14 2008 Tanaka Akira <akr@fsij.org>
* transcode.c (econv_args): take snamev_p and dnamev_p to prevent
diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb
index 68175fe67e..c1e78e54a7 100644
--- a/test/ruby/test_econv.rb
+++ b/test/ruby/test_econv.rb
@@ -49,12 +49,34 @@ class TestEncodingConverter < Test::Unit::TestCase
assert_equal(str, str3)
end
- def test_new
+ def test_s_new
assert_kind_of(Encoding::Converter, Encoding::Converter.new("UTF-8", "EUC-JP"))
assert_kind_of(Encoding::Converter, Encoding::Converter.new(Encoding::UTF_8, Encoding::EUC_JP))
end
- def test_new_fail
+ def test_s_new_convpath
+ assert_equal([], Encoding::Converter.new([]).convpath)
+ assert_equal([[Encoding::UTF_8, Encoding::EUC_JP]],
+ Encoding::Converter.new([["UTF-8", "EUC-JP"]]).convpath)
+ assert_equal([[Encoding::UTF_8, Encoding::WINDOWS_31J]],
+ Encoding::Converter.new([["utf-8", "cp932"]]).convpath)
+ assert_equal([[Encoding::UTF_8, Encoding::EUC_JP]],
+ Encoding::Converter.new([[Encoding::UTF_8, Encoding::EUC_JP]]).convpath)
+ assert_equal([[Encoding::ISO_8859_1, Encoding::UTF_8],
+ [Encoding::UTF_8, Encoding::EUC_JP]],
+ Encoding::Converter.new([["iso-8859-1", "euc-jp"]]).convpath)
+ assert_equal([[Encoding::ISO_8859_1, Encoding::UTF_8],
+ [Encoding::UTF_8, Encoding::EUC_JP],
+ "universal_newline"],
+ Encoding::Converter.new([["iso-8859-1", "euc-jp"], "universal_newline"]).convpath)
+ assert_equal(["universal_newline",
+ [Encoding::ISO_8859_1, Encoding::UTF_8],
+ [Encoding::UTF_8, Encoding::EUC_JP],
+ "universal_newline"],
+ Encoding::Converter.new(["universal_newline", ["iso-8859-1", "euc-jp"], "universal_newline"]).convpath)
+ end
+
+ def test_s_new_fail
name1 = "encoding-which-is-not-exist-1"
name2 = "encoding-which-is-not-exist-2"
diff --git a/transcode.c b/transcode.c
index 25e065d4ca..25b744747c 100644
--- a/transcode.c
+++ b/transcode.c
@@ -2808,21 +2808,110 @@ econv_s_search_convpath(int argc, VALUE *argv, VALUE klass)
return convpath;
}
+struct rb_econv_init_by_convpath_t {
+ rb_econv_t *ec;
+ int index;
+ int ret;
+};
+
+void rb_econv_init_by_convpath_i(const char *sname, const char *dname, int depth, void *arg)
+{
+ struct rb_econv_init_by_convpath_t *a = (struct rb_econv_init_by_convpath_t *)arg;
+ int ret;
+
+ if (a->ret == -1)
+ return;
+
+ ret = rb_econv_add_converter(a->ec, sname, dname, a->index);
+
+ a->ret = ret;
+ return;
+}
+
+static rb_econv_t *
+rb_econv_init_by_convpath(VALUE self, VALUE convpath,
+ const char **sname_p, const char **dname_p,
+ rb_encoding **senc_p, rb_encoding**denc_p)
+{
+ rb_econv_t *ec;
+ long i;
+ int ret, first=1;
+ VALUE elt;
+ rb_encoding *senc, *denc;
+ const char *sname, *dname;
+
+ ec = rb_econv_alloc(RARRAY_LEN(convpath));
+ DATA_PTR(self) = ec;
+
+ for (i = 0; i < RARRAY_LEN(convpath); i++) {
+ volatile VALUE snamev, dnamev;
+ VALUE pair;
+ elt = rb_ary_entry(convpath, i);
+ if (!NIL_P(pair = rb_check_array_type(elt))) {
+ if (RARRAY_LEN(pair) != 2)
+ rb_raise(rb_eArgError, "not a 2-element array in convpath");
+ snamev = rb_ary_entry(pair, 0);
+ enc_arg(&snamev, &sname, &senc);
+ dnamev = rb_ary_entry(pair, 1);
+ enc_arg(&dnamev, &dname, &denc);
+ }
+ else {
+ sname = "";
+ dname = StringValueCStr(elt);
+ }
+ if (SUPPLEMENTAL_CONVERSION(sname, dname)) {
+ ret = rb_econv_add_converter(ec, sname, dname, ec->num_trans);
+ if (ret == -1)
+ rb_raise(rb_eArgError, "decoration failed: %s", dname);
+ }
+ else {
+ int j = ec->num_trans;
+ struct rb_econv_init_by_convpath_t arg;
+ arg.ec = ec;
+ arg.index = ec->num_trans;
+ arg.ret = 0;
+ ret = transcode_search_path(sname, dname, rb_econv_init_by_convpath_i, &arg);
+ if (ret == -1 || arg.ret == -1)
+ rb_raise(rb_eArgError, "conversion add failed: %s to %s", sname, dname);
+ if (first) {
+ first = 0;
+ *senc_p = senc;
+ *sname_p = ec->elems[j].tc->transcoder->src_encoding;
+ }
+ *denc_p = denc;
+ *dname_p = ec->elems[ec->num_trans-1].tc->transcoder->dst_encoding;
+ }
+ }
+
+ if (first) {
+ *senc_p = NULL;
+ *denc_p = NULL;
+ *sname_p = "";
+ *dname_p = "";
+ }
+
+ ec->source_encoding_name = *sname_p;
+ ec->destination_encoding_name = *dname_p;
+
+ return ec;
+}
+
/*
* call-seq:
* Encoding::Converter.new(source_encoding, destination_encoding)
* Encoding::Converter.new(source_encoding, destination_encoding, opt)
+ * Encoding::Converter.new(convpath)
*
* possible options elements:
* hash form:
- * :universal_newline => true # convert CRLF and CR to LF
- * :crlf_newline => true # convert LF to CRLF
- * :cr_newline => true # convert LF to CR
* :invalid => nil # error on invalid byte sequence (default)
* :invalid => :replace # replace invalid byte sequence
* :undef => nil # error on undefined conversion (default)
* :undef => :replace # replace undefined conversion
* :replace => string # replacement string ("?" or "\uFFFD" if not specified)
+ * :universal_newline => true # decorator for converting CRLF and CR to LF
+ * :crlf_newline => true # decorator for converting LF to CRLF
+ * :cr_newline => true # decorator for converting LF to CR
* integer form:
* Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR
* Encoding::Converter::CRLF_NEWLINE_DECORATOR
@@ -2835,18 +2924,31 @@ econv_s_search_convpath(int argc, VALUE *argv, VALUE klass)
*
* opt should be nil, a hash or an integer.
*
+ * convpath should be an array.
+ * convpath should contains
+ * - two-element array which contains encoding or encoding name, or
+ * - a string of decorator name.
+ *
* example:
* # UTF-16BE to UTF-8
* ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
*
- * # (1) convert UTF-16BE to UTF-8
- * # (2) convert CRLF and CR to LF
+ * # Usually, decorators such as newline conversion are inserted at last.
* ec = Encoding::Converter.new("UTF-16BE", "UTF-8", :universal_newline => true)
+ * p ec.convpath #=> [[#<Encoding:UTF-16BE>, #<Encoding:UTF-8>],
+ * # "universal_newline"]
*
- * # (1) convert LF to CRLF
- * # (2) convert UTF-8 to UTF-16BE
+ * # But, if the last encoding is ASCII incompatible,
+ * # decorators are inserted before the last conversion.
* ec = Encoding::Converter.new("UTF-8", "UTF-16BE", :crlf_newline => true)
- *
+ * p ec.convpath #=> ["crlf_newline",
+ * # [#<Encoding:UTF-8>, #<Encoding:UTF-16BE>]]
+ *
+ * # conversion path can be specified directly.
+ * ec = Encoding::Converter.new(["universal_newline", ["EUC-JP", "UTF-8"], ["UTF-8", "UTF-16BE"]])
+ * p ec.convpath #=> ["universal_newline",
+ * # [#<Encoding:EUC-JP>, #<Encoding:UTF-8>],
+ * # [#<Encoding:UTF-8>, #<Encoding:UTF-16BE>]]
*/
static VALUE
econv_init(int argc, VALUE *argv, VALUE self)
@@ -2857,13 +2959,22 @@ econv_init(int argc, VALUE *argv, VALUE self)
rb_encoding *senc, *denc;
rb_econv_t *ec;
int ecflags;
+ VALUE convpath;
if (DATA_PTR(self)) {
rb_raise(rb_eTypeError, "already initialized");
}
- econv_args(argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
- ec = rb_econv_open_opts(sname, dname, ecflags, ecopts);
+ if (argc == 1 && !NIL_P(convpath = rb_check_array_type(argv[0]))) {
+ ec = rb_econv_init_by_convpath(self, convpath, &sname, &dname, &senc, &denc);
+ ecflags = 0;
+ ecopts = Qnil;
+ }
+ else {
+ econv_args(argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
+ ec = rb_econv_open_opts(sname, dname, ecflags, ecopts);
+ }
+
if (!ec) {
rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags));
}