summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog10
-rw-r--r--test/ruby/test_econv.rb24
-rw-r--r--transcode.c90
3 files changed, 124 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 4208375ee8..03c1acff16 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Tue Aug 12 23:42:31 2008 Tanaka Akira <akr@fsij.org>
+
+ * transcode.c (rb_cEncodingConverter): new class Encoding::Converter.
+ (econv_free): new function.
+ (econv_s_allocate): ditto.
+ (econv_init): ditto.
+ (check_econv): ditto.
+ (econv_primitive_convert): new method.
+ (Init_transcode): define Encoding::Converter.
+
Tue Aug 12 23:16:09 2008 Tanaka Akira <akr@fsij.org>
* string.c (rb_str_splice_0): call rb_str_drop_bytes, not rb_str_drop.
diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb
new file mode 100644
index 0000000000..045a4a0e00
--- /dev/null
+++ b/test/ruby/test_econv.rb
@@ -0,0 +1,24 @@
+require 'test/unit'
+
+class TestEncodingConverter < Test::Unit::TestCase
+ def assert_econv(ret_expected, src_expected, dst_expected, from, to, src, dst, flags=0)
+ ec = Encoding::Converter.new(from, to)
+ ret = ec.primitive_convert(src, dst, flags)
+ assert_equal(ret_expected, ret)
+ assert_equal(src_expected, src)
+ assert_equal(dst_expected, dst)
+ end
+
+ def test_eucjp_to_utf8
+ assert_econv(:finished, "", "", "EUC-JP", "UTF-8", "", "")
+ assert_econv(:ibuf_empty, "", "", "EUC-JP", "UTF-8", "", "", Encoding::Converter::PARTIAL_INPUT)
+ assert_econv(:finished, "", "", "EUC-JP", "UTF-8", "", " "*10)
+ assert_econv(:obuf_full, "", "", "EUC-JP", "UTF-8", "a", "")
+ end
+
+ def test_invalid
+ assert_econv(:invalid_input, "", "", "EUC-JP", "UTF-8", "\x80", " "*10)
+ assert_econv(:invalid_input, "", "a", "EUC-JP", "UTF-8", "a\x80", " "*10)
+ assert_econv(:invalid_input, "\x80", "a", "EUC-JP", "UTF-8", "a\x80\x80", " "*10)
+ end
+end
diff --git a/transcode.c b/transcode.c
index 5cc93da63d..324bc3e483 100644
--- a/transcode.c
+++ b/transcode.c
@@ -18,6 +18,8 @@
VALUE rb_eConversionUndefined;
VALUE rb_eInvalidByteSequence;
+VALUE rb_cEncodingConverter;
+
static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace;
#define INVALID_IGNORE 0x1
#define INVALID_REPLACE 0x2
@@ -1219,6 +1221,88 @@ rb_str_transcode(VALUE str, VALUE to)
return str_encode(1, &to, str);
}
+static void
+econv_free(rb_trans_t *ts)
+{
+ rb_trans_close(ts);
+}
+
+static VALUE
+econv_s_allocate(VALUE klass)
+{
+ return Data_Wrap_Struct(klass, NULL, econv_free, NULL);
+}
+
+static VALUE
+econv_init(VALUE self, VALUE from_encoding, VALUE to_encoding)
+{
+ const char *from_e, *to_e;
+ rb_trans_t *ts;
+
+ from_e = StringValueCStr(from_encoding);
+ to_e = StringValueCStr(to_encoding);
+
+ if (DATA_PTR(self)) {
+ rb_raise(rb_eTypeError, "already initialized");
+ }
+
+ ts = rb_trans_open(from_e, to_e, 0);
+ if (!ts) {
+ rb_raise(rb_eArgError, "encoding convewrter not supported (from %s to %s)", from_e, to_e);
+ }
+
+ DATA_PTR(self) = ts;
+
+ return self;
+}
+
+#define IS_ECONV(obj) (RDATA(obj)->dfree == (RUBY_DATA_FUNC)econv_free)
+
+static rb_trans_t *
+check_econv(VALUE self)
+{
+ Check_Type(self, T_DATA);
+ if (!IS_ECONV(self)) {
+ rb_raise(rb_eTypeError, "wrong argument type %s (expected Encoding::Converter)",
+ rb_class2name(CLASS_OF(self)));
+ }
+ return DATA_PTR(self);
+}
+
+static VALUE
+econv_primitive_convert(VALUE self, VALUE input, VALUE output, VALUE flags_v)
+{
+ rb_trans_t *ts = check_econv(self);
+ rb_trans_result_t res;
+ const unsigned char *ip, *is;
+ unsigned char *op, *os;
+ int flags;
+
+ StringValue(input);
+ StringValue(output);
+ rb_str_modify(output);
+ flags = NUM2INT(flags_v);
+
+ ip = (const unsigned char *)RSTRING_PTR(input);
+ is = ip + RSTRING_LEN(input);
+
+ op = (unsigned char *)RSTRING_PTR(output);
+ os = op + RSTRING_LEN(output);
+
+ res = rb_trans_conv(ts, &ip, is, &op, os, flags);
+ rb_str_set_len(output, op-(unsigned char *)RSTRING_PTR(output));
+ rb_str_drop_bytes(input, ip - (unsigned char *)RSTRING_PTR(input));
+
+ switch (res) {
+ case transcode_invalid_input: return ID2SYM(rb_intern("invalid_input"));
+ case transcode_undefined_conversion: return ID2SYM(rb_intern("undefined_conversion"));
+ case transcode_obuf_full: return ID2SYM(rb_intern("obuf_full"));
+ case transcode_ibuf_empty: return ID2SYM(rb_intern("ibuf_empty"));
+ case transcode_finished: return ID2SYM(rb_intern("finished"));
+ default: return INT2NUM(res);
+ }
+}
+
void
Init_transcode(void)
{
@@ -1234,4 +1318,10 @@ Init_transcode(void)
rb_define_method(rb_cString, "encode", str_encode, -1);
rb_define_method(rb_cString, "encode!", str_encode_bang, -1);
+
+ rb_cEncodingConverter = rb_define_class_under(rb_cEncoding, "Converter", rb_cData);
+ rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate);
+ rb_define_method(rb_cEncodingConverter, "initialize", econv_init, 2);
+ rb_define_method(rb_cEncodingConverter, "primitive_convert", econv_primitive_convert, 3);
+ rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(PARTIAL_INPUT));
}