summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-09-04 12:48:21 +0000
committerakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-09-04 12:48:21 +0000
commit2494e67f16fc82834a3f6d5e04fe28f9418ec9cc (patch)
tree8693d58055b0531eaf1789b2bd4ff9d99dab2537
parentf7607eff9af30e002770805f775da7a078cedaeb (diff)
* transcode.c (sym_partial_input): new variable.
(econv_primitive_convert): accept a hash as 5th argument as well. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19131 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog5
-rw-r--r--test/ruby/test_econv.rb62
-rw-r--r--transcode.c41
3 files changed, 66 insertions, 42 deletions
diff --git a/ChangeLog b/ChangeLog
index a25bddd859..8e90c1e186 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+Thu Sep 4 21:46:21 2008 Tanaka Akira <akr@fsij.org>
+
+ * transcode.c (sym_partial_input): new variable.
+ (econv_primitive_convert): accept a hash as 5th argument as well.
+
Thu Sep 4 21:04:27 2008 Tanaka Akira <akr@fsij.org>
* transcode.c (sym_universal_newline_decoder): new variable.
diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb
index e7256c12f3..2e4409dbc5 100644
--- a/test/ruby/test_econv.rb
+++ b/test/ruby/test_econv.rb
@@ -1,8 +1,8 @@
require 'test/unit'
class TestEncodingConverter < Test::Unit::TestCase
- def check_ec(edst, esrc, eres, dst, src, ec, off, len, flags=0)
- res = ec.primitive_convert(src, dst, off, len, flags)
+ def check_ec(edst, esrc, eres, dst, src, ec, off, len, opts=nil)
+ res = ec.primitive_convert(src, dst, off, len, opts)
assert_equal([edst.dup.force_encoding("ASCII-8BIT"),
esrc.dup.force_encoding("ASCII-8BIT"),
eres],
@@ -11,11 +11,11 @@ class TestEncodingConverter < Test::Unit::TestCase
res])
end
- def assert_econv(converted, eres, obuf_bytesize, ec, consumed, rest, flags=0)
+ def assert_econv(converted, eres, obuf_bytesize, ec, consumed, rest, opts=nil)
ec = Encoding::Converter.new(*ec) if Array === ec
i = consumed + rest
o = ""
- ret = ec.primitive_convert(i, o, 0, obuf_bytesize, flags)
+ ret = ec.primitive_convert(i, o, 0, obuf_bytesize, opts)
assert_equal([converted, eres, rest],
[o, ret, i])
end
@@ -61,20 +61,20 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_output_region
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
- ec.primitive_convert(src="a", dst="b", nil, 1, Encoding::Converter::PARTIAL_INPUT)
+ ec.primitive_convert(src="a", dst="b", nil, 1, :partial_input=>true)
assert_equal("ba", dst)
- ec.primitive_convert(src="a", dst="b", 0, 1, Encoding::Converter::PARTIAL_INPUT)
+ ec.primitive_convert(src="a", dst="b", 0, 1, :partial_input=>true)
assert_equal("a", dst)
- ec.primitive_convert(src="a", dst="b", 1, 1, Encoding::Converter::PARTIAL_INPUT)
+ ec.primitive_convert(src="a", dst="b", 1, 1, :partial_input=>true)
assert_equal("ba", dst)
assert_raise(ArgumentError) {
- ec.primitive_convert(src="a", dst="b", 2, 1, Encoding::Converter::PARTIAL_INPUT)
+ ec.primitive_convert(src="a", dst="b", 2, 1, :partial_input=>true)
}
assert_raise(ArgumentError) {
- ec.primitive_convert(src="a", dst="b", -1, 1, Encoding::Converter::PARTIAL_INPUT)
+ ec.primitive_convert(src="a", dst="b", -1, 1, :partial_input=>true)
}
assert_raise(ArgumentError) {
- ec.primitive_convert(src="a", dst="b", 1, -1, Encoding::Converter::PARTIAL_INPUT)
+ ec.primitive_convert(src="a", dst="b", 1, -1, :partial_input=>true)
}
end
@@ -114,7 +114,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_partial_input
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
- ret = ec.primitive_convert(src="", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT)
+ ret = ec.primitive_convert(src="", dst="", nil, 10, :partial_input=>true)
assert_equal(:source_buffer_empty, ret)
ret = ec.primitive_convert(src="", dst="", nil, 10)
assert_equal(:finished, ret)
@@ -153,7 +153,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_iso2022jp_encode
ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP")
- a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
+ a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "a"; check_ec("a", "", :source_buffer_empty, *a)
src << "\xA2"; check_ec("a", "", :source_buffer_empty, *a)
src << "\xA4"; check_ec("a\e$B\"$", "", :source_buffer_empty, *a)
@@ -166,7 +166,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_iso2022jp_decode
ec = Encoding::Converter.new("ISO-2022-JP", "EUC-JP")
- a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
+ a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "a"; check_ec("a", "", :source_buffer_empty, *a)
src << "\e"; check_ec("a", "", :source_buffer_empty, *a)
src << "$"; check_ec("a", "", :source_buffer_empty, *a)
@@ -212,7 +212,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_invalid4
ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
- a = ["", "abc\xFFdef", ec, nil, 10, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
+ a = ["", "abc\xFFdef", ec, nil, 10, :output_followed_by_input=>true]
check_ec("a", "bc\xFFdef", :output_followed_by_input, *a)
check_ec("ab", "c\xFFdef", :output_followed_by_input, *a)
check_ec("abc", "\xFFdef", :output_followed_by_input, *a)
@@ -225,7 +225,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_invalid_utf16le
ec = Encoding::Converter.new("UTF-16LE", "UTF-8")
- a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
+ a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "A"; check_ec("", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
@@ -244,7 +244,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_invalid_utf16be
ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
- a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
+ a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
src << "A"; check_ec("A", "", :source_buffer_empty, *a)
src << "\xd8"; check_ec("A", "", :source_buffer_empty, *a)
@@ -263,7 +263,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_invalid_utf32be
ec = Encoding::Converter.new("UTF-32BE", "UTF-8")
- a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
+ a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
@@ -287,7 +287,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_invalid_utf32le
ec = Encoding::Converter.new("UTF-32LE", "UTF-8")
- a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
+ a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "A"; check_ec("", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
@@ -319,7 +319,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_errors2
ec = Encoding::Converter.new("UTF-16BE", "EUC-JP")
- a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
+ a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10, :output_followed_by_input=>true]
check_ec("", "\x00A\xDC\x00\x00B", :undefined_conversion, *a)
check_ec("A", "\xDC\x00\x00B", :output_followed_by_input, *a)
check_ec("A", "\x00B", :invalid_byte_sequence, *a)
@@ -329,7 +329,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_universal_newline
ec = Encoding::Converter.new("UTF-8", "EUC-JP", universal_newline_decoder: true)
- a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
+ a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a)
src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a)
src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a)
@@ -340,7 +340,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_universal_newline2
ec = Encoding::Converter.new("", "", universal_newline_decoder: true)
- a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
+ a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a)
src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a)
src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a)
@@ -371,7 +371,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_output_followed_by_input
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
- a = ["", "abc\u{3042}def", ec, nil, 100, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
+ a = ["", "abc\u{3042}def", ec, nil, 100, :output_followed_by_input=>true]
check_ec("a", "bc\u{3042}def", :output_followed_by_input, *a)
check_ec("ab", "c\u{3042}def", :output_followed_by_input, *a)
check_ec("abc", "\u{3042}def", :output_followed_by_input, *a)
@@ -408,7 +408,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_errinfo_valid_partial_character
ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
- ec.primitive_convert(src="\xa4", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT)
+ ec.primitive_convert(src="\xa4", dst="", nil, 10, :partial_input=>true)
assert_errinfo(:source_buffer_empty, nil, nil, nil, nil, ec)
end
@@ -428,23 +428,23 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_output_iso2022jp
ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP")
- ec.primitive_convert(src="\xa1\xa1", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT)
+ ec.primitive_convert(src="\xa1\xa1", dst="", nil, 10, :partial_input=>true)
assert_equal("\e$B!!".force_encoding("ISO-2022-JP"), dst)
assert_equal(nil, ec.insert_output("???"))
- ec.primitive_convert("", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT)
+ ec.primitive_convert("", dst, nil, 10, :partial_input=>true)
assert_equal("\e$B!!\e(B???".force_encoding("ISO-2022-JP"), dst)
- ec.primitive_convert(src="\xa1\xa2", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT)
+ ec.primitive_convert(src="\xa1\xa2", dst, nil, 10, :partial_input=>true)
assert_equal("\e$B!!\e(B???\e$B!\"".force_encoding("ISO-2022-JP"), dst)
assert_equal(nil, ec.insert_output("\xA1\xA1".force_encoding("EUC-JP")))
- ec.primitive_convert("", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT)
+ ec.primitive_convert("", dst, nil, 10, :partial_input=>true)
assert_equal("\e$B!!\e(B???\e$B!\"!!".force_encoding("ISO-2022-JP"), dst)
- ec.primitive_convert(src="\xa1\xa3", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT)
+ ec.primitive_convert(src="\xa1\xa3", dst, nil, 10, :partial_input=>true)
assert_equal("\e$B!!\e(B???\e$B!\"!!!\#".force_encoding("ISO-2022-JP"), dst)
assert_equal(nil, ec.insert_output("\u3042"))
- ec.primitive_convert("", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT)
+ ec.primitive_convert("", dst, nil, 10, :partial_input=>true)
assert_equal("\e$B!!\e(B???\e$B!\"!!!\#$\"".force_encoding("ISO-2022-JP"), dst)
assert_raise(Encoding::ConversionUndefined) {
@@ -561,7 +561,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_noconv_partial
ec = Encoding::Converter.new("", "")
- a = ["", "abcdefg", ec, nil, 2, Encoding::Converter::PARTIAL_INPUT]
+ a = ["", "abcdefg", ec, nil, 2, :partial_input=>true]
check_ec("ab", "cdefg", :destination_buffer_full, *a)
check_ec("abcd", "efg", :destination_buffer_full, *a)
check_ec("abcdef", "g", :destination_buffer_full, *a)
@@ -570,7 +570,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_noconv_output_followed_by_input
ec = Encoding::Converter.new("", "")
- a = ["", "abcdefg", ec, nil, 2, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
+ a = ["", "abcdefg", ec, nil, 2, :output_followed_by_input=>true]
check_ec("a", "bcdefg", :output_followed_by_input, *a)
check_ec("ab", "cdefg", :output_followed_by_input, *a)
check_ec("abc", "defg", :output_followed_by_input, *a)
diff --git a/transcode.c b/transcode.c
index 7f3f0665b2..96b3666c62 100644
--- a/transcode.c
+++ b/transcode.c
@@ -24,6 +24,7 @@ static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace;
static VALUE sym_universal_newline_decoder;
static VALUE sym_crlf_newline_encoder;
static VALUE sym_cr_newline_encoder;
+static VALUE sym_partial_input;
static VALUE sym_invalid_byte_sequence;
static VALUE sym_undefined_conversion;
@@ -2527,11 +2528,15 @@ econv_result_to_symbol(rb_econv_result_t res)
* ec.primitive_convert(source_buffer, destination_buffer) -> symbol
* ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset) -> symbol
* ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize) -> symbol
- * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize, flags) -> symbol
+ * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize, opt) -> symbol
*
- * possible flags:
- * Encoding::Converter::PARTIAL_INPUT # source buffer may be part of larger source
- * Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT # stop conversion after output before input
+ * possible opt elements:
+ * hash form:
+ * :partial_input => true # source buffer may be part of larger source
+ * output_followed_by_input => true # stop conversion after output before input
+ * integer form:
+ * Encoding::Converter::PARTIAL_INPUT
+ * Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT
*
* possible results:
* :invalid_byte_sequence
@@ -2583,14 +2588,14 @@ econv_result_to_symbol(rb_econv_result_t res)
* primitive_convert stops conversion when one of following condition met.
* - invalid byte sequence found in source buffer (:invalid_byte_sequence)
* - unexpected end of source buffer (:incomplete_input)
- * this occur only when PARTIAL_INPUT is not specified.
+ * this occur only when :partial_input is not specified.
* - character not representable in output encoding (:undefined_conversion)
* - after some output is generated, before input is done (:output_followed_by_input)
- * this occur only when OUTPUT_FOLLOWED_BY_INPUT is specified.
+ * this occur only when :output_followed_by_input is specified.
* - destination buffer is full (:destination_buffer_full)
* this occur only when destination_bytesize is non-nil.
* - source buffer is empty (:source_buffer_empty)
- * this occur only when PARTIAL_INPUT is specified.
+ * this occur only when :partial_input is specified.
* - conversion is finished (:finished)
*
* example:
@@ -2612,7 +2617,7 @@ econv_result_to_symbol(rb_econv_result_t res)
static VALUE
econv_primitive_convert(int argc, VALUE *argv, VALUE self)
{
- VALUE input, output, output_byteoffset_v, output_bytesize_v, flags_v;
+ VALUE input, output, output_byteoffset_v, output_bytesize_v, opt, flags_v;
rb_econv_t *ec = check_econv(self);
rb_econv_result_t res;
const unsigned char *ip, *is;
@@ -2621,7 +2626,7 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self)
unsigned long output_byteend;
int flags;
- rb_scan_args(argc, argv, "23", &input, &output, &output_byteoffset_v, &output_bytesize_v, &flags_v);
+ rb_scan_args(argc, argv, "23", &input, &output, &output_byteoffset_v, &output_bytesize_v, &opt);
if (NIL_P(output_byteoffset_v))
output_byteoffset = 0; /* dummy */
@@ -2633,10 +2638,23 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self)
else
output_bytesize = NUM2LONG(output_bytesize_v);
- if (NIL_P(flags_v))
+ if (NIL_P(opt)) {
flags = 0;
- else
+ }
+ else if (!NIL_P(flags_v = rb_check_to_integer(opt, "to_int"))) {
flags = NUM2INT(flags_v);
+ }
+ else {
+ VALUE v;
+ opt = rb_convert_type(opt, T_HASH, "Hash", "to_hash");
+ flags = 0;
+ v = rb_hash_aref(opt, sym_partial_input);
+ if (RTEST(v))
+ flags |= ECONV_PARTIAL_INPUT;
+ v = rb_hash_aref(opt, sym_output_followed_by_input);
+ if (RTEST(v))
+ flags |= ECONV_OUTPUT_FOLLOWED_BY_INPUT;
+ }
StringValue(output);
if (!NIL_P(input))
@@ -3301,6 +3319,7 @@ Init_transcode(void)
sym_universal_newline_decoder = ID2SYM(rb_intern("universal_newline_decoder"));
sym_crlf_newline_encoder = ID2SYM(rb_intern("crlf_newline_encoder"));
sym_cr_newline_encoder = ID2SYM(rb_intern("cr_newline_encoder"));
+ sym_partial_input = ID2SYM(rb_intern("partial_input"));
rb_define_method(rb_cString, "encode", str_encode, -1);
rb_define_method(rb_cString, "encode!", str_encode_bang, -1);