summaryrefslogtreecommitdiff
path: root/test/ruby/test_econv.rb
blob: e9669aec1829663e5a1e2ff27cab72760dfedb9b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
require 'test/unit'

class TestEncodingConverter < Test::Unit::TestCase
  def assert_econv(ret_expected, dst_expected, src_expected, to, from, src, opt={})
    opt[:obuf_len] ||= 100
    src = src.dup
    ec = Encoding::Converter.new(from, to)
    dst = ''
    while true
      ret = ec.primitive_convert(src, dst2="", opt[:obuf_len])
      dst << dst2
      #p [ret, dst, src]
      break if ret != :obuf_full
    end
    assert_equal([ret_expected, dst_expected, src_expected], [ret, dst, src])
  end

  def test_eucjp_to_utf8
    assert_econv(:finished, "", "", "EUC-JP", "UTF-8", "")
    assert_econv(:finished, "a", "", "EUC-JP", "UTF-8", "a")
  end

  def test_iso2022jp
    assert_econv(:finished, "", "", "ISO-2022-JP", "Shift_JIS", "")
  end

  def test_invalid
    assert_econv(:invalid_input, "", "", "EUC-JP", "UTF-8", "\x80")
    assert_econv(:invalid_input, "a", "", "EUC-JP", "UTF-8", "a\x80")
    assert_econv(:invalid_input, "a", "\x80", "EUC-JP", "UTF-8", "a\x80\x80")
    assert_econv(:invalid_input, "abc", "def", "EUC-JP", "UTF-8", "abc\xFFdef")
    assert_econv(:invalid_input, "abc", "def", "EUC-JP", "Shift_JIS", "abc\xFFdef")
    assert_econv(:invalid_input, "abc", "def", "EUC-JP", "Shift_JIS", "abc\xFFdef", :obuf_len=>1)
    assert_econv(:invalid_input, "abc", "def", "Shift_JIS", "ISO-2022-JP", "abc\xFFdef")
  end

  def test_errors
    ec = Encoding::Converter.new("UTF-16BE", "EUC-JP")
    src = "\xFF\xFE\x00A\xDC\x00"
    ret = ec.primitive_convert(src, dst="", 10)
    assert_equal("", src)
    assert_equal("", dst)
    assert_equal(:undefined_conversion, ret) # \xFF\xFE is not representable in EUC-JP
    ret = ec.primitive_convert(src, dst="", 10)
    assert_equal("", src)
    assert_equal("A", dst)
    assert_equal(:invalid_input, ret) # \xDC\x00 is invalid as UTF-16BE
    ret = ec.primitive_convert(src, dst="", 10)
    assert_equal("", src)
    assert_equal("", dst)
    assert_equal(:finished, ret)
  end

  def test_universal_newline
    ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNIVERSAL_NEWLINE)
    ret = ec.primitive_convert(src="abc\r\ndef", dst="", 50, Encoding::Converter::PARTIAL_INPUT)
    assert_equal([:ibuf_empty, "", "abc\ndef"], [ret, src, dst])
    ret = ec.primitive_convert(src="ghi\njkl", dst="", 50, Encoding::Converter::PARTIAL_INPUT)
    assert_equal([:ibuf_empty, "", "ghi\njkl"], [ret, src, dst])
    ret = ec.primitive_convert(src="mno\rpqr", dst="", 50, Encoding::Converter::PARTIAL_INPUT)
    assert_equal([:ibuf_empty, "", "mno\npqr"], [ret, src, dst])
    ret = ec.primitive_convert(src="stu\r", dst="", 50, Encoding::Converter::PARTIAL_INPUT)
    assert_equal([:ibuf_empty, "", "stu\n"], [ret, src, dst])
    ret = ec.primitive_convert(src="\nvwx", dst="", 50, Encoding::Converter::PARTIAL_INPUT)
    assert_equal([:ibuf_empty, "", "vwx"], [ret, src, dst])
  end

  def test_crlf_newline
    ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CRLF_NEWLINE)
    ret = ec.primitive_convert(src="abc\ndef", dst="", 50)
    assert_equal([:finished, "", "abc\r\ndef"], [ret, src, dst])
  end

  def test_cr_newline
    ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CR_NEWLINE)
    ret = ec.primitive_convert(src="abc\ndef", dst="", 50)
    assert_equal([:finished, "", "abc\rdef"], [ret, src, dst])
  end
end