summaryrefslogtreecommitdiff
path: root/test/ruby/test_gb18030.rb
blob: a33a9eb28e88cac033e5d49af4472bdb0961581c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
require "test/unit"

class TestGB18030 < Test::Unit::TestCase
  def s(s)
    s.force_encoding("gb18030")
  end

  def test_mbc_enc_len
    assert_equal(1, s("\x81\x40").size)
    assert_equal(1, s("\x81\x30\x81\x30").size)
  end

  def test_mbc_to_code
    assert_equal(0x8140, s("\x81\x40").ord)
  end

  def test_code_to_mbc
    assert_equal(s("\x81\x40"), 0x8140.chr("gb18030"))
  end

  def test_mbc_case_fold
    r = Regexp.new(s("(\x81\x40)\\1"), "i")
    assert_match(r, s("\x81\x40\x81\x40"))
  end

  def scheck(c, i)
    assert_equal(s(c.reverse.take(c.size - i).join), s(c.reverse.join).chop)
  end

  def fcheck(c)
    assert_raise(ArgumentError) { s(c.reverse.join).chop }
  end

  def test_left_adjust_char_head
    # C1: 00-2f, 3a-3f, 7f, ff
    # C2: 40-7e, 80
    # C4: 30-39
    # CM: 81-fe
    c1 = "\x2f"
    c2 = "\x40"
    c4 = "\x30"
    cm = "\x81"

    # S_START-c1
    # S_START-c2-S_one_C2-0
    # S_START-c2-S_one_C2-c1
    # S_START-c2-S_one_C2-cm-S_odd_CM_one_CX-c1
    # S_START-c2-S_one_C2-cm-S_odd_CM_one_CX-cm-S_even_CM_one_CX-c1
    # S_START-c2-S_one_C2-cm-S_odd_CM_one_CX-cm-S_even_CM_one_CX-cm-S_odd_CM_one_CX(rec)
    # S_START-c4-S_one_C4-c1
    # S_START-c4-S_one_C4-cm-S_one_CMC4-c1
    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-c1
    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c1
    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c4-S_one_C4_even_CMC4-c1
    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c4-S_one_C4_even_CMC4-cm-S_odd_CMC4-c1
    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c4-S_one_C4_even_CMC4-cm-S_odd_CMC4-c4-S_one_C4_odd_CMC4(rec)
    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c4-S_one_C4_even_CMC4-cm-S_odd_CMC4-cm-S_odd_CM_odd_CMC4-c1
    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c4-S_one_C4_even_CMC4-cm-S_odd_CMC4-cm-S_odd_CM_odd_CMC4-cm-S_even_CM_odd_CMC4-c1
    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c4-S_one_C4_even_CMC4-cm-S_odd_CMC4-cm-S_odd_CM_odd_CMC4-cm-S_even_CM_odd_CMC4-cm-S_odd_CM_odd_CMC4(rec)
    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-cm-S_odd_CM_even_CMC4-c1
    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-cm-S_odd_CM_even_CMC4-cm-S_even_CM_even_CMC4-c1
    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-cm-S_odd_CM_even_CMC4-cm-S_even_CM_even_CMC4-cm-S_odd_CM_even_CMC4(rec)
    # S_START-c4-S_one_C4-cm-S_one_CMC4-cm-S_even_CM_one_CX(rec)
    # S_START-cm-S_one_CM-c1
    # S_START-cm-S_one_CM-c4-S_odd_C4CM-c1
    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c1
    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c4-S_even_C4CM-c1
    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c4-S_even_C4CM-cm-S_one_CM_even_C4CM-c1
    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c4-S_even_C4CM-cm-S_one_CM_even_C4CM-c4-S_odd_C4CM(rec)
    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c4-S_even_C4CM-cm-S_one_CM_even_C4CM-cm-S_even_CM_even_C4CM-c1
    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c4-S_even_C4CM-cm-S_one_CM_even_C4CM-cm-S_even_CM_even_C4CM-cm-S_odd_CM_even_C4CM-c1
    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c4-S_even_C4CM-cm-S_one_CM_even_C4CM-cm-S_even_CM_even_C4CM-cm-S_odd_CM_even_C4CM-cm-S_even_CM_even_C4CM(rec)
    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-cm-S_even_CM_odd_C4CM-c1
    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-cm-S_even_CM_odd_C4CM-cm-S_odd_CM_odd_C4CM-c1
    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-cm-S_even_CM_odd_C4CM-cm-S_odd_CM_odd_C4CM-cm-S_even_CM_odd_C4CM(rec)
    # S_START-cm-S_one_CM-cm-S_odd_CM_one_CX(rec)

    scheck([c1], 1)
    scheck([c2], 1)
    scheck([c2, c1], 1)
    scheck([c2, cm, c1], 2)
    scheck([c2, cm, cm, c1], 1)
    scheck([c2, cm, cm, cm], 2)
    scheck([c4], 1)
    scheck([c4, c1], 1)
    scheck([c4, cm], 2)
    fcheck([c4, cm, c1])
    fcheck([c4, cm, c4, c1])
    scheck([c4, cm, c4, cm], 4)
    scheck([c4, cm, c4, cm, c1], 4)
    scheck([c4, cm, c4, cm, c4], 4)
    scheck([c4, cm, c4, cm, c4, c1], 4)
    fcheck([c4, cm, c4, cm, c4, cm])
    fcheck([c4, cm, c4, cm, c4, cm, c1])
    fcheck([c4, cm, c4, cm, c4, cm, c4])
    scheck([c4, cm, c4, cm, c4, cm, cm, c1], 4)
    fcheck([c4, cm, c4, cm, c4, cm, cm, cm])
    fcheck([c4, cm, c4, cm, c4, cm, cm, cm, c1])
    scheck([c4, cm, c4, cm, c4, cm, cm, cm, cm], 4)
    fcheck([c4, cm, c4, cm, cm, c1])
    scheck([c4, cm, c4, cm, cm, cm], 4)
    scheck([c4, cm, c4, cm, cm, cm, c1], 4)
    fcheck([c4, cm, c4, cm, cm, cm, cm])
    scheck([c4, cm, cm], 1)
    scheck([cm], 1)
    fcheck([cm, c1])
    fcheck([cm, c4, c1])
    scheck([cm, c4, cm], 3)
    fcheck([cm, c4, cm, c1])
    fcheck([cm, c4, cm, c4])
    fcheck([cm, c4, cm, c4, c1])
    fcheck([cm, c4, cm, c4, cm])
    fcheck([cm, c4, cm, c4, cm, c1])
    fcheck([cm, c4, cm, c4, cm, c4])
    fcheck([cm, c4, cm, c4, cm, cm, c1])
    fcheck([cm, c4, cm, c4, cm, cm, cm])
    fcheck([cm, c4, cm, c4, cm, cm, cm, c1])
    fcheck([cm, c4, cm, c4, cm, cm, cm, cm])
    fcheck([cm, c4, cm, cm, c1])
    fcheck([cm, c4, cm, cm, cm])
    fcheck([cm, c4, cm, cm, cm, c1])
    fcheck([cm, c4, cm, cm, cm, cm])
    scheck([cm, cm], 2)
  end
end