summaryrefslogtreecommitdiff
path: root/spec/ruby/core/encoding/converter/primitive_convert_spec.rb
blob: 63f25eddef1be06aa2be94e8e6ce5c4db5dcf155 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
# -*- encoding: binary -*-
# frozen_string_literal: false
require_relative '../../../spec_helper'

describe "Encoding::Converter#primitive_convert" do
  before :each do
    @ec = Encoding::Converter.new("utf-8", "iso-8859-1")
  end

  it "accepts a nil source buffer" do
    -> { @ec.primitive_convert(nil,"") }.should_not raise_error
  end

  it "accepts a String as the source buffer" do
    -> { @ec.primitive_convert("","") }.should_not raise_error
  end

  it "raises FrozenError when the destination buffer is a frozen String" do
    -> { @ec.primitive_convert("", "".freeze) }.should raise_error(FrozenError)
  end

  it "accepts nil for the destination byte offset" do
    -> { @ec.primitive_convert("","", nil) }.should_not raise_error
  end

  it "accepts an integer for the destination byte offset" do
    -> { @ec.primitive_convert("","a", 1) }.should_not raise_error
  end

  it "calls #to_int to convert the destination byte offset" do
    offset = mock("encoding primitive_convert destination byte offset")
    offset.should_receive(:to_int).and_return(2)
    @ec.primitive_convert("abc", result = "   ", offset).should == :finished
    result.should == "  abc"
  end

  it "raises an ArgumentError if the destination byte offset is greater than the bytesize of the destination buffer" do
    -> { @ec.primitive_convert("","am", 0) }.should_not raise_error
    -> { @ec.primitive_convert("","am", 1) }.should_not raise_error
    -> { @ec.primitive_convert("","am", 2) }.should_not raise_error
    -> { @ec.primitive_convert("","am", 3) }.should raise_error(ArgumentError)
  end

  it "uses the destination byte offset to determine where to write the result in the destination buffer" do
    dest = "aa"
    @ec.primitive_convert("b",dest, nil, 0)
    dest.should == "aa"

    @ec.primitive_convert("b",dest, nil, 1)
    dest.should == "aab"

    @ec.primitive_convert("b",dest, nil, 2)
    dest.should == "aabbb"
  end

  it "accepts nil for the destination bytesize" do
    -> { @ec.primitive_convert("","", nil, nil) }.should_not raise_error
  end

  it "accepts an integer for the destination bytesize" do
    -> { @ec.primitive_convert("","", nil, 0) }.should_not raise_error
  end

  it "allows a destination bytesize value greater than the bytesize of the source buffer" do
    -> { @ec.primitive_convert("am","", nil, 3) }.should_not raise_error
  end

  it "allows a destination bytesize value less than the bytesize of the source buffer" do
    -> { @ec.primitive_convert("am","", nil, 1) }.should_not raise_error
  end

  it "calls #to_int to convert the destination byte size" do
    size = mock("encoding primitive_convert destination byte size")
    size.should_receive(:to_int).and_return(2)
    @ec.primitive_convert("abc", result = "   ", 0, size).should == :destination_buffer_full
    result.should == "ab"
  end

  it "uses destination bytesize as the maximum bytesize of the destination buffer" do
    dest = ""
    @ec.primitive_convert("glark", dest, nil, 1)
    dest.bytesize.should == 1
  end

  it "allows a destination buffer of unlimited size if destination bytesize is nil" do
    source = "glark".force_encoding('utf-8')
    dest = ""
    @ec.primitive_convert("glark", dest, nil, nil)
    dest.bytesize.should == source.bytesize
  end

  it "accepts an options hash" do
    @ec.primitive_convert("","",nil,nil, after_output: true).should == :finished
  end

  it "sets the destination buffer's encoding to the destination encoding if the conversion succeeded" do
    dest = "".force_encoding('utf-8')
    dest.encoding.should == Encoding::UTF_8
    @ec.primitive_convert("\u{98}",dest).should == :finished
    dest.encoding.should == Encoding::ISO_8859_1
  end

  it "sets the destination buffer's encoding to the destination encoding if the conversion failed" do
    dest = "".force_encoding('utf-8')
    dest.encoding.should == Encoding::UTF_8
    @ec.primitive_convert("\u{9878}",dest).should == :undefined_conversion
    dest.encoding.should == Encoding::ISO_8859_1
  end

  it "removes the undefined part from the source buffer when returning :undefined_conversion" do
    dest = "".force_encoding('utf-8')
    s = "\u{9878}abcd"
    @ec.primitive_convert(s, dest).should == :undefined_conversion

    s.should == "abcd"
  end

  it "returns :incomplete_input when source buffer ends unexpectedly and :partial_input isn't specified" do
    ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
    ec.primitive_convert("\xa4", "", nil, nil, partial_input: false).should == :incomplete_input
  end

  it "clears the source buffer when returning :incomplete_input" do
    ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
    s = "\xa4"
    ec.primitive_convert(s, "").should == :incomplete_input

    s.should == ""
  end

  it "returns :source_buffer_empty when source buffer ends unexpectedly and :partial_input is true" do
    ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
    ec.primitive_convert("\xa4", "", nil, nil, partial_input: true).should == :source_buffer_empty
  end

  it "clears the source buffer when returning :source_buffer_empty" do
    ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
    s = "\xa4"
    ec.primitive_convert(s, "", nil, nil, partial_input: true).should == :source_buffer_empty

    s.should == ""
  end

  it "returns :undefined_conversion when a character in the source buffer is not representable in the output encoding" do
    @ec.primitive_convert("\u{9876}","").should == :undefined_conversion
  end

  it "returns :invalid_byte_sequence when an invalid byte sequence was found in the source buffer" do
    @ec.primitive_convert("\xf1abcd","").should == :invalid_byte_sequence
  end

  it "removes consumed and erroneous bytes from the source buffer when returning :invalid_byte_sequence" do
    ec = Encoding::Converter.new(Encoding::UTF_8, Encoding::UTF_8_MAC)
    s = "\xC3\xA1\x80\x80\xC3\xA1".force_encoding("utf-8")
    dest = "".force_encoding("utf-8")
    ec.primitive_convert(s, dest)

    s.should == "\x80\xC3\xA1".force_encoding("utf-8")
  end

  it "returns :finished when the conversion succeeded" do
    @ec.primitive_convert("glark".force_encoding('utf-8'),"").should == :finished
  end

  it "clears the source buffer when returning :finished" do
    s = "glark".force_encoding('utf-8')
    @ec.primitive_convert(s, "").should == :finished

    s.should == ""
  end

  it "returns :destination_buffer_full when the destination buffer is too small" do
    ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
    source = "\u{9999}"
    destination_bytesize = source.bytesize - 1
    ec.primitive_convert(source, "", 0, destination_bytesize) \
      .should == :destination_buffer_full
    source.should == ""
  end

  it "clears the source buffer when returning :destination_buffer_full" do
    ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
    s = "\u{9999}"
    destination_bytesize = s.bytesize - 1
    ec.primitive_convert(s, "", 0, destination_bytesize).should == :destination_buffer_full

    s.should == ""
  end

  it "keeps removing invalid bytes from the source buffer" do
    ec = Encoding::Converter.new(Encoding::UTF_8, Encoding::UTF_8_MAC)
    s = "\x80\x80\x80"
    dest = "".force_encoding(Encoding::UTF_8_MAC)

    ec.primitive_convert(s, dest)
    s.should == "\x80\x80"
    ec.primitive_convert(s, dest)
    s.should == "\x80"
    ec.primitive_convert(s, dest)
    s.should == ""
  end

  it "reuses read-again bytes after the first error" do
    s = "\xf1abcd"
    dest = ""

    @ec.primitive_convert(s, dest).should == :invalid_byte_sequence
    s.should == "bcd"
    @ec.primitive_errinfo[4].should == "a"

    @ec.primitive_convert(s, dest).should == :finished
    s.should == ""

    dest.should == "abcd"
  end
end