blob: a51fbd020adf41d3b9770766431681f102bac289 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
|
# -*- encoding: utf-8 -*-
require_relative '../../spec_helper'
require_relative 'fixtures/classes'
describe "String#scrub with a default replacement" do
it "returns self for valid strings" do
input = "foo"
input.scrub.should == input
end
it "replaces invalid byte sequences" do
x81 = [0x81].pack('C').force_encoding('utf-8')
"abc\u3042#{x81}".scrub.should == "abc\u3042\uFFFD"
end
it "replaces invalid byte sequences in lazy substrings" do
x81 = [0x81].pack('C').force_encoding('utf-8')
"abc\u3042#{x81}def"[1...-1].scrub.should == "bc\u3042\uFFFDde"
end
it "returns a copy of self when the input encoding is BINARY" do
input = "foo".encode('BINARY')
input.scrub.should == "foo"
end
it "replaces invalid byte sequences when using ASCII as the input encoding" do
xE3x80 = [0xE3, 0x80].pack('CC').force_encoding 'utf-8'
input = "abc\u3042#{xE3x80}".force_encoding('ASCII')
input.scrub.should == "abc?????"
end
it "returns a String in the same encoding as self" do
x81 = [0x81].pack('C').force_encoding('utf-8')
"abc\u3042#{x81}".scrub.encoding.should == Encoding::UTF_8
end
ruby_version_is '3.0' do
it "returns String instances when called on a subclass" do
StringSpecs::MyString.new("foo").scrub.should be_an_instance_of(String)
input = [0x81].pack('C').force_encoding('utf-8')
StringSpecs::MyString.new(input).scrub.should be_an_instance_of(String)
end
end
ruby_version_is ''...'3.0' do
it "returns subclass instances when called on a subclass" do
StringSpecs::MyString.new("foo").scrub.should be_an_instance_of(StringSpecs::MyString)
end
end
end
describe "String#scrub with a custom replacement" do
it "returns self for valid strings" do
input = "foo"
input.scrub("*").should == input
end
it "replaces invalid byte sequences" do
x81 = [0x81].pack('C').force_encoding('utf-8')
"abc\u3042#{x81}".scrub("*").should == "abc\u3042*"
end
it "replaces invalid byte sequences in frozen strings" do
x81 = [0x81].pack('C').force_encoding('utf-8')
(-"abc\u3042#{x81}").scrub("*").should == "abc\u3042*"
leading_surrogate = [0x00, 0xD8]
utf16_str = ("abc".encode('UTF-16LE').bytes + leading_surrogate).pack('c*').force_encoding('UTF-16LE')
(-(utf16_str)).scrub("*".encode('UTF-16LE')).should == "abc*".encode('UTF-16LE')
end
it "replaces an incomplete character at the end with a single replacement" do
xE3x80 = [0xE3, 0x80].pack('CC').force_encoding 'utf-8'
xE3x80.scrub("*").should == "*"
end
it "raises ArgumentError for replacements with an invalid encoding" do
x81 = [0x81].pack('C').force_encoding('utf-8')
xE4 = [0xE4].pack('C').force_encoding('utf-8')
block = -> { "foo#{x81}".scrub(xE4) }
block.should raise_error(ArgumentError)
end
it "returns a String in the same encoding as self" do
x81 = [0x81].pack('C').force_encoding('utf-8')
"abc\u3042#{x81}".scrub("*").encoding.should == Encoding::UTF_8
end
it "raises TypeError when a non String replacement is given" do
x81 = [0x81].pack('C').force_encoding('utf-8')
block = -> { "foo#{x81}".scrub(1) }
block.should raise_error(TypeError)
end
ruby_version_is '3.0' do
it "returns String instances when called on a subclass" do
StringSpecs::MyString.new("foo").scrub("*").should be_an_instance_of(String)
input = [0x81].pack('C').force_encoding('utf-8')
StringSpecs::MyString.new(input).scrub("*").should be_an_instance_of(String)
end
end
end
describe "String#scrub with a block" do
it "returns self for valid strings" do
input = "foo"
input.scrub { |b| "*" }.should == input
end
it "replaces invalid byte sequences" do
xE3x80 = [0xE3, 0x80].pack('CC').force_encoding 'utf-8'
replaced = "abc\u3042#{xE3x80}".scrub { |b| "<#{b.unpack("H*")[0]}>" }
replaced.should == "abc\u3042<e380>"
end
it "replaces invalid byte sequences using a custom encoding" do
x80x80 = [0x80, 0x80].pack('CC').force_encoding 'utf-8'
replaced = x80x80.scrub do |bad|
bad.encode(Encoding::UTF_8, Encoding::Windows_1252)
end
replaced.should == "€€"
end
ruby_version_is '3.0' do
it "returns String instances when called on a subclass" do
StringSpecs::MyString.new("foo").scrub { |b| "*" }.should be_an_instance_of(String)
input = [0x81].pack('C').force_encoding('utf-8')
StringSpecs::MyString.new(input).scrub { |b| "<#{b.unpack("H*")[0]}>" }.should be_an_instance_of(String)
end
end
end
describe "String#scrub!" do
it "modifies self for valid strings" do
x81 = [0x81].pack('C').force_encoding('utf-8')
input = "a#{x81}"
input.scrub!
input.should == "a\uFFFD"
end
it "accepts blocks" do
x81 = [0x81].pack('C').force_encoding('utf-8')
input = "a#{x81}"
input.scrub! { |b| "<?>" }
input.should == "a<?>"
end
it "maintains the state of frozen strings that are already valid" do
input = "a"
input.freeze
input.scrub!
input.frozen?.should be_true
end
it "preserves the instance variables of already valid strings" do
input = "a"
input.instance_variable_set(:@a, 'b')
input.scrub!
input.instance_variable_get(:@a).should == 'b'
end
it "accepts a frozen string as a replacement" do
input = "a\xE2"
input.scrub!('.'.freeze)
input.should == 'a.'
end
end
|