blob: 5c67ad01bcbc5f619a988f9e54b1285b196f34e6 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
# -*- encoding: utf-8 -*-
require_relative '../../spec_helper'
describe "String#scrub with a default replacement" do
it "returns self for valid strings" do
input = "foo"
input.scrub.should == input
end
it "replaces invalid byte sequences" do
x81 = [0x81].pack('C').force_encoding('utf-8')
"abc\u3042#{x81}".scrub.should == "abc\u3042\uFFFD"
end
it "returns a copy of self when the input encoding is BINARY" do
input = "foo".encode('BINARY')
input.scrub.should == "foo"
end
it "replaces invalid byte sequences when using ASCII as the input encoding" do
xE3x80 = [0xE3, 0x80].pack('CC').force_encoding 'utf-8'
input = "abc\u3042#{xE3x80}".force_encoding('ASCII')
input.scrub.should == "abc?????"
end
end
describe "String#scrub with a custom replacement" do
it "returns self for valid strings" do
input = "foo"
input.scrub("*").should == input
end
it "replaces invalid byte sequences" do
x81 = [0x81].pack('C').force_encoding('utf-8')
"abc\u3042#{x81}".scrub("*").should == "abc\u3042*"
end
it "replaces invalid byte sequences in frozen strings" do
x81 = [0x81].pack('C').force_encoding('utf-8')
(-"abc\u3042#{x81}").scrub("*").should == "abc\u3042*"
utf16_str = ("abc".encode('UTF-16LE').bytes + [0x81]).pack('c*').force_encoding('UTF-16LE')
(-(utf16_str)).scrub("*".encode('UTF-16LE')).should == "abc*".encode('UTF-16LE')
end
it "replaces an incomplete character at the end with a single replacement" do
xE3x80 = [0xE3, 0x80].pack('CC').force_encoding 'utf-8'
xE3x80.scrub("*").should == "*"
end
it "raises ArgumentError for replacements with an invalid encoding" do
x81 = [0x81].pack('C').force_encoding('utf-8')
xE4 = [0xE4].pack('C').force_encoding('utf-8')
block = -> { "foo#{x81}".scrub(xE4) }
block.should raise_error(ArgumentError)
end
it "raises TypeError when a non String replacement is given" do
x81 = [0x81].pack('C').force_encoding('utf-8')
block = -> { "foo#{x81}".scrub(1) }
block.should raise_error(TypeError)
end
end
describe "String#scrub with a block" do
it "returns self for valid strings" do
input = "foo"
input.scrub { |b| "*" }.should == input
end
it "replaces invalid byte sequences" do
xE3x80 = [0xE3, 0x80].pack('CC').force_encoding 'utf-8'
replaced = "abc\u3042#{xE3x80}".scrub { |b| "<#{b.unpack("H*")[0]}>" }
replaced.should == "abc\u3042<e380>"
end
it "replaces invalid byte sequences using a custom encoding" do
x80x80 = [0x80, 0x80].pack('CC').force_encoding 'utf-8'
replaced = x80x80.scrub do |bad|
bad.encode(Encoding::UTF_8, Encoding::Windows_1252)
end
replaced.should == "€€"
end
end
describe "String#scrub!" do
it "modifies self for valid strings" do
x81 = [0x81].pack('C').force_encoding('utf-8')
input = "a#{x81}"
input.scrub!
input.should == "a\uFFFD"
end
it "accepts blocks" do
x81 = [0x81].pack('C').force_encoding('utf-8')
input = "a#{x81}"
input.scrub! { |b| "<?>" }
input.should == "a<?>"
end
it "maintains the state of frozen strings that are already valid" do
input = "a"
input.freeze
input.scrub!
input.frozen?.should be_true
end
it "preserves the instance variables of already valid strings" do
input = "a"
input.instance_variable_set(:@a, 'b')
input.scrub!
input.instance_variable_get(:@a).should == 'b'
end
it "accepts a frozen string as a replacement" do
input = "a\xE2"
input.scrub!('.'.freeze)
input.should == 'a.'
end
end
|