summaryrefslogtreecommitdiff
path: root/spec/ruby/core/string/scrub_spec.rb
blob: 4da44a79924c6a89a65d63e576ae3686e3800300 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# -*- encoding: utf-8 -*-
require_relative '../../spec_helper'
require_relative 'fixtures/classes'

describe "String#scrub with a default replacement" do
  it "returns self for valid strings" do
    input = "foo"

    input.scrub.should == input
  end

  it "replaces invalid byte sequences" do
    x81 = [0x81].pack('C').force_encoding('utf-8')
    "abc\u3042#{x81}".scrub.should == "abc\u3042\uFFFD"
  end

  it "returns a copy of self when the input encoding is BINARY" do
    input = "foo".encode('BINARY')

    input.scrub.should == "foo"
  end

  it "replaces invalid byte sequences when using ASCII as the input encoding" do
    xE3x80 = [0xE3, 0x80].pack('CC').force_encoding 'utf-8'
    input = "abc\u3042#{xE3x80}".force_encoding('ASCII')
    input.scrub.should == "abc?????"
  end

  ruby_version_is '3.0' do
    it "returns String instances when called on a subclass" do
      StringSpecs::MyString.new("foo").scrub.should be_an_instance_of(String)
      input = [0x81].pack('C').force_encoding('utf-8')
      StringSpecs::MyString.new(input).scrub.should be_an_instance_of(String)
    end
  end

  ruby_version_is ''...'3.0' do
    it "returns subclass instances when called on a subclass" do
      StringSpecs::MyString.new("foo").scrub.should be_an_instance_of(StringSpecs::MyString)
    end
  end
end

describe "String#scrub with a custom replacement" do
  it "returns self for valid strings" do
    input = "foo"

    input.scrub("*").should == input
  end

  it "replaces invalid byte sequences" do
    x81 = [0x81].pack('C').force_encoding('utf-8')
    "abc\u3042#{x81}".scrub("*").should == "abc\u3042*"
  end

  it "replaces invalid byte sequences in frozen strings" do
    x81 = [0x81].pack('C').force_encoding('utf-8')
    (-"abc\u3042#{x81}").scrub("*").should == "abc\u3042*"
    utf16_str = ("abc".encode('UTF-16LE').bytes + [0x81]).pack('c*').force_encoding('UTF-16LE')
    (-(utf16_str)).scrub("*".encode('UTF-16LE')).should == "abc*".encode('UTF-16LE')
  end

  it "replaces an incomplete character at the end with a single replacement" do
    xE3x80 = [0xE3, 0x80].pack('CC').force_encoding 'utf-8'
    xE3x80.scrub("*").should == "*"
  end

  it "raises ArgumentError for replacements with an invalid encoding" do
    x81 = [0x81].pack('C').force_encoding('utf-8')
    xE4 = [0xE4].pack('C').force_encoding('utf-8')
    block = -> { "foo#{x81}".scrub(xE4) }

    block.should raise_error(ArgumentError)
  end

  it "raises TypeError when a non String replacement is given" do
    x81 = [0x81].pack('C').force_encoding('utf-8')
    block = -> { "foo#{x81}".scrub(1) }

    block.should raise_error(TypeError)
  end

  ruby_version_is '3.0' do
    it "returns String instances when called on a subclass" do
      StringSpecs::MyString.new("foo").scrub("*").should be_an_instance_of(String)
      input = [0x81].pack('C').force_encoding('utf-8')
      StringSpecs::MyString.new(input).scrub("*").should be_an_instance_of(String)
    end
  end
end

describe "String#scrub with a block" do
  it "returns self for valid strings" do
    input = "foo"

    input.scrub { |b| "*" }.should == input
  end

  it "replaces invalid byte sequences" do
    xE3x80 = [0xE3, 0x80].pack('CC').force_encoding 'utf-8'
    replaced = "abc\u3042#{xE3x80}".scrub { |b| "<#{b.unpack("H*")[0]}>" }

    replaced.should == "abc\u3042<e380>"
  end

  it "replaces invalid byte sequences using a custom encoding" do
    x80x80 = [0x80, 0x80].pack('CC').force_encoding 'utf-8'
    replaced = x80x80.scrub do |bad|
      bad.encode(Encoding::UTF_8, Encoding::Windows_1252)
    end

    replaced.should == "€€"
  end

  ruby_version_is '3.0' do
    it "returns String instances when called on a subclass" do
      StringSpecs::MyString.new("foo").scrub { |b| "*" }.should be_an_instance_of(String)
      input = [0x81].pack('C').force_encoding('utf-8')
      StringSpecs::MyString.new(input).scrub { |b| "<#{b.unpack("H*")[0]}>" }.should be_an_instance_of(String)
    end
  end
end

describe "String#scrub!" do
  it "modifies self for valid strings" do
    x81 = [0x81].pack('C').force_encoding('utf-8')
    input = "a#{x81}"
    input.scrub!
    input.should == "a\uFFFD"
  end

  it "accepts blocks" do
    x81 = [0x81].pack('C').force_encoding('utf-8')
    input = "a#{x81}"
    input.scrub! { |b| "<?>" }
    input.should == "a<?>"
  end

  it "maintains the state of frozen strings that are already valid" do
    input = "a"
    input.freeze
    input.scrub!
    input.frozen?.should be_true
  end

  it "preserves the instance variables of already valid strings" do
    input = "a"
    input.instance_variable_set(:@a, 'b')
    input.scrub!
    input.instance_variable_get(:@a).should == 'b'
  end

  it "accepts a frozen string as a replacement" do
    input = "a\xE2"
    input.scrub!('.'.freeze)
    input.should == 'a.'
  end
end