summaryrefslogtreecommitdiff
path: root/spec/ruby/core/string/valid_encoding_spec.rb
blob: 375035cd9496a93014a2d305b0f6f7fc731b500a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
require_relative '../../spec_helper'

describe "String#valid_encoding?" do
  it "returns true if the String's encoding is valid" do
    "a".valid_encoding?.should be_true
    "\u{8365}\u{221}".valid_encoding?.should be_true
  end

  it "returns true if self is valid in the current encoding and other encodings" do
    str = +"\x77"
    str.force_encoding('utf-8').valid_encoding?.should be_true
    str.force_encoding('binary').valid_encoding?.should be_true
  end

  it "returns true for all encodings self is valid in" do
    str = +"\xE6\x9D\x94"
    str.force_encoding('BINARY').valid_encoding?.should be_true
    str.force_encoding('UTF-8').valid_encoding?.should be_true
    str.force_encoding('US-ASCII').valid_encoding?.should be_false
    str.force_encoding('Big5').valid_encoding?.should be_false
    str.force_encoding('CP949').valid_encoding?.should be_false
    str.force_encoding('Emacs-Mule').valid_encoding?.should be_false
    str.force_encoding('EUC-JP').valid_encoding?.should be_false
    str.force_encoding('EUC-KR').valid_encoding?.should be_false
    str.force_encoding('EUC-TW').valid_encoding?.should be_false
    str.force_encoding('GB18030').valid_encoding?.should be_false
    str.force_encoding('GBK').valid_encoding?.should be_false
    str.force_encoding('ISO-8859-1').valid_encoding?.should be_true
    str.force_encoding('ISO-8859-2').valid_encoding?.should be_true
    str.force_encoding('ISO-8859-3').valid_encoding?.should be_true
    str.force_encoding('ISO-8859-4').valid_encoding?.should be_true
    str.force_encoding('ISO-8859-5').valid_encoding?.should be_true
    str.force_encoding('ISO-8859-6').valid_encoding?.should be_true
    str.force_encoding('ISO-8859-7').valid_encoding?.should be_true
    str.force_encoding('ISO-8859-8').valid_encoding?.should be_true
    str.force_encoding('ISO-8859-9').valid_encoding?.should be_true
    str.force_encoding('ISO-8859-10').valid_encoding?.should be_true
    str.force_encoding('ISO-8859-11').valid_encoding?.should be_true
    str.force_encoding('ISO-8859-13').valid_encoding?.should be_true
    str.force_encoding('ISO-8859-14').valid_encoding?.should be_true
    str.force_encoding('ISO-8859-15').valid_encoding?.should be_true
    str.force_encoding('ISO-8859-16').valid_encoding?.should be_true
    str.force_encoding('KOI8-R').valid_encoding?.should be_true
    str.force_encoding('KOI8-U').valid_encoding?.should be_true
    str.force_encoding('Shift_JIS').valid_encoding?.should be_false
    "\xD8\x00".dup.force_encoding('UTF-16BE').valid_encoding?.should be_false
    "\x00\xD8".dup.force_encoding('UTF-16LE').valid_encoding?.should be_false
    "\x04\x03\x02\x01".dup.force_encoding('UTF-32BE').valid_encoding?.should be_false
    "\x01\x02\x03\x04".dup.force_encoding('UTF-32LE').valid_encoding?.should be_false
    str.force_encoding('Windows-1251').valid_encoding?.should be_true
    str.force_encoding('IBM437').valid_encoding?.should be_true
    str.force_encoding('IBM737').valid_encoding?.should be_true
    str.force_encoding('IBM775').valid_encoding?.should be_true
    str.force_encoding('CP850').valid_encoding?.should be_true
    str.force_encoding('IBM852').valid_encoding?.should be_true
    str.force_encoding('CP852').valid_encoding?.should be_true
    str.force_encoding('IBM855').valid_encoding?.should be_true
    str.force_encoding('CP855').valid_encoding?.should be_true
    str.force_encoding('IBM857').valid_encoding?.should be_true
    str.force_encoding('IBM860').valid_encoding?.should be_true
    str.force_encoding('IBM861').valid_encoding?.should be_true
    str.force_encoding('IBM862').valid_encoding?.should be_true
    str.force_encoding('IBM863').valid_encoding?.should be_true
    str.force_encoding('IBM864').valid_encoding?.should be_true
    str.force_encoding('IBM865').valid_encoding?.should be_true
    str.force_encoding('IBM866').valid_encoding?.should be_true
    str.force_encoding('IBM869').valid_encoding?.should be_true
    str.force_encoding('Windows-1258').valid_encoding?.should be_true
    str.force_encoding('GB1988').valid_encoding?.should be_true
    str.force_encoding('macCentEuro').valid_encoding?.should be_true
    str.force_encoding('macCroatian').valid_encoding?.should be_true
    str.force_encoding('macCyrillic').valid_encoding?.should be_true
    str.force_encoding('macGreek').valid_encoding?.should be_true
    str.force_encoding('macIceland').valid_encoding?.should be_true
    str.force_encoding('macRoman').valid_encoding?.should be_true
    str.force_encoding('macRomania').valid_encoding?.should be_true
    str.force_encoding('macThai').valid_encoding?.should be_true
    str.force_encoding('macTurkish').valid_encoding?.should be_true
    str.force_encoding('macUkraine').valid_encoding?.should be_true
    str.force_encoding('stateless-ISO-2022-JP').valid_encoding?.should be_false
    str.force_encoding('eucJP-ms').valid_encoding?.should be_false
    str.force_encoding('CP51932').valid_encoding?.should be_false
    str.force_encoding('GB2312').valid_encoding?.should be_false
    str.force_encoding('GB12345').valid_encoding?.should be_false
    str.force_encoding('ISO-2022-JP').valid_encoding?.should be_true
    str.force_encoding('ISO-2022-JP-2').valid_encoding?.should be_true
    str.force_encoding('CP50221').valid_encoding?.should be_true
    str.force_encoding('Windows-1252').valid_encoding?.should be_true
    str.force_encoding('Windows-1250').valid_encoding?.should be_true
    str.force_encoding('Windows-1256').valid_encoding?.should be_true
    str.force_encoding('Windows-1253').valid_encoding?.should be_true
    str.force_encoding('Windows-1255').valid_encoding?.should be_true
    str.force_encoding('Windows-1254').valid_encoding?.should be_true
    str.force_encoding('TIS-620').valid_encoding?.should be_true
    str.force_encoding('Windows-874').valid_encoding?.should be_true
    str.force_encoding('Windows-1257').valid_encoding?.should be_true
    str.force_encoding('Windows-31J').valid_encoding?.should be_false
    str.force_encoding('MacJapanese').valid_encoding?.should be_false
    str.force_encoding('UTF-7').valid_encoding?.should be_true
    str.force_encoding('UTF8-MAC').valid_encoding?.should be_true
  end

  it "returns true for IBM720 encoding self is valid in" do
    str = +"\xE6\x9D\x94"
    str.force_encoding('IBM720').valid_encoding?.should be_true
    str.force_encoding('CP720').valid_encoding?.should be_true
  end

  it "returns false if self is valid in one encoding, but invalid in the one it's tagged with" do
    str = +"\u{8765}"
    str.valid_encoding?.should be_true
    str.force_encoding('ascii')
    str.valid_encoding?.should be_false
  end

  it "returns false if self contains a character invalid in the associated encoding" do
    "abc#{[0x80].pack('C')}".dup.force_encoding('ascii').valid_encoding?.should be_false
  end

  it "returns false if a valid String had an invalid character appended to it" do
    str = +"a"
    str.valid_encoding?.should be_true
    str << [0xDD].pack('C').force_encoding('utf-8')
    str.valid_encoding?.should be_false
  end

  it "returns true if an invalid string is appended another invalid one but both make a valid string" do
    str = [0xD0].pack('C').force_encoding('utf-8')
    str.valid_encoding?.should be_false
    str << [0xBF].pack('C').force_encoding('utf-8')
    str.valid_encoding?.should be_true
  end
end