summaryrefslogtreecommitdiff
path: root/spec/ruby/language/regexp/encoding_spec.rb
blob: 8e2a294b9581a402e6270e5018a6031a1ea70975 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# -*- encoding: binary -*-
require_relative '../../spec_helper'
require_relative '../fixtures/classes'

describe "Regexps with encoding modifiers" do
  it "supports /e (EUC encoding)" do
    match = /./e.match("\303\251".force_encoding(Encoding::EUC_JP))
    match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)]
  end

  it "supports /e (EUC encoding) with interpolation" do
    match = /#{/./}/e.match("\303\251".force_encoding(Encoding::EUC_JP))
    match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)]
  end

  it "supports /e (EUC encoding) with interpolation /o" do
    match = /#{/./}/e.match("\303\251".force_encoding(Encoding::EUC_JP))
    match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)]
  end

  it 'uses EUC-JP as /e encoding' do
    /./e.encoding.should == Encoding::EUC_JP
  end

  it 'preserves EUC-JP as /e encoding through interpolation' do
    /#{/./}/e.encoding.should == Encoding::EUC_JP
  end

  it "supports /n (No encoding)" do
    /./n.match("\303\251").to_a.should == ["\303"]
  end

  it "supports /n (No encoding) with interpolation" do
    /#{/./}/n.match("\303\251").to_a.should == ["\303"]
  end

  it "supports /n (No encoding) with interpolation /o" do
    /#{/./}/n.match("\303\251").to_a.should == ["\303"]
  end

  it 'uses US-ASCII as /n encoding if all chars are 7-bit' do
    /./n.encoding.should == Encoding::US_ASCII
  end

  it 'uses BINARY when is not initialized' do
    Regexp.allocate.encoding.should == Encoding::BINARY
  end

  it 'uses BINARY as /n encoding if not all chars are 7-bit' do
    /\xFF/n.encoding.should == Encoding::BINARY
  end

  it 'preserves US-ASCII as /n encoding through interpolation if all chars are 7-bit' do
    /.#{/./}/n.encoding.should == Encoding::US_ASCII
  end

  it 'preserves BINARY as /n encoding through interpolation if all chars are 7-bit' do
    /\xFF#{/./}/n.encoding.should == Encoding::BINARY
  end

  it "supports /s (Windows_31J encoding)" do
    match = /./s.match("\303\251".force_encoding(Encoding::Windows_31J))
    match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)]
  end

  it "supports /s (Windows_31J encoding) with interpolation" do
    match = /#{/./}/s.match("\303\251".force_encoding(Encoding::Windows_31J))
    match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)]
  end

  it "supports /s (Windows_31J encoding) with interpolation and /o" do
    match = /#{/./}/s.match("\303\251".force_encoding(Encoding::Windows_31J))
    match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)]
  end

  it 'uses Windows-31J as /s encoding' do
    /./s.encoding.should == Encoding::Windows_31J
  end

  it 'preserves Windows-31J as /s encoding through interpolation' do
    /#{/./}/s.encoding.should == Encoding::Windows_31J
  end

  it "supports /u (UTF8 encoding)" do
    /./u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"]
  end

  it "supports /u (UTF8 encoding) with interpolation" do
    /#{/./}/u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"]
  end

  it "supports /u (UTF8 encoding) with interpolation and /o" do
    /#{/./}/u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"]
  end

  it 'uses UTF-8 as /u encoding' do
    /./u.encoding.should == Encoding::UTF_8
  end

  it 'preserves UTF-8 as /u encoding through interpolation' do
    /#{/./}/u.encoding.should == Encoding::UTF_8
  end

  it "selects last of multiple encoding specifiers" do
    /foo/ensuensuens.should == /foo/s
  end

  it "raises Encoding::CompatibilityError when trying match against different encodings" do
    -> { /\A[[:space:]]*\z/.match(" ".encode("UTF-16LE")) }.should raise_error(Encoding::CompatibilityError)
  end

  it "raises Encoding::CompatibilityError when trying match? against different encodings" do
    -> { /\A[[:space:]]*\z/.match?(" ".encode("UTF-16LE")) }.should raise_error(Encoding::CompatibilityError)
  end

  it "raises Encoding::CompatibilityError when trying =~ against different encodings" do
    -> { /\A[[:space:]]*\z/ =~ " ".encode("UTF-16LE") }.should raise_error(Encoding::CompatibilityError)
  end

  it "computes the Regexp Encoding for each interpolated Regexp instance" do
    make_regexp = -> str { /#{str}/ }

    r = make_regexp.call("été".force_encoding(Encoding::UTF_8))
    r.should.fixed_encoding?
    r.encoding.should == Encoding::UTF_8

    r = make_regexp.call("abc".force_encoding(Encoding::UTF_8))
    r.should_not.fixed_encoding?
    r.encoding.should == Encoding::US_ASCII
  end
end