summaryrefslogtreecommitdiff
path: root/spec/ruby/core/regexp/union_spec.rb
blob: 80768364710168e7689fd0b652937a918da77394 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# encoding: utf-8

require_relative '../../spec_helper'

describe "Regexp.union" do
  it "returns /(?!)/ when passed no arguments" do
    Regexp.union.should == /(?!)/
  end

  it "returns a regular expression that will match passed arguments" do
    Regexp.union("penzance").should == /penzance/
    Regexp.union("skiing", "sledding").should == /skiing|sledding/
    not_supported_on :opal do
      Regexp.union(/dogs/, /cats/i).should == /(?-mix:dogs)|(?i-mx:cats)/
    end
  end

  it "quotes any string arguments" do
    Regexp.union("n", ".").should == /n|\./
  end

  it "returns a Regexp with the encoding of an ASCII-incompatible String argument" do
    Regexp.union("a".encode("UTF-16LE")).encoding.should == Encoding::UTF_16LE
  end

  it "returns a Regexp with the encoding of a String containing non-ASCII-compatible characters" do
    Regexp.union("\u00A9".encode("ISO-8859-1")).encoding.should == Encoding::ISO_8859_1
  end

  it "returns a Regexp with US-ASCII encoding if all arguments are ASCII-only" do
    Regexp.union("a".encode("UTF-8"), "b".encode("SJIS")).encoding.should == Encoding::US_ASCII
  end

  it "returns a Regexp with the encoding of multiple non-conflicting ASCII-incompatible String arguments" do
    Regexp.union("a".encode("UTF-16LE"), "b".encode("UTF-16LE")).encoding.should == Encoding::UTF_16LE
  end

  it "returns a Regexp with the encoding of multiple non-conflicting Strings containing non-ASCII-compatible characters" do
    Regexp.union("\u00A9".encode("ISO-8859-1"), "\u00B0".encode("ISO-8859-1")).encoding.should == Encoding::ISO_8859_1
  end

  it "returns a Regexp with the encoding of a String containing non-ASCII-compatible characters and another ASCII-only String" do
    Regexp.union("\u00A9".encode("ISO-8859-1"), "a".encode("UTF-8")).encoding.should == Encoding::ISO_8859_1
  end

  it "returns a Regexp with UTF-8 if one part is UTF-8" do
    Regexp.union(/probl[éeè]me/i, /help/i).encoding.should == Encoding::UTF_8
  end

  it "returns a Regexp if an array of string with special characters is passed" do
    Regexp.union(["+","-"]).should == /\+|\-/
  end

  it "raises ArgumentError if the arguments include conflicting ASCII-incompatible Strings" do
    -> {
      Regexp.union("a".encode("UTF-16LE"), "b".encode("UTF-16BE"))
    }.should raise_error(ArgumentError)
  end

  it "raises ArgumentError if the arguments include conflicting ASCII-incompatible Regexps" do
    -> {
      Regexp.union(Regexp.new("a".encode("UTF-16LE")),
                   Regexp.new("b".encode("UTF-16BE")))
    }.should raise_error(ArgumentError)
  end

  it "raises ArgumentError if the arguments include conflicting fixed encoding Regexps" do
    -> {
      Regexp.union(Regexp.new("a".encode("UTF-8"),    Regexp::FIXEDENCODING),
                   Regexp.new("b".encode("US-ASCII"), Regexp::FIXEDENCODING))
    }.should raise_error(ArgumentError)
  end

  it "raises ArgumentError if the arguments include a fixed encoding Regexp and a String containing non-ASCII-compatible characters in a different encoding" do
    -> {
      Regexp.union(Regexp.new("a".encode("UTF-8"), Regexp::FIXEDENCODING),
                   "\u00A9".encode("ISO-8859-1"))
    }.should raise_error(ArgumentError)
  end

  it "raises ArgumentError if the arguments include a String containing non-ASCII-compatible characters and a fixed encoding Regexp in a different encoding" do
    -> {
      Regexp.union("\u00A9".encode("ISO-8859-1"),
                   Regexp.new("a".encode("UTF-8"), Regexp::FIXEDENCODING))
    }.should raise_error(ArgumentError)
  end

  it "raises ArgumentError if the arguments include an ASCII-incompatible String and an ASCII-only String" do
    -> {
      Regexp.union("a".encode("UTF-16LE"), "b".encode("UTF-8"))
    }.should raise_error(ArgumentError)
  end

  it "raises ArgumentError if the arguments include an ASCII-incompatible Regexp and an ASCII-only String" do
    -> {
      Regexp.union(Regexp.new("a".encode("UTF-16LE")), "b".encode("UTF-8"))
    }.should raise_error(ArgumentError)
  end

  it "raises ArgumentError if the arguments include an ASCII-incompatible String and an ASCII-only Regexp" do
    -> {
      Regexp.union("a".encode("UTF-16LE"), Regexp.new("b".encode("UTF-8")))
    }.should raise_error(ArgumentError)
  end

  it "raises ArgumentError if the arguments include an ASCII-incompatible Regexp and an ASCII-only Regexp" do
    -> {
      Regexp.union(Regexp.new("a".encode("UTF-16LE")), Regexp.new("b".encode("UTF-8")))
    }.should raise_error(ArgumentError)
  end

  it "raises ArgumentError if the arguments include an ASCII-incompatible String and a String containing non-ASCII-compatible characters in a different encoding" do
    -> {
      Regexp.union("a".encode("UTF-16LE"), "\u00A9".encode("ISO-8859-1"))
    }.should raise_error(ArgumentError)
  end

  it "raises ArgumentError if the arguments include an ASCII-incompatible Regexp and a String containing non-ASCII-compatible characters in a different encoding" do
    -> {
      Regexp.union(Regexp.new("a".encode("UTF-16LE")), "\u00A9".encode("ISO-8859-1"))
    }.should raise_error(ArgumentError)
  end

  it "raises ArgumentError if the arguments include an ASCII-incompatible String and a Regexp containing non-ASCII-compatible characters in a different encoding" do
    -> {
      Regexp.union("a".encode("UTF-16LE"), Regexp.new("\u00A9".encode("ISO-8859-1")))
    }.should raise_error(ArgumentError)
  end

  it "raises ArgumentError if the arguments include an ASCII-incompatible Regexp and a Regexp containing non-ASCII-compatible characters in a different encoding" do
    -> {
      Regexp.union(Regexp.new("a".encode("UTF-16LE")), Regexp.new("\u00A9".encode("ISO-8859-1")))
    }.should raise_error(ArgumentError)
  end

  it "uses to_str to convert arguments (if not Regexp)" do
    obj = mock('pattern')
    obj.should_receive(:to_str).and_return('foo')
    Regexp.union(obj, "bar").should == /foo|bar/
  end

  it "uses to_regexp to convert argument" do
    obj = mock('pattern')
    obj.should_receive(:to_regexp).and_return(/foo/)
    Regexp.union(obj).should == /foo/
  end

  it "accepts a Symbol as argument" do
    Regexp.union(:foo).should == /foo/
  end

  it "accepts a single array of patterns as arguments" do
    Regexp.union(["skiing", "sledding"]).should == /skiing|sledding/
    not_supported_on :opal do
      Regexp.union([/dogs/, /cats/i]).should == /(?-mix:dogs)|(?i-mx:cats)/
    end
    ->{Regexp.union(["skiing", "sledding"], [/dogs/, /cats/i])}.should raise_error(TypeError)
  end
end