diff options
Diffstat (limited to 'spec/ruby/core/regexp')
| -rw-r--r-- | spec/ruby/core/regexp/case_compare_spec.rb | 14 | ||||
| -rw-r--r-- | spec/ruby/core/regexp/compile_spec.rb | 2 | ||||
| -rw-r--r-- | spec/ruby/core/regexp/encoding_spec.rb | 2 | ||||
| -rw-r--r-- | spec/ruby/core/regexp/fixed_encoding_spec.rb | 16 | ||||
| -rw-r--r-- | spec/ruby/core/regexp/initialize_spec.rb | 24 | ||||
| -rw-r--r-- | spec/ruby/core/regexp/last_match_spec.rb | 6 | ||||
| -rw-r--r-- | spec/ruby/core/regexp/linear_time_spec.rb | 80 | ||||
| -rw-r--r-- | spec/ruby/core/regexp/match_spec.rb | 38 | ||||
| -rw-r--r-- | spec/ruby/core/regexp/named_captures_spec.rb | 4 | ||||
| -rw-r--r-- | spec/ruby/core/regexp/names_spec.rb | 4 | ||||
| -rw-r--r-- | spec/ruby/core/regexp/new_spec.rb | 2 | ||||
| -rw-r--r-- | spec/ruby/core/regexp/options_spec.rb | 6 | ||||
| -rw-r--r-- | spec/ruby/core/regexp/shared/new.rb | 422 | ||||
| -rw-r--r-- | spec/ruby/core/regexp/shared/quote.rb | 20 | ||||
| -rw-r--r-- | spec/ruby/core/regexp/source_spec.rb | 4 | ||||
| -rw-r--r-- | spec/ruby/core/regexp/timeout_spec.rb | 33 | ||||
| -rw-r--r-- | spec/ruby/core/regexp/try_convert_spec.rb | 8 | ||||
| -rw-r--r-- | spec/ruby/core/regexp/union_spec.rb | 51 |
18 files changed, 300 insertions, 436 deletions
diff --git a/spec/ruby/core/regexp/case_compare_spec.rb b/spec/ruby/core/regexp/case_compare_spec.rb index 5ae8b56c6a..29aada70bc 100644 --- a/spec/ruby/core/regexp/case_compare_spec.rb +++ b/spec/ruby/core/regexp/case_compare_spec.rb @@ -2,25 +2,25 @@ require_relative '../../spec_helper' describe "Regexp#===" do it "is true if there is a match" do - (/abc/ === "aabcc").should be_true + (/abc/ === "aabcc").should == true end it "is false if there is no match" do - (/abc/ === "xyz").should be_false + (/abc/ === "xyz").should == false end it "returns true if it matches a Symbol" do - (/a/ === :a).should be_true + (/a/ === :a).should == true end it "returns false if it does not match a Symbol" do - (/a/ === :b).should be_false + (/a/ === :b).should == false end # mirroring https://github.com/ruby/ruby/blob/master/test/ruby/test_regexp.rb it "returns false if the other value cannot be coerced to a string" do - (/abc/ === nil).should be_false - (/abc/ === /abc/).should be_false + (/abc/ === nil).should == false + (/abc/ === /abc/).should == false end it "uses #to_str on string-like objects" do @@ -30,6 +30,6 @@ describe "Regexp#===" do end end.new - (/abc/ === stringlike).should be_true + (/abc/ === stringlike).should == true end end diff --git a/spec/ruby/core/regexp/compile_spec.rb b/spec/ruby/core/regexp/compile_spec.rb index c41399cfbb..887c8d77dc 100644 --- a/spec/ruby/core/regexp/compile_spec.rb +++ b/spec/ruby/core/regexp/compile_spec.rb @@ -14,6 +14,6 @@ describe "Regexp.compile given a Regexp" do it_behaves_like :regexp_new_regexp, :compile end -describe "Regexp.new given a non-String/Regexp" do +describe "Regexp.compile given a non-String/Regexp" do it_behaves_like :regexp_new_non_string_or_regexp, :compile end diff --git a/spec/ruby/core/regexp/encoding_spec.rb b/spec/ruby/core/regexp/encoding_spec.rb index dfc835b4e4..fb4fdba064 100644 --- a/spec/ruby/core/regexp/encoding_spec.rb +++ b/spec/ruby/core/regexp/encoding_spec.rb @@ -3,7 +3,7 @@ require_relative '../../spec_helper' describe "Regexp#encoding" do it "returns an Encoding object" do - /glar/.encoding.should be_an_instance_of(Encoding) + /glar/.encoding.should.instance_of?(Encoding) end it "defaults to US-ASCII if the Regexp contains only US-ASCII character" do diff --git a/spec/ruby/core/regexp/fixed_encoding_spec.rb b/spec/ruby/core/regexp/fixed_encoding_spec.rb index 29d0a22c53..5d8b1c2860 100644 --- a/spec/ruby/core/regexp/fixed_encoding_spec.rb +++ b/spec/ruby/core/regexp/fixed_encoding_spec.rb @@ -3,34 +3,34 @@ require_relative '../../spec_helper' describe "Regexp#fixed_encoding?" do it "returns false by default" do - /needle/.fixed_encoding?.should be_false + /needle/.fixed_encoding?.should == false end it "returns false if the 'n' modifier was supplied to the Regexp" do - /needle/n.fixed_encoding?.should be_false + /needle/n.fixed_encoding?.should == false end it "returns true if the 'u' modifier was supplied to the Regexp" do - /needle/u.fixed_encoding?.should be_true + /needle/u.fixed_encoding?.should == true end it "returns true if the 's' modifier was supplied to the Regexp" do - /needle/s.fixed_encoding?.should be_true + /needle/s.fixed_encoding?.should == true end it "returns true if the 'e' modifier was supplied to the Regexp" do - /needle/e.fixed_encoding?.should be_true + /needle/e.fixed_encoding?.should == true end it "returns true if the Regexp contains a \\u escape" do - /needle \u{8768}/.fixed_encoding?.should be_true + /needle \u{8768}/.fixed_encoding?.should == true end it "returns true if the Regexp contains a UTF-8 literal" do - /文字化け/.fixed_encoding?.should be_true + /文字化け/.fixed_encoding?.should == true end it "returns true if the Regexp was created with the Regexp::FIXEDENCODING option" do - Regexp.new("", Regexp::FIXEDENCODING).fixed_encoding?.should be_true + Regexp.new("", Regexp::FIXEDENCODING).fixed_encoding?.should == true end end diff --git a/spec/ruby/core/regexp/initialize_spec.rb b/spec/ruby/core/regexp/initialize_spec.rb index 772a233e82..1c0133acae 100644 --- a/spec/ruby/core/regexp/initialize_spec.rb +++ b/spec/ruby/core/regexp/initialize_spec.rb @@ -2,22 +2,28 @@ require_relative '../../spec_helper' describe "Regexp#initialize" do it "is a private method" do - Regexp.should have_private_method(:initialize) + Regexp.private_instance_methods(false).should.include?(:initialize) end - ruby_version_is ""..."3.0" do - it "raises a SecurityError on a Regexp literal" do - -> { //.send(:initialize, "") }.should raise_error(SecurityError) + it "raises a FrozenError on a Regexp literal" do + -> { //.send(:initialize, "") }.should.raise(FrozenError) + end + + ruby_version_is "4.1" do + it "raises a FrozenError on an initialized non-literal Regexp" do + regexp = Regexp.new("") + -> { regexp.send(:initialize, "") }.should.raise(FrozenError) end end - ruby_version_is "3.0" do - it "raises a FrozenError on a Regexp literal" do - -> { //.send(:initialize, "") }.should raise_error(FrozenError) + ruby_version_is ""..."4.1" do + it "raises a TypeError on an initialized non-literal Regexp" do + -> { Regexp.new("").send(:initialize, "") }.should.raise(TypeError) end end - it "raises a TypeError on an initialized non-literal Regexp" do - -> { Regexp.new("").send(:initialize, "") }.should raise_error(TypeError) + it "raises a TypeError on an initialized non-literal Regexp subclass" do + r = Class.new(Regexp).new("") + -> { r.send(:initialize, "") }.should.raise(TypeError) end end diff --git a/spec/ruby/core/regexp/last_match_spec.rb b/spec/ruby/core/regexp/last_match_spec.rb index 0bfed32051..6c256cc1cf 100644 --- a/spec/ruby/core/regexp/last_match_spec.rb +++ b/spec/ruby/core/regexp/last_match_spec.rb @@ -4,7 +4,7 @@ describe "Regexp.last_match" do it "returns MatchData instance when not passed arguments" do /c(.)t/ =~ 'cat' - Regexp.last_match.should be_kind_of(MatchData) + Regexp.last_match.should.is_a?(MatchData) end it "returns the nth field in this MatchData when passed an Integer" do @@ -28,7 +28,7 @@ describe "Regexp.last_match" do it "raises an IndexError when given a missing name" do /(?<test>[A-Z]+.*)/ =~ "TEST123" - -> { Regexp.last_match(:missing) }.should raise_error(IndexError) + -> { Regexp.last_match(:missing) }.should.raise(IndexError) end end @@ -50,7 +50,7 @@ describe "Regexp.last_match" do it "raises a TypeError when unable to coerce" do obj = Object.new /(?<test>[A-Z]+.*)/ =~ "TEST123" - -> { Regexp.last_match(obj) }.should raise_error(TypeError) + -> { Regexp.last_match(obj) }.should.raise(TypeError) end end end diff --git a/spec/ruby/core/regexp/linear_time_spec.rb b/spec/ruby/core/regexp/linear_time_spec.rb new file mode 100644 index 0000000000..f70021dfed --- /dev/null +++ b/spec/ruby/core/regexp/linear_time_spec.rb @@ -0,0 +1,80 @@ +require_relative '../../spec_helper' + +describe "Regexp.linear_time?" do + it "returns true if matching can be done in linear time" do + Regexp.linear_time?(/a/).should == true + Regexp.linear_time?('a').should == true + end + + it "returns true if matching can be done in linear time for a binary Regexp" do + Regexp.linear_time?(/[\x80-\xff]/n).should == true + end + + it "return false if matching can't be done in linear time" do + Regexp.linear_time?(/(a)\1/).should == false + Regexp.linear_time?("(a)\\1").should == false + end + + it "accepts flags for string argument" do + Regexp.linear_time?('a', Regexp::IGNORECASE).should == true + end + + it "warns about flags being ignored for regexp arguments" do + -> { + Regexp.linear_time?(/a/, Regexp::IGNORECASE) + }.should complain(/warning: flags ignored/) + end + + it "returns true for positive lookahead" do + Regexp.linear_time?(/a*(?:(?=a*)a)*b/).should == true + end + + it "returns true for positive lookbehind" do + Regexp.linear_time?(/a*(?:(?<=a)a*)*b/).should == true + end + + it "returns true for negative lookbehind" do + Regexp.linear_time?(/a*(?:(?<!a)a*)*b/).should == true + end + + # There are two known ways to make Regexp linear: + # * Using a DFA (deterministic finite-state automaton) Regexp engine, which always matches in linear time (e.g. TruffleRuby with TRegex) + # * Caching position and state to avoid catastrophic backtracking (e.g. CRuby: https://bugs.ruby-lang.org/issues/19104) + # + # Both approach should be allowed and given that DFA Regexp engines + # are much faster there should be no specs preventing using them. + uses_regexp_caching = RUBY_ENGINE == 'ruby' + uses_dfa_regexp_engine = !uses_regexp_caching + + # The following specs should not be relied upon, + # they are here only to illustrate differences between Regexp engines. + guard -> { uses_regexp_caching } do + it "returns true for negative lookahead" do + Regexp.linear_time?(/a*(?:(?!a*)a*)*b/).should == true + end + + it "returns true for atomic groups" do + Regexp.linear_time?(/a*(?:(?>a)a*)*b/).should == true + end + + it "returns true for possessive quantifiers" do + Regexp.linear_time?(/a*(?:(?:a)?+a*)*b/).should == true + end + + it "returns true for positive lookbehind with capture group" do + Regexp.linear_time?(/.(?<=(a))/).should == true + end + end + + # The following specs should not be relied upon, + # they are here only to illustrate differences between Regexp engines. + guard -> { uses_dfa_regexp_engine } do + it "returns true for non-recursive subexpression call" do + Regexp.linear_time?(/(?<a>a){0}\g<a>/).should == true + end + + it "returns true for positive lookahead with capture group" do + Regexp.linear_time?(/x+(?=(a))/).should == true + end + end +end diff --git a/spec/ruby/core/regexp/match_spec.rb b/spec/ruby/core/regexp/match_spec.rb index 80dbfb4c10..276cecc8e4 100644 --- a/spec/ruby/core/regexp/match_spec.rb +++ b/spec/ruby/core/regexp/match_spec.rb @@ -3,11 +3,11 @@ require_relative '../../spec_helper' describe :regexp_match, shared: true do it "returns nil if there is no match" do - /xyz/.send(@method,"abxyc").should be_nil + /xyz/.send(@method,"abxyc").should == nil end it "returns nil if the object is nil" do - /\w+/.send(@method, nil).should be_nil + /\w+/.send(@method, nil).should == nil end end @@ -27,19 +27,19 @@ describe "Regexp#match" do it_behaves_like :regexp_match, :match it "returns a MatchData object" do - /(.)(.)(.)/.match("abc").should be_kind_of(MatchData) + /(.)(.)(.)/.match("abc").should.is_a?(MatchData) end it "returns a MatchData object, when argument is a Symbol" do - /(.)(.)(.)/.match(:abc).should be_kind_of(MatchData) + /(.)(.)(.)/.match(:abc).should.is_a?(MatchData) end it "raises a TypeError on an uninitialized Regexp" do - -> { Regexp.allocate.match('foo') }.should raise_error(TypeError) + -> { Regexp.allocate.match('foo') }.should.raise(TypeError) end it "raises TypeError on an uninitialized Regexp" do - -> { Regexp.allocate.match('foo'.encode("UTF-16LE")) }.should raise_error(TypeError) + -> { Regexp.allocate.match('foo'.encode("UTF-16LE")) }.should.raise(TypeError) end describe "with [string, position]" do @@ -54,7 +54,7 @@ describe "Regexp#match" do it "raises an ArgumentError for an invalid encoding" do x96 = ([150].pack('C')).force_encoding('utf-8') - -> { /(.).(.)/.match("Hello, #{x96} world!", 1) }.should raise_error(ArgumentError) + -> { /(.).(.)/.match("Hello, #{x96} world!", 1) }.should.raise(ArgumentError) end end @@ -69,14 +69,14 @@ describe "Regexp#match" do it "raises an ArgumentError for an invalid encoding" do x96 = ([150].pack('C')).force_encoding('utf-8') - -> { /(.).(.)/.match("Hello, #{x96} world!", -1) }.should raise_error(ArgumentError) + -> { /(.).(.)/.match("Hello, #{x96} world!", -1) }.should.raise(ArgumentError) end end describe "when passed a block" do it "yields the MatchData" do /./.match("abc") {|m| ScratchPad.record m } - ScratchPad.recorded.should be_kind_of(MatchData) + ScratchPad.recorded.should.is_a?(MatchData) end it "returns the block result" do @@ -94,20 +94,20 @@ describe "Regexp#match" do it "resets $~ if passed nil" do # set $~ /./.match("a") - $~.should be_kind_of(MatchData) + $~.should.is_a?(MatchData) /1/.match(nil) - $~.should be_nil + $~.should == nil end it "raises TypeError when the given argument cannot be coerced to String" do f = 1 - -> { /foo/.match(f)[0] }.should raise_error(TypeError) + -> { /foo/.match(f)[0] }.should.raise(TypeError) end it "raises TypeError when the given argument is an Exception" do f = Exception.new("foo") - -> { /foo/.match(f)[0] }.should raise_error(TypeError) + -> { /foo/.match(f)[0] }.should.raise(TypeError) end end @@ -119,22 +119,22 @@ describe "Regexp#match?" do context "when matches the given value" do it "returns true but does not set Regexp.last_match" do - /string/i.match?('string').should be_true - Regexp.last_match.should be_nil + /string/i.match?('string').should == true + Regexp.last_match.should == nil end end it "returns false when does not match the given value" do - /STRING/.match?('string').should be_false + /STRING/.match?('string').should == false end it "takes matching position as the 2nd argument" do - /str/i.match?('string', 0).should be_true - /str/i.match?('string', 1).should be_false + /str/i.match?('string', 0).should == true + /str/i.match?('string', 1).should == false end it "returns false when given nil" do - /./.match?(nil).should be_false + /./.match?(nil).should == false end end diff --git a/spec/ruby/core/regexp/named_captures_spec.rb b/spec/ruby/core/regexp/named_captures_spec.rb index 1a68d7877b..4d3fdd23ab 100644 --- a/spec/ruby/core/regexp/named_captures_spec.rb +++ b/spec/ruby/core/regexp/named_captures_spec.rb @@ -2,7 +2,7 @@ require_relative '../../spec_helper' describe "Regexp#named_captures" do it "returns a Hash" do - /foo/.named_captures.should be_an_instance_of(Hash) + /foo/.named_captures.should.instance_of?(Hash) end it "returns an empty Hash when there are no capture groups" do @@ -17,7 +17,7 @@ describe "Regexp#named_captures" do it "sets the values of the Hash to Arrays" do rex = /this (?<is>is) [aA] (?<pat>pate?rn)/ rex.named_captures.values.each do |value| - value.should be_an_instance_of(Array) + value.should.instance_of?(Array) end end diff --git a/spec/ruby/core/regexp/names_spec.rb b/spec/ruby/core/regexp/names_spec.rb index 099768fd26..9013f41e20 100644 --- a/spec/ruby/core/regexp/names_spec.rb +++ b/spec/ruby/core/regexp/names_spec.rb @@ -2,7 +2,7 @@ require_relative '../../spec_helper' describe "Regexp#names" do it "returns an Array" do - /foo/.names.should be_an_instance_of(Array) + /foo/.names.should.instance_of?(Array) end it "returns an empty Array if there are no named captures" do @@ -11,7 +11,7 @@ describe "Regexp#names" do it "returns each named capture as a String" do /n(?<cap>ee)d(?<ture>le)/.names.each do |name| - name.should be_an_instance_of(String) + name.should.instance_of?(String) end end diff --git a/spec/ruby/core/regexp/new_spec.rb b/spec/ruby/core/regexp/new_spec.rb index 65f612df55..79210e9a23 100644 --- a/spec/ruby/core/regexp/new_spec.rb +++ b/spec/ruby/core/regexp/new_spec.rb @@ -7,11 +7,11 @@ end describe "Regexp.new given a String" do it_behaves_like :regexp_new_string, :new + it_behaves_like :regexp_new_string_binary, :new end describe "Regexp.new given a Regexp" do it_behaves_like :regexp_new_regexp, :new - it_behaves_like :regexp_new_string_binary, :new end describe "Regexp.new given a non-String/Regexp" do diff --git a/spec/ruby/core/regexp/options_spec.rb b/spec/ruby/core/regexp/options_spec.rb index 527b51a3b2..c3401cee6e 100644 --- a/spec/ruby/core/regexp/options_spec.rb +++ b/spec/ruby/core/regexp/options_spec.rb @@ -2,9 +2,9 @@ require_relative '../../spec_helper' describe "Regexp#options" do it "returns an Integer bitvector of regexp options for the Regexp object" do - /cat/.options.should be_kind_of(Integer) + /cat/.options.should.is_a?(Integer) not_supported_on :opal do - /cat/ix.options.should be_kind_of(Integer) + /cat/ix.options.should.is_a?(Integer) end end @@ -29,7 +29,7 @@ describe "Regexp#options" do end it "raises a TypeError on an uninitialized Regexp" do - -> { Regexp.allocate.options }.should raise_error(TypeError) + -> { Regexp.allocate.options }.should.raise(TypeError) end it "includes Regexp::FIXEDENCODING for a Regexp literal with the 'u' option" do diff --git a/spec/ruby/core/regexp/shared/new.rb b/spec/ruby/core/regexp/shared/new.rb index 10c2d3d390..affdaf855c 100644 --- a/spec/ruby/core/regexp/shared/new.rb +++ b/spec/ruby/core/regexp/shared/new.rb @@ -1,10 +1,16 @@ -# -*- encoding: binary -*- +# encoding: binary describe :regexp_new, shared: true do it "requires one argument and creates a new regular expression object" do Regexp.send(@method, '').is_a?(Regexp).should == true end + ruby_version_is "4.1" do + it "is frozen" do + Regexp.send(@method, '').should.frozen? + end + end + it "works by default for subclasses with overridden #initialize" do class RegexpSpecsSubclass < Regexp def initialize(*args) @@ -17,10 +23,10 @@ describe :regexp_new, shared: true do class RegexpSpecsSubclassTwo < Regexp; end - RegexpSpecsSubclass.send(@method, "hi").should be_kind_of(RegexpSpecsSubclass) + RegexpSpecsSubclass.send(@method, "hi").should.is_a?(RegexpSpecsSubclass) RegexpSpecsSubclass.send(@method, "hi").args.first.should == "hi" - RegexpSpecsSubclassTwo.send(@method, "hi").should be_kind_of(RegexpSpecsSubclassTwo) + RegexpSpecsSubclassTwo.send(@method, "hi").should.is_a?(RegexpSpecsSubclassTwo) end end @@ -34,19 +40,19 @@ describe :regexp_new_non_string_or_regexp, shared: true do it "raises TypeError if there is no #to_str method for non-String/Regexp argument" do obj = Object.new - -> { Regexp.send(@method, obj) }.should raise_error(TypeError, "no implicit conversion of Object into String") + -> { Regexp.send(@method, obj) }.should.raise(TypeError, "no implicit conversion of Object into String") - -> { Regexp.send(@method, 1) }.should raise_error(TypeError, "no implicit conversion of Integer into String") - -> { Regexp.send(@method, 1.0) }.should raise_error(TypeError, "no implicit conversion of Float into String") - -> { Regexp.send(@method, :symbol) }.should raise_error(TypeError, "no implicit conversion of Symbol into String") - -> { Regexp.send(@method, []) }.should raise_error(TypeError, "no implicit conversion of Array into String") + -> { Regexp.send(@method, 1) }.should.raise(TypeError, "no implicit conversion of Integer into String") + -> { Regexp.send(@method, 1.0) }.should.raise(TypeError, "no implicit conversion of Float into String") + -> { Regexp.send(@method, :symbol) }.should.raise(TypeError, "no implicit conversion of Symbol into String") + -> { Regexp.send(@method, []) }.should.raise(TypeError, "no implicit conversion of Array into String") end it "raises TypeError if #to_str returns non-String value" do obj = Object.new def obj.to_str() [] end - -> { Regexp.send(@method, obj) }.should raise_error(TypeError, /can't convert Object to String/) + -> { Regexp.send(@method, obj) }.should raise_consistent_error(TypeError, /can't convert Object into String/) end end @@ -56,7 +62,7 @@ describe :regexp_new_string, shared: true do end it "raises a RegexpError when passed an incorrect regexp" do - -> { Regexp.send(@method, "^[$", 0) }.should raise_error(RegexpError) + -> { Regexp.send(@method, "^[$", 0) }.should.raise(RegexpError, Regexp.new(Regexp.escape("premature end of char-class: /^[$/"))) end it "does not set Regexp options if only given one argument" do @@ -128,318 +134,95 @@ describe :regexp_new_string, shared: true do obj = Object.new def obj.to_int() ScratchPad.record(:called) end - Regexp.send(@method, "Hi", obj) + -> { + Regexp.send(@method, "Hi", obj) + }.should complain(/expected true or false as ignorecase/, {verbose: true}) ScratchPad.recorded.should == nil end - ruby_version_is ""..."3.2" do - it "treats any non-Integer, non-nil, non-false second argument as IGNORECASE" do + it "warns any non-Integer, non-nil, non-false second argument" do + r = nil + -> { r = Regexp.send(@method, 'Hi', Object.new) - (r.options & Regexp::IGNORECASE).should_not == 0 - (r.options & Regexp::MULTILINE).should == 0 - not_supported_on :opal do - (r.options & Regexp::EXTENDED).should == 0 - end + }.should complain(/expected true or false as ignorecase/, {verbose: true}) + (r.options & Regexp::IGNORECASE).should_not == 0 + (r.options & Regexp::MULTILINE).should == 0 + not_supported_on :opal do + (r.options & Regexp::EXTENDED).should == 0 end end - ruby_version_is "3.2" do - it "warns any non-Integer, non-nil, non-false second argument" do - r = nil - -> { - r = Regexp.send(@method, 'Hi', Object.new) - }.should complain(/expected true or false as ignorecase/, {verbose: true}) - (r.options & Regexp::IGNORECASE).should_not == 0 - (r.options & Regexp::MULTILINE).should == 0 - not_supported_on :opal do - (r.options & Regexp::EXTENDED).should == 0 - end + it "accepts a String of supported flags as the second argument" do + r = Regexp.send(@method, 'Hi', 'i') + (r.options & Regexp::IGNORECASE).should_not == 0 + (r.options & Regexp::MULTILINE).should == 0 + not_supported_on :opal do + (r.options & Regexp::EXTENDED).should == 0 end - it "accepts a String of supported flags as the second argument" do - r = Regexp.send(@method, 'Hi', 'i') - (r.options & Regexp::IGNORECASE).should_not == 0 - (r.options & Regexp::MULTILINE).should == 0 - not_supported_on :opal do - (r.options & Regexp::EXTENDED).should == 0 - end - - r = Regexp.send(@method, 'Hi', 'imx') - (r.options & Regexp::IGNORECASE).should_not == 0 - (r.options & Regexp::MULTILINE).should_not == 0 - not_supported_on :opal do - (r.options & Regexp::EXTENDED).should_not == 0 - end - - r = Regexp.send(@method, 'Hi', 'mimi') - (r.options & Regexp::IGNORECASE).should_not == 0 - (r.options & Regexp::MULTILINE).should_not == 0 - not_supported_on :opal do - (r.options & Regexp::EXTENDED).should == 0 - end - - r = Regexp.send(@method, 'Hi', '') - (r.options & Regexp::IGNORECASE).should == 0 - (r.options & Regexp::MULTILINE).should == 0 - not_supported_on :opal do - (r.options & Regexp::EXTENDED).should == 0 - end + r = Regexp.send(@method, 'Hi', 'imx') + (r.options & Regexp::IGNORECASE).should_not == 0 + (r.options & Regexp::MULTILINE).should_not == 0 + not_supported_on :opal do + (r.options & Regexp::EXTENDED).should_not == 0 end - it "raises an Argument error if the second argument contains unsupported chars" do - -> { Regexp.send(@method, 'Hi', 'e') }.should raise_error(ArgumentError) - -> { Regexp.send(@method, 'Hi', 'n') }.should raise_error(ArgumentError) - -> { Regexp.send(@method, 'Hi', 's') }.should raise_error(ArgumentError) - -> { Regexp.send(@method, 'Hi', 'u') }.should raise_error(ArgumentError) - -> { Regexp.send(@method, 'Hi', 'j') }.should raise_error(ArgumentError) - -> { Regexp.send(@method, 'Hi', 'mjx') }.should raise_error(ArgumentError) + r = Regexp.send(@method, 'Hi', 'mimi') + (r.options & Regexp::IGNORECASE).should_not == 0 + (r.options & Regexp::MULTILINE).should_not == 0 + not_supported_on :opal do + (r.options & Regexp::EXTENDED).should == 0 end - end - - it "ignores the third argument if it is 'e' or 'euc' (case-insensitive)" do - -> { - Regexp.send(@method, 'Hi', nil, 'e').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'euc').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'E').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'EUC').encoding.should == Encoding::US_ASCII - }.should complain(/encoding option is ignored/) - end - - it "ignores the third argument if it is 's' or 'sjis' (case-insensitive)" do - -> { - Regexp.send(@method, 'Hi', nil, 's').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'sjis').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'S').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'SJIS').encoding.should == Encoding::US_ASCII - }.should complain(/encoding option is ignored/) - end - - it "ignores the third argument if it is 'u' or 'utf8' (case-insensitive)" do - -> { - Regexp.send(@method, 'Hi', nil, 'u').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'utf8').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'U').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'UTF8').encoding.should == Encoding::US_ASCII - }.should complain(/encoding option is ignored/) - end - it "uses US_ASCII encoding if third argument is 'n' or 'none' (case insensitive) and only ascii characters" do - Regexp.send(@method, 'Hi', nil, 'n').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'none').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'N').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'NONE').encoding.should == Encoding::US_ASCII + r = Regexp.send(@method, 'Hi', '') + (r.options & Regexp::IGNORECASE).should == 0 + (r.options & Regexp::MULTILINE).should == 0 + not_supported_on :opal do + (r.options & Regexp::EXTENDED).should == 0 + end end - it "uses ASCII_8BIT encoding if third argument is 'n' or 'none' (case insensitive) and non-ascii characters" do - a = "(?:[\x8E\xA1-\xFE])" - str = "\A(?:#{a}|x*)\z" - - Regexp.send(@method, str, nil, 'N').encoding.should == Encoding::BINARY - Regexp.send(@method, str, nil, 'n').encoding.should == Encoding::BINARY - Regexp.send(@method, str, nil, 'none').encoding.should == Encoding::BINARY - Regexp.send(@method, str, nil, 'NONE').encoding.should == Encoding::BINARY + it "raises an Argument error if the second argument contains unsupported chars" do + -> { Regexp.send(@method, 'Hi', 'e') }.should.raise(ArgumentError, "unknown regexp option: e") + -> { Regexp.send(@method, 'Hi', 'n') }.should.raise(ArgumentError, "unknown regexp option: n") + -> { Regexp.send(@method, 'Hi', 's') }.should.raise(ArgumentError, "unknown regexp option: s") + -> { Regexp.send(@method, 'Hi', 'u') }.should.raise(ArgumentError, "unknown regexp option: u") + -> { Regexp.send(@method, 'Hi', 'j') }.should.raise(ArgumentError, "unknown regexp option: j") + -> { Regexp.send(@method, 'Hi', 'mjx') }.should.raise(ArgumentError, /unknown regexp option: mjx\b/) end describe "with escaped characters" do it "raises a Regexp error if there is a trailing backslash" do - -> { Regexp.send(@method, "\\") }.should raise_error(RegexpError) + -> { Regexp.send(@method, "\\") }.should.raise(RegexpError, Regexp.new(Regexp.escape("too short escape sequence: /\\/"))) end it "does not raise a Regexp error if there is an escaped trailing backslash" do - -> { Regexp.send(@method, "\\\\") }.should_not raise_error(RegexpError) + -> { Regexp.send(@method, "\\\\") }.should_not.raise(RegexpError) end - it "accepts a backspace followed by a character" do + it "accepts a backspace followed by a non-special character" do Regexp.send(@method, "\\N").should == /#{"\x5c"+"N"}/ end - it "accepts a one-digit octal value" do - Regexp.send(@method, "\0").should == /#{"\x00"}/ - end - - it "accepts a two-digit octal value" do - Regexp.send(@method, "\11").should == /#{"\x09"}/ - end - - it "accepts a one-digit hexadecimal value" do - Regexp.send(@method, "\x9n").should == /#{"\x09n"}/ - end - - it "accepts a two-digit hexadecimal value" do - Regexp.send(@method, "\x23").should == /#{"\x23"}/ - end - - it "interprets a digit following a two-digit hexadecimal value as a character" do - Regexp.send(@method, "\x420").should == /#{"\x420"}/ - end - it "raises a RegexpError if \\x is not followed by any hexadecimal digits" do - -> { Regexp.send(@method, "\\" + "xn") }.should raise_error(RegexpError) - end - - it "accepts an escaped string interpolation" do - Regexp.send(@method, "\#{abc}").should == /#{"\#{abc}"}/ - end - - it "accepts '\\n'" do - Regexp.send(@method, "\n").should == /#{"\x0a"}/ - end - - it "accepts '\\t'" do - Regexp.send(@method, "\t").should == /#{"\x09"}/ - end - - it "accepts '\\r'" do - Regexp.send(@method, "\r").should == /#{"\x0d"}/ - end - - it "accepts '\\f'" do - Regexp.send(@method, "\f").should == /#{"\x0c"}/ - end - - it "accepts '\\v'" do - Regexp.send(@method, "\v").should == /#{"\x0b"}/ - end - - it "accepts '\\a'" do - Regexp.send(@method, "\a").should == /#{"\x07"}/ - end - - it "accepts '\\e'" do - Regexp.send(@method, "\e").should == /#{"\x1b"}/ - end - - it "accepts '\\C-\\n'" do - Regexp.send(@method, "\C-\n").should == /#{"\x0a"}/ - end - - it "accepts '\\C-\\t'" do - Regexp.send(@method, "\C-\t").should == /#{"\x09"}/ - end - - it "accepts '\\C-\\r'" do - Regexp.send(@method, "\C-\r").should == /#{"\x0d"}/ - end - - it "accepts '\\C-\\f'" do - Regexp.send(@method, "\C-\f").should == /#{"\x0c"}/ - end - - it "accepts '\\C-\\v'" do - Regexp.send(@method, "\C-\v").should == /#{"\x0b"}/ - end - - it "accepts '\\C-\\a'" do - Regexp.send(@method, "\C-\a").should == /#{"\x07"}/ - end - - it "accepts '\\C-\\e'" do - Regexp.send(@method, "\C-\e").should == /#{"\x1b"}/ - end - - it "accepts multiple consecutive '\\' characters" do - Regexp.send(@method, "\\\\\\N").should == /#{"\\\\\\"+"N"}/ - end - - it "accepts characters and escaped octal digits" do - Regexp.send(@method, "abc\076").should == /#{"abc\x3e"}/ - end - - it "accepts escaped octal digits and characters" do - Regexp.send(@method, "\076abc").should == /#{"\x3eabc"}/ - end - - it "accepts characters and escaped hexadecimal digits" do - Regexp.send(@method, "abc\x42").should == /#{"abc\x42"}/ - end - - it "accepts escaped hexadecimal digits and characters" do - Regexp.send(@method, "\x3eabc").should == /#{"\x3eabc"}/ - end - - it "accepts escaped hexadecimal and octal digits" do - Regexp.send(@method, "\061\x42").should == /#{"\x31\x42"}/ - end - - it "accepts \\u{H} for a single Unicode codepoint" do - Regexp.send(@method, "\u{f}").should == /#{"\x0f"}/ - end - - it "accepts \\u{HH} for a single Unicode codepoint" do - Regexp.send(@method, "\u{7f}").should == /#{"\x7f"}/ - end - - it "accepts \\u{HHH} for a single Unicode codepoint" do - Regexp.send(@method, "\u{07f}").should == /#{"\x7f"}/ - end - - it "accepts \\u{HHHH} for a single Unicode codepoint" do - Regexp.send(@method, "\u{0000}").should == /#{"\x00"}/ - end - - it "accepts \\u{HHHHH} for a single Unicode codepoint" do - Regexp.send(@method, "\u{00001}").should == /#{"\x01"}/ - end - - it "accepts \\u{HHHHHH} for a single Unicode codepoint" do - Regexp.send(@method, "\u{000000}").should == /#{"\x00"}/ - end - - it "accepts characters followed by \\u{HHHH}" do - Regexp.send(@method, "abc\u{3042}").should == /#{"abc\u3042"}/ - end - - it "accepts \\u{HHHH} followed by characters" do - Regexp.send(@method, "\u{3042}abc").should == /#{"\u3042abc"}/ - end - - it "accepts escaped hexadecimal digits followed by \\u{HHHH}" do - Regexp.send(@method, "\x42\u{3042}").should == /#{"\x42\u3042"}/ - end - - it "accepts escaped octal digits followed by \\u{HHHH}" do - Regexp.send(@method, "\056\u{3042}").should == /#{"\x2e\u3042"}/ - end - - it "accepts a combination of escaped octal and hexadecimal digits and \\u{HHHH}" do - Regexp.send(@method, "\056\x42\u{3042}\x52\076").should == /#{"\x2e\x42\u3042\x52\x3e"}/ - end - - it "accepts \\uHHHH for a single Unicode codepoint" do - Regexp.send(@method, "\u3042").should == /#{"\u3042"}/ - end - - it "accepts characters followed by \\uHHHH" do - Regexp.send(@method, "abc\u3042").should == /#{"abc\u3042"}/ - end - - it "accepts \\uHHHH followed by characters" do - Regexp.send(@method, "\u3042abc").should == /#{"\u3042abc"}/ - end - - it "accepts escaped hexadecimal digits followed by \\uHHHH" do - Regexp.send(@method, "\x42\u3042").should == /#{"\x42\u3042"}/ - end - - it "accepts escaped octal digits followed by \\uHHHH" do - Regexp.send(@method, "\056\u3042").should == /#{"\x2e\u3042"}/ - end - - it "accepts a combination of escaped octal and hexadecimal digits and \\uHHHH" do - Regexp.send(@method, "\056\x42\u3042\x52\076").should == /#{"\x2e\x42\u3042\x52\x3e"}/ + -> { Regexp.send(@method, "\\" + "xn") }.should.raise(RegexpError, Regexp.new(Regexp.escape("invalid hex escape: /\\xn/"))) end it "raises a RegexpError if less than four digits are given for \\uHHHH" do - -> { Regexp.send(@method, "\\" + "u304") }.should raise_error(RegexpError) + -> { Regexp.send(@method, "\\" + "u304") }.should.raise(RegexpError, Regexp.new(Regexp.escape("invalid Unicode escape: /\\u304/"))) end it "raises a RegexpError if the \\u{} escape is empty" do - -> { Regexp.send(@method, "\\" + "u{}") }.should raise_error(RegexpError) + -> { Regexp.send(@method, "\\" + "u{}") }.should.raise(RegexpError, Regexp.new(Regexp.escape("invalid Unicode list: /\\u{}/"))) + end + + it "raises a RegexpError if the \\u{} escape contains non hexadecimal digits" do + -> { Regexp.send(@method, "\\" + "u{abcX}") }.should.raise(RegexpError, Regexp.new(Regexp.escape("invalid Unicode list: /\\u{abcX}/"))) end it "raises a RegexpError if more than six hexadecimal digits are given" do - -> { Regexp.send(@method, "\\" + "u{0ffffff}") }.should raise_error(RegexpError) + -> { Regexp.send(@method, "\\" + "u{0ffffff}") }.should.raise(RegexpError, Regexp.new(Regexp.escape("invalid Unicode range: /\\u{0ffffff}/"))) end it "returns a Regexp with US-ASCII encoding if only 7-bit ASCII characters are present regardless of the input String's encoding" do @@ -467,12 +250,12 @@ describe :regexp_new_string, shared: true do end it "returns a Regexp with the input String's encoding" do - str = "\x82\xa0".force_encoding(Encoding::Shift_JIS) + str = "\x82\xa0".dup.force_encoding(Encoding::Shift_JIS) Regexp.send(@method, str).encoding.should == Encoding::Shift_JIS end it "returns a Regexp with source String having the input String's encoding" do - str = "\x82\xa0".force_encoding(Encoding::Shift_JIS) + str = "\x82\xa0".dup.force_encoding(Encoding::Shift_JIS) Regexp.send(@method, str).source.encoding.should == Encoding::Shift_JIS end end @@ -480,69 +263,6 @@ end describe :regexp_new_string_binary, shared: true do describe "with escaped characters" do - it "accepts a three-digit octal value" do - Regexp.send(@method, "\315").should == /#{"\xcd"}/ - end - - it "interprets a digit following a three-digit octal value as a character" do - Regexp.send(@method, "\3762").should == /#{"\xfe2"}/ - end - - it "accepts '\\M-\\n'" do - Regexp.send(@method, "\M-\n").should == /#{"\x8a"}/ - end - - it "accepts '\\M-\\t'" do - Regexp.send(@method, "\M-\t").should == /#{"\x89"}/ - end - - it "accepts '\\M-\\r'" do - Regexp.send(@method, "\M-\r").should == /#{"\x8d"}/ - end - - it "accepts '\\M-\\f'" do - Regexp.send(@method, "\M-\f").should == /#{"\x8c"}/ - end - - it "accepts '\\M-\\v'" do - Regexp.send(@method, "\M-\v").should == /#{"\x8b"}/ - end - - it "accepts '\\M-\\a'" do - Regexp.send(@method, "\M-\a").should == /#{"\x87"}/ - end - - it "accepts '\\M-\\e'" do - Regexp.send(@method, "\M-\e").should == /#{"\x9b"}/ - end - - it "accepts '\\M-\\C-\\n'" do - Regexp.send(@method, "\M-\C-\n").should == /#{"\x8a"}/ - end - - it "accepts '\\M-\\C-\\t'" do - Regexp.send(@method, "\M-\C-\t").should == /#{"\x89"}/ - end - - it "accepts '\\M-\\C-\\r'" do - Regexp.send(@method, "\M-\C-\r").should == /#{"\x8d"}/ - end - - it "accepts '\\M-\\C-\\f'" do - Regexp.send(@method, "\M-\C-\f").should == /#{"\x8c"}/ - end - - it "accepts '\\M-\\C-\\v'" do - Regexp.send(@method, "\M-\C-\v").should == /#{"\x8b"}/ - end - - it "accepts '\\M-\\C-\\a'" do - Regexp.send(@method, "\M-\C-\a").should == /#{"\x87"}/ - end - - it "accepts '\\M-\\C-\\e'" do - Regexp.send(@method, "\M-\C-\e").should == /#{"\x9b"}/ - end end end @@ -597,9 +317,5 @@ describe :regexp_new_regexp, shared: true do it "sets the encoding to US-ASCII if the Regexp literal has the 'n' option and the source String is ASCII only" do Regexp.send(@method, /Hi/n).encoding.should == Encoding::US_ASCII end - - it "sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only" do - Regexp.send(@method, Regexp.new("\\xff", nil, 'n')).encoding.should == Encoding::BINARY - end end end diff --git a/spec/ruby/core/regexp/shared/quote.rb b/spec/ruby/core/regexp/shared/quote.rb index 9533102766..083f12d78c 100644 --- a/spec/ruby/core/regexp/shared/quote.rb +++ b/spec/ruby/core/regexp/shared/quote.rb @@ -1,9 +1,9 @@ -# -*- encoding: binary -*- +# encoding: binary describe :regexp_quote, shared: true do it "escapes any characters with special meaning in a regular expression" do - Regexp.send(@method, '\*?{}.+^[]()- ').should == '\\\\\*\?\{\}\.\+\^\[\]\(\)\-\\ ' - Regexp.send(@method, "\*?{}.+^[]()- ").should == '\\*\\?\\{\\}\\.\\+\\^\\[\\]\\(\\)\\-\\ ' + Regexp.send(@method, '\*?{}.+^$[]()- ').should == '\\\\\*\?\{\}\.\+\^\$\[\]\(\)\-\\ ' + Regexp.send(@method, "\*?{}.+^$[]()- ").should == '\\*\\?\\{\\}\\.\\+\\^\\$\\[\\]\\(\\)\\-\\ ' Regexp.send(@method, '\n\r\f\t').should == '\\\\n\\\\r\\\\f\\\\t' Regexp.send(@method, "\n\r\f\t").should == '\\n\\r\\f\\t' end @@ -18,24 +18,24 @@ describe :regexp_quote, shared: true do end it "works for broken strings" do - Regexp.send(@method, "a.\x85b.".force_encoding("US-ASCII")).should =="a\\.\x85b\\.".force_encoding("US-ASCII") - Regexp.send(@method, "a.\x80".force_encoding("UTF-8")).should == "a\\.\x80".force_encoding("UTF-8") + Regexp.send(@method, "a.\x85b.".dup.force_encoding("US-ASCII")).should =="a\\.\x85b\\.".dup.force_encoding("US-ASCII") + Regexp.send(@method, "a.\x80".dup.force_encoding("UTF-8")).should == "a\\.\x80".dup.force_encoding("UTF-8") end it "sets the encoding of the result to US-ASCII if there are only US-ASCII characters present in the input String" do - str = "abc".force_encoding("euc-jp") + str = "abc".dup.force_encoding("euc-jp") Regexp.send(@method, str).encoding.should == Encoding::US_ASCII end it "sets the encoding of the result to the encoding of the String if any non-US-ASCII characters are present in an input String with valid encoding" do - str = "ありがとう".force_encoding("utf-8") - str.valid_encoding?.should be_true + str = "ありがとう".dup.force_encoding("utf-8") + str.valid_encoding?.should == true Regexp.send(@method, str).encoding.should == Encoding::UTF_8 end it "sets the encoding of the result to BINARY if any non-US-ASCII characters are present in an input String with invalid encoding" do - str = "\xff".force_encoding "us-ascii" - str.valid_encoding?.should be_false + str = "\xff".dup.force_encoding "us-ascii" + str.valid_encoding?.should == false Regexp.send(@method, "\xff").encoding.should == Encoding::BINARY end end diff --git a/spec/ruby/core/regexp/source_spec.rb b/spec/ruby/core/regexp/source_spec.rb index 5f253da9ea..4eebf280f0 100644 --- a/spec/ruby/core/regexp/source_spec.rb +++ b/spec/ruby/core/regexp/source_spec.rb @@ -34,14 +34,14 @@ describe "Regexp#source" do not_supported_on :opal do it "has US-ASCII encoding when created from an ASCII-only \\u{} literal" do re = /[\u{20}-\u{7E}]/ - re.source.encoding.should equal(Encoding::US_ASCII) + re.source.encoding.should.equal?(Encoding::US_ASCII) end end not_supported_on :opal do it "has UTF-8 encoding when created from a non-ASCII-only \\u{} literal" do re = /[\u{20}-\u{7EE}]/ - re.source.encoding.should equal(Encoding::UTF_8) + re.source.encoding.should.equal?(Encoding::UTF_8) end end end diff --git a/spec/ruby/core/regexp/timeout_spec.rb b/spec/ruby/core/regexp/timeout_spec.rb new file mode 100644 index 0000000000..a1ec475ef3 --- /dev/null +++ b/spec/ruby/core/regexp/timeout_spec.rb @@ -0,0 +1,33 @@ +require_relative '../../spec_helper' + +describe "Regexp.timeout" do + after :each do + Regexp.timeout = nil + end + + it "returns global timeout" do + Regexp.timeout = 3 + Regexp.timeout.should == 3 + end + + it "raises Regexp::TimeoutError after global timeout elapsed" do + Regexp.timeout = 0.001 + Regexp.timeout.should == 0.001 + + -> { + # A typical ReDoS case + /^(a*)*$/ =~ "a" * 1000000 + "x" + }.should.raise(Regexp::TimeoutError, "regexp match timeout") + end + + it "raises Regexp::TimeoutError after timeout keyword value elapsed" do + Regexp.timeout = 3 # This should be ignored + Regexp.timeout.should == 3 + + re = Regexp.new("^a*b?a*$", timeout: 0.001) + + -> { + re =~ "a" * 1000000 + "x" + }.should.raise(Regexp::TimeoutError, "regexp match timeout") + end +end diff --git a/spec/ruby/core/regexp/try_convert_spec.rb b/spec/ruby/core/regexp/try_convert_spec.rb index be567e2130..da5e10adce 100644 --- a/spec/ruby/core/regexp/try_convert_spec.rb +++ b/spec/ruby/core/regexp/try_convert_spec.rb @@ -9,7 +9,7 @@ describe "Regexp.try_convert" do it "returns nil if given an argument that can't be converted to a Regexp" do ['', 'glark', [], Object.new, :pat].each do |arg| - Regexp.try_convert(arg).should be_nil + Regexp.try_convert(arg).should == nil end end @@ -18,4 +18,10 @@ describe "Regexp.try_convert" do rex.should_receive(:to_regexp).and_return(/(p(a)t[e]rn)/) Regexp.try_convert(rex).should == /(p(a)t[e]rn)/ end + + it "raises a TypeError if the object does not return an Regexp from #to_regexp" do + obj = mock("regexp") + obj.should_receive(:to_regexp).and_return("string") + -> { Regexp.try_convert(obj) }.should raise_consistent_error(TypeError, "can't convert MockObject into Regexp (MockObject#to_regexp gives String)") + end end diff --git a/spec/ruby/core/regexp/union_spec.rb b/spec/ruby/core/regexp/union_spec.rb index 8076836471..c0a9d12fed 100644 --- a/spec/ruby/core/regexp/union_spec.rb +++ b/spec/ruby/core/regexp/union_spec.rb @@ -43,6 +43,27 @@ describe "Regexp.union" do Regexp.union("\u00A9".encode("ISO-8859-1"), "a".encode("UTF-8")).encoding.should == Encoding::ISO_8859_1 end + it "returns ASCII-8BIT if the regexp encodings are ASCII-8BIT and at least one has non-ASCII characters" do + us_ascii_implicit, us_ascii_explicit, binary = /abc/, /[\x00-\x7f]/n, /[\x80-\xBF]/n + us_ascii_implicit.encoding.should == Encoding::US_ASCII + us_ascii_explicit.encoding.should == Encoding::US_ASCII + binary.encoding.should == Encoding::BINARY + + Regexp.union(us_ascii_implicit, us_ascii_explicit, binary).encoding.should == Encoding::BINARY + Regexp.union(us_ascii_implicit, binary, us_ascii_explicit).encoding.should == Encoding::BINARY + Regexp.union(us_ascii_explicit, us_ascii_implicit, binary).encoding.should == Encoding::BINARY + Regexp.union(us_ascii_explicit, binary, us_ascii_implicit).encoding.should == Encoding::BINARY + Regexp.union(binary, us_ascii_implicit, us_ascii_explicit).encoding.should == Encoding::BINARY + Regexp.union(binary, us_ascii_explicit, us_ascii_implicit).encoding.should == Encoding::BINARY + end + + it "return US-ASCII if all patterns are ASCII-only" do + Regexp.union(/abc/e, /def/e).encoding.should == Encoding::US_ASCII + Regexp.union(/abc/n, /def/n).encoding.should == Encoding::US_ASCII + Regexp.union(/abc/s, /def/s).encoding.should == Encoding::US_ASCII + Regexp.union(/abc/u, /def/u).encoding.should == Encoding::US_ASCII + end + it "returns a Regexp with UTF-8 if one part is UTF-8" do Regexp.union(/probl[éeè]me/i, /help/i).encoding.should == Encoding::UTF_8 end @@ -54,83 +75,83 @@ describe "Regexp.union" do it "raises ArgumentError if the arguments include conflicting ASCII-incompatible Strings" do -> { Regexp.union("a".encode("UTF-16LE"), "b".encode("UTF-16BE")) - }.should raise_error(ArgumentError) + }.should.raise(ArgumentError, 'incompatible encodings: UTF-16LE and UTF-16BE') end it "raises ArgumentError if the arguments include conflicting ASCII-incompatible Regexps" do -> { Regexp.union(Regexp.new("a".encode("UTF-16LE")), Regexp.new("b".encode("UTF-16BE"))) - }.should raise_error(ArgumentError) + }.should.raise(ArgumentError, 'incompatible encodings: UTF-16LE and UTF-16BE') end it "raises ArgumentError if the arguments include conflicting fixed encoding Regexps" do -> { Regexp.union(Regexp.new("a".encode("UTF-8"), Regexp::FIXEDENCODING), Regexp.new("b".encode("US-ASCII"), Regexp::FIXEDENCODING)) - }.should raise_error(ArgumentError) + }.should.raise(ArgumentError, 'incompatible encodings: UTF-8 and US-ASCII') end it "raises ArgumentError if the arguments include a fixed encoding Regexp and a String containing non-ASCII-compatible characters in a different encoding" do -> { Regexp.union(Regexp.new("a".encode("UTF-8"), Regexp::FIXEDENCODING), "\u00A9".encode("ISO-8859-1")) - }.should raise_error(ArgumentError) + }.should.raise(ArgumentError, 'incompatible encodings: UTF-8 and ISO-8859-1') end it "raises ArgumentError if the arguments include a String containing non-ASCII-compatible characters and a fixed encoding Regexp in a different encoding" do -> { Regexp.union("\u00A9".encode("ISO-8859-1"), Regexp.new("a".encode("UTF-8"), Regexp::FIXEDENCODING)) - }.should raise_error(ArgumentError) + }.should.raise(ArgumentError, 'incompatible encodings: ISO-8859-1 and UTF-8') end it "raises ArgumentError if the arguments include an ASCII-incompatible String and an ASCII-only String" do -> { Regexp.union("a".encode("UTF-16LE"), "b".encode("UTF-8")) - }.should raise_error(ArgumentError) + }.should.raise(ArgumentError, /ASCII incompatible encoding: UTF-16LE|incompatible encodings: UTF-16LE and US-ASCII/) end it "raises ArgumentError if the arguments include an ASCII-incompatible Regexp and an ASCII-only String" do -> { Regexp.union(Regexp.new("a".encode("UTF-16LE")), "b".encode("UTF-8")) - }.should raise_error(ArgumentError) + }.should.raise(ArgumentError, /ASCII incompatible encoding: UTF-16LE|incompatible encodings: UTF-16LE and US-ASCII/) end it "raises ArgumentError if the arguments include an ASCII-incompatible String and an ASCII-only Regexp" do -> { Regexp.union("a".encode("UTF-16LE"), Regexp.new("b".encode("UTF-8"))) - }.should raise_error(ArgumentError) + }.should.raise(ArgumentError, /ASCII incompatible encoding: UTF-16LE|incompatible encodings: UTF-16LE and US-ASCII/) end it "raises ArgumentError if the arguments include an ASCII-incompatible Regexp and an ASCII-only Regexp" do -> { Regexp.union(Regexp.new("a".encode("UTF-16LE")), Regexp.new("b".encode("UTF-8"))) - }.should raise_error(ArgumentError) + }.should.raise(ArgumentError, /ASCII incompatible encoding: UTF-16LE|incompatible encodings: UTF-16LE and US-ASCII/) end it "raises ArgumentError if the arguments include an ASCII-incompatible String and a String containing non-ASCII-compatible characters in a different encoding" do -> { Regexp.union("a".encode("UTF-16LE"), "\u00A9".encode("ISO-8859-1")) - }.should raise_error(ArgumentError) + }.should.raise(ArgumentError, 'incompatible encodings: UTF-16LE and ISO-8859-1') end it "raises ArgumentError if the arguments include an ASCII-incompatible Regexp and a String containing non-ASCII-compatible characters in a different encoding" do -> { Regexp.union(Regexp.new("a".encode("UTF-16LE")), "\u00A9".encode("ISO-8859-1")) - }.should raise_error(ArgumentError) + }.should.raise(ArgumentError, 'incompatible encodings: UTF-16LE and ISO-8859-1') end it "raises ArgumentError if the arguments include an ASCII-incompatible String and a Regexp containing non-ASCII-compatible characters in a different encoding" do -> { Regexp.union("a".encode("UTF-16LE"), Regexp.new("\u00A9".encode("ISO-8859-1"))) - }.should raise_error(ArgumentError) + }.should.raise(ArgumentError, 'incompatible encodings: UTF-16LE and ISO-8859-1') end it "raises ArgumentError if the arguments include an ASCII-incompatible Regexp and a Regexp containing non-ASCII-compatible characters in a different encoding" do -> { Regexp.union(Regexp.new("a".encode("UTF-16LE")), Regexp.new("\u00A9".encode("ISO-8859-1"))) - }.should raise_error(ArgumentError) + }.should.raise(ArgumentError, 'incompatible encodings: UTF-16LE and ISO-8859-1') end it "uses to_str to convert arguments (if not Regexp)" do @@ -154,6 +175,8 @@ describe "Regexp.union" do not_supported_on :opal do Regexp.union([/dogs/, /cats/i]).should == /(?-mix:dogs)|(?i-mx:cats)/ end - ->{Regexp.union(["skiing", "sledding"], [/dogs/, /cats/i])}.should raise_error(TypeError) + -> { + Regexp.union(["skiing", "sledding"], [/dogs/, /cats/i]) + }.should.raise(TypeError, 'no implicit conversion of Array into String') end end |
