diff options
Diffstat (limited to 'spec/ruby/core/regexp')
27 files changed, 1440 insertions, 0 deletions
diff --git a/spec/ruby/core/regexp/case_compare_spec.rb b/spec/ruby/core/regexp/case_compare_spec.rb new file mode 100644 index 0000000000..29aada70bc --- /dev/null +++ b/spec/ruby/core/regexp/case_compare_spec.rb @@ -0,0 +1,35 @@ +require_relative '../../spec_helper' + +describe "Regexp#===" do + it "is true if there is a match" do + (/abc/ === "aabcc").should == true + end + + it "is false if there is no match" do + (/abc/ === "xyz").should == false + end + + it "returns true if it matches a Symbol" do + (/a/ === :a).should == true + end + + it "returns false if it does not match a Symbol" do + (/a/ === :b).should == false + end + + # mirroring https://github.com/ruby/ruby/blob/master/test/ruby/test_regexp.rb + it "returns false if the other value cannot be coerced to a string" do + (/abc/ === nil).should == false + (/abc/ === /abc/).should == false + end + + it "uses #to_str on string-like objects" do + stringlike = Class.new do + def to_str + "abc" + end + end.new + + (/abc/ === stringlike).should == true + end +end diff --git a/spec/ruby/core/regexp/casefold_spec.rb b/spec/ruby/core/regexp/casefold_spec.rb new file mode 100644 index 0000000000..d36467a989 --- /dev/null +++ b/spec/ruby/core/regexp/casefold_spec.rb @@ -0,0 +1,8 @@ +require_relative '../../spec_helper' + +describe "Regexp#casefold?" do + it "returns the value of the case-insensitive flag" do + /abc/i.should.casefold? + /xyz/.should_not.casefold? + end +end diff --git a/spec/ruby/core/regexp/compile_spec.rb b/spec/ruby/core/regexp/compile_spec.rb new file mode 100644 index 0000000000..887c8d77dc --- /dev/null +++ b/spec/ruby/core/regexp/compile_spec.rb @@ -0,0 +1,19 @@ +require_relative '../../spec_helper' +require_relative 'shared/new' + +describe "Regexp.compile" do + it_behaves_like :regexp_new, :compile +end + +describe "Regexp.compile given a String" do + it_behaves_like :regexp_new_string, :compile + it_behaves_like :regexp_new_string_binary, :compile +end + +describe "Regexp.compile given a Regexp" do + it_behaves_like :regexp_new_regexp, :compile +end + +describe "Regexp.compile given a non-String/Regexp" do + it_behaves_like :regexp_new_non_string_or_regexp, :compile +end diff --git a/spec/ruby/core/regexp/encoding_spec.rb b/spec/ruby/core/regexp/encoding_spec.rb new file mode 100644 index 0000000000..fb4fdba064 --- /dev/null +++ b/spec/ruby/core/regexp/encoding_spec.rb @@ -0,0 +1,62 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' + +describe "Regexp#encoding" do + it "returns an Encoding object" do + /glar/.encoding.should.instance_of?(Encoding) + end + + it "defaults to US-ASCII if the Regexp contains only US-ASCII character" do + /ASCII/.encoding.should == Encoding::US_ASCII + end + + it "returns US_ASCII if the 'n' modifier is supplied and only US-ASCII characters are present" do + /ASCII/n.encoding.should == Encoding::US_ASCII + end + + it "returns BINARY if the 'n' modifier is supplied and non-US-ASCII characters are present" do + /\xc2\xa1/n.encoding.should == Encoding::BINARY + end + + it "defaults to UTF-8 if \\u escapes appear" do + /\u{9879}/.encoding.should == Encoding::UTF_8 + end + + it "defaults to UTF-8 if a literal UTF-8 character appears" do + /¥/.encoding.should == Encoding::UTF_8 + end + + it "returns UTF-8 if the 'u' modifier is supplied" do + /ASCII/u.encoding.should == Encoding::UTF_8 + end + + it "returns Windows-31J if the 's' modifier is supplied" do + /ASCII/s.encoding.should == Encoding::Windows_31J + end + + it "returns EUC_JP if the 'e' modifier is supplied" do + /ASCII/e.encoding.should == Encoding::EUC_JP + end + + it "upgrades the encoding to that of an embedded String" do + str = "文字化け".encode('euc-jp') + /#{str}/.encoding.should == Encoding::EUC_JP + end + + it "ignores the encoding and uses US-ASCII if the string has only ASCII characters" do + str = "abc".encode('euc-jp') + str.encoding.should == Encoding::EUC_JP + /#{str}/.encoding.should == Encoding::US_ASCII + end + + it "ignores the default_internal encoding" do + old_internal = Encoding.default_internal + Encoding.default_internal = Encoding::EUC_JP + /foo/.encoding.should_not == Encoding::EUC_JP + Encoding.default_internal = old_internal + end + + it "allows otherwise invalid characters if NOENCODING is specified" do + Regexp.new('([\x00-\xFF])', Regexp::IGNORECASE | Regexp::NOENCODING).encoding.should == Encoding::BINARY + end +end diff --git a/spec/ruby/core/regexp/eql_spec.rb b/spec/ruby/core/regexp/eql_spec.rb new file mode 100644 index 0000000000..bd5ae43eb2 --- /dev/null +++ b/spec/ruby/core/regexp/eql_spec.rb @@ -0,0 +1,6 @@ +require_relative '../../spec_helper' +require_relative 'shared/equal_value' + +describe "Regexp#eql?" do + it_behaves_like :regexp_eql, :eql? +end diff --git a/spec/ruby/core/regexp/equal_value_spec.rb b/spec/ruby/core/regexp/equal_value_spec.rb new file mode 100644 index 0000000000..5455a30598 --- /dev/null +++ b/spec/ruby/core/regexp/equal_value_spec.rb @@ -0,0 +1,6 @@ +require_relative '../../spec_helper' +require_relative 'shared/equal_value' + +describe "Regexp#==" do + it_behaves_like :regexp_eql, :== +end diff --git a/spec/ruby/core/regexp/escape_spec.rb b/spec/ruby/core/regexp/escape_spec.rb new file mode 100644 index 0000000000..6b06ab1cbc --- /dev/null +++ b/spec/ruby/core/regexp/escape_spec.rb @@ -0,0 +1,6 @@ +require_relative '../../spec_helper' +require_relative 'shared/quote' + +describe "Regexp.escape" do + it_behaves_like :regexp_quote, :escape +end diff --git a/spec/ruby/core/regexp/fixed_encoding_spec.rb b/spec/ruby/core/regexp/fixed_encoding_spec.rb new file mode 100644 index 0000000000..5d8b1c2860 --- /dev/null +++ b/spec/ruby/core/regexp/fixed_encoding_spec.rb @@ -0,0 +1,36 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' + +describe "Regexp#fixed_encoding?" do + it "returns false by default" do + /needle/.fixed_encoding?.should == false + end + + it "returns false if the 'n' modifier was supplied to the Regexp" do + /needle/n.fixed_encoding?.should == false + end + + it "returns true if the 'u' modifier was supplied to the Regexp" do + /needle/u.fixed_encoding?.should == true + end + + it "returns true if the 's' modifier was supplied to the Regexp" do + /needle/s.fixed_encoding?.should == true + end + + it "returns true if the 'e' modifier was supplied to the Regexp" do + /needle/e.fixed_encoding?.should == true + end + + it "returns true if the Regexp contains a \\u escape" do + /needle \u{8768}/.fixed_encoding?.should == true + end + + it "returns true if the Regexp contains a UTF-8 literal" do + /文字化け/.fixed_encoding?.should == true + end + + it "returns true if the Regexp was created with the Regexp::FIXEDENCODING option" do + Regexp.new("", Regexp::FIXEDENCODING).fixed_encoding?.should == true + end +end diff --git a/spec/ruby/core/regexp/hash_spec.rb b/spec/ruby/core/regexp/hash_spec.rb new file mode 100644 index 0000000000..2d42e288e6 --- /dev/null +++ b/spec/ruby/core/regexp/hash_spec.rb @@ -0,0 +1,20 @@ +require_relative '../../spec_helper' + +describe "Regexp#hash" do + it "is provided" do + Regexp.new('').respond_to?(:hash).should == true + end + + it "is based on the text and options of Regexp" do + (/cat/.hash == /dog/.hash).should == false + (/dog/m.hash == /dog/m.hash).should == true + not_supported_on :opal do + (/cat/ix.hash == /cat/ixn.hash).should == true + (/cat/.hash == /cat/ix.hash).should == false + end + end + + it "returns the same value for two Regexps differing only in the /n option" do + (//.hash == //n.hash).should == true + end +end diff --git a/spec/ruby/core/regexp/initialize_spec.rb b/spec/ruby/core/regexp/initialize_spec.rb new file mode 100644 index 0000000000..1c0133acae --- /dev/null +++ b/spec/ruby/core/regexp/initialize_spec.rb @@ -0,0 +1,29 @@ +require_relative '../../spec_helper' + +describe "Regexp#initialize" do + it "is a private method" do + Regexp.private_instance_methods(false).should.include?(:initialize) + end + + it "raises a FrozenError on a Regexp literal" do + -> { //.send(:initialize, "") }.should.raise(FrozenError) + end + + ruby_version_is "4.1" do + it "raises a FrozenError on an initialized non-literal Regexp" do + regexp = Regexp.new("") + -> { regexp.send(:initialize, "") }.should.raise(FrozenError) + end + end + + ruby_version_is ""..."4.1" do + it "raises a TypeError on an initialized non-literal Regexp" do + -> { Regexp.new("").send(:initialize, "") }.should.raise(TypeError) + end + end + + it "raises a TypeError on an initialized non-literal Regexp subclass" do + r = Class.new(Regexp).new("") + -> { r.send(:initialize, "") }.should.raise(TypeError) + end +end diff --git a/spec/ruby/core/regexp/inspect_spec.rb b/spec/ruby/core/regexp/inspect_spec.rb new file mode 100644 index 0000000000..f4e39234f5 --- /dev/null +++ b/spec/ruby/core/regexp/inspect_spec.rb @@ -0,0 +1,44 @@ +require_relative '../../spec_helper' + +describe "Regexp#inspect" do + it "returns a formatted string that would eval to the same regexp" do + not_supported_on :opal do + /ab+c/ix.inspect.should == "/ab+c/ix" + /a(.)+s/n.inspect.should =~ %r|/a(.)+s/n?| # Default 'n' may not appear + end + # 1.9 doesn't round-trip the encoding flags, such as 'u'. This is + # seemingly by design. + /a(.)+s/m.inspect.should == "/a(.)+s/m" # But a specified one does + end + + it "returns options in the order 'mixn'" do + //nixm.inspect.should == "//mixn" + end + + it "does not include the 'o' option" do + //o.inspect.should == "//" + end + + it "does not include a character set code" do + //u.inspect.should == "//" + //s.inspect.should == "//" + //e.inspect.should == "//" + end + + it "correctly escapes forward slashes /" do + Regexp.new("/foo/bar").inspect.should == "/\\/foo\\/bar/" + Regexp.new("/foo/bar[/]").inspect.should == "/\\/foo\\/bar[\\/]/" + end + + it "doesn't over escape forward slashes" do + /\/foo\/bar/.inspect.should == '/\/foo\/bar/' + end + + it "escapes 2 slashes in a row properly" do + Regexp.new("//").inspect.should == '/\/\//' + end + + it "does not over escape" do + Regexp.new('\\\/').inspect.should == "/\\\\\\//" + end +end diff --git a/spec/ruby/core/regexp/last_match_spec.rb b/spec/ruby/core/regexp/last_match_spec.rb new file mode 100644 index 0000000000..6c256cc1cf --- /dev/null +++ b/spec/ruby/core/regexp/last_match_spec.rb @@ -0,0 +1,56 @@ +require_relative '../../spec_helper' + +describe "Regexp.last_match" do + it "returns MatchData instance when not passed arguments" do + /c(.)t/ =~ 'cat' + + Regexp.last_match.should.is_a?(MatchData) + end + + it "returns the nth field in this MatchData when passed an Integer" do + /c(.)t/ =~ 'cat' + Regexp.last_match(1).should == 'a' + end + + it "returns nil when there is no match" do + /foo/ =~ "TEST123" + Regexp.last_match(:test).should == nil + Regexp.last_match(1).should == nil + Regexp.last_match(Object.new).should == nil + Regexp.last_match("test").should == nil + end + + describe "when given a Symbol" do + it "returns a named capture" do + /(?<test>[A-Z]+.*)/ =~ "TEST123" + Regexp.last_match(:test).should == "TEST123" + end + + it "raises an IndexError when given a missing name" do + /(?<test>[A-Z]+.*)/ =~ "TEST123" + -> { Regexp.last_match(:missing) }.should.raise(IndexError) + end + end + + describe "when given a String" do + it "returns a named capture" do + /(?<test>[A-Z]+.*)/ =~ "TEST123" + Regexp.last_match("test").should == "TEST123" + end + end + + describe "when given an Object" do + it "coerces argument to an index using #to_int" do + obj = mock("converted to int") + obj.should_receive(:to_int).and_return(1) + /(?<test>[A-Z]+.*)/ =~ "TEST123" + Regexp.last_match(obj).should == "TEST123" + end + + it "raises a TypeError when unable to coerce" do + obj = Object.new + /(?<test>[A-Z]+.*)/ =~ "TEST123" + -> { Regexp.last_match(obj) }.should.raise(TypeError) + end + end +end diff --git a/spec/ruby/core/regexp/linear_time_spec.rb b/spec/ruby/core/regexp/linear_time_spec.rb new file mode 100644 index 0000000000..f70021dfed --- /dev/null +++ b/spec/ruby/core/regexp/linear_time_spec.rb @@ -0,0 +1,80 @@ +require_relative '../../spec_helper' + +describe "Regexp.linear_time?" do + it "returns true if matching can be done in linear time" do + Regexp.linear_time?(/a/).should == true + Regexp.linear_time?('a').should == true + end + + it "returns true if matching can be done in linear time for a binary Regexp" do + Regexp.linear_time?(/[\x80-\xff]/n).should == true + end + + it "return false if matching can't be done in linear time" do + Regexp.linear_time?(/(a)\1/).should == false + Regexp.linear_time?("(a)\\1").should == false + end + + it "accepts flags for string argument" do + Regexp.linear_time?('a', Regexp::IGNORECASE).should == true + end + + it "warns about flags being ignored for regexp arguments" do + -> { + Regexp.linear_time?(/a/, Regexp::IGNORECASE) + }.should complain(/warning: flags ignored/) + end + + it "returns true for positive lookahead" do + Regexp.linear_time?(/a*(?:(?=a*)a)*b/).should == true + end + + it "returns true for positive lookbehind" do + Regexp.linear_time?(/a*(?:(?<=a)a*)*b/).should == true + end + + it "returns true for negative lookbehind" do + Regexp.linear_time?(/a*(?:(?<!a)a*)*b/).should == true + end + + # There are two known ways to make Regexp linear: + # * Using a DFA (deterministic finite-state automaton) Regexp engine, which always matches in linear time (e.g. TruffleRuby with TRegex) + # * Caching position and state to avoid catastrophic backtracking (e.g. CRuby: https://bugs.ruby-lang.org/issues/19104) + # + # Both approach should be allowed and given that DFA Regexp engines + # are much faster there should be no specs preventing using them. + uses_regexp_caching = RUBY_ENGINE == 'ruby' + uses_dfa_regexp_engine = !uses_regexp_caching + + # The following specs should not be relied upon, + # they are here only to illustrate differences between Regexp engines. + guard -> { uses_regexp_caching } do + it "returns true for negative lookahead" do + Regexp.linear_time?(/a*(?:(?!a*)a*)*b/).should == true + end + + it "returns true for atomic groups" do + Regexp.linear_time?(/a*(?:(?>a)a*)*b/).should == true + end + + it "returns true for possessive quantifiers" do + Regexp.linear_time?(/a*(?:(?:a)?+a*)*b/).should == true + end + + it "returns true for positive lookbehind with capture group" do + Regexp.linear_time?(/.(?<=(a))/).should == true + end + end + + # The following specs should not be relied upon, + # they are here only to illustrate differences between Regexp engines. + guard -> { uses_dfa_regexp_engine } do + it "returns true for non-recursive subexpression call" do + Regexp.linear_time?(/(?<a>a){0}\g<a>/).should == true + end + + it "returns true for positive lookahead with capture group" do + Regexp.linear_time?(/x+(?=(a))/).should == true + end + end +end diff --git a/spec/ruby/core/regexp/match_spec.rb b/spec/ruby/core/regexp/match_spec.rb new file mode 100644 index 0000000000..276cecc8e4 --- /dev/null +++ b/spec/ruby/core/regexp/match_spec.rb @@ -0,0 +1,146 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' + +describe :regexp_match, shared: true do + it "returns nil if there is no match" do + /xyz/.send(@method,"abxyc").should == nil + end + + it "returns nil if the object is nil" do + /\w+/.send(@method, nil).should == nil + end +end + +describe "Regexp#=~" do + it_behaves_like :regexp_match, :=~ + + it "returns the index of the first character of the matching region" do + (/(.)(.)(.)/ =~ "abc").should == 0 + end + + it "returns the index too, when argument is a Symbol" do + (/(.)(.)(.)/ =~ :abc).should == 0 + end +end + +describe "Regexp#match" do + it_behaves_like :regexp_match, :match + + it "returns a MatchData object" do + /(.)(.)(.)/.match("abc").should.is_a?(MatchData) + end + + it "returns a MatchData object, when argument is a Symbol" do + /(.)(.)(.)/.match(:abc).should.is_a?(MatchData) + end + + it "raises a TypeError on an uninitialized Regexp" do + -> { Regexp.allocate.match('foo') }.should.raise(TypeError) + end + + it "raises TypeError on an uninitialized Regexp" do + -> { Regexp.allocate.match('foo'.encode("UTF-16LE")) }.should.raise(TypeError) + end + + describe "with [string, position]" do + describe "when given a positive position" do + it "matches the input at a given position" do + /(.).(.)/.match("01234", 1).captures.should == ["1", "3"] + end + + it "uses the start as a character offset" do + /(.).(.)/.match("零一二三四", 1).captures.should == ["一", "三"] + end + + it "raises an ArgumentError for an invalid encoding" do + x96 = ([150].pack('C')).force_encoding('utf-8') + -> { /(.).(.)/.match("Hello, #{x96} world!", 1) }.should.raise(ArgumentError) + end + end + + describe "when given a negative position" do + it "matches the input at a given position" do + /(.).(.)/.match("01234", -4).captures.should == ["1", "3"] + end + + it "uses the start as a character offset" do + /(.).(.)/.match("零一二三四", -4).captures.should == ["一", "三"] + end + + it "raises an ArgumentError for an invalid encoding" do + x96 = ([150].pack('C')).force_encoding('utf-8') + -> { /(.).(.)/.match("Hello, #{x96} world!", -1) }.should.raise(ArgumentError) + end + end + + describe "when passed a block" do + it "yields the MatchData" do + /./.match("abc") {|m| ScratchPad.record m } + ScratchPad.recorded.should.is_a?(MatchData) + end + + it "returns the block result" do + /./.match("abc") { :result }.should == :result + end + + it "does not yield if there is no match" do + ScratchPad.record [] + /a/.match("b") {|m| ScratchPad << m } + ScratchPad.recorded.should == [] + end + end + end + + it "resets $~ if passed nil" do + # set $~ + /./.match("a") + $~.should.is_a?(MatchData) + + /1/.match(nil) + $~.should == nil + end + + it "raises TypeError when the given argument cannot be coerced to String" do + f = 1 + -> { /foo/.match(f)[0] }.should.raise(TypeError) + end + + it "raises TypeError when the given argument is an Exception" do + f = Exception.new("foo") + -> { /foo/.match(f)[0] }.should.raise(TypeError) + end +end + +describe "Regexp#match?" do + before :each do + # Resetting Regexp.last_match + /DONTMATCH/.match '' + end + + context "when matches the given value" do + it "returns true but does not set Regexp.last_match" do + /string/i.match?('string').should == true + Regexp.last_match.should == nil + end + end + + it "returns false when does not match the given value" do + /STRING/.match?('string').should == false + end + + it "takes matching position as the 2nd argument" do + /str/i.match?('string', 0).should == true + /str/i.match?('string', 1).should == false + end + + it "returns false when given nil" do + /./.match?(nil).should == false + end +end + +describe "Regexp#~" do + it "matches against the contents of $_" do + $_ = "input data" + (~ /at/).should == 7 + end +end diff --git a/spec/ruby/core/regexp/named_captures_spec.rb b/spec/ruby/core/regexp/named_captures_spec.rb new file mode 100644 index 0000000000..4d3fdd23ab --- /dev/null +++ b/spec/ruby/core/regexp/named_captures_spec.rb @@ -0,0 +1,35 @@ +require_relative '../../spec_helper' + +describe "Regexp#named_captures" do + it "returns a Hash" do + /foo/.named_captures.should.instance_of?(Hash) + end + + it "returns an empty Hash when there are no capture groups" do + /foo/.named_captures.should == {} + end + + it "sets the keys of the Hash to the names of the capture groups" do + rex = /this (?<is>is) [aA] (?<pat>pate?rn)/ + rex.named_captures.keys.should == ['is','pat'] + end + + it "sets the values of the Hash to Arrays" do + rex = /this (?<is>is) [aA] (?<pat>pate?rn)/ + rex.named_captures.values.each do |value| + value.should.instance_of?(Array) + end + end + + it "sets each element of the Array to the corresponding group's index" do + rex = /this (?<is>is) [aA] (?<pat>pate?rn)/ + rex.named_captures['is'].should == [1] + rex.named_captures['pat'].should == [2] + end + + it "works with duplicate capture group names" do + rex = /this (?<is>is) [aA] (?<pat>pate?(?<is>rn))/ + rex.named_captures['is'].should == [1,3] + rex.named_captures['pat'].should == [2] + end +end diff --git a/spec/ruby/core/regexp/names_spec.rb b/spec/ruby/core/regexp/names_spec.rb new file mode 100644 index 0000000000..9013f41e20 --- /dev/null +++ b/spec/ruby/core/regexp/names_spec.rb @@ -0,0 +1,29 @@ +require_relative '../../spec_helper' + +describe "Regexp#names" do + it "returns an Array" do + /foo/.names.should.instance_of?(Array) + end + + it "returns an empty Array if there are no named captures" do + /needle/.names.should == [] + end + + it "returns each named capture as a String" do + /n(?<cap>ee)d(?<ture>le)/.names.each do |name| + name.should.instance_of?(String) + end + end + + it "returns all of the named captures" do + /n(?<cap>ee)d(?<ture>le)/.names.should == ['cap', 'ture'] + end + + it "works with nested named captures" do + /n(?<cap>eed(?<ture>le))/.names.should == ['cap', 'ture'] + end + + it "returns each capture name only once" do + /n(?<cap>ee)d(?<cap>le)/.names.should == ['cap'] + end +end diff --git a/spec/ruby/core/regexp/new_spec.rb b/spec/ruby/core/regexp/new_spec.rb new file mode 100644 index 0000000000..79210e9a23 --- /dev/null +++ b/spec/ruby/core/regexp/new_spec.rb @@ -0,0 +1,19 @@ +require_relative '../../spec_helper' +require_relative 'shared/new' + +describe "Regexp.new" do + it_behaves_like :regexp_new, :new +end + +describe "Regexp.new given a String" do + it_behaves_like :regexp_new_string, :new + it_behaves_like :regexp_new_string_binary, :new +end + +describe "Regexp.new given a Regexp" do + it_behaves_like :regexp_new_regexp, :new +end + +describe "Regexp.new given a non-String/Regexp" do + it_behaves_like :regexp_new_non_string_or_regexp, :new +end diff --git a/spec/ruby/core/regexp/options_spec.rb b/spec/ruby/core/regexp/options_spec.rb new file mode 100644 index 0000000000..c3401cee6e --- /dev/null +++ b/spec/ruby/core/regexp/options_spec.rb @@ -0,0 +1,54 @@ +require_relative '../../spec_helper' + +describe "Regexp#options" do + it "returns an Integer bitvector of regexp options for the Regexp object" do + /cat/.options.should.is_a?(Integer) + not_supported_on :opal do + /cat/ix.options.should.is_a?(Integer) + end + end + + it "allows checking for presence of a certain option with bitwise &" do + (/cat/.options & Regexp::IGNORECASE).should == 0 + (/cat/i.options & Regexp::IGNORECASE).should_not == 0 + (/cat/.options & Regexp::MULTILINE).should == 0 + (/cat/m.options & Regexp::MULTILINE).should_not == 0 + not_supported_on :opal do + (/cat/.options & Regexp::EXTENDED).should == 0 + (/cat/x.options & Regexp::EXTENDED).should_not == 0 + (/cat/mx.options & Regexp::MULTILINE).should_not == 0 + (/cat/mx.options & Regexp::EXTENDED).should_not == 0 + (/cat/xi.options & Regexp::IGNORECASE).should_not == 0 + (/cat/xi.options & Regexp::EXTENDED).should_not == 0 + end + end + + it "returns 0 for a Regexp literal without options" do + //.options.should == 0 + /abc/.options.should == 0 + end + + it "raises a TypeError on an uninitialized Regexp" do + -> { Regexp.allocate.options }.should.raise(TypeError) + end + + it "includes Regexp::FIXEDENCODING for a Regexp literal with the 'u' option" do + (//u.options & Regexp::FIXEDENCODING).should_not == 0 + end + + it "includes Regexp::FIXEDENCODING for a Regexp literal with the 'e' option" do + (//e.options & Regexp::FIXEDENCODING).should_not == 0 + end + + it "includes Regexp::FIXEDENCODING for a Regexp literal with the 's' option" do + (//s.options & Regexp::FIXEDENCODING).should_not == 0 + end + + it "does not include Regexp::FIXEDENCODING for a Regexp literal with the 'n' option" do + (//n.options & Regexp::FIXEDENCODING).should == 0 + end + + it "includes Regexp::NOENCODING for a Regexp literal with the 'n' option" do + (//n.options & Regexp::NOENCODING).should_not == 0 + end +end diff --git a/spec/ruby/core/regexp/quote_spec.rb b/spec/ruby/core/regexp/quote_spec.rb new file mode 100644 index 0000000000..370ab13e30 --- /dev/null +++ b/spec/ruby/core/regexp/quote_spec.rb @@ -0,0 +1,6 @@ +require_relative '../../spec_helper' +require_relative 'shared/quote' + +describe "Regexp.quote" do + it_behaves_like :regexp_quote, :quote +end diff --git a/spec/ruby/core/regexp/shared/equal_value.rb b/spec/ruby/core/regexp/shared/equal_value.rb new file mode 100644 index 0000000000..803988de9e --- /dev/null +++ b/spec/ruby/core/regexp/shared/equal_value.rb @@ -0,0 +1,31 @@ +describe :regexp_eql, shared: true do + it "is true if self and other have the same pattern" do + /abc/.send(@method, /abc/).should == true + /abc/.send(@method, /abd/).should == false + end + + not_supported_on :opal do + it "is true if self and other have the same character set code" do + /abc/.send(@method, /abc/x).should == false + /abc/x.send(@method, /abc/x).should == true + /abc/u.send(@method, /abc/n).should == false + /abc/u.send(@method, /abc/u).should == true + /abc/n.send(@method, /abc/n).should == true + end + end + + it "is true if other has the same #casefold? values" do + /abc/.send(@method, /abc/i).should == false + /abc/i.send(@method, /abc/i).should == true + end + + not_supported_on :opal do + it "is true if self does not specify /n option and other does" do + //.send(@method, //n).should == true + end + + it "is true if self specifies /n option and other does not" do + //n.send(@method, //).should == true + end + end +end diff --git a/spec/ruby/core/regexp/shared/new.rb b/spec/ruby/core/regexp/shared/new.rb new file mode 100644 index 0000000000..affdaf855c --- /dev/null +++ b/spec/ruby/core/regexp/shared/new.rb @@ -0,0 +1,321 @@ +# encoding: binary + +describe :regexp_new, shared: true do + it "requires one argument and creates a new regular expression object" do + Regexp.send(@method, '').is_a?(Regexp).should == true + end + + ruby_version_is "4.1" do + it "is frozen" do + Regexp.send(@method, '').should.frozen? + end + end + + it "works by default for subclasses with overridden #initialize" do + class RegexpSpecsSubclass < Regexp + def initialize(*args) + super + @args = args + end + + attr_accessor :args + end + + class RegexpSpecsSubclassTwo < Regexp; end + + RegexpSpecsSubclass.send(@method, "hi").should.is_a?(RegexpSpecsSubclass) + RegexpSpecsSubclass.send(@method, "hi").args.first.should == "hi" + + RegexpSpecsSubclassTwo.send(@method, "hi").should.is_a?(RegexpSpecsSubclassTwo) + end +end + +describe :regexp_new_non_string_or_regexp, shared: true do + it "calls #to_str method for non-String/Regexp argument" do + obj = Object.new + def obj.to_str() "a" end + + Regexp.send(@method, obj).should == /a/ + end + + it "raises TypeError if there is no #to_str method for non-String/Regexp argument" do + obj = Object.new + -> { Regexp.send(@method, obj) }.should.raise(TypeError, "no implicit conversion of Object into String") + + -> { Regexp.send(@method, 1) }.should.raise(TypeError, "no implicit conversion of Integer into String") + -> { Regexp.send(@method, 1.0) }.should.raise(TypeError, "no implicit conversion of Float into String") + -> { Regexp.send(@method, :symbol) }.should.raise(TypeError, "no implicit conversion of Symbol into String") + -> { Regexp.send(@method, []) }.should.raise(TypeError, "no implicit conversion of Array into String") + end + + it "raises TypeError if #to_str returns non-String value" do + obj = Object.new + def obj.to_str() [] end + + -> { Regexp.send(@method, obj) }.should raise_consistent_error(TypeError, /can't convert Object into String/) + end +end + +describe :regexp_new_string, shared: true do + it "uses the String argument as an unescaped literal to construct a Regexp object" do + Regexp.send(@method, "^hi{2,3}fo.o$").should == /^hi{2,3}fo.o$/ + end + + it "raises a RegexpError when passed an incorrect regexp" do + -> { Regexp.send(@method, "^[$", 0) }.should.raise(RegexpError, Regexp.new(Regexp.escape("premature end of char-class: /^[$/"))) + end + + it "does not set Regexp options if only given one argument" do + r = Regexp.send(@method, 'Hi') + (r.options & Regexp::IGNORECASE).should == 0 + (r.options & Regexp::MULTILINE).should == 0 + not_supported_on :opal do + (r.options & Regexp::EXTENDED).should == 0 + end + end + + it "does not set Regexp options if second argument is nil or false" do + r = Regexp.send(@method, 'Hi', nil) + (r.options & Regexp::IGNORECASE).should == 0 + (r.options & Regexp::MULTILINE).should == 0 + not_supported_on :opal do + (r.options & Regexp::EXTENDED).should == 0 + end + + r = Regexp.send(@method, 'Hi', false) + (r.options & Regexp::IGNORECASE).should == 0 + (r.options & Regexp::MULTILINE).should == 0 + not_supported_on :opal do + (r.options & Regexp::EXTENDED).should == 0 + end + end + + it "sets options from second argument if it is true" do + r = Regexp.send(@method, 'Hi', true) + (r.options & Regexp::IGNORECASE).should_not == 0 + (r.options & Regexp::MULTILINE).should == 0 + not_supported_on :opal do + (r.options & Regexp::EXTENDED).should == 0 + end + end + + it "sets options from second argument if it is one of the Integer option constants" do + r = Regexp.send(@method, 'Hi', Regexp::IGNORECASE) + (r.options & Regexp::IGNORECASE).should_not == 0 + (r.options & Regexp::MULTILINE).should == 0 + not_supported_on :opal do + (r.options & Regexp::EXTENDED).should == 0 + end + + r = Regexp.send(@method, 'Hi', Regexp::MULTILINE) + (r.options & Regexp::IGNORECASE).should == 0 + (r.options & Regexp::MULTILINE).should_not == 0 + not_supported_on :opal do + (r.options & Regexp::EXTENDED).should == 0 + end + + not_supported_on :opal do + r = Regexp.send(@method, 'Hi', Regexp::EXTENDED) + (r.options & Regexp::IGNORECASE).should == 0 + (r.options & Regexp::MULTILINE).should == 0 + (r.options & Regexp::EXTENDED).should_not == 1 + end + end + + it "accepts an Integer of two or more options ORed together as the second argument" do + r = Regexp.send(@method, 'Hi', Regexp::IGNORECASE | Regexp::EXTENDED) + (r.options & Regexp::IGNORECASE).should_not == 0 + (r.options & Regexp::MULTILINE).should == 0 + (r.options & Regexp::EXTENDED).should_not == 0 + end + + it "does not try to convert the second argument to Integer with #to_int method call" do + ScratchPad.clear + obj = Object.new + def obj.to_int() ScratchPad.record(:called) end + + -> { + Regexp.send(@method, "Hi", obj) + }.should complain(/expected true or false as ignorecase/, {verbose: true}) + + ScratchPad.recorded.should == nil + end + + it "warns any non-Integer, non-nil, non-false second argument" do + r = nil + -> { + r = Regexp.send(@method, 'Hi', Object.new) + }.should complain(/expected true or false as ignorecase/, {verbose: true}) + (r.options & Regexp::IGNORECASE).should_not == 0 + (r.options & Regexp::MULTILINE).should == 0 + not_supported_on :opal do + (r.options & Regexp::EXTENDED).should == 0 + end + end + + it "accepts a String of supported flags as the second argument" do + r = Regexp.send(@method, 'Hi', 'i') + (r.options & Regexp::IGNORECASE).should_not == 0 + (r.options & Regexp::MULTILINE).should == 0 + not_supported_on :opal do + (r.options & Regexp::EXTENDED).should == 0 + end + + r = Regexp.send(@method, 'Hi', 'imx') + (r.options & Regexp::IGNORECASE).should_not == 0 + (r.options & Regexp::MULTILINE).should_not == 0 + not_supported_on :opal do + (r.options & Regexp::EXTENDED).should_not == 0 + end + + r = Regexp.send(@method, 'Hi', 'mimi') + (r.options & Regexp::IGNORECASE).should_not == 0 + (r.options & Regexp::MULTILINE).should_not == 0 + not_supported_on :opal do + (r.options & Regexp::EXTENDED).should == 0 + end + + r = Regexp.send(@method, 'Hi', '') + (r.options & Regexp::IGNORECASE).should == 0 + (r.options & Regexp::MULTILINE).should == 0 + not_supported_on :opal do + (r.options & Regexp::EXTENDED).should == 0 + end + end + + it "raises an Argument error if the second argument contains unsupported chars" do + -> { Regexp.send(@method, 'Hi', 'e') }.should.raise(ArgumentError, "unknown regexp option: e") + -> { Regexp.send(@method, 'Hi', 'n') }.should.raise(ArgumentError, "unknown regexp option: n") + -> { Regexp.send(@method, 'Hi', 's') }.should.raise(ArgumentError, "unknown regexp option: s") + -> { Regexp.send(@method, 'Hi', 'u') }.should.raise(ArgumentError, "unknown regexp option: u") + -> { Regexp.send(@method, 'Hi', 'j') }.should.raise(ArgumentError, "unknown regexp option: j") + -> { Regexp.send(@method, 'Hi', 'mjx') }.should.raise(ArgumentError, /unknown regexp option: mjx\b/) + end + + describe "with escaped characters" do + it "raises a Regexp error if there is a trailing backslash" do + -> { Regexp.send(@method, "\\") }.should.raise(RegexpError, Regexp.new(Regexp.escape("too short escape sequence: /\\/"))) + end + + it "does not raise a Regexp error if there is an escaped trailing backslash" do + -> { Regexp.send(@method, "\\\\") }.should_not.raise(RegexpError) + end + + it "accepts a backspace followed by a non-special character" do + Regexp.send(@method, "\\N").should == /#{"\x5c"+"N"}/ + end + + it "raises a RegexpError if \\x is not followed by any hexadecimal digits" do + -> { Regexp.send(@method, "\\" + "xn") }.should.raise(RegexpError, Regexp.new(Regexp.escape("invalid hex escape: /\\xn/"))) + end + + it "raises a RegexpError if less than four digits are given for \\uHHHH" do + -> { Regexp.send(@method, "\\" + "u304") }.should.raise(RegexpError, Regexp.new(Regexp.escape("invalid Unicode escape: /\\u304/"))) + end + + it "raises a RegexpError if the \\u{} escape is empty" do + -> { Regexp.send(@method, "\\" + "u{}") }.should.raise(RegexpError, Regexp.new(Regexp.escape("invalid Unicode list: /\\u{}/"))) + end + + it "raises a RegexpError if the \\u{} escape contains non hexadecimal digits" do + -> { Regexp.send(@method, "\\" + "u{abcX}") }.should.raise(RegexpError, Regexp.new(Regexp.escape("invalid Unicode list: /\\u{abcX}/"))) + end + + it "raises a RegexpError if more than six hexadecimal digits are given" do + -> { Regexp.send(@method, "\\" + "u{0ffffff}") }.should.raise(RegexpError, Regexp.new(Regexp.escape("invalid Unicode range: /\\u{0ffffff}/"))) + end + + it "returns a Regexp with US-ASCII encoding if only 7-bit ASCII characters are present regardless of the input String's encoding" do + Regexp.send(@method, "abc").encoding.should == Encoding::US_ASCII + end + + it "returns a Regexp with source String having US-ASCII encoding if only 7-bit ASCII characters are present regardless of the input String's encoding" do + Regexp.send(@method, "abc").source.encoding.should == Encoding::US_ASCII + end + + it "returns a Regexp with US-ASCII encoding if UTF-8 escape sequences using only 7-bit ASCII are present" do + Regexp.send(@method, "\u{61}").encoding.should == Encoding::US_ASCII + end + + it "returns a Regexp with source String having US-ASCII encoding if UTF-8 escape sequences using only 7-bit ASCII are present" do + Regexp.send(@method, "\u{61}").source.encoding.should == Encoding::US_ASCII + end + + it "returns a Regexp with UTF-8 encoding if any UTF-8 escape sequences outside 7-bit ASCII are present" do + Regexp.send(@method, "\u{ff}").encoding.should == Encoding::UTF_8 + end + + it "returns a Regexp with source String having UTF-8 encoding if any UTF-8 escape sequences outside 7-bit ASCII are present" do + Regexp.send(@method, "\u{ff}").source.encoding.should == Encoding::UTF_8 + end + + it "returns a Regexp with the input String's encoding" do + str = "\x82\xa0".dup.force_encoding(Encoding::Shift_JIS) + Regexp.send(@method, str).encoding.should == Encoding::Shift_JIS + end + + it "returns a Regexp with source String having the input String's encoding" do + str = "\x82\xa0".dup.force_encoding(Encoding::Shift_JIS) + Regexp.send(@method, str).source.encoding.should == Encoding::Shift_JIS + end + end +end + +describe :regexp_new_string_binary, shared: true do + describe "with escaped characters" do + end +end + +describe :regexp_new_regexp, shared: true do + it "uses the argument as a literal to construct a Regexp object" do + Regexp.send(@method, /^hi{2,3}fo.o$/).should == /^hi{2,3}fo.o$/ + end + + it "preserves any options given in the Regexp literal" do + (Regexp.send(@method, /Hi/i).options & Regexp::IGNORECASE).should_not == 0 + (Regexp.send(@method, /Hi/m).options & Regexp::MULTILINE).should_not == 0 + not_supported_on :opal do + (Regexp.send(@method, /Hi/x).options & Regexp::EXTENDED).should_not == 0 + end + + not_supported_on :opal do + r = Regexp.send @method, /Hi/imx + (r.options & Regexp::IGNORECASE).should_not == 0 + (r.options & Regexp::MULTILINE).should_not == 0 + (r.options & Regexp::EXTENDED).should_not == 0 + end + + r = Regexp.send @method, /Hi/ + (r.options & Regexp::IGNORECASE).should == 0 + (r.options & Regexp::MULTILINE).should == 0 + not_supported_on :opal do + (r.options & Regexp::EXTENDED).should == 0 + end + end + + it "does not honour options given as additional arguments" do + r = nil + -> { + r = Regexp.send @method, /hi/, Regexp::IGNORECASE + }.should complain(/flags ignored/) + (r.options & Regexp::IGNORECASE).should == 0 + end + + not_supported_on :opal do + it "sets the encoding to UTF-8 if the Regexp literal has the 'u' option" do + Regexp.send(@method, /Hi/u).encoding.should == Encoding::UTF_8 + end + + it "sets the encoding to EUC-JP if the Regexp literal has the 'e' option" do + Regexp.send(@method, /Hi/e).encoding.should == Encoding::EUC_JP + end + + it "sets the encoding to Windows-31J if the Regexp literal has the 's' option" do + Regexp.send(@method, /Hi/s).encoding.should == Encoding::Windows_31J + end + + it "sets the encoding to US-ASCII if the Regexp literal has the 'n' option and the source String is ASCII only" do + Regexp.send(@method, /Hi/n).encoding.should == Encoding::US_ASCII + end + end +end diff --git a/spec/ruby/core/regexp/shared/quote.rb b/spec/ruby/core/regexp/shared/quote.rb new file mode 100644 index 0000000000..083f12d78c --- /dev/null +++ b/spec/ruby/core/regexp/shared/quote.rb @@ -0,0 +1,41 @@ +# encoding: binary + +describe :regexp_quote, shared: true do + it "escapes any characters with special meaning in a regular expression" do + Regexp.send(@method, '\*?{}.+^$[]()- ').should == '\\\\\*\?\{\}\.\+\^\$\[\]\(\)\-\\ ' + Regexp.send(@method, "\*?{}.+^$[]()- ").should == '\\*\\?\\{\\}\\.\\+\\^\\$\\[\\]\\(\\)\\-\\ ' + Regexp.send(@method, '\n\r\f\t').should == '\\\\n\\\\r\\\\f\\\\t' + Regexp.send(@method, "\n\r\f\t").should == '\\n\\r\\f\\t' + end + + it "works with symbols" do + Regexp.send(@method, :symbol).should == 'symbol' + end + + it "works with substrings" do + str = ".+[]()"[1...-1] + Regexp.send(@method, str).should == '\+\[\]\(' + end + + it "works for broken strings" do + Regexp.send(@method, "a.\x85b.".dup.force_encoding("US-ASCII")).should =="a\\.\x85b\\.".dup.force_encoding("US-ASCII") + Regexp.send(@method, "a.\x80".dup.force_encoding("UTF-8")).should == "a\\.\x80".dup.force_encoding("UTF-8") + end + + it "sets the encoding of the result to US-ASCII if there are only US-ASCII characters present in the input String" do + str = "abc".dup.force_encoding("euc-jp") + Regexp.send(@method, str).encoding.should == Encoding::US_ASCII + end + + it "sets the encoding of the result to the encoding of the String if any non-US-ASCII characters are present in an input String with valid encoding" do + str = "ありがとう".dup.force_encoding("utf-8") + str.valid_encoding?.should == true + Regexp.send(@method, str).encoding.should == Encoding::UTF_8 + end + + it "sets the encoding of the result to BINARY if any non-US-ASCII characters are present in an input String with invalid encoding" do + str = "\xff".dup.force_encoding "us-ascii" + str.valid_encoding?.should == false + Regexp.send(@method, "\xff").encoding.should == Encoding::BINARY + end +end diff --git a/spec/ruby/core/regexp/source_spec.rb b/spec/ruby/core/regexp/source_spec.rb new file mode 100644 index 0000000000..4eebf280f0 --- /dev/null +++ b/spec/ruby/core/regexp/source_spec.rb @@ -0,0 +1,47 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' + +describe "Regexp#source" do + it "returns the original string of the pattern" do + not_supported_on :opal do + /ab+c/ix.source.should == "ab+c" + end + /x(.)xz/.source.should == "x(.)xz" + end + + it "keeps escape sequences as is" do + /\x20\+/.source.should == '\x20\+' + end + + describe "escaping" do + it "keeps escaping of metacharacter" do + /\$/.source.should == "\\$" + end + + it "keeps escaping of metacharacter used as a terminator" do + %r+\++.source.should == "\\+" + end + + it "removes escaping of non-metacharacter used as a terminator" do + %r@\@@.source.should == "@" + end + + it "keeps escaping of non-metacharacter not used as a terminator" do + /\@/.source.should == "\\@" + end + end + + not_supported_on :opal do + it "has US-ASCII encoding when created from an ASCII-only \\u{} literal" do + re = /[\u{20}-\u{7E}]/ + re.source.encoding.should.equal?(Encoding::US_ASCII) + end + end + + not_supported_on :opal do + it "has UTF-8 encoding when created from a non-ASCII-only \\u{} literal" do + re = /[\u{20}-\u{7EE}]/ + re.source.encoding.should.equal?(Encoding::UTF_8) + end + end +end diff --git a/spec/ruby/core/regexp/timeout_spec.rb b/spec/ruby/core/regexp/timeout_spec.rb new file mode 100644 index 0000000000..a1ec475ef3 --- /dev/null +++ b/spec/ruby/core/regexp/timeout_spec.rb @@ -0,0 +1,33 @@ +require_relative '../../spec_helper' + +describe "Regexp.timeout" do + after :each do + Regexp.timeout = nil + end + + it "returns global timeout" do + Regexp.timeout = 3 + Regexp.timeout.should == 3 + end + + it "raises Regexp::TimeoutError after global timeout elapsed" do + Regexp.timeout = 0.001 + Regexp.timeout.should == 0.001 + + -> { + # A typical ReDoS case + /^(a*)*$/ =~ "a" * 1000000 + "x" + }.should.raise(Regexp::TimeoutError, "regexp match timeout") + end + + it "raises Regexp::TimeoutError after timeout keyword value elapsed" do + Regexp.timeout = 3 # This should be ignored + Regexp.timeout.should == 3 + + re = Regexp.new("^a*b?a*$", timeout: 0.001) + + -> { + re =~ "a" * 1000000 + "x" + }.should.raise(Regexp::TimeoutError, "regexp match timeout") + end +end diff --git a/spec/ruby/core/regexp/to_s_spec.rb b/spec/ruby/core/regexp/to_s_spec.rb new file mode 100644 index 0000000000..798eaee6c2 --- /dev/null +++ b/spec/ruby/core/regexp/to_s_spec.rb @@ -0,0 +1,62 @@ +require_relative '../../spec_helper' + +describe "Regexp#to_s" do + not_supported_on :opal do + it "displays options if included" do + /abc/mxi.to_s.should == "(?mix:abc)" + end + end + + it "shows non-included options after a - sign" do + /abc/i.to_s.should == "(?i-mx:abc)" + end + + it "shows all options as excluded if none are selected" do + /abc/.to_s.should == "(?-mix:abc)" + end + + it "shows the pattern after the options" do + not_supported_on :opal do + /ab+c/mix.to_s.should == "(?mix:ab+c)" + end + /xyz/.to_s.should == "(?-mix:xyz)" + end + + not_supported_on :opal do + it "displays groups with options" do + /(?ix:foo)(?m:bar)/.to_s.should == "(?-mix:(?ix:foo)(?m:bar))" + /(?ix:foo)bar/m.to_s.should == "(?m-ix:(?ix:foo)bar)" + end + + it "displays single group with same options as main regex as the main regex" do + /(?i:nothing outside this group)/.to_s.should == "(?i-mx:nothing outside this group)" + end + end + + not_supported_on :opal do + it "deals properly with uncaptured groups" do + /whatever(?:0d)/ix.to_s.should == "(?ix-m:whatever(?:0d))" + end + end + + it "deals properly with the two types of lookahead groups" do + /(?=5)/.to_s.should == "(?-mix:(?=5))" + /(?!5)/.to_s.should == "(?-mix:(?!5))" + end + + it "returns a string in (?xxx:yyy) notation" do + not_supported_on :opal do + /ab+c/ix.to_s.should == "(?ix-m:ab+c)" + /jis/s.to_s.should == "(?-mix:jis)" + /(?i:.)/.to_s.should == "(?i-mx:.)" + end + /(?:.)/.to_s.should == "(?-mix:.)" + end + + not_supported_on :opal do + it "handles abusive option groups" do + /(?mmmmix-miiiix:)/.to_s.should == '(?-mix:)' + end + end + +end diff --git a/spec/ruby/core/regexp/try_convert_spec.rb b/spec/ruby/core/regexp/try_convert_spec.rb new file mode 100644 index 0000000000..da5e10adce --- /dev/null +++ b/spec/ruby/core/regexp/try_convert_spec.rb @@ -0,0 +1,27 @@ +require_relative '../../spec_helper' + +describe "Regexp.try_convert" do + not_supported_on :opal do + it "returns the argument if given a Regexp" do + Regexp.try_convert(/foo/s).should == /foo/s + end + end + + it "returns nil if given an argument that can't be converted to a Regexp" do + ['', 'glark', [], Object.new, :pat].each do |arg| + Regexp.try_convert(arg).should == nil + end + end + + it "tries to coerce the argument by calling #to_regexp" do + rex = mock('regexp') + rex.should_receive(:to_regexp).and_return(/(p(a)t[e]rn)/) + Regexp.try_convert(rex).should == /(p(a)t[e]rn)/ + end + + it "raises a TypeError if the object does not return an Regexp from #to_regexp" do + obj = mock("regexp") + obj.should_receive(:to_regexp).and_return("string") + -> { Regexp.try_convert(obj) }.should raise_consistent_error(TypeError, "can't convert MockObject into Regexp (MockObject#to_regexp gives String)") + end +end diff --git a/spec/ruby/core/regexp/union_spec.rb b/spec/ruby/core/regexp/union_spec.rb new file mode 100644 index 0000000000..c0a9d12fed --- /dev/null +++ b/spec/ruby/core/regexp/union_spec.rb @@ -0,0 +1,182 @@ +# encoding: utf-8 + +require_relative '../../spec_helper' + +describe "Regexp.union" do + it "returns /(?!)/ when passed no arguments" do + Regexp.union.should == /(?!)/ + end + + it "returns a regular expression that will match passed arguments" do + Regexp.union("penzance").should == /penzance/ + Regexp.union("skiing", "sledding").should == /skiing|sledding/ + not_supported_on :opal do + Regexp.union(/dogs/, /cats/i).should == /(?-mix:dogs)|(?i-mx:cats)/ + end + end + + it "quotes any string arguments" do + Regexp.union("n", ".").should == /n|\./ + end + + it "returns a Regexp with the encoding of an ASCII-incompatible String argument" do + Regexp.union("a".encode("UTF-16LE")).encoding.should == Encoding::UTF_16LE + end + + it "returns a Regexp with the encoding of a String containing non-ASCII-compatible characters" do + Regexp.union("\u00A9".encode("ISO-8859-1")).encoding.should == Encoding::ISO_8859_1 + end + + it "returns a Regexp with US-ASCII encoding if all arguments are ASCII-only" do + Regexp.union("a".encode("UTF-8"), "b".encode("SJIS")).encoding.should == Encoding::US_ASCII + end + + it "returns a Regexp with the encoding of multiple non-conflicting ASCII-incompatible String arguments" do + Regexp.union("a".encode("UTF-16LE"), "b".encode("UTF-16LE")).encoding.should == Encoding::UTF_16LE + end + + it "returns a Regexp with the encoding of multiple non-conflicting Strings containing non-ASCII-compatible characters" do + Regexp.union("\u00A9".encode("ISO-8859-1"), "\u00B0".encode("ISO-8859-1")).encoding.should == Encoding::ISO_8859_1 + end + + it "returns a Regexp with the encoding of a String containing non-ASCII-compatible characters and another ASCII-only String" do + Regexp.union("\u00A9".encode("ISO-8859-1"), "a".encode("UTF-8")).encoding.should == Encoding::ISO_8859_1 + end + + it "returns ASCII-8BIT if the regexp encodings are ASCII-8BIT and at least one has non-ASCII characters" do + us_ascii_implicit, us_ascii_explicit, binary = /abc/, /[\x00-\x7f]/n, /[\x80-\xBF]/n + us_ascii_implicit.encoding.should == Encoding::US_ASCII + us_ascii_explicit.encoding.should == Encoding::US_ASCII + binary.encoding.should == Encoding::BINARY + + Regexp.union(us_ascii_implicit, us_ascii_explicit, binary).encoding.should == Encoding::BINARY + Regexp.union(us_ascii_implicit, binary, us_ascii_explicit).encoding.should == Encoding::BINARY + Regexp.union(us_ascii_explicit, us_ascii_implicit, binary).encoding.should == Encoding::BINARY + Regexp.union(us_ascii_explicit, binary, us_ascii_implicit).encoding.should == Encoding::BINARY + Regexp.union(binary, us_ascii_implicit, us_ascii_explicit).encoding.should == Encoding::BINARY + Regexp.union(binary, us_ascii_explicit, us_ascii_implicit).encoding.should == Encoding::BINARY + end + + it "return US-ASCII if all patterns are ASCII-only" do + Regexp.union(/abc/e, /def/e).encoding.should == Encoding::US_ASCII + Regexp.union(/abc/n, /def/n).encoding.should == Encoding::US_ASCII + Regexp.union(/abc/s, /def/s).encoding.should == Encoding::US_ASCII + Regexp.union(/abc/u, /def/u).encoding.should == Encoding::US_ASCII + end + + it "returns a Regexp with UTF-8 if one part is UTF-8" do + Regexp.union(/probl[éeè]me/i, /help/i).encoding.should == Encoding::UTF_8 + end + + it "returns a Regexp if an array of string with special characters is passed" do + Regexp.union(["+","-"]).should == /\+|\-/ + end + + it "raises ArgumentError if the arguments include conflicting ASCII-incompatible Strings" do + -> { + Regexp.union("a".encode("UTF-16LE"), "b".encode("UTF-16BE")) + }.should.raise(ArgumentError, 'incompatible encodings: UTF-16LE and UTF-16BE') + end + + it "raises ArgumentError if the arguments include conflicting ASCII-incompatible Regexps" do + -> { + Regexp.union(Regexp.new("a".encode("UTF-16LE")), + Regexp.new("b".encode("UTF-16BE"))) + }.should.raise(ArgumentError, 'incompatible encodings: UTF-16LE and UTF-16BE') + end + + it "raises ArgumentError if the arguments include conflicting fixed encoding Regexps" do + -> { + Regexp.union(Regexp.new("a".encode("UTF-8"), Regexp::FIXEDENCODING), + Regexp.new("b".encode("US-ASCII"), Regexp::FIXEDENCODING)) + }.should.raise(ArgumentError, 'incompatible encodings: UTF-8 and US-ASCII') + end + + it "raises ArgumentError if the arguments include a fixed encoding Regexp and a String containing non-ASCII-compatible characters in a different encoding" do + -> { + Regexp.union(Regexp.new("a".encode("UTF-8"), Regexp::FIXEDENCODING), + "\u00A9".encode("ISO-8859-1")) + }.should.raise(ArgumentError, 'incompatible encodings: UTF-8 and ISO-8859-1') + end + + it "raises ArgumentError if the arguments include a String containing non-ASCII-compatible characters and a fixed encoding Regexp in a different encoding" do + -> { + Regexp.union("\u00A9".encode("ISO-8859-1"), + Regexp.new("a".encode("UTF-8"), Regexp::FIXEDENCODING)) + }.should.raise(ArgumentError, 'incompatible encodings: ISO-8859-1 and UTF-8') + end + + it "raises ArgumentError if the arguments include an ASCII-incompatible String and an ASCII-only String" do + -> { + Regexp.union("a".encode("UTF-16LE"), "b".encode("UTF-8")) + }.should.raise(ArgumentError, /ASCII incompatible encoding: UTF-16LE|incompatible encodings: UTF-16LE and US-ASCII/) + end + + it "raises ArgumentError if the arguments include an ASCII-incompatible Regexp and an ASCII-only String" do + -> { + Regexp.union(Regexp.new("a".encode("UTF-16LE")), "b".encode("UTF-8")) + }.should.raise(ArgumentError, /ASCII incompatible encoding: UTF-16LE|incompatible encodings: UTF-16LE and US-ASCII/) + end + + it "raises ArgumentError if the arguments include an ASCII-incompatible String and an ASCII-only Regexp" do + -> { + Regexp.union("a".encode("UTF-16LE"), Regexp.new("b".encode("UTF-8"))) + }.should.raise(ArgumentError, /ASCII incompatible encoding: UTF-16LE|incompatible encodings: UTF-16LE and US-ASCII/) + end + + it "raises ArgumentError if the arguments include an ASCII-incompatible Regexp and an ASCII-only Regexp" do + -> { + Regexp.union(Regexp.new("a".encode("UTF-16LE")), Regexp.new("b".encode("UTF-8"))) + }.should.raise(ArgumentError, /ASCII incompatible encoding: UTF-16LE|incompatible encodings: UTF-16LE and US-ASCII/) + end + + it "raises ArgumentError if the arguments include an ASCII-incompatible String and a String containing non-ASCII-compatible characters in a different encoding" do + -> { + Regexp.union("a".encode("UTF-16LE"), "\u00A9".encode("ISO-8859-1")) + }.should.raise(ArgumentError, 'incompatible encodings: UTF-16LE and ISO-8859-1') + end + + it "raises ArgumentError if the arguments include an ASCII-incompatible Regexp and a String containing non-ASCII-compatible characters in a different encoding" do + -> { + Regexp.union(Regexp.new("a".encode("UTF-16LE")), "\u00A9".encode("ISO-8859-1")) + }.should.raise(ArgumentError, 'incompatible encodings: UTF-16LE and ISO-8859-1') + end + + it "raises ArgumentError if the arguments include an ASCII-incompatible String and a Regexp containing non-ASCII-compatible characters in a different encoding" do + -> { + Regexp.union("a".encode("UTF-16LE"), Regexp.new("\u00A9".encode("ISO-8859-1"))) + }.should.raise(ArgumentError, 'incompatible encodings: UTF-16LE and ISO-8859-1') + end + + it "raises ArgumentError if the arguments include an ASCII-incompatible Regexp and a Regexp containing non-ASCII-compatible characters in a different encoding" do + -> { + Regexp.union(Regexp.new("a".encode("UTF-16LE")), Regexp.new("\u00A9".encode("ISO-8859-1"))) + }.should.raise(ArgumentError, 'incompatible encodings: UTF-16LE and ISO-8859-1') + end + + it "uses to_str to convert arguments (if not Regexp)" do + obj = mock('pattern') + obj.should_receive(:to_str).and_return('foo') + Regexp.union(obj, "bar").should == /foo|bar/ + end + + it "uses to_regexp to convert argument" do + obj = mock('pattern') + obj.should_receive(:to_regexp).and_return(/foo/) + Regexp.union(obj).should == /foo/ + end + + it "accepts a Symbol as argument" do + Regexp.union(:foo).should == /foo/ + end + + it "accepts a single array of patterns as arguments" do + Regexp.union(["skiing", "sledding"]).should == /skiing|sledding/ + not_supported_on :opal do + Regexp.union([/dogs/, /cats/i]).should == /(?-mix:dogs)|(?i-mx:cats)/ + end + -> { + Regexp.union(["skiing", "sledding"], [/dogs/, /cats/i]) + }.should.raise(TypeError, 'no implicit conversion of Array into String') + end +end |
