summaryrefslogtreecommitdiff
path: root/spec/ruby/language/regexp
diff options
context:
space:
mode:
Diffstat (limited to 'spec/ruby/language/regexp')
-rw-r--r--spec/ruby/language/regexp/anchors_spec.rb62
-rw-r--r--spec/ruby/language/regexp/back-references_spec.rb68
-rw-r--r--spec/ruby/language/regexp/character_classes_spec.rb238
-rw-r--r--spec/ruby/language/regexp/empty_checks_spec.rb135
-rw-r--r--spec/ruby/language/regexp/encoding_spec.rb63
-rw-r--r--spec/ruby/language/regexp/escapes_spec.rb96
-rw-r--r--spec/ruby/language/regexp/grouping_spec.rb41
-rw-r--r--spec/ruby/language/regexp/interpolation_spec.rb6
-rw-r--r--spec/ruby/language/regexp/modifiers_spec.rb40
-rw-r--r--spec/ruby/language/regexp/repetition_spec.rb21
10 files changed, 532 insertions, 238 deletions
diff --git a/spec/ruby/language/regexp/anchors_spec.rb b/spec/ruby/language/regexp/anchors_spec.rb
index 0129e255da..8e597b65e8 100644
--- a/spec/ruby/language/regexp/anchors_spec.rb
+++ b/spec/ruby/language/regexp/anchors_spec.rb
@@ -7,8 +7,8 @@ describe "Regexps with anchors" do
/^foo/.match("foo").to_a.should == ["foo"]
/^bar/.match("foo\nbar").to_a.should == ["bar"]
# Basic non-matching
- /^foo/.match(" foo").should be_nil
- /foo^/.match("foo\n\n\n").should be_nil
+ /^foo/.match(" foo").should == nil
+ /foo^/.match("foo\n\n\n").should == nil
# A bit advanced
/^^^foo/.match("foo").to_a.should == ["foo"]
@@ -16,8 +16,8 @@ describe "Regexps with anchors" do
(/($^)($^)/ =~ "foo\n\n").should == "foo\n".size and $~.to_a.should == ["", "", ""]
# Different start of line chars
- /^bar/.match("foo\rbar").should be_nil
- /^bar/.match("foo\0bar").should be_nil
+ /^bar/.match("foo\rbar").should == nil
+ /^bar/.match("foo\0bar").should == nil
# Trivial
/^/.match("foo").to_a.should == [""]
@@ -29,7 +29,7 @@ describe "Regexps with anchors" do
end
it "does not match ^ after trailing \\n" do
- /^(?!\A)/.match("foo\n").should be_nil # There is no (empty) line after a trailing \n
+ /^(?!\A)/.match("foo\n").should == nil # There is no (empty) line after a trailing \n
end
it "supports $ (line end anchor)" do
@@ -37,16 +37,16 @@ describe "Regexps with anchors" do
/foo$/.match("foo").to_a.should == ["foo"]
/foo$/.match("foo\nbar").to_a.should == ["foo"]
# Basic non-matching
- /foo$/.match("foo ").should be_nil
- /$foo/.match("\n\n\nfoo").should be_nil
+ /foo$/.match("foo ").should == nil
+ /$foo/.match("\n\n\nfoo").should == nil
# A bit advanced
/foo$$$/.match("foo").to_a.should == ["foo"]
(/[^o]$/ =~ "foo\n\n").should == ("foo\n".size - 1) and $~.to_a.should == ["\n"]
# Different end of line chars
- /foo$/.match("foo\r\nbar").should be_nil
- /foo$/.match("foo\0bar").should be_nil
+ /foo$/.match("foo\r\nbar").should == nil
+ /foo$/.match("foo\0bar").should == nil
# Trivial
(/$/ =~ "foo").should == "foo".size and $~.to_a.should == [""]
@@ -61,15 +61,15 @@ describe "Regexps with anchors" do
# Basic matching
/\Afoo/.match("foo").to_a.should == ["foo"]
# Basic non-matching
- /\Abar/.match("foo\nbar").should be_nil
- /\Afoo/.match(" foo").should be_nil
+ /\Abar/.match("foo\nbar").should == nil
+ /\Afoo/.match(" foo").should == nil
# A bit advanced
/\A\A\Afoo/.match("foo").to_a.should == ["foo"]
/(\A\Z)(\A\Z)/.match("").to_a.should == ["", "", ""]
# Different start of line chars
- /\Abar/.match("foo\0bar").should be_nil
+ /\Abar/.match("foo\0bar").should == nil
# Grouping
/(\Afoo)/.match("foo").to_a.should == ["foo", "foo"]
@@ -81,8 +81,8 @@ describe "Regexps with anchors" do
/foo\Z/.match("foo").to_a.should == ["foo"]
/foo\Z/.match("foo\n").to_a.should == ["foo"]
# Basic non-matching
- /foo\Z/.match("foo\nbar").should be_nil
- /foo\Z/.match("foo ").should be_nil
+ /foo\Z/.match("foo\nbar").should == nil
+ /foo\Z/.match("foo ").should == nil
# A bit advanced
/foo\Z\Z\Z/.match("foo\n").to_a.should == ["foo"]
@@ -90,8 +90,8 @@ describe "Regexps with anchors" do
(/(\z\Z)(\z\Z)/ =~ "foo\n").should == "foo\n".size and $~.to_a.should == ["", "", ""]
# Different end of line chars
- /foo\Z/.match("foo\0bar").should be_nil
- /foo\Z/.match("foo\r\n").should be_nil
+ /foo\Z/.match("foo\0bar").should == nil
+ /foo\Z/.match("foo\r\n").should == nil
# Grouping
/(foo\Z)/.match("foo").to_a.should == ["foo", "foo"]
@@ -102,17 +102,17 @@ describe "Regexps with anchors" do
# Basic matching
/foo\z/.match("foo").to_a.should == ["foo"]
# Basic non-matching
- /foo\z/.match("foo\nbar").should be_nil
- /foo\z/.match("foo\n").should be_nil
- /foo\z/.match("foo ").should be_nil
+ /foo\z/.match("foo\nbar").should == nil
+ /foo\z/.match("foo\n").should == nil
+ /foo\z/.match("foo ").should == nil
# A bit advanced
/foo\z\z\z/.match("foo").to_a.should == ["foo"]
(/($\z)($\z)/ =~ "foo").should == "foo".size and $~.to_a.should == ["", "", ""]
# Different end of line chars
- /foo\z/.match("foo\0bar").should be_nil
- /foo\z/.match("foo\r\nbar").should be_nil
+ /foo\z/.match("foo\0bar").should == nil
+ /foo\z/.match("foo\r\nbar").should == nil
# Grouping
/(foo\z)/.match("foo").to_a.should == ["foo", "foo"]
@@ -124,16 +124,16 @@ describe "Regexps with anchors" do
/foo\b/.match("foo").to_a.should == ["foo"]
/foo\b/.match("foo\n").to_a.should == ["foo"]
LanguageSpecs.white_spaces.scan(/./).each do |c|
- /foo\b/.match("foo" + c).to_a.should == ["foo"]
+ /foo\b/.match("foo" + c).to_a.should == ["foo"]
end
LanguageSpecs.non_alphanum_non_space.scan(/./).each do |c|
- /foo\b/.match("foo" + c).to_a.should == ["foo"]
+ /foo\b/.match("foo" + c).to_a.should == ["foo"]
end
/foo\b/.match("foo\0").to_a.should == ["foo"]
# Basic non-matching
- /foo\b/.match("foobar").should be_nil
- /foo\b/.match("foo123").should be_nil
- /foo\b/.match("foo_").should be_nil
+ /foo\b/.match("foobar").should == nil
+ /foo\b/.match("foo123").should == nil
+ /foo\b/.match("foo_").should == nil
end
it "supports \\B (non-word-boundary)" do
@@ -142,15 +142,15 @@ describe "Regexps with anchors" do
/foo\B/.match("foo123").to_a.should == ["foo"]
/foo\B/.match("foo_").to_a.should == ["foo"]
# Basic non-matching
- /foo\B/.match("foo").should be_nil
- /foo\B/.match("foo\n").should be_nil
+ /foo\B/.match("foo").should == nil
+ /foo\B/.match("foo\n").should == nil
LanguageSpecs.white_spaces.scan(/./).each do |c|
- /foo\B/.match("foo" + c).should be_nil
+ /foo\B/.match("foo" + c).should == nil
end
LanguageSpecs.non_alphanum_non_space.scan(/./).each do |c|
- /foo\B/.match("foo" + c).should be_nil
+ /foo\B/.match("foo" + c).should == nil
end
- /foo\B/.match("foo\0").should be_nil
+ /foo\B/.match("foo\0").should == nil
end
it "supports (?= ) (positive lookahead)" do
diff --git a/spec/ruby/language/regexp/back-references_spec.rb b/spec/ruby/language/regexp/back-references_spec.rb
index e8df8725c5..3b4c5656a2 100644
--- a/spec/ruby/language/regexp/back-references_spec.rb
+++ b/spec/ruby/language/regexp/back-references_spec.rb
@@ -22,6 +22,15 @@ describe "Regexps with back-references" do
$10.should == "0"
end
+ it "returns nil for numbered variable with too large index" do
+ -> {
+ eval(<<~CODE).should == nil
+ "a" =~ /(.)/
+ eval('$4294967296')
+ CODE
+ }.should complain(/warning: ('|`)\$4294967296' is too big for a number variable, always nil/)
+ end
+
it "will not clobber capture variables across threads" do
cap1, cap2, cap3 = nil
"foo" =~ /(o+)/
@@ -40,7 +49,7 @@ describe "Regexps with back-references" do
it "supports \<n> (backreference to previous group match)" do
/(foo.)\1/.match("foo1foo1").to_a.should == ["foo1foo1", "foo1"]
- /(foo.)\1/.match("foo1foo2").should be_nil
+ /(foo.)\1/.match("foo1foo2").should == nil
end
it "resets nested \<n> backreference before match of outer subexpression" do
@@ -63,12 +72,17 @@ describe "Regexps with back-references" do
(/\10()()()()()()()()()()/ =~ "\x08").should == 0
end
+ it "fails when trying to match a backreference to an unmatched capture group" do
+ /\1()/.match("").should == nil
+ /(?:(a)|b)\1/.match("b").should == nil
+ end
+
it "ignores backreferences > 1000" do
/\99999/.match("99999")[0].should == "99999"
end
it "0 is not a valid backreference" do
- -> { Regexp.new("\\k<0>") }.should raise_error(RegexpError)
+ -> { Regexp.new("\\k<0>") }.should.raise(RegexpError)
end
it "allows numeric conditional backreferences" do
@@ -78,7 +92,7 @@ describe "Regexps with back-references" do
end
it "allows either <> or '' in named conditional backreferences" do
- -> { Regexp.new("(?<a>a)(?(a)a|b)") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<a>a)(?(a)a|b)") }.should.raise(RegexpError)
/(?<a>a)(?(<a>)a|b)/.match("aa").to_a.should == [ "aa", "a" ]
/(?<a>a)(?('a')a|b)/.match("aa").to_a.should == [ "aa", "a" ]
end
@@ -104,32 +118,32 @@ describe "Regexps with back-references" do
end
it "named capture groups invalidate numeric backreferences" do
- -> { Regexp.new("(?<a>a)\\1") }.should raise_error(RegexpError)
- -> { Regexp.new("(?<a>a)\\k<1>") }.should raise_error(RegexpError)
- -> { Regexp.new("(a)(?<a>a)\\1") }.should raise_error(RegexpError)
- -> { Regexp.new("(a)(?<a>a)\\k<1>") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<a>a)\\1") }.should.raise(RegexpError)
+ -> { Regexp.new("(?<a>a)\\k<1>") }.should.raise(RegexpError)
+ -> { Regexp.new("(a)(?<a>a)\\1") }.should.raise(RegexpError)
+ -> { Regexp.new("(a)(?<a>a)\\k<1>") }.should.raise(RegexpError)
end
it "treats + or - as the beginning of a level specifier in \\k<> backreferences and (?(...)...|...) conditional backreferences" do
- -> { Regexp.new("(?<a+>a)\\k<a+>") }.should raise_error(RegexpError)
- -> { Regexp.new("(?<a+b>a)\\k<a+b>") }.should raise_error(RegexpError)
- -> { Regexp.new("(?<a+1>a)\\k<a+1>") }.should raise_error(RegexpError)
- -> { Regexp.new("(?<a->a)\\k<a->") }.should raise_error(RegexpError)
- -> { Regexp.new("(?<a-b>a)\\k<a-b>") }.should raise_error(RegexpError)
- -> { Regexp.new("(?<a-1>a)\\k<a-1>") }.should raise_error(RegexpError)
-
- -> { Regexp.new("(?<a+>a)(?(<a+>)a|b)") }.should raise_error(RegexpError)
- -> { Regexp.new("(?<a+b>a)(?(<a+b>)a|b)") }.should raise_error(RegexpError)
- -> { Regexp.new("(?<a+1>a)(?(<a+1>)a|b)") }.should raise_error(RegexpError)
- -> { Regexp.new("(?<a->a)(?(<a->)a|b)") }.should raise_error(RegexpError)
- -> { Regexp.new("(?<a-b>a)(?(<a-b>)a|b)") }.should raise_error(RegexpError)
- -> { Regexp.new("(?<a-1>a)(?(<a-1>)a|b)") }.should raise_error(RegexpError)
-
- -> { Regexp.new("(?<a+>a)(?('a+')a|b)") }.should raise_error(RegexpError)
- -> { Regexp.new("(?<a+b>a)(?('a+b')a|b)") }.should raise_error(RegexpError)
- -> { Regexp.new("(?<a+1>a)(?('a+1')a|b)") }.should raise_error(RegexpError)
- -> { Regexp.new("(?<a->a)(?('a-')a|b)") }.should raise_error(RegexpError)
- -> { Regexp.new("(?<a-b>a)(?('a-b')a|b)") }.should raise_error(RegexpError)
- -> { Regexp.new("(?<a-1>a)(?('a-1')a|b)") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<a+>a)\\k<a+>") }.should.raise(RegexpError)
+ -> { Regexp.new("(?<a+b>a)\\k<a+b>") }.should.raise(RegexpError)
+ -> { Regexp.new("(?<a+1>a)\\k<a+1>") }.should.raise(RegexpError)
+ -> { Regexp.new("(?<a->a)\\k<a->") }.should.raise(RegexpError)
+ -> { Regexp.new("(?<a-b>a)\\k<a-b>") }.should.raise(RegexpError)
+ -> { Regexp.new("(?<a-1>a)\\k<a-1>") }.should.raise(RegexpError)
+
+ -> { Regexp.new("(?<a+>a)(?(<a+>)a|b)") }.should.raise(RegexpError)
+ -> { Regexp.new("(?<a+b>a)(?(<a+b>)a|b)") }.should.raise(RegexpError)
+ -> { Regexp.new("(?<a+1>a)(?(<a+1>)a|b)") }.should.raise(RegexpError)
+ -> { Regexp.new("(?<a->a)(?(<a->)a|b)") }.should.raise(RegexpError)
+ -> { Regexp.new("(?<a-b>a)(?(<a-b>)a|b)") }.should.raise(RegexpError)
+ -> { Regexp.new("(?<a-1>a)(?(<a-1>)a|b)") }.should.raise(RegexpError)
+
+ -> { Regexp.new("(?<a+>a)(?('a+')a|b)") }.should.raise(RegexpError)
+ -> { Regexp.new("(?<a+b>a)(?('a+b')a|b)") }.should.raise(RegexpError)
+ -> { Regexp.new("(?<a+1>a)(?('a+1')a|b)") }.should.raise(RegexpError)
+ -> { Regexp.new("(?<a->a)(?('a-')a|b)") }.should.raise(RegexpError)
+ -> { Regexp.new("(?<a-b>a)(?('a-b')a|b)") }.should.raise(RegexpError)
+ -> { Regexp.new("(?<a-1>a)(?('a-1')a|b)") }.should.raise(RegexpError)
end
end
diff --git a/spec/ruby/language/regexp/character_classes_spec.rb b/spec/ruby/language/regexp/character_classes_spec.rb
index 0cf1e9b6f4..c6ed92b78e 100644
--- a/spec/ruby/language/regexp/character_classes_spec.rb
+++ b/spec/ruby/language/regexp/character_classes_spec.rb
@@ -9,9 +9,9 @@ describe "Regexp with character classes" do
/\w/.match("_").to_a.should == ["_"]
# Non-matches
- /\w/.match(LanguageSpecs.white_spaces).should be_nil
- /\w/.match(LanguageSpecs.non_alphanum_non_space).should be_nil
- /\w/.match("\0").should be_nil
+ /\w/.match(LanguageSpecs.white_spaces).should == nil
+ /\w/.match(LanguageSpecs.non_alphanum_non_space).should == nil
+ /\w/.match("\0").should == nil
end
it "supports \\W (non-word character)" do
@@ -20,19 +20,19 @@ describe "Regexp with character classes" do
/\W/.match("\0").to_a.should == ["\0"]
# Non-matches
- /\W/.match("a").should be_nil
- /\W/.match("1").should be_nil
- /\W/.match("_").should be_nil
+ /\W/.match("a").should == nil
+ /\W/.match("1").should == nil
+ /\W/.match("_").should == nil
end
it "supports \\s (space character)" do
/\s+/.match(LanguageSpecs.white_spaces).to_a.should == [LanguageSpecs.white_spaces]
# Non-matches
- /\s/.match("a").should be_nil
- /\s/.match("1").should be_nil
- /\s/.match(LanguageSpecs.non_alphanum_non_space).should be_nil
- /\s/.match("\0").should be_nil
+ /\s/.match("a").should == nil
+ /\s/.match("1").should == nil
+ /\s/.match(LanguageSpecs.non_alphanum_non_space).should == nil
+ /\s/.match("\0").should == nil
end
it "supports \\S (non-space character)" do
@@ -42,17 +42,17 @@ describe "Regexp with character classes" do
/\S/.match("\0").to_a.should == ["\0"]
# Non-matches
- /\S/.match(LanguageSpecs.white_spaces).should be_nil
+ /\S/.match(LanguageSpecs.white_spaces).should == nil
end
it "supports \\d (numeric digit)" do
/\d/.match("1").to_a.should == ["1"]
# Non-matches
- /\d/.match("a").should be_nil
- /\d/.match(LanguageSpecs.white_spaces).should be_nil
- /\d/.match(LanguageSpecs.non_alphanum_non_space).should be_nil
- /\d/.match("\0").should be_nil
+ /\d/.match("a").should == nil
+ /\d/.match(LanguageSpecs.white_spaces).should == nil
+ /\d/.match(LanguageSpecs.non_alphanum_non_space).should == nil
+ /\d/.match("\0").should == nil
end
it "supports \\D (non-digit)" do
@@ -62,7 +62,7 @@ describe "Regexp with character classes" do
/\D/.match("\0").to_a.should == ["\0"]
# Non-matches
- /\D/.match("1").should be_nil
+ /\D/.match("1").should == nil
end
it "supports [] (character class)" do
@@ -89,7 +89,7 @@ describe "Regexp with character classes" do
/[^[:lower:]A-C]+/.match("abcABCDEF123def").to_a.should == ["DEF123"] # negated character class
/[:alnum:]+/.match("a:l:n:u:m").to_a.should == ["a:l:n:u:m"] # should behave like regular character class composed of the individual letters
/[\[:alnum:]+/.match("[:a:l:n:u:m").to_a.should == ["[:a:l:n:u:m"] # should behave like regular character class composed of the individual letters
- -> { eval('/[[:alpha:]-[:digit:]]/') }.should raise_error(SyntaxError) # can't use character class as a start value of range
+ -> { eval('/[[:alpha:]-[:digit:]]/') }.should.raise(SyntaxError) # can't use character class as a start value of range
end
it "matches ASCII characters with [[:ascii:]]" do
@@ -99,8 +99,8 @@ describe "Regexp with character classes" do
not_supported_on :opal do
it "doesn't match non-ASCII characters with [[:ascii:]]" do
- /[[:ascii:]]/.match("\u{80}").should be_nil
- /[[:ascii:]]/.match("\u{9898}").should be_nil
+ /[[:ascii:]]/.match("\u{80}").should == nil
+ /[[:ascii:]]/.match("\u{9898}").should == nil
end
end
@@ -113,7 +113,7 @@ describe "Regexp with character classes" do
end
it "doesn't matches Unicode marks with [[:alnum:]]" do
- "\u{36F}".match(/[[:alnum:]]/).should be_nil
+ "\u{3099}".match(/[[:alnum:]]/).should == nil
end
it "doesn't match Unicode control characters with [[:alnum:]]" do
@@ -133,7 +133,7 @@ describe "Regexp with character classes" do
end
it "doesn't matches Unicode marks with [[:alpha:]]" do
- "\u{36F}".match(/[[:alpha:]]/).should be_nil
+ "\u{3099}".match(/[[:alpha:]]/).should == nil
end
it "doesn't match Unicode control characters with [[:alpha:]]" do
@@ -149,39 +149,39 @@ describe "Regexp with character classes" do
end
it "doesn't match Unicode control characters with [[:blank:]]" do
- "\u{16}".match(/[[:blank:]]/).should be_nil
+ "\u{16}".match(/[[:blank:]]/).should == nil
end
it "doesn't match Unicode punctuation characters with [[:blank:]]" do
- "\u{3F}".match(/[[:blank:]]/).should be_nil
+ "\u{3F}".match(/[[:blank:]]/).should == nil
end
it "doesn't match Unicode letter characters with [[:blank:]]" do
- "à".match(/[[:blank:]]/).should be_nil
+ "à".match(/[[:blank:]]/).should == nil
end
it "doesn't match Unicode digits with [[:blank:]]" do
- "\u{0660}".match(/[[:blank:]]/).should be_nil
+ "\u{0660}".match(/[[:blank:]]/).should == nil
end
it "doesn't match Unicode marks with [[:blank:]]" do
- "\u{36F}".match(/[[:blank:]]/).should be_nil
+ "\u{36F}".match(/[[:blank:]]/).should == nil
end
it "doesn't Unicode letter characters with [[:cntrl:]]" do
- "à".match(/[[:cntrl:]]/).should be_nil
+ "à".match(/[[:cntrl:]]/).should == nil
end
it "doesn't match Unicode digits with [[:cntrl:]]" do
- "\u{0660}".match(/[[:cntrl:]]/).should be_nil
+ "\u{0660}".match(/[[:cntrl:]]/).should == nil
end
it "doesn't match Unicode marks with [[:cntrl:]]" do
- "\u{36F}".match(/[[:cntrl:]]/).should be_nil
+ "\u{36F}".match(/[[:cntrl:]]/).should == nil
end
it "doesn't match Unicode punctuation characters with [[:cntrl:]]" do
- "\u{3F}".match(/[[:cntrl:]]/).should be_nil
+ "\u{3F}".match(/[[:cntrl:]]/).should == nil
end
it "matches Unicode control characters with [[:cntrl:]]" do
@@ -189,15 +189,15 @@ describe "Regexp with character classes" do
end
it "doesn't match Unicode format characters with [[:cntrl:]]" do
- "\u{2060}".match(/[[:cntrl:]]/).should be_nil
+ "\u{2060}".match(/[[:cntrl:]]/).should == nil
end
it "doesn't match Unicode private-use characters with [[:cntrl:]]" do
- "\u{E001}".match(/[[:cntrl:]]/).should be_nil
+ "\u{E001}".match(/[[:cntrl:]]/).should == nil
end
it "doesn't match Unicode letter characters with [[:digit:]]" do
- "à".match(/[[:digit:]]/).should be_nil
+ "à".match(/[[:digit:]]/).should == nil
end
it "matches Unicode digits with [[:digit:]]" do
@@ -206,27 +206,27 @@ describe "Regexp with character classes" do
end
it "doesn't match Unicode marks with [[:digit:]]" do
- "\u{36F}".match(/[[:digit:]]/).should be_nil
+ "\u{36F}".match(/[[:digit:]]/).should == nil
end
it "doesn't match Unicode punctuation characters with [[:digit:]]" do
- "\u{3F}".match(/[[:digit:]]/).should be_nil
+ "\u{3F}".match(/[[:digit:]]/).should == nil
end
it "doesn't match Unicode control characters with [[:digit:]]" do
- "\u{16}".match(/[[:digit:]]/).should be_nil
+ "\u{16}".match(/[[:digit:]]/).should == nil
end
it "doesn't match Unicode format characters with [[:digit:]]" do
- "\u{2060}".match(/[[:digit:]]/).should be_nil
+ "\u{2060}".match(/[[:digit:]]/).should == nil
end
it "doesn't match Unicode private-use characters with [[:digit:]]" do
- "\u{E001}".match(/[[:digit:]]/).should be_nil
+ "\u{E001}".match(/[[:digit:]]/).should == nil
end
it "matches Unicode letter characters with [[:graph:]]" do
- "à".match(/[[:graph:]]/).to_a.should == ["à"]
+ "à".match(/[[:graph:]]/).to_a.should == ["à"]
end
it "matches Unicode digits with [[:graph:]]" do
@@ -243,7 +243,7 @@ describe "Regexp with character classes" do
end
it "doesn't match Unicode control characters with [[:graph:]]" do
- "\u{16}".match(/[[:graph:]]/).should be_nil
+ "\u{16}".match(/[[:graph:]]/).should == nil
end
it "match Unicode format characters with [[:graph:]]" do
@@ -261,40 +261,40 @@ describe "Regexp with character classes" do
end
it "doesn't match Unicode uppercase letter characters with [[:lower:]]" do
- "\u{100}".match(/[[:lower:]]/).should be_nil
- "\u{130}".match(/[[:lower:]]/).should be_nil
- "\u{405}".match(/[[:lower:]]/).should be_nil
+ "\u{100}".match(/[[:lower:]]/).should == nil
+ "\u{130}".match(/[[:lower:]]/).should == nil
+ "\u{405}".match(/[[:lower:]]/).should == nil
end
it "doesn't match Unicode title-case characters with [[:lower:]]" do
- "\u{1F88}".match(/[[:lower:]]/).should be_nil
- "\u{1FAD}".match(/[[:lower:]]/).should be_nil
- "\u{01C5}".match(/[[:lower:]]/).should be_nil
+ "\u{1F88}".match(/[[:lower:]]/).should == nil
+ "\u{1FAD}".match(/[[:lower:]]/).should == nil
+ "\u{01C5}".match(/[[:lower:]]/).should == nil
end
it "doesn't match Unicode digits with [[:lower:]]" do
- "\u{0660}".match(/[[:lower:]]/).should be_nil
- "\u{FF12}".match(/[[:lower:]]/).should be_nil
+ "\u{0660}".match(/[[:lower:]]/).should == nil
+ "\u{FF12}".match(/[[:lower:]]/).should == nil
end
it "doesn't match Unicode marks with [[:lower:]]" do
- "\u{36F}".match(/[[:lower:]]/).should be_nil
+ "\u{36F}".match(/[[:lower:]]/).should == nil
end
it "doesn't match Unicode punctuation characters with [[:lower:]]" do
- "\u{3F}".match(/[[:lower:]]/).should be_nil
+ "\u{3F}".match(/[[:lower:]]/).should == nil
end
it "doesn't match Unicode control characters with [[:lower:]]" do
- "\u{16}".match(/[[:lower:]]/).should be_nil
+ "\u{16}".match(/[[:lower:]]/).should == nil
end
it "doesn't match Unicode format characters with [[:lower:]]" do
- "\u{2060}".match(/[[:lower:]]/).should be_nil
+ "\u{2060}".match(/[[:lower:]]/).should == nil
end
it "doesn't match Unicode private-use characters with [[:lower:]]" do
- "\u{E001}".match(/[[:lower:]]/).should be_nil
+ "\u{E001}".match(/[[:lower:]]/).should == nil
end
it "matches Unicode lowercase letter characters with [[:print:]]" do
@@ -329,7 +329,7 @@ describe "Regexp with character classes" do
end
it "doesn't match Unicode control characters with [[:print:]]" do
- "\u{16}".match(/[[:print:]]/).should be_nil
+ "\u{16}".match(/[[:print:]]/).should == nil
end
it "match Unicode format characters with [[:print:]]" do
@@ -342,30 +342,30 @@ describe "Regexp with character classes" do
it "doesn't match Unicode lowercase letter characters with [[:punct:]]" do
- "\u{FF41}".match(/[[:punct:]]/).should be_nil
- "\u{1D484}".match(/[[:punct:]]/).should be_nil
- "\u{E8}".match(/[[:punct:]]/).should be_nil
+ "\u{FF41}".match(/[[:punct:]]/).should == nil
+ "\u{1D484}".match(/[[:punct:]]/).should == nil
+ "\u{E8}".match(/[[:punct:]]/).should == nil
end
it "doesn't match Unicode uppercase letter characters with [[:punct:]]" do
- "\u{100}".match(/[[:punct:]]/).should be_nil
- "\u{130}".match(/[[:punct:]]/).should be_nil
- "\u{405}".match(/[[:punct:]]/).should be_nil
+ "\u{100}".match(/[[:punct:]]/).should == nil
+ "\u{130}".match(/[[:punct:]]/).should == nil
+ "\u{405}".match(/[[:punct:]]/).should == nil
end
it "doesn't match Unicode title-case characters with [[:punct:]]" do
- "\u{1F88}".match(/[[:punct:]]/).should be_nil
- "\u{1FAD}".match(/[[:punct:]]/).should be_nil
- "\u{01C5}".match(/[[:punct:]]/).should be_nil
+ "\u{1F88}".match(/[[:punct:]]/).should == nil
+ "\u{1FAD}".match(/[[:punct:]]/).should == nil
+ "\u{01C5}".match(/[[:punct:]]/).should == nil
end
it "doesn't match Unicode digits with [[:punct:]]" do
- "\u{0660}".match(/[[:punct:]]/).should be_nil
- "\u{FF12}".match(/[[:punct:]]/).should be_nil
+ "\u{0660}".match(/[[:punct:]]/).should == nil
+ "\u{FF12}".match(/[[:punct:]]/).should == nil
end
it "doesn't match Unicode marks with [[:punct:]]" do
- "\u{36F}".match(/[[:punct:]]/).should be_nil
+ "\u{36F}".match(/[[:punct:]]/).should == nil
end
it "matches Unicode Pc characters with [[:punct:]]" do
@@ -398,38 +398,38 @@ describe "Regexp with character classes" do
end
it "doesn't match Unicode format characters with [[:punct:]]" do
- "\u{2060}".match(/[[:punct:]]/).should be_nil
+ "\u{2060}".match(/[[:punct:]]/).should == nil
end
it "doesn't match Unicode private-use characters with [[:punct:]]" do
- "\u{E001}".match(/[[:punct:]]/).should be_nil
+ "\u{E001}".match(/[[:punct:]]/).should == nil
end
it "doesn't match Unicode lowercase letter characters with [[:space:]]" do
- "\u{FF41}".match(/[[:space:]]/).should be_nil
- "\u{1D484}".match(/[[:space:]]/).should be_nil
- "\u{E8}".match(/[[:space:]]/).should be_nil
+ "\u{FF41}".match(/[[:space:]]/).should == nil
+ "\u{1D484}".match(/[[:space:]]/).should == nil
+ "\u{E8}".match(/[[:space:]]/).should == nil
end
it "doesn't match Unicode uppercase letter characters with [[:space:]]" do
- "\u{100}".match(/[[:space:]]/).should be_nil
- "\u{130}".match(/[[:space:]]/).should be_nil
- "\u{405}".match(/[[:space:]]/).should be_nil
+ "\u{100}".match(/[[:space:]]/).should == nil
+ "\u{130}".match(/[[:space:]]/).should == nil
+ "\u{405}".match(/[[:space:]]/).should == nil
end
it "doesn't match Unicode title-case characters with [[:space:]]" do
- "\u{1F88}".match(/[[:space:]]/).should be_nil
- "\u{1FAD}".match(/[[:space:]]/).should be_nil
- "\u{01C5}".match(/[[:space:]]/).should be_nil
+ "\u{1F88}".match(/[[:space:]]/).should == nil
+ "\u{1FAD}".match(/[[:space:]]/).should == nil
+ "\u{01C5}".match(/[[:space:]]/).should == nil
end
it "doesn't match Unicode digits with [[:space:]]" do
- "\u{0660}".match(/[[:space:]]/).should be_nil
- "\u{FF12}".match(/[[:space:]]/).should be_nil
+ "\u{0660}".match(/[[:space:]]/).should == nil
+ "\u{FF12}".match(/[[:space:]]/).should == nil
end
it "doesn't match Unicode marks with [[:space:]]" do
- "\u{36F}".match(/[[:space:]]/).should be_nil
+ "\u{36F}".match(/[[:space:]]/).should == nil
end
it "matches Unicode Zs characters with [[:space:]]" do
@@ -445,17 +445,17 @@ describe "Regexp with character classes" do
end
it "doesn't match Unicode format characters with [[:space:]]" do
- "\u{2060}".match(/[[:space:]]/).should be_nil
+ "\u{2060}".match(/[[:space:]]/).should == nil
end
it "doesn't match Unicode private-use characters with [[:space:]]" do
- "\u{E001}".match(/[[:space:]]/).should be_nil
+ "\u{E001}".match(/[[:space:]]/).should == nil
end
it "doesn't match Unicode lowercase characters with [[:upper:]]" do
- "\u{FF41}".match(/[[:upper:]]/).should be_nil
- "\u{1D484}".match(/[[:upper:]]/).should be_nil
- "\u{E8}".match(/[[:upper:]]/).should be_nil
+ "\u{FF41}".match(/[[:upper:]]/).should == nil
+ "\u{1D484}".match(/[[:upper:]]/).should == nil
+ "\u{E8}".match(/[[:upper:]]/).should == nil
end
it "matches Unicode uppercase characters with [[:upper:]]" do
@@ -465,40 +465,40 @@ describe "Regexp with character classes" do
end
it "doesn't match Unicode title-case characters with [[:upper:]]" do
- "\u{1F88}".match(/[[:upper:]]/).should be_nil
- "\u{1FAD}".match(/[[:upper:]]/).should be_nil
- "\u{01C5}".match(/[[:upper:]]/).should be_nil
+ "\u{1F88}".match(/[[:upper:]]/).should == nil
+ "\u{1FAD}".match(/[[:upper:]]/).should == nil
+ "\u{01C5}".match(/[[:upper:]]/).should == nil
end
it "doesn't match Unicode digits with [[:upper:]]" do
- "\u{0660}".match(/[[:upper:]]/).should be_nil
- "\u{FF12}".match(/[[:upper:]]/).should be_nil
+ "\u{0660}".match(/[[:upper:]]/).should == nil
+ "\u{FF12}".match(/[[:upper:]]/).should == nil
end
it "doesn't match Unicode marks with [[:upper:]]" do
- "\u{36F}".match(/[[:upper:]]/).should be_nil
+ "\u{36F}".match(/[[:upper:]]/).should == nil
end
it "doesn't match Unicode punctuation characters with [[:upper:]]" do
- "\u{3F}".match(/[[:upper:]]/).should be_nil
+ "\u{3F}".match(/[[:upper:]]/).should == nil
end
it "doesn't match Unicode control characters with [[:upper:]]" do
- "\u{16}".match(/[[:upper:]]/).should be_nil
+ "\u{16}".match(/[[:upper:]]/).should == nil
end
it "doesn't match Unicode format characters with [[:upper:]]" do
- "\u{2060}".match(/[[:upper:]]/).should be_nil
+ "\u{2060}".match(/[[:upper:]]/).should == nil
end
it "doesn't match Unicode private-use characters with [[:upper:]]" do
- "\u{E001}".match(/[[:upper:]]/).should be_nil
+ "\u{E001}".match(/[[:upper:]]/).should == nil
end
it "doesn't match Unicode letter characters [^a-fA-F] with [[:xdigit:]]" do
- "à".match(/[[:xdigit:]]/).should be_nil
- "g".match(/[[:xdigit:]]/).should be_nil
- "X".match(/[[:xdigit:]]/).should be_nil
+ "à".match(/[[:xdigit:]]/).should == nil
+ "g".match(/[[:xdigit:]]/).should == nil
+ "X".match(/[[:xdigit:]]/).should == nil
end
it "matches Unicode letter characters [a-fA-F] with [[:xdigit:]]" do
@@ -507,28 +507,28 @@ describe "Regexp with character classes" do
end
it "doesn't match Unicode digits [^0-9] with [[:xdigit:]]" do
- "\u{0660}".match(/[[:xdigit:]]/).should be_nil
- "\u{FF12}".match(/[[:xdigit:]]/).should be_nil
+ "\u{0660}".match(/[[:xdigit:]]/).should == nil
+ "\u{FF12}".match(/[[:xdigit:]]/).should == nil
end
it "doesn't match Unicode marks with [[:xdigit:]]" do
- "\u{36F}".match(/[[:xdigit:]]/).should be_nil
+ "\u{36F}".match(/[[:xdigit:]]/).should == nil
end
it "doesn't match Unicode punctuation characters with [[:xdigit:]]" do
- "\u{3F}".match(/[[:xdigit:]]/).should be_nil
+ "\u{3F}".match(/[[:xdigit:]]/).should == nil
end
it "doesn't match Unicode control characters with [[:xdigit:]]" do
- "\u{16}".match(/[[:xdigit:]]/).should be_nil
+ "\u{16}".match(/[[:xdigit:]]/).should == nil
end
it "doesn't match Unicode format characters with [[:xdigit:]]" do
- "\u{2060}".match(/[[:xdigit:]]/).should be_nil
+ "\u{2060}".match(/[[:xdigit:]]/).should == nil
end
it "doesn't match Unicode private-use characters with [[:xdigit:]]" do
- "\u{E001}".match(/[[:xdigit:]]/).should be_nil
+ "\u{E001}".match(/[[:xdigit:]]/).should == nil
end
it "matches Unicode lowercase characters with [[:word:]]" do
@@ -562,23 +562,30 @@ describe "Regexp with character classes" do
"\u{16EE}".match(/[[:word:]]/).to_a.should == ["\u{16EE}"]
end
+ ruby_bug "#19417", ""..."3.4.6" do
+ it "matches Unicode join control characters with [[:word:]]" do
+ "\u{200C}".match(/[[:word:]]/).to_a.should == ["\u{200C}"]
+ "\u{200D}".match(/[[:word:]]/).to_a.should == ["\u{200D}"]
+ end
+ end
+
it "doesn't match Unicode No characters with [[:word:]]" do
- "\u{17F0}".match(/[[:word:]]/).should be_nil
+ "\u{17F0}".match(/[[:word:]]/).should == nil
end
it "doesn't match Unicode punctuation characters with [[:word:]]" do
- "\u{3F}".match(/[[:word:]]/).should be_nil
+ "\u{3F}".match(/[[:word:]]/).should == nil
end
it "doesn't match Unicode control characters with [[:word:]]" do
- "\u{16}".match(/[[:word:]]/).should be_nil
+ "\u{16}".match(/[[:word:]]/).should == nil
end
it "doesn't match Unicode format characters with [[:word:]]" do
- "\u{2060}".match(/[[:word:]]/).should be_nil
+ "\u{2060}".match(/[[:word:]]/).should == nil
end
it "doesn't match Unicode private-use characters with [[:word:]]" do
- "\u{E001}".match(/[[:word:]]/).should be_nil
+ "\u{E001}".match(/[[:word:]]/).should == nil
end
it "matches unicode named character properties" do
@@ -609,10 +616,13 @@ describe "Regexp with character classes" do
"루비(Ruby)".match(/\p{Hangul}+/u).to_a.should == ["루비"]
end
- ruby_bug "#17340", ''...'3.0' do
- it "raises a RegexpError for an unterminated unicode property" do
- -> { Regexp.new('\p{') }.should raise_error(RegexpError)
- end
+ it "supports negated property condition" do
+ "a".match(eval("/\P{L}/")).should == nil
+ "1".match(eval("/\P{N}/")).should == nil
+ end
+
+ it "raises a RegexpError for an unterminated unicode property" do
+ -> { Regexp.new('\p{') }.should.raise(RegexpError)
end
it "supports \\X (unicode 9.0 with UTR #51 workarounds)" do
diff --git a/spec/ruby/language/regexp/empty_checks_spec.rb b/spec/ruby/language/regexp/empty_checks_spec.rb
new file mode 100644
index 0000000000..391e65b003
--- /dev/null
+++ b/spec/ruby/language/regexp/empty_checks_spec.rb
@@ -0,0 +1,135 @@
+require_relative '../../spec_helper'
+require_relative '../fixtures/classes'
+
+describe "empty checks in Regexps" do
+
+ it "allow extra empty iterations" do
+ /()?/.match("").to_a.should == ["", ""]
+ /(a*)?/.match("").to_a.should == ["", ""]
+ /(a*)*/.match("").to_a.should == ["", ""]
+ # The bounds are high to avoid DFA-based matchers in implementations
+ # and to check backtracking behavior.
+ /(?:a|()){500,1000}/.match("a" * 500).to_a.should == ["a" * 500, ""]
+
+ # Variations with non-greedy loops.
+ /()??/.match("").to_a.should == ["", nil]
+ /(a*?)?/.match("").to_a.should == ["", ""]
+ /(a*)??/.match("").to_a.should == ["", nil]
+ /(a*?)??/.match("").to_a.should == ["", nil]
+ /(a*?)*/.match("").to_a.should == ["", ""]
+ /(a*)*?/.match("").to_a.should == ["", nil]
+ /(a*?)*?/.match("").to_a.should == ["", nil]
+ end
+
+ it "allow empty iterations in the middle of a loop" do
+ # One empty iteration between a's and b's.
+ /(a|\2b|())*/.match("aaabbb").to_a.should == ["aaabbb", "", ""]
+ /(a|\2b|()){2,4}/.match("aaabbb").to_a.should == ["aaa", "", ""]
+
+ # Two empty iterations between a's and b's.
+ /(a|\2b|\3()|())*/.match("aaabbb").to_a.should == ["aaabbb", "", "", ""]
+ /(a|\2b|\3()|()){2,4}/.match("aaabbb").to_a.should == ["aaa", "", nil, ""]
+
+ # Check that the empty iteration correctly updates the loop counter.
+ /(a|\2b|()){20,24}/.match("a" * 20 + "b" * 5).to_a.should == ["a" * 20 + "b" * 3, "b", ""]
+
+ # Variations with non-greedy loops.
+ /(a|\2b|())*?/.match("aaabbb").to_a.should == ["", nil, nil]
+ /(a|\2b|()){2,4}/.match("aaabbb").to_a.should == ["aaa", "", ""]
+ /(a|\2b|\3()|())*?/.match("aaabbb").to_a.should == ["", nil, nil, nil]
+ /(a|\2b|\3()|()){2,4}/.match("aaabbb").to_a.should == ["aaa", "", nil, ""]
+ /(a|\2b|()){20,24}/.match("a" * 20 + "b" * 5).to_a.should == ["a" * 20 + "b" * 3, "b", ""]
+ end
+
+ it "make the Regexp proceed past the quantified expression on failure" do
+ # If the contents of the ()* quantified group are empty (i.e., they fail
+ # the empty check), the loop will abort. It will not try to backtrack
+ # and try other alternatives (e.g. matching the "a") like in other Regexp
+ # dialects such as ECMAScript.
+ /(?:|a)*/.match("aaa").to_a.should == [""]
+ /(?:()|a)*/.match("aaa").to_a.should == ["", ""]
+ /(|a)*/.match("aaa").to_a.should == ["", ""]
+ /(()|a)*/.match("aaa").to_a.should == ["", "", ""]
+
+ # Same expressions, but with backreferences, to force the use of non-DFA-based
+ # engines.
+ /()\1(?:|a)*/.match("aaa").to_a.should == ["", ""]
+ /()\1(?:()|a)*/.match("aaa").to_a.should == ["", "", ""]
+ /()\1(|a)*/.match("aaa").to_a.should == ["", "", ""]
+ /()\1(()|a)*/.match("aaa").to_a.should == ["", "", "", ""]
+
+ # Variations with other zero-width contents of the quantified
+ # group: backreferences, capture groups, lookarounds
+ /()(?:\1|a)*/.match("aaa").to_a.should == ["", ""]
+ /()(?:()\1|a)*/.match("aaa").to_a.should == ["", "", ""]
+ /()(?:(\1)|a)*/.match("aaa").to_a.should == ["", "", ""]
+ /()(?:\1()|a)*/.match("aaa").to_a.should == ["", "", ""]
+ /()(\1|a)*/.match("aaa").to_a.should == ["", "", ""]
+ /()(()\1|a)*/.match("aaa").to_a.should == ["", "", "", ""]
+ /()((\1)|a)*/.match("aaa").to_a.should == ["", "", "", ""]
+ /()(\1()|a)*/.match("aaa").to_a.should == ["", "", "", ""]
+
+ /(?:(?=a)|a)*/.match("aaa").to_a.should == [""]
+ /(?:(?=a)()|a)*/.match("aaa").to_a.should == ["", ""]
+ /(?:()(?=a)|a)*/.match("aaa").to_a.should == ["", ""]
+ /(?:((?=a))|a)*/.match("aaa").to_a.should == ["", ""]
+ /()\1(?:(?=a)|a)*/.match("aaa").to_a.should == ["", ""]
+ /()\1(?:(?=a)()|a)*/.match("aaa").to_a.should == ["", "", ""]
+ /()\1(?:()(?=a)|a)*/.match("aaa").to_a.should == ["", "", ""]
+ /()\1(?:((?=a))|a)*/.match("aaa").to_a.should == ["", "", ""]
+
+ # Variations with non-greedy loops.
+ /(?:|a)*?/.match("aaa").to_a.should == [""]
+ /(?:()|a)*?/.match("aaa").to_a.should == ["", nil]
+ /(|a)*?/.match("aaa").to_a.should == ["", nil]
+ /(()|a)*?/.match("aaa").to_a.should == ["", nil, nil]
+
+ /()\1(?:|a)*?/.match("aaa").to_a.should == ["", ""]
+ /()\1(?:()|a)*?/.match("aaa").to_a.should == ["", "", nil]
+ /()\1(|a)*?/.match("aaa").to_a.should == ["", "", nil]
+ /()\1(()|a)*?/.match("aaa").to_a.should == ["", "", nil, nil]
+
+ /()(?:\1|a)*?/.match("aaa").to_a.should == ["", ""]
+ /()(?:()\1|a)*?/.match("aaa").to_a.should == ["", "", nil]
+ /()(?:(\1)|a)*?/.match("aaa").to_a.should == ["", "", nil]
+ /()(?:\1()|a)*?/.match("aaa").to_a.should == ["", "", nil]
+ /()(\1|a)*?/.match("aaa").to_a.should == ["", "", nil]
+ /()(()\1|a)*?/.match("aaa").to_a.should == ["", "", nil, nil]
+ /()((\1)|a)*?/.match("aaa").to_a.should == ["", "", nil, nil]
+ /()(\1()|a)*?/.match("aaa").to_a.should == ["", "", nil, nil]
+
+ /(?:(?=a)|a)*?/.match("aaa").to_a.should == [""]
+ /(?:(?=a)()|a)*?/.match("aaa").to_a.should == ["", nil]
+ /(?:()(?=a)|a)*?/.match("aaa").to_a.should == ["", nil]
+ /(?:((?=a))|a)*?/.match("aaa").to_a.should == ["", nil]
+ /()\1(?:(?=a)|a)*?/.match("aaa").to_a.should == ["", ""]
+ /()\1(?:(?=a)()|a)*?/.match("aaa").to_a.should == ["", "", nil]
+ /()\1(?:()(?=a)|a)*?/.match("aaa").to_a.should == ["", "", nil]
+ /()\1(?:((?=a))|a)*?/.match("aaa").to_a.should == ["", "", nil]
+ end
+
+ it "shouldn't cause the Regexp parser to get stuck in a loop" do
+ /(|a|\2b|())*/.match("aaabbb").to_a.should == ["", "", nil]
+ /(a||\2b|())*/.match("aaabbb").to_a.should == ["aaa", "", nil]
+ /(a|\2b||())*/.match("aaabbb").to_a.should == ["aaa", "", nil]
+ /(a|\2b|()|)*/.match("aaabbb").to_a.should == ["aaabbb", "", ""]
+ /(()|a|\3b|())*/.match("aaabbb").to_a.should == ["", "", "", nil]
+ /(a|()|\3b|())*/.match("aaabbb").to_a.should == ["aaa", "", "", nil]
+ /(a|\2b|()|())*/.match("aaabbb").to_a.should == ["aaabbb", "", "", nil]
+ /(a|\3b|()|())*/.match("aaabbb").to_a.should == ["aaa", "", "", nil]
+ /(a|()|())*/.match("aaa").to_a.should == ["aaa", "", "", nil]
+ /^(()|a|())*$/.match("aaa").to_a.should == ["aaa", "", "", nil]
+
+ # Variations with non-greedy loops.
+ /(|a|\2b|())*?/.match("aaabbb").to_a.should == ["", nil, nil]
+ /(a||\2b|())*?/.match("aaabbb").to_a.should == ["", nil, nil]
+ /(a|\2b||())*?/.match("aaabbb").to_a.should == ["", nil, nil]
+ /(a|\2b|()|)*?/.match("aaabbb").to_a.should == ["", nil, nil]
+ /(()|a|\3b|())*?/.match("aaabbb").to_a.should == ["", nil, nil, nil]
+ /(a|()|\3b|())*?/.match("aaabbb").to_a.should == ["", nil, nil, nil]
+ /(a|\2b|()|())*?/.match("aaabbb").to_a.should == ["", nil, nil, nil]
+ /(a|\3b|()|())*?/.match("aaabbb").to_a.should == ["", nil, nil, nil]
+ /(a|()|())*?/.match("aaa").to_a.should == ["", nil, nil, nil]
+ /^(()|a|())*?$/.match("aaa").to_a.should == ["aaa", "a", "", nil]
+ end
+end
diff --git a/spec/ruby/language/regexp/encoding_spec.rb b/spec/ruby/language/regexp/encoding_spec.rb
index 8e2a294b95..81e845af0c 100644
--- a/spec/ruby/language/regexp/encoding_spec.rb
+++ b/spec/ruby/language/regexp/encoding_spec.rb
@@ -1,21 +1,21 @@
-# -*- encoding: binary -*-
+# encoding: binary
require_relative '../../spec_helper'
require_relative '../fixtures/classes'
describe "Regexps with encoding modifiers" do
it "supports /e (EUC encoding)" do
- match = /./e.match("\303\251".force_encoding(Encoding::EUC_JP))
- match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)]
+ match = /./e.match("\303\251".dup.force_encoding(Encoding::EUC_JP))
+ match.to_a.should == ["\303\251".dup.force_encoding(Encoding::EUC_JP)]
end
it "supports /e (EUC encoding) with interpolation" do
- match = /#{/./}/e.match("\303\251".force_encoding(Encoding::EUC_JP))
- match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)]
+ match = /#{/./}/e.match("\303\251".dup.force_encoding(Encoding::EUC_JP))
+ match.to_a.should == ["\303\251".dup.force_encoding(Encoding::EUC_JP)]
end
it "supports /e (EUC encoding) with interpolation /o" do
- match = /#{/./}/e.match("\303\251".force_encoding(Encoding::EUC_JP))
- match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)]
+ match = /#{/./}/e.match("\303\251".dup.force_encoding(Encoding::EUC_JP))
+ match.to_a.should == ["\303\251".dup.force_encoding(Encoding::EUC_JP)]
end
it 'uses EUC-JP as /e encoding' do
@@ -38,6 +38,14 @@ describe "Regexps with encoding modifiers" do
/#{/./}/n.match("\303\251").to_a.should == ["\303"]
end
+ it "warns when using /n with a match string with non-ASCII characters and an encoding other than ASCII-8BIT" do
+ -> {
+ eval <<~RUBY
+ /./n.match("\303\251".dup.force_encoding('utf-8'))
+ RUBY
+ }.should complain(%r{historical binary regexp match /.../n against UTF-8 string})
+ end
+
it 'uses US-ASCII as /n encoding if all chars are 7-bit' do
/./n.encoding.should == Encoding::US_ASCII
end
@@ -59,18 +67,18 @@ describe "Regexps with encoding modifiers" do
end
it "supports /s (Windows_31J encoding)" do
- match = /./s.match("\303\251".force_encoding(Encoding::Windows_31J))
- match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)]
+ match = /./s.match("\303\251".dup.force_encoding(Encoding::Windows_31J))
+ match.to_a.should == ["\303".dup.force_encoding(Encoding::Windows_31J)]
end
it "supports /s (Windows_31J encoding) with interpolation" do
- match = /#{/./}/s.match("\303\251".force_encoding(Encoding::Windows_31J))
- match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)]
+ match = /#{/./}/s.match("\303\251".dup.force_encoding(Encoding::Windows_31J))
+ match.to_a.should == ["\303".dup.force_encoding(Encoding::Windows_31J)]
end
it "supports /s (Windows_31J encoding) with interpolation and /o" do
- match = /#{/./}/s.match("\303\251".force_encoding(Encoding::Windows_31J))
- match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)]
+ match = /#{/./}/s.match("\303\251".dup.force_encoding(Encoding::Windows_31J))
+ match.to_a.should == ["\303".dup.force_encoding(Encoding::Windows_31J)]
end
it 'uses Windows-31J as /s encoding' do
@@ -82,15 +90,15 @@ describe "Regexps with encoding modifiers" do
end
it "supports /u (UTF8 encoding)" do
- /./u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"]
+ /./u.match("\303\251".dup.force_encoding('utf-8')).to_a.should == ["\u{e9}"]
end
it "supports /u (UTF8 encoding) with interpolation" do
- /#{/./}/u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"]
+ /#{/./}/u.match("\303\251".dup.force_encoding('utf-8')).to_a.should == ["\u{e9}"]
end
it "supports /u (UTF8 encoding) with interpolation and /o" do
- /#{/./}/u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"]
+ /#{/./}/u.match("\303\251".dup.force_encoding('utf-8')).to_a.should == ["\u{e9}"]
end
it 'uses UTF-8 as /u encoding' do
@@ -106,25 +114,38 @@ describe "Regexps with encoding modifiers" do
end
it "raises Encoding::CompatibilityError when trying match against different encodings" do
- -> { /\A[[:space:]]*\z/.match(" ".encode("UTF-16LE")) }.should raise_error(Encoding::CompatibilityError)
+ -> { /\A[[:space:]]*\z/.match(" ".encode("UTF-16LE")) }.should.raise(Encoding::CompatibilityError)
end
it "raises Encoding::CompatibilityError when trying match? against different encodings" do
- -> { /\A[[:space:]]*\z/.match?(" ".encode("UTF-16LE")) }.should raise_error(Encoding::CompatibilityError)
+ -> { /\A[[:space:]]*\z/.match?(" ".encode("UTF-16LE")) }.should.raise(Encoding::CompatibilityError)
end
it "raises Encoding::CompatibilityError when trying =~ against different encodings" do
- -> { /\A[[:space:]]*\z/ =~ " ".encode("UTF-16LE") }.should raise_error(Encoding::CompatibilityError)
+ -> { /\A[[:space:]]*\z/ =~ " ".encode("UTF-16LE") }.should.raise(Encoding::CompatibilityError)
+ end
+
+ it "raises Encoding::CompatibilityError when the regexp has a fixed, non-ASCII-compatible encoding" do
+ -> { Regexp.new("".dup.force_encoding("UTF-16LE"), Regexp::FIXEDENCODING) =~ " ".encode("UTF-8") }.should.raise(Encoding::CompatibilityError)
+ end
+
+ it "raises Encoding::CompatibilityError when the regexp has a fixed encoding and the match string has non-ASCII characters" do
+ -> { Regexp.new("".dup.force_encoding("US-ASCII"), Regexp::FIXEDENCODING) =~ "\303\251".dup.force_encoding('UTF-8') }.should.raise(Encoding::CompatibilityError)
+ end
+
+ it "raises ArgumentError when trying to match a broken String" do
+ s = "\x80".dup.force_encoding('UTF-8')
+ -> { s =~ /./ }.should.raise(ArgumentError, "invalid byte sequence in UTF-8")
end
it "computes the Regexp Encoding for each interpolated Regexp instance" do
make_regexp = -> str { /#{str}/ }
- r = make_regexp.call("été".force_encoding(Encoding::UTF_8))
+ r = make_regexp.call("été".dup.force_encoding(Encoding::UTF_8))
r.should.fixed_encoding?
r.encoding.should == Encoding::UTF_8
- r = make_regexp.call("abc".force_encoding(Encoding::UTF_8))
+ r = make_regexp.call("abc".dup.force_encoding(Encoding::UTF_8))
r.should_not.fixed_encoding?
r.encoding.should == Encoding::US_ASCII
end
diff --git a/spec/ruby/language/regexp/escapes_spec.rb b/spec/ruby/language/regexp/escapes_spec.rb
index 2e5fe5ad2e..4a0e611540 100644
--- a/spec/ruby/language/regexp/escapes_spec.rb
+++ b/spec/ruby/language/regexp/escapes_spec.rb
@@ -1,9 +1,11 @@
-# -*- encoding: binary -*-
+# encoding: binary
require_relative '../../spec_helper'
require_relative '../fixtures/classes'
+# TODO: synchronize with spec/core/regexp/new_spec.rb -
+# escaping is also tested there
describe "Regexps with escape characters" do
- it "they're supported" do
+ it "supports escape sequences" do
/\t/.match("\t").to_a.should == ["\t"] # horizontal tab
/\v/.match("\v").to_a.should == ["\v"] # vertical tab
/\n/.match("\n").to_a.should == ["\n"] # newline
@@ -15,9 +17,7 @@ describe "Regexps with escape characters" do
# \nnn octal char (encoded byte value)
end
- it "support quoting meta-characters via escape sequence" do
- /\\/.match("\\").to_a.should == ["\\"]
- /\//.match("/").to_a.should == ["/"]
+ it "supports quoting meta-characters via escape sequence" do
# parenthesis, etc
/\(/.match("(").to_a.should == ["("]
/\)/.match(")").to_a.should == [")"]
@@ -25,6 +25,8 @@ describe "Regexps with escape characters" do
/\]/.match("]").to_a.should == ["]"]
/\{/.match("{").to_a.should == ["{"]
/\}/.match("}").to_a.should == ["}"]
+ /\</.match("<").to_a.should == ["<"]
+ /\>/.match(">").to_a.should == [">"]
# alternation separator
/\|/.match("|").to_a.should == ["|"]
# quantifiers
@@ -37,23 +39,93 @@ describe "Regexps with escape characters" do
/\$/.match("$").to_a.should == ["$"]
end
+ it "supports quoting meta-characters via escape sequence when used as a terminator" do
+ # parenthesis, etc
+ # %r[[, %r((, etc literals - are forbidden
+ %r(\().match("(").to_a.should == ["("]
+ %r(\)).match(")").to_a.should == [")"]
+ %r)\().match("(").to_a.should == ["("]
+ %r)\)).match(")").to_a.should == [")"]
+
+ %r[\[].match("[").to_a.should == ["["]
+ %r[\]].match("]").to_a.should == ["]"]
+ %r]\[].match("[").to_a.should == ["["]
+ %r]\]].match("]").to_a.should == ["]"]
+
+ %r{\{}.match("{").to_a.should == ["{"]
+ %r{\}}.match("}").to_a.should == ["}"]
+ %r}\{}.match("{").to_a.should == ["{"]
+ %r}\}}.match("}").to_a.should == ["}"]
+
+ %r<\<>.match("<").to_a.should == ["<"]
+ %r<\>>.match(">").to_a.should == [">"]
+ %r>\<>.match("<").to_a.should == ["<"]
+ %r>\>>.match(">").to_a.should == [">"]
+
+ # alternation separator
+ %r|\||.match("|").to_a.should == ["|"]
+ # quantifiers
+ %r?\??.match("?").to_a.should == ["?"]
+ %r.\...match(".").to_a.should == ["."]
+ %r*\**.match("*").to_a.should == ["*"]
+ %r+\++.match("+").to_a.should == ["+"]
+ # line anchors
+ %r^\^^.match("^").to_a.should == ["^"]
+ %r$\$$.match("$").to_a.should == ["$"]
+ end
+
+ it "supports quoting non-meta-characters via escape sequence when used as a terminator" do
+ non_meta_character_terminators = [
+ '!', '"', '#', '%', '&', "'", ',', '-', ':', ';', '@', '_', '`', '/', '=', '~'
+ ]
+
+ non_meta_character_terminators.each do |c|
+ pattern = eval("%r" + c + "\\" + c + c)
+ pattern.match(c).to_a.should == [c]
+ end
+ end
+
+ it "does not change semantics of escaped non-meta-character when used as a terminator" do
+ all_terminators = [*("!".."/"), *(":".."@"), *("[".."`"), *("{".."~")]
+ meta_character_terminators = ["$", "^", "*", "+", ".", "?", "|", "}", ")", ">", "]"]
+ special_cases = ['(', '{', '[', '<', '\\']
+
+ # it should be equivalent to
+ # [ '!', '"', '#', '%', '&', "'", ',', '-', ':', ';', '@', '_', '`', '/', '=', '~' ]
+ non_meta_character_terminators = all_terminators - meta_character_terminators - special_cases
+
+ non_meta_character_terminators.each do |c|
+ pattern = eval("%r" + c + "\\" + c + c)
+ pattern.should == /#{c}/
+ end
+ end
+
+ it "does not change semantics of escaped meta-character when used as a terminator" do
+ meta_character_terminators = ["$", "^", "*", "+", ".", "?", "|", "}", ")", ">", "]"]
+
+ meta_character_terminators.each do |c|
+ pattern = eval("%r" + c + "\\" + c + c)
+ pattern.should == eval("/\\#{c}/")
+ end
+ end
+
it "allows any character to be escaped" do
/\y/.match("y").to_a.should == ["y"]
end
- it "support \\x (hex characters)" do
+ it "supports \\x (hex characters)" do
/\xA/.match("\nxyz").to_a.should == ["\n"]
/\x0A/.match("\n").to_a.should == ["\n"]
- /\xAA/.match("\nA").should be_nil
+ /\xAA/.match("\nA").should == nil
/\x0AA/.match("\nA").to_a.should == ["\nA"]
/\xAG/.match("\nG").to_a.should == ["\nG"]
# Non-matches
- -> { eval('/\xG/') }.should raise_error(SyntaxError)
+ -> { eval('/\xG/') }.should.raise(SyntaxError)
# \x{7HHHHHHH} wide hexadecimal char (character code point value)
end
- it "support \\c (control characters)" do
+ it "supports \\c (control characters)" do
#/\c \c@\c`/.match("\00\00\00").to_a.should == ["\00\00\00"]
/\c#\cc\cC/.match("\03\03\03").to_a.should == ["\03\03\03"]
/\c'\cG\cg/.match("\a\a\a").to_a.should == ["\a\a\a"]
@@ -64,14 +136,14 @@ describe "Regexps with escape characters" do
/\c,\cL\cl/.match("\f\f\f").to_a.should == ["\f\f\f"]
/\c-\cM\cm/.match("\r\r\r").to_a.should == ["\r\r\r"]
- /\cJ/.match("\r").should be_nil
+ /\cJ/.match("\r").should == nil
# Parsing precedence
/\cJ+/.match("\n\n").to_a.should == ["\n\n"] # Quantifiers apply to entire escape sequence
/\\cJ/.match("\\cJ").to_a.should == ["\\cJ"]
- -> { eval('/[abc\x]/') }.should raise_error(SyntaxError) # \x is treated as a escape sequence even inside a character class
+ -> { eval('/[abc\x]/') }.should.raise(SyntaxError) # \x is treated as a escape sequence even inside a character class
# Syntax error
- -> { eval('/\c/') }.should raise_error(SyntaxError)
+ -> { eval('/\c/') }.should.raise(SyntaxError)
# \cx control char (character code point value)
# \C-x control char (character code point value)
diff --git a/spec/ruby/language/regexp/grouping_spec.rb b/spec/ruby/language/regexp/grouping_spec.rb
index 2fecf2d2cb..80ad7460da 100644
--- a/spec/ruby/language/regexp/grouping_spec.rb
+++ b/spec/ruby/language/regexp/grouping_spec.rb
@@ -12,7 +12,7 @@ describe "Regexps with grouping" do
end
it "raises a SyntaxError when parentheses aren't balanced" do
- -> { eval "/(hay(st)ack/" }.should raise_error(SyntaxError)
+ -> { eval "/(hay(st)ack/" }.should.raise(SyntaxError)
end
it "supports (?: ) (non-capturing group)" do
@@ -22,7 +22,42 @@ describe "Regexps with grouping" do
end
it "group names cannot start with digits or minus" do
- -> { Regexp.new("(?<1a>a)") }.should raise_error(RegexpError)
- -> { Regexp.new("(?<-a>a)") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<1a>a)") }.should.raise(RegexpError)
+ -> { Regexp.new("(?<-a>a)") }.should.raise(RegexpError)
+ end
+
+ it "ignore capture groups in line comments" do
+ /^
+ (a) # there is a capture group on this line
+ b # there is no capture group on this line (not even here)
+ $/x.match("ab").to_a.should == [ "ab", "a" ]
+ end
+
+ it "does not consider # inside a character class as a comment" do
+ # From https://github.com/rubocop/rubocop/blob/39fcf1c568/lib/rubocop/cop/utils/format_string.rb#L18
+ regexp = /
+ % (?<type>%) # line comment
+ | % (?<flags>(?-mix:[ #0+-]|(?-mix:(\d+)\$))*) (?#group comment)
+ (?:
+ (?: (?-mix:(?<width>(?-mix:\d+|(?-mix:\*(?-mix:(\d+)\$)?))))? (?-mix:\.(?<precision>(?-mix:\d+|(?-mix:\*(?-mix:(\d+)\$)?))))? (?-mix:<(?<name>\w+)>)?
+ | (?-mix:(?<width>(?-mix:\d+|(?-mix:\*(?-mix:(\d+)\$)?))))? (?-mix:<(?<name>\w+)>) (?-mix:\.(?<precision>(?-mix:\d+|(?-mix:\*(?-mix:(\d+)\$)?))))?
+ | (?-mix:<(?<name>\w+)>) (?<more_flags>(?-mix:[ #0+-]|(?-mix:(\d+)\$))*) (?-mix:(?<width>(?-mix:\d+|(?-mix:\*(?-mix:(\d+)\$)?))))? (?-mix:\.(?<precision>(?-mix:\d+|(?-mix:\*(?-mix:(\d+)\$)?))))?
+ ) (?-mix:(?<type>[bBdiouxXeEfgGaAcps]))
+ | (?-mix:(?<width>(?-mix:\d+|(?-mix:\*(?-mix:(\d+)\$)?))))? (?-mix:\.(?<precision>(?-mix:\d+|(?-mix:\*(?-mix:(\d+)\$)?))))? (?-mix:\{(?<name>\w+)\})
+ )
+ /x
+ regexp.named_captures.should == {
+ "type" => [1, 13],
+ "flags" => [2],
+ "width" => [3, 6, 11, 14],
+ "precision" => [4, 8, 12, 15],
+ "name" => [5, 7, 9, 16],
+ "more_flags" => [10]
+ }
+ match = regexp.match("%6.3f")
+ match[:width].should == '6'
+ match[:precision].should == '3'
+ match[:type].should == 'f'
+ match.to_a.should == [ "%6.3f", nil, "", "6", "3"] + [nil] * 8 + ["f"] + [nil] * 3
end
end
diff --git a/spec/ruby/language/regexp/interpolation_spec.rb b/spec/ruby/language/regexp/interpolation_spec.rb
index ed0b724763..f771d0a395 100644
--- a/spec/ruby/language/regexp/interpolation_spec.rb
+++ b/spec/ruby/language/regexp/interpolation_spec.rb
@@ -36,14 +36,14 @@ describe "Regexps with interpolation" do
it "gives precedence to escape sequences over substitution" do
str = "J"
- /\c#{str}/.to_s.should == '(?-mix:\c#' + '{str})'
+ /\c#{str}/.to_s.should.include?('{str}')
end
it "throws RegexpError for malformed interpolation" do
s = ""
- -> { /(#{s}/ }.should raise_error(RegexpError)
+ -> { /(#{s}/ }.should.raise(RegexpError)
s = "("
- -> { /#{s}/ }.should raise_error(RegexpError)
+ -> { /#{s}/ }.should.raise(RegexpError)
end
it "allows interpolation in extended mode" do
diff --git a/spec/ruby/language/regexp/modifiers_spec.rb b/spec/ruby/language/regexp/modifiers_spec.rb
index 2f5522bc8a..c96fbfa983 100644
--- a/spec/ruby/language/regexp/modifiers_spec.rb
+++ b/spec/ruby/language/regexp/modifiers_spec.rb
@@ -8,7 +8,7 @@ describe "Regexps with modifiers" do
it "supports /m (multiline)" do
/foo.bar/m.match("foo\nbar").to_a.should == ["foo\nbar"]
- /foo.bar/.match("foo\nbar").should be_nil
+ /foo.bar/.match("foo\nbar").should == nil
end
it "supports /x (extended syntax)" do
@@ -36,7 +36,7 @@ describe "Regexps with modifiers" do
/foo/imox.match("foo").to_a.should == ["foo"]
/foo/imoximox.match("foo").to_a.should == ["foo"]
- -> { eval('/foo/a') }.should raise_error(SyntaxError)
+ -> { eval('/foo/a') }.should.raise(SyntaxError)
end
it "supports (?~) (absent operator)" do
@@ -46,57 +46,57 @@ describe "Regexps with modifiers" do
it "supports (?imx-imx) (inline modifiers)" do
/(?i)foo/.match("FOO").to_a.should == ["FOO"]
- /foo(?i)/.match("FOO").should be_nil
+ /foo(?i)/.match("FOO").should == nil
# Interaction with /i
- /(?-i)foo/i.match("FOO").should be_nil
+ /(?-i)foo/i.match("FOO").should == nil
/foo(?-i)/i.match("FOO").to_a.should == ["FOO"]
# Multiple uses
/foo (?i)bar (?-i)baz/.match("foo BAR baz").to_a.should == ["foo BAR baz"]
- /foo (?i)bar (?-i)baz/.match("foo BAR BAZ").should be_nil
+ /foo (?i)bar (?-i)baz/.match("foo BAR BAZ").should == nil
/(?m)./.match("\n").to_a.should == ["\n"]
- /.(?m)/.match("\n").should be_nil
+ /.(?m)/.match("\n").should == nil
# Interaction with /m
- /(?-m)./m.match("\n").should be_nil
+ /(?-m)./m.match("\n").should == nil
/.(?-m)/m.match("\n").to_a.should == ["\n"]
# Multiple uses
/. (?m). (?-m)./.match(". \n .").to_a.should == [". \n ."]
- /. (?m). (?-m)./.match(". \n \n").should be_nil
+ /. (?m). (?-m)./.match(". \n \n").should == nil
/(?x) foo /.match("foo").to_a.should == ["foo"]
- / foo (?x)/.match("foo").should be_nil
+ / foo (?x)/.match("foo").should == nil
# Interaction with /x
- /(?-x) foo /x.match("foo").should be_nil
+ /(?-x) foo /x.match("foo").should == nil
/ foo (?-x)/x.match("foo").to_a.should == ["foo"]
# Multiple uses
/( foo )(?x)( bar )(?-x)( baz )/.match(" foo bar baz ").to_a.should == [" foo bar baz ", " foo ", "bar", " baz "]
- /( foo )(?x)( bar )(?-x)( baz )/.match(" foo barbaz").should be_nil
+ /( foo )(?x)( bar )(?-x)( baz )/.match(" foo barbaz").should == nil
# Parsing
- /(?i-i)foo/.match("FOO").should be_nil
+ /(?i-i)foo/.match("FOO").should == nil
/(?ii)foo/.match("FOO").to_a.should == ["FOO"]
/(?-)foo/.match("foo").to_a.should == ["foo"]
- -> { eval('/(?o)/') }.should raise_error(SyntaxError)
+ -> { eval('/(?o)/') }.should.raise(SyntaxError)
end
it "supports (?imx-imx:expr) (scoped inline modifiers)" do
/foo (?i:bar) baz/.match("foo BAR baz").to_a.should == ["foo BAR baz"]
- /foo (?i:bar) baz/.match("foo BAR BAZ").should be_nil
- /foo (?-i:bar) baz/i.match("foo BAR BAZ").should be_nil
+ /foo (?i:bar) baz/.match("foo BAR BAZ").should == nil
+ /foo (?-i:bar) baz/i.match("foo BAR BAZ").should == nil
/. (?m:.) ./.match(". \n .").to_a.should == [". \n ."]
- /. (?m:.) ./.match(". \n \n").should be_nil
- /. (?-m:.) ./m.match("\n \n \n").should be_nil
+ /. (?m:.) ./.match(". \n \n").should == nil
+ /. (?-m:.) ./m.match("\n \n \n").should == nil
/( foo )(?x: bar )( baz )/.match(" foo bar baz ").to_a.should == [" foo bar baz ", " foo ", " baz "]
- /( foo )(?x: bar )( baz )/.match(" foo barbaz").should be_nil
+ /( foo )(?x: bar )( baz )/.match(" foo barbaz").should == nil
/( foo )(?-x: bar )( baz )/x.match("foo bar baz").to_a.should == ["foo bar baz", "foo", "baz"]
# Parsing
- /(?i-i:foo)/.match("FOO").should be_nil
+ /(?i-i:foo)/.match("FOO").should == nil
/(?ii:foo)/.match("FOO").to_a.should == ["FOO"]
/(?-:)foo/.match("foo").to_a.should == ["foo"]
- -> { eval('/(?o:)/') }.should raise_error(SyntaxError)
+ -> { eval('/(?o:)/') }.should.raise(SyntaxError)
end
it "supports . with /m" do
diff --git a/spec/ruby/language/regexp/repetition_spec.rb b/spec/ruby/language/regexp/repetition_spec.rb
index 295b3bf553..f24323de5c 100644
--- a/spec/ruby/language/regexp/repetition_spec.rb
+++ b/spec/ruby/language/regexp/repetition_spec.rb
@@ -15,7 +15,7 @@ describe "Regexps with repetition" do
it "supports + (1 or more of previous subexpression)" do
/a+/.match("aaa").to_a.should == ["aaa"]
- /a+/.match("bbb").should be_nil
+ /a+/.match("bbb").should == nil
/<.+>/.match("<a>foo</a>").to_a.should == ["<a>foo</a>"] # it is greedy
end
@@ -87,9 +87,7 @@ describe "Regexps with repetition" do
/a+?*/.match("a")[0].should == "a"
/(a+?)*/.match("a")[0].should == "a"
- ruby_bug '#17341', ''...'3.0' do
- /a+?*/.match("aa")[0].should == "aa"
- end
+ /a+?*/.match("aa")[0].should == "aa"
/(a+?)*/.match("aa")[0].should == "aa"
# a+?+ should not be reduced, it should be equivalent to (a+?)+
@@ -100,9 +98,7 @@ describe "Regexps with repetition" do
/a+?+/.match("a")[0].should == "a"
/(a+?)+/.match("a")[0].should == "a"
- ruby_bug '#17341', ''...'3.0' do
- /a+?+/.match("aa")[0].should == "aa"
- end
+ /a+?+/.match("aa")[0].should == "aa"
/(a+?)+/.match("aa")[0].should == "aa"
# both a**? and a+*? should be equivalent to (a+)??
@@ -128,4 +124,15 @@ describe "Regexps with repetition" do
RUBY
end
end
+
+ it "treats ? after {n} quantifier as another quantifier, not as non-greedy marker" do
+ /a{2}?/.match("").to_a.should == [""]
+ end
+
+ it "matches zero-width capture groups in optional iterations of loops" do
+ /()?/.match("").to_a.should == ["", ""]
+ /(a*)?/.match("").to_a.should == ["", ""]
+ /(a*)*/.match("").to_a.should == ["", ""]
+ /(?:a|()){500,1000}/.match("a" * 500).to_a.should == ["a" * 500, ""]
+ end
end