summaryrefslogtreecommitdiff
path: root/spec/ruby/language/regexp
diff options
context:
space:
mode:
authorBenoit Daloze <eregontp@gmail.com>2021-02-27 13:00:26 +0100
committerBenoit Daloze <eregontp@gmail.com>2021-02-27 13:00:26 +0100
commit36dde35e029c7a6607e6c674062ce6fc7a51c0bd (patch)
tree47f9c820a93d5b9a68f7e903cc01ee607913e2dd /spec/ruby/language/regexp
parentdbea0be13dc1f44833eca43a73f3ab898fa27c15 (diff)
Update to ruby/spec@37e52e5
Diffstat (limited to 'spec/ruby/language/regexp')
-rw-r--r--spec/ruby/language/regexp/back-references_spec.rb5
-rw-r--r--spec/ruby/language/regexp/empty_checks_spec.rb135
-rw-r--r--spec/ruby/language/regexp/repetition_spec.rb11
3 files changed, 151 insertions, 0 deletions
diff --git a/spec/ruby/language/regexp/back-references_spec.rb b/spec/ruby/language/regexp/back-references_spec.rb
index e8df8725c5..26750c20c5 100644
--- a/spec/ruby/language/regexp/back-references_spec.rb
+++ b/spec/ruby/language/regexp/back-references_spec.rb
@@ -63,6 +63,11 @@ describe "Regexps with back-references" do
(/\10()()()()()()()()()()/ =~ "\x08").should == 0
end
+ it "fails when trying to match a backreference to an unmatched capture group" do
+ /\1()/.match("").should == nil
+ /(?:(a)|b)\1/.match("b").should == nil
+ end
+
it "ignores backreferences > 1000" do
/\99999/.match("99999")[0].should == "99999"
end
diff --git a/spec/ruby/language/regexp/empty_checks_spec.rb b/spec/ruby/language/regexp/empty_checks_spec.rb
new file mode 100644
index 0000000000..391e65b003
--- /dev/null
+++ b/spec/ruby/language/regexp/empty_checks_spec.rb
@@ -0,0 +1,135 @@
+require_relative '../../spec_helper'
+require_relative '../fixtures/classes'
+
+describe "empty checks in Regexps" do
+
+ it "allow extra empty iterations" do
+ /()?/.match("").to_a.should == ["", ""]
+ /(a*)?/.match("").to_a.should == ["", ""]
+ /(a*)*/.match("").to_a.should == ["", ""]
+ # The bounds are high to avoid DFA-based matchers in implementations
+ # and to check backtracking behavior.
+ /(?:a|()){500,1000}/.match("a" * 500).to_a.should == ["a" * 500, ""]
+
+ # Variations with non-greedy loops.
+ /()??/.match("").to_a.should == ["", nil]
+ /(a*?)?/.match("").to_a.should == ["", ""]
+ /(a*)??/.match("").to_a.should == ["", nil]
+ /(a*?)??/.match("").to_a.should == ["", nil]
+ /(a*?)*/.match("").to_a.should == ["", ""]
+ /(a*)*?/.match("").to_a.should == ["", nil]
+ /(a*?)*?/.match("").to_a.should == ["", nil]
+ end
+
+ it "allow empty iterations in the middle of a loop" do
+ # One empty iteration between a's and b's.
+ /(a|\2b|())*/.match("aaabbb").to_a.should == ["aaabbb", "", ""]
+ /(a|\2b|()){2,4}/.match("aaabbb").to_a.should == ["aaa", "", ""]
+
+ # Two empty iterations between a's and b's.
+ /(a|\2b|\3()|())*/.match("aaabbb").to_a.should == ["aaabbb", "", "", ""]
+ /(a|\2b|\3()|()){2,4}/.match("aaabbb").to_a.should == ["aaa", "", nil, ""]
+
+ # Check that the empty iteration correctly updates the loop counter.
+ /(a|\2b|()){20,24}/.match("a" * 20 + "b" * 5).to_a.should == ["a" * 20 + "b" * 3, "b", ""]
+
+ # Variations with non-greedy loops.
+ /(a|\2b|())*?/.match("aaabbb").to_a.should == ["", nil, nil]
+ /(a|\2b|()){2,4}/.match("aaabbb").to_a.should == ["aaa", "", ""]
+ /(a|\2b|\3()|())*?/.match("aaabbb").to_a.should == ["", nil, nil, nil]
+ /(a|\2b|\3()|()){2,4}/.match("aaabbb").to_a.should == ["aaa", "", nil, ""]
+ /(a|\2b|()){20,24}/.match("a" * 20 + "b" * 5).to_a.should == ["a" * 20 + "b" * 3, "b", ""]
+ end
+
+ it "make the Regexp proceed past the quantified expression on failure" do
+ # If the contents of the ()* quantified group are empty (i.e., they fail
+ # the empty check), the loop will abort. It will not try to backtrack
+ # and try other alternatives (e.g. matching the "a") like in other Regexp
+ # dialects such as ECMAScript.
+ /(?:|a)*/.match("aaa").to_a.should == [""]
+ /(?:()|a)*/.match("aaa").to_a.should == ["", ""]
+ /(|a)*/.match("aaa").to_a.should == ["", ""]
+ /(()|a)*/.match("aaa").to_a.should == ["", "", ""]
+
+ # Same expressions, but with backreferences, to force the use of non-DFA-based
+ # engines.
+ /()\1(?:|a)*/.match("aaa").to_a.should == ["", ""]
+ /()\1(?:()|a)*/.match("aaa").to_a.should == ["", "", ""]
+ /()\1(|a)*/.match("aaa").to_a.should == ["", "", ""]
+ /()\1(()|a)*/.match("aaa").to_a.should == ["", "", "", ""]
+
+ # Variations with other zero-width contents of the quantified
+ # group: backreferences, capture groups, lookarounds
+ /()(?:\1|a)*/.match("aaa").to_a.should == ["", ""]
+ /()(?:()\1|a)*/.match("aaa").to_a.should == ["", "", ""]
+ /()(?:(\1)|a)*/.match("aaa").to_a.should == ["", "", ""]
+ /()(?:\1()|a)*/.match("aaa").to_a.should == ["", "", ""]
+ /()(\1|a)*/.match("aaa").to_a.should == ["", "", ""]
+ /()(()\1|a)*/.match("aaa").to_a.should == ["", "", "", ""]
+ /()((\1)|a)*/.match("aaa").to_a.should == ["", "", "", ""]
+ /()(\1()|a)*/.match("aaa").to_a.should == ["", "", "", ""]
+
+ /(?:(?=a)|a)*/.match("aaa").to_a.should == [""]
+ /(?:(?=a)()|a)*/.match("aaa").to_a.should == ["", ""]
+ /(?:()(?=a)|a)*/.match("aaa").to_a.should == ["", ""]
+ /(?:((?=a))|a)*/.match("aaa").to_a.should == ["", ""]
+ /()\1(?:(?=a)|a)*/.match("aaa").to_a.should == ["", ""]
+ /()\1(?:(?=a)()|a)*/.match("aaa").to_a.should == ["", "", ""]
+ /()\1(?:()(?=a)|a)*/.match("aaa").to_a.should == ["", "", ""]
+ /()\1(?:((?=a))|a)*/.match("aaa").to_a.should == ["", "", ""]
+
+ # Variations with non-greedy loops.
+ /(?:|a)*?/.match("aaa").to_a.should == [""]
+ /(?:()|a)*?/.match("aaa").to_a.should == ["", nil]
+ /(|a)*?/.match("aaa").to_a.should == ["", nil]
+ /(()|a)*?/.match("aaa").to_a.should == ["", nil, nil]
+
+ /()\1(?:|a)*?/.match("aaa").to_a.should == ["", ""]
+ /()\1(?:()|a)*?/.match("aaa").to_a.should == ["", "", nil]
+ /()\1(|a)*?/.match("aaa").to_a.should == ["", "", nil]
+ /()\1(()|a)*?/.match("aaa").to_a.should == ["", "", nil, nil]
+
+ /()(?:\1|a)*?/.match("aaa").to_a.should == ["", ""]
+ /()(?:()\1|a)*?/.match("aaa").to_a.should == ["", "", nil]
+ /()(?:(\1)|a)*?/.match("aaa").to_a.should == ["", "", nil]
+ /()(?:\1()|a)*?/.match("aaa").to_a.should == ["", "", nil]
+ /()(\1|a)*?/.match("aaa").to_a.should == ["", "", nil]
+ /()(()\1|a)*?/.match("aaa").to_a.should == ["", "", nil, nil]
+ /()((\1)|a)*?/.match("aaa").to_a.should == ["", "", nil, nil]
+ /()(\1()|a)*?/.match("aaa").to_a.should == ["", "", nil, nil]
+
+ /(?:(?=a)|a)*?/.match("aaa").to_a.should == [""]
+ /(?:(?=a)()|a)*?/.match("aaa").to_a.should == ["", nil]
+ /(?:()(?=a)|a)*?/.match("aaa").to_a.should == ["", nil]
+ /(?:((?=a))|a)*?/.match("aaa").to_a.should == ["", nil]
+ /()\1(?:(?=a)|a)*?/.match("aaa").to_a.should == ["", ""]
+ /()\1(?:(?=a)()|a)*?/.match("aaa").to_a.should == ["", "", nil]
+ /()\1(?:()(?=a)|a)*?/.match("aaa").to_a.should == ["", "", nil]
+ /()\1(?:((?=a))|a)*?/.match("aaa").to_a.should == ["", "", nil]
+ end
+
+ it "shouldn't cause the Regexp parser to get stuck in a loop" do
+ /(|a|\2b|())*/.match("aaabbb").to_a.should == ["", "", nil]
+ /(a||\2b|())*/.match("aaabbb").to_a.should == ["aaa", "", nil]
+ /(a|\2b||())*/.match("aaabbb").to_a.should == ["aaa", "", nil]
+ /(a|\2b|()|)*/.match("aaabbb").to_a.should == ["aaabbb", "", ""]
+ /(()|a|\3b|())*/.match("aaabbb").to_a.should == ["", "", "", nil]
+ /(a|()|\3b|())*/.match("aaabbb").to_a.should == ["aaa", "", "", nil]
+ /(a|\2b|()|())*/.match("aaabbb").to_a.should == ["aaabbb", "", "", nil]
+ /(a|\3b|()|())*/.match("aaabbb").to_a.should == ["aaa", "", "", nil]
+ /(a|()|())*/.match("aaa").to_a.should == ["aaa", "", "", nil]
+ /^(()|a|())*$/.match("aaa").to_a.should == ["aaa", "", "", nil]
+
+ # Variations with non-greedy loops.
+ /(|a|\2b|())*?/.match("aaabbb").to_a.should == ["", nil, nil]
+ /(a||\2b|())*?/.match("aaabbb").to_a.should == ["", nil, nil]
+ /(a|\2b||())*?/.match("aaabbb").to_a.should == ["", nil, nil]
+ /(a|\2b|()|)*?/.match("aaabbb").to_a.should == ["", nil, nil]
+ /(()|a|\3b|())*?/.match("aaabbb").to_a.should == ["", nil, nil, nil]
+ /(a|()|\3b|())*?/.match("aaabbb").to_a.should == ["", nil, nil, nil]
+ /(a|\2b|()|())*?/.match("aaabbb").to_a.should == ["", nil, nil, nil]
+ /(a|\3b|()|())*?/.match("aaabbb").to_a.should == ["", nil, nil, nil]
+ /(a|()|())*?/.match("aaa").to_a.should == ["", nil, nil, nil]
+ /^(()|a|())*?$/.match("aaa").to_a.should == ["aaa", "a", "", nil]
+ end
+end
diff --git a/spec/ruby/language/regexp/repetition_spec.rb b/spec/ruby/language/regexp/repetition_spec.rb
index 295b3bf553..9a191d74e2 100644
--- a/spec/ruby/language/regexp/repetition_spec.rb
+++ b/spec/ruby/language/regexp/repetition_spec.rb
@@ -128,4 +128,15 @@ describe "Regexps with repetition" do
RUBY
end
end
+
+ it "treats ? after {n} quantifier as another quantifier, not as non-greedy marker" do
+ /a{2}?/.match("").to_a.should == [""]
+ end
+
+ it "matches zero-width capture groups in optional iterations of loops" do
+ /()?/.match("").to_a.should == ["", ""]
+ /(a*)?/.match("").to_a.should == ["", ""]
+ /(a*)*/.match("").to_a.should == ["", ""]
+ /(?:a|()){500,1000}/.match("a" * 500).to_a.should == ["a" * 500, ""]
+ end
end