summaryrefslogtreecommitdiff
path: root/spec/ruby/language/regexp
diff options
context:
space:
mode:
authorBenoit Daloze <eregontp@gmail.com>2020-12-27 17:35:32 +0100
committerBenoit Daloze <eregontp@gmail.com>2020-12-27 17:35:32 +0100
commit727c97da1977544c91b9b3677811da3a44af7d53 (patch)
tree4f027117edad10789db57ff4b83242753a89e39d /spec/ruby/language/regexp
parent267bed0cd91711e2a8c79219e97431ba22137b01 (diff)
Update to ruby/spec@4ce9f41
Diffstat (limited to 'spec/ruby/language/regexp')
-rw-r--r--spec/ruby/language/regexp/back-references_spec.rb82
-rw-r--r--spec/ruby/language/regexp/escapes_spec.rb16
-rw-r--r--spec/ruby/language/regexp/grouping_spec.rb5
-rw-r--r--spec/ruby/language/regexp/repetition_spec.rb83
-rw-r--r--spec/ruby/language/regexp/subexpression_call_spec.rb50
5 files changed, 236 insertions, 0 deletions
diff --git a/spec/ruby/language/regexp/back-references_spec.rb b/spec/ruby/language/regexp/back-references_spec.rb
index 81015ac21e..e8df8725c5 100644
--- a/spec/ruby/language/regexp/back-references_spec.rb
+++ b/spec/ruby/language/regexp/back-references_spec.rb
@@ -47,7 +47,89 @@ describe "Regexps with back-references" do
/(a\1?){2}/.match("aaaa").to_a.should == ["aa", "a"]
end
+ it "does not reset enclosed capture groups" do
+ /((a)|(b))+/.match("ab").captures.should == [ "b", "a", "b" ]
+ end
+
it "can match an optional quote, followed by content, followed by a matching quote, as the whole string" do
/^("|)(.*)\1$/.match('x').to_a.should == ["x", "", "x"]
end
+
+ it "allows forward references" do
+ /(?:(\2)|(.))+/.match("aa").to_a.should == [ "aa", "a", "a" ]
+ end
+
+ it "disallows forward references >= 10" do
+ (/\10()()()()()()()()()()/ =~ "\x08").should == 0
+ end
+
+ it "ignores backreferences > 1000" do
+ /\99999/.match("99999")[0].should == "99999"
+ end
+
+ it "0 is not a valid backreference" do
+ -> { Regexp.new("\\k<0>") }.should raise_error(RegexpError)
+ end
+
+ it "allows numeric conditional backreferences" do
+ /(a)(?(1)a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+ /(a)(?(<1>)a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+ /(a)(?('1')a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+ end
+
+ it "allows either <> or '' in named conditional backreferences" do
+ -> { Regexp.new("(?<a>a)(?(a)a|b)") }.should raise_error(RegexpError)
+ /(?<a>a)(?(<a>)a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+ /(?<a>a)(?('a')a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+ end
+
+ it "allows negative numeric backreferences" do
+ /(a)\k<-1>/.match("aa").to_a.should == [ "aa", "a" ]
+ /(a)\g<-1>/.match("aa").to_a.should == [ "aa", "a" ]
+ /(a)(?(<-1>)a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+ /(a)(?('-1')a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+ end
+
+ it "delimited numeric backreferences can start with 0" do
+ /(a)\k<01>/.match("aa").to_a.should == [ "aa", "a" ]
+ /(a)\g<01>/.match("aa").to_a.should == [ "aa", "a" ]
+ /(a)(?(01)a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+ /(a)(?(<01>)a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+ /(a)(?('01')a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+ end
+
+ it "regular numeric backreferences cannot start with 0" do
+ /(a)\01/.match("aa").should == nil
+ /(a)\01/.match("a\x01").to_a.should == [ "a\x01", "a" ]
+ end
+
+ it "named capture groups invalidate numeric backreferences" do
+ -> { Regexp.new("(?<a>a)\\1") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<a>a)\\k<1>") }.should raise_error(RegexpError)
+ -> { Regexp.new("(a)(?<a>a)\\1") }.should raise_error(RegexpError)
+ -> { Regexp.new("(a)(?<a>a)\\k<1>") }.should raise_error(RegexpError)
+ end
+
+ it "treats + or - as the beginning of a level specifier in \\k<> backreferences and (?(...)...|...) conditional backreferences" do
+ -> { Regexp.new("(?<a+>a)\\k<a+>") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<a+b>a)\\k<a+b>") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<a+1>a)\\k<a+1>") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<a->a)\\k<a->") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<a-b>a)\\k<a-b>") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<a-1>a)\\k<a-1>") }.should raise_error(RegexpError)
+
+ -> { Regexp.new("(?<a+>a)(?(<a+>)a|b)") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<a+b>a)(?(<a+b>)a|b)") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<a+1>a)(?(<a+1>)a|b)") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<a->a)(?(<a->)a|b)") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<a-b>a)(?(<a-b>)a|b)") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<a-1>a)(?(<a-1>)a|b)") }.should raise_error(RegexpError)
+
+ -> { Regexp.new("(?<a+>a)(?('a+')a|b)") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<a+b>a)(?('a+b')a|b)") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<a+1>a)(?('a+1')a|b)") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<a->a)(?('a-')a|b)") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<a-b>a)(?('a-b')a|b)") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<a-1>a)(?('a-1')a|b)") }.should raise_error(RegexpError)
+ end
end
diff --git a/spec/ruby/language/regexp/escapes_spec.rb b/spec/ruby/language/regexp/escapes_spec.rb
index 14e1424d47..2e5fe5ad2e 100644
--- a/spec/ruby/language/regexp/escapes_spec.rb
+++ b/spec/ruby/language/regexp/escapes_spec.rb
@@ -78,4 +78,20 @@ describe "Regexps with escape characters" do
# \M-x meta (x|0x80) (character code point value)
# \M-\C-x meta control char (character code point value)
end
+
+ it "handles three digit octal escapes starting with 0" do
+ /[\000-\b]/.match("\x00")[0].should == "\x00"
+ end
+
+ it "handles control escapes with \\C-x syntax" do
+ /\C-*\C-J\C-j/.match("\n\n\n")[0].should == "\n\n\n"
+ end
+
+ it "supports the \\K keep operator" do
+ /a\Kb/.match("ab")[0].should == "b"
+ end
+
+ it "supports the \\R line break escape" do
+ /\R/.match("\n")[0].should == "\n"
+ end
end
diff --git a/spec/ruby/language/regexp/grouping_spec.rb b/spec/ruby/language/regexp/grouping_spec.rb
index 8806d06746..2fecf2d2cb 100644
--- a/spec/ruby/language/regexp/grouping_spec.rb
+++ b/spec/ruby/language/regexp/grouping_spec.rb
@@ -20,4 +20,9 @@ describe "Regexps with grouping" do
# Parsing precedence
/(?:xdigit:)/.match("xdigit:").to_a.should == ["xdigit:"]
end
+
+ it "group names cannot start with digits or minus" do
+ -> { Regexp.new("(?<1a>a)") }.should raise_error(RegexpError)
+ -> { Regexp.new("(?<-a>a)") }.should raise_error(RegexpError)
+ end
end
diff --git a/spec/ruby/language/regexp/repetition_spec.rb b/spec/ruby/language/regexp/repetition_spec.rb
index 7bb767ccaf..295b3bf553 100644
--- a/spec/ruby/language/regexp/repetition_spec.rb
+++ b/spec/ruby/language/regexp/repetition_spec.rb
@@ -45,4 +45,87 @@ describe "Regexps with repetition" do
/a?/.match("aaa").to_a.should == ["a"]
/a?/.match("bbb").to_a.should == [""]
end
+
+ it "handles incomplete range quantifiers" do
+ /a{}/.match("a{}")[0].should == "a{}"
+ /a{,}/.match("a{,}")[0].should == "a{,}"
+ /a{1/.match("a{1")[0].should == "a{1"
+ /a{1,2/.match("a{1,2")[0].should == "a{1,2"
+ /a{,5}/.match("aaa")[0].should == "aaa"
+ end
+
+ it "lets us use quantifiers on assertions" do
+ /a^?b/.match("ab")[0].should == "ab"
+ /a$?b/.match("ab")[0].should == "ab"
+ /a\A?b/.match("ab")[0].should == "ab"
+ /a\Z?b/.match("ab")[0].should == "ab"
+ /a\z?b/.match("ab")[0].should == "ab"
+ /a\G?b/.match("ab")[0].should == "ab"
+ /a\b?b/.match("ab")[0].should == "ab"
+ /a\B?b/.match("ab")[0].should == "ab"
+ /a(?=c)?b/.match("ab")[0].should == "ab"
+ /a(?!=b)?b/.match("ab")[0].should == "ab"
+ /a(?<=c)?b/.match("ab")[0].should == "ab"
+ /a(?<!a)?b/.match("ab")[0].should == "ab"
+ end
+
+ it "does not delete optional assertions" do
+ /(?=(a))?/.match("a").to_a.should == [ "", "a" ]
+ end
+
+ it "supports nested quantifiers" do
+ suppress_warning do
+ eval <<-RUBY
+ /a***/.match("aaa")[0].should == "aaa"
+
+ # a+?* should not be reduced, it should be equivalent to (a+?)*
+ # NB: the capture group prevents regex engines from reducing the two quantifiers
+ # https://bugs.ruby-lang.org/issues/17341
+ /a+?*/.match("")[0].should == ""
+ /(a+?)*/.match("")[0].should == ""
+
+ /a+?*/.match("a")[0].should == "a"
+ /(a+?)*/.match("a")[0].should == "a"
+
+ ruby_bug '#17341', ''...'3.0' do
+ /a+?*/.match("aa")[0].should == "aa"
+ end
+ /(a+?)*/.match("aa")[0].should == "aa"
+
+ # a+?+ should not be reduced, it should be equivalent to (a+?)+
+ # https://bugs.ruby-lang.org/issues/17341
+ /a+?+/.match("").should == nil
+ /(a+?)+/.match("").should == nil
+
+ /a+?+/.match("a")[0].should == "a"
+ /(a+?)+/.match("a")[0].should == "a"
+
+ ruby_bug '#17341', ''...'3.0' do
+ /a+?+/.match("aa")[0].should == "aa"
+ end
+ /(a+?)+/.match("aa")[0].should == "aa"
+
+ # both a**? and a+*? should be equivalent to (a+)??
+ # this quantifier would rather match nothing, but if that's not possible,
+ # it will greedily take everything
+ /a**?/.match("")[0].should == ""
+ /(a*)*?/.match("")[0].should == ""
+ /a+*?/.match("")[0].should == ""
+ /(a+)*?/.match("")[0].should == ""
+ /(a+)??/.match("")[0].should == ""
+
+ /a**?/.match("aaa")[0].should == ""
+ /(a*)*?/.match("aaa")[0].should == ""
+ /a+*?/.match("aaa")[0].should == ""
+ /(a+)*?/.match("aaa")[0].should == ""
+ /(a+)??/.match("aaa")[0].should == ""
+
+ /b.**?b/.match("baaabaaab")[0].should == "baaabaaab"
+ /b(.*)*?b/.match("baaabaaab")[0].should == "baaabaaab"
+ /b.+*?b/.match("baaabaaab")[0].should == "baaabaaab"
+ /b(.+)*?b/.match("baaabaaab")[0].should == "baaabaaab"
+ /b(.+)??b/.match("baaabaaab")[0].should == "baaabaaab"
+ RUBY
+ end
+ end
end
diff --git a/spec/ruby/language/regexp/subexpression_call_spec.rb b/spec/ruby/language/regexp/subexpression_call_spec.rb
new file mode 100644
index 0000000000..16b64cb327
--- /dev/null
+++ b/spec/ruby/language/regexp/subexpression_call_spec.rb
@@ -0,0 +1,50 @@
+require_relative '../../spec_helper'
+require_relative '../fixtures/classes'
+
+describe "Regexps with subexpression calls" do
+ it "allows numeric subexpression calls" do
+ /(a)\g<1>/.match("aa").to_a.should == [ "aa", "a" ]
+ end
+
+ it "treats subexpression calls as distinct from simple back-references" do
+ # Back-references only match a string which is equal to the original captured string.
+ /(?<three_digits>[0-9]{3})-\k<three_digits>/.match("123-123")[0].should == "123-123"
+ /(?<three_digits>[0-9]{3})-\k<three_digits>/.match("123-456").should == nil
+ # However, subexpression calls reuse the previous expression and can match a different
+ # string.
+ /(?<three_digits>[0-9]{3})-\g<three_digits>/.match("123-456")[0].should == "123-456"
+ end
+
+ it "allows recursive subexpression calls" do
+ # This pattern matches well-nested parenthesized expression.
+ parens = /^ (?<parens> (?: \( \g<parens> \) | [^()] )* ) $/x
+ parens.match("((a)(b))c(d)")[0].should == "((a)(b))c(d)"
+ parens.match("((a)(b)c(d)").should == nil
+ end
+
+ it "allows access to back-references from the current level" do
+ # Using \\k<first_char-0> accesses the last value captured in first_char
+ # on the current stack level.
+ mirror = /^ (?<mirror> (?: (?<first_char>.) \g<mirror> \k<first_char-0> )? ) $/x
+ mirror.match("abccba")[0].should == "abccba"
+ mirror.match("abccbd").should == nil
+
+ # OTOH, using \\k<first_char> accesses the last value captured in first_char,
+ # regardless of the stack level. Therefore, it can't be used to implement
+ # the mirror language.
+ broken_mirror = /^ (?<mirror> (?: (?<first_char>.) \g<mirror> \k<first_char> )? ) $/x
+ broken_mirror.match("abccba").should == nil
+ # This matches because the 'c' is captured in first_char and that value is
+ # then used for all subsequent back-references, regardless of nesting.
+ broken_mirror.match("abcccc")[0].should == "abcccc"
+ end
+
+ it "allows + and - in group names and referential constructs that don't use levels, i.e. subexpression calls" do
+ /(?<a+>a)\g<a+>/.match("aa").to_a.should == [ "aa", "a" ]
+ /(?<a+b>a)\g<a+b>/.match("aa").to_a.should == [ "aa", "a" ]
+ /(?<a+1>a)\g<a+1>/.match("aa").to_a.should == [ "aa", "a" ]
+ /(?<a->a)\g<a->/.match("aa").to_a.should == [ "aa", "a" ]
+ /(?<a-b>a)\g<a-b>/.match("aa").to_a.should == [ "aa", "a" ]
+ /(?<a-1>a)\g<a-1>/.match("aa").to_a.should == [ "aa", "a" ]
+ end
+end