11 files changed, 1855 insertions, 0 deletions
diff --git a/spec/ruby/language/regexp/anchors_spec.rb b/spec/ruby/language/regexp/anchors_spec.rb
new file mode 100644
index 0000000000..8e597b65e8
--- /dev/null
+++ b/spec/ruby/language/regexp/anchors_spec.rb
@@ -0,0 +1,179 @@
+require_relative '../../spec_helper'
+require_relative '../fixtures/classes'
+
+describe "Regexps with anchors" do
+  it "supports ^ (line start anchor)" do
+    # Basic matching
+    /^foo/.match("foo").to_a.should == ["foo"]
+    /^bar/.match("foo\nbar").to_a.should == ["bar"]
+    # Basic non-matching
+    /^foo/.match(" foo").should == nil
+    /foo^/.match("foo\n\n\n").should == nil
+
+    # A bit advanced
+    /^^^foo/.match("foo").to_a.should == ["foo"]
+    (/^[^f]/ =~ "foo\n\n").should == "foo\n".size and $~.to_a.should == ["\n"]
+    (/($^)($^)/ =~ "foo\n\n").should == "foo\n".size and $~.to_a.should == ["", "", ""]
+
+    # Different start of line chars
+    /^bar/.match("foo\rbar").should == nil
+    /^bar/.match("foo\0bar").should == nil
+
+    # Trivial
+    /^/.match("foo").to_a.should == [""]
+
+    # Grouping
+    /(^foo)/.match("foo").to_a.should == ["foo", "foo"]
+    /(^)/.match("foo").to_a.should == ["", ""]
+    /(foo\n^)(^bar)/.match("foo\nbar").to_a.should == ["foo\nbar", "foo\n", "bar"]
+  end
+
+  it "does not match ^ after trailing \\n" do
+    /^(?!\A)/.match("foo\n").should == nil # There is no (empty) line after a trailing \n
+  end
+
+  it "supports $ (line end anchor)" do
+    # Basic  matching
+    /foo$/.match("foo").to_a.should == ["foo"]
+    /foo$/.match("foo\nbar").to_a.should == ["foo"]
+    # Basic non-matching
+    /foo$/.match("foo ").should == nil
+    /$foo/.match("\n\n\nfoo").should == nil
+
+    # A bit advanced
+    /foo$$$/.match("foo").to_a.should == ["foo"]
+    (/[^o]$/ =~ "foo\n\n").should == ("foo\n".size - 1) and $~.to_a.should == ["\n"]
+
+    # Different end of line chars
+    /foo$/.match("foo\r\nbar").should == nil
+    /foo$/.match("foo\0bar").should == nil
+
+    # Trivial
+    (/$/ =~ "foo").should == "foo".size and $~.to_a.should == [""]
+
+    # Grouping
+    /(foo$)/.match("foo").to_a.should == ["foo", "foo"]
+    (/($)/ =~ "foo").should == "foo".size and $~.to_a.should == ["", ""]
+    /(foo$)($\nbar)/.match("foo\nbar").to_a.should == ["foo\nbar", "foo", "\nbar"]
+  end
+
+  it "supports \\A (string start anchor)" do
+    # Basic matching
+    /\Afoo/.match("foo").to_a.should == ["foo"]
+    # Basic non-matching
+    /\Abar/.match("foo\nbar").should == nil
+    /\Afoo/.match(" foo").should == nil
+
+    # A bit advanced
+    /\A\A\Afoo/.match("foo").to_a.should == ["foo"]
+    /(\A\Z)(\A\Z)/.match("").to_a.should == ["", "", ""]
+
+    # Different start of line chars
+    /\Abar/.match("foo\0bar").should == nil
+
+    # Grouping
+    /(\Afoo)/.match("foo").to_a.should == ["foo", "foo"]
+    /(\A)/.match("foo").to_a.should == ["", ""]
+  end
+
+  it "supports \\Z (string end anchor, including before trailing \\n)" do
+    # Basic matching
+    /foo\Z/.match("foo").to_a.should == ["foo"]
+    /foo\Z/.match("foo\n").to_a.should == ["foo"]
+    # Basic non-matching
+    /foo\Z/.match("foo\nbar").should == nil
+    /foo\Z/.match("foo ").should == nil
+
+    # A bit advanced
+    /foo\Z\Z\Z/.match("foo\n").to_a.should == ["foo"]
+    (/($\Z)($\Z)/ =~ "foo\n").should == "foo".size and $~.to_a.should == ["", "", ""]
+    (/(\z\Z)(\z\Z)/ =~ "foo\n").should == "foo\n".size and $~.to_a.should == ["", "", ""]
+
+    # Different end of line chars
+    /foo\Z/.match("foo\0bar").should == nil
+    /foo\Z/.match("foo\r\n").should == nil
+
+    # Grouping
+    /(foo\Z)/.match("foo").to_a.should == ["foo", "foo"]
+    (/(\Z)/ =~ "foo").should == "foo".size and $~.to_a.should == ["", ""]
+  end
+
+  it "supports \\z (string end anchor)" do
+    # Basic matching
+    /foo\z/.match("foo").to_a.should == ["foo"]
+    # Basic non-matching
+    /foo\z/.match("foo\nbar").should == nil
+    /foo\z/.match("foo\n").should == nil
+    /foo\z/.match("foo ").should == nil
+
+    # A bit advanced
+    /foo\z\z\z/.match("foo").to_a.should == ["foo"]
+    (/($\z)($\z)/ =~ "foo").should == "foo".size and $~.to_a.should == ["", "", ""]
+
+    # Different end of line chars
+    /foo\z/.match("foo\0bar").should == nil
+    /foo\z/.match("foo\r\nbar").should == nil
+
+    # Grouping
+    /(foo\z)/.match("foo").to_a.should == ["foo", "foo"]
+    (/(\z)/ =~ "foo").should == "foo".size and $~.to_a.should == ["", ""]
+  end
+
+  it "supports \\b (word boundary)" do
+    # Basic matching
+    /foo\b/.match("foo").to_a.should == ["foo"]
+    /foo\b/.match("foo\n").to_a.should == ["foo"]
+    LanguageSpecs.white_spaces.scan(/./).each do |c|
+      /foo\b/.match("foo" + c).to_a.should == ["foo"]
+    end
+    LanguageSpecs.non_alphanum_non_space.scan(/./).each do |c|
+      /foo\b/.match("foo" + c).to_a.should == ["foo"]
+    end
+    /foo\b/.match("foo\0").to_a.should == ["foo"]
+    # Basic non-matching
+    /foo\b/.match("foobar").should == nil
+    /foo\b/.match("foo123").should == nil
+    /foo\b/.match("foo_").should == nil
+  end
+
+  it "supports \\B (non-word-boundary)" do
+    # Basic matching
+    /foo\B/.match("foobar").to_a.should == ["foo"]
+    /foo\B/.match("foo123").to_a.should == ["foo"]
+    /foo\B/.match("foo_").to_a.should == ["foo"]
+    # Basic non-matching
+    /foo\B/.match("foo").should == nil
+    /foo\B/.match("foo\n").should == nil
+    LanguageSpecs.white_spaces.scan(/./).each do |c|
+      /foo\B/.match("foo" + c).should == nil
+    end
+    LanguageSpecs.non_alphanum_non_space.scan(/./).each do |c|
+      /foo\B/.match("foo" + c).should == nil
+    end
+    /foo\B/.match("foo\0").should == nil
+  end
+
+  it "supports (?= ) (positive lookahead)" do
+    /foo.(?=bar)/.match("foo1 foo2bar").to_a.should == ["foo2"]
+  end
+
+  it "supports (?! ) (negative lookahead)" do
+    /foo.(?!bar)/.match("foo1bar foo2").to_a.should == ["foo2"]
+  end
+
+  it "supports (?!<) (negative lookbehind)" do
+    /(?<!foo)bar./.match("foobar1 bar2").to_a.should == ["bar2"]
+  end
+
+  it "supports (?<=) (positive lookbehind)" do
+    /(?<=foo)bar./.match("bar1 foobar2").to_a.should == ["bar2"]
+  end
+
+  it "supports (?<=\\b) (positive lookbehind with word boundary)" do
+    /(?<=\bfoo)bar./.match("1foobar1 foobar2").to_a.should == ["bar2"]
+  end
+
+  it "supports (?!<\\b) (negative lookbehind with word boundary)" do
+    /(?<!\bfoo)bar./.match("foobar1 1foobar2").to_a.should == ["bar2"]
+  end
+end
diff --git a/spec/ruby/language/regexp/back-references_spec.rb b/spec/ruby/language/regexp/back-references_spec.rb
new file mode 100644
index 0000000000..3b4c5656a2
--- /dev/null
+++ b/spec/ruby/language/regexp/back-references_spec.rb
@@ -0,0 +1,149 @@
+require_relative '../../spec_helper'
+require_relative '../fixtures/classes'
+
+describe "Regexps with back-references" do
+  it "saves match data in the $~ pseudo-global variable" do
+    "hello" =~ /l+/
+    $~.to_a.should == ["ll"]
+  end
+
+  it "saves captures in numbered $[1-N] variables" do
+    "1234567890" =~ /(1)(2)(3)(4)(5)(6)(7)(8)(9)(0)/
+    $~.to_a.should == ["1234567890", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0"]
+    $1.should == "1"
+    $2.should == "2"
+    $3.should == "3"
+    $4.should == "4"
+    $5.should == "5"
+    $6.should == "6"
+    $7.should == "7"
+    $8.should == "8"
+    $9.should == "9"
+    $10.should == "0"
+  end
+
+  it "returns nil for numbered variable with too large index" do
+    -> {
+      eval(<<~CODE).should == nil
+        "a" =~ /(.)/
+        eval('$4294967296')
+      CODE
+    }.should complain(/warning: ('|`)\$4294967296' is too big for a number variable, always nil/)
+  end
+
+  it "will not clobber capture variables across threads" do
+    cap1, cap2, cap3 = nil
+    "foo" =~ /(o+)/
+    cap1 = [$~.to_a, $1]
+    Thread.new do
+      cap2 = [$~.to_a, $1]
+      "bar" =~ /(a)/
+      cap3 = [$~.to_a, $1]
+    end.join
+    cap4 = [$~.to_a, $1]
+    cap1.should == [["oo", "oo"], "oo"]
+    cap2.should == [[], nil]
+    cap3.should == [["a", "a"], "a"]
+    cap4.should == [["oo", "oo"], "oo"]
+  end
+
+  it "supports \<n> (backreference to previous group match)" do
+    /(foo.)\1/.match("foo1foo1").to_a.should == ["foo1foo1", "foo1"]
+    /(foo.)\1/.match("foo1foo2").should == nil
+  end
+
+  it "resets nested \<n> backreference before match of outer subexpression" do
+    /(a\1?){2}/.match("aaaa").to_a.should == ["aa", "a"]
+  end
+
+  it "does not reset enclosed capture groups" do
+    /((a)|(b))+/.match("ab").captures.should == [ "b", "a", "b" ]
+  end
+
+  it "can match an optional quote, followed by content, followed by a matching quote, as the whole string" do
+    /^("|)(.*)\1$/.match('x').to_a.should == ["x", "", "x"]
+  end
+
+  it "allows forward references" do
+    /(?:(\2)|(.))+/.match("aa").to_a.should == [ "aa", "a", "a" ]
+  end
+
+  it "disallows forward references >= 10" do
+    (/\10()()()()()()()()()()/ =~ "\x08").should == 0
+  end
+
+  it "fails when trying to match a backreference to an unmatched capture group" do
+    /\1()/.match("").should == nil
+    /(?:(a)|b)\1/.match("b").should == nil
+  end
+
+  it "ignores backreferences > 1000" do
+    /\99999/.match("99999")[0].should == "99999"
+  end
+
+  it "0 is not a valid backreference" do
+    -> { Regexp.new("\\k<0>") }.should.raise(RegexpError)
+  end
+
+  it "allows numeric conditional backreferences" do
+    /(a)(?(1)a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+    /(a)(?(<1>)a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+    /(a)(?('1')a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+  end
+
+  it "allows either <> or '' in named conditional backreferences" do
+    -> { Regexp.new("(?<a>a)(?(a)a|b)") }.should.raise(RegexpError)
+    /(?<a>a)(?(<a>)a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+    /(?<a>a)(?('a')a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+  end
+
+  it "allows negative numeric backreferences" do
+    /(a)\k<-1>/.match("aa").to_a.should == [ "aa", "a" ]
+    /(a)\g<-1>/.match("aa").to_a.should == [ "aa", "a" ]
+    /(a)(?(<-1>)a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+    /(a)(?('-1')a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+  end
+
+  it "delimited numeric backreferences can start with 0" do
+    /(a)\k<01>/.match("aa").to_a.should == [ "aa", "a" ]
+    /(a)\g<01>/.match("aa").to_a.should == [ "aa", "a" ]
+    /(a)(?(01)a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+    /(a)(?(<01>)a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+    /(a)(?('01')a|b)/.match("aa").to_a.should == [ "aa", "a" ]
+  end
+
+  it "regular numeric backreferences cannot start with 0" do
+    /(a)\01/.match("aa").should == nil
+    /(a)\01/.match("a\x01").to_a.should == [ "a\x01", "a" ]
+  end
+
+  it "named capture groups invalidate numeric backreferences" do
+    -> { Regexp.new("(?<a>a)\\1") }.should.raise(RegexpError)
+    -> { Regexp.new("(?<a>a)\\k<1>") }.should.raise(RegexpError)
+    -> { Regexp.new("(a)(?<a>a)\\1") }.should.raise(RegexpError)
+    -> { Regexp.new("(a)(?<a>a)\\k<1>") }.should.raise(RegexpError)
+  end
+
+  it "treats + or - as the beginning of a level specifier in \\k<> backreferences and (?(...)...|...) conditional backreferences" do
+    -> { Regexp.new("(?<a+>a)\\k<a+>") }.should.raise(RegexpError)
+    -> { Regexp.new("(?<a+b>a)\\k<a+b>") }.should.raise(RegexpError)
+    -> { Regexp.new("(?<a+1>a)\\k<a+1>") }.should.raise(RegexpError)
+    -> { Regexp.new("(?<a->a)\\k<a->") }.should.raise(RegexpError)
+    -> { Regexp.new("(?<a-b>a)\\k<a-b>") }.should.raise(RegexpError)
+    -> { Regexp.new("(?<a-1>a)\\k<a-1>") }.should.raise(RegexpError)
+
+    -> { Regexp.new("(?<a+>a)(?(<a+>)a|b)") }.should.raise(RegexpError)
+    -> { Regexp.new("(?<a+b>a)(?(<a+b>)a|b)") }.should.raise(RegexpError)
+    -> { Regexp.new("(?<a+1>a)(?(<a+1>)a|b)") }.should.raise(RegexpError)
+    -> { Regexp.new("(?<a->a)(?(<a->)a|b)") }.should.raise(RegexpError)
+    -> { Regexp.new("(?<a-b>a)(?(<a-b>)a|b)") }.should.raise(RegexpError)
+    -> { Regexp.new("(?<a-1>a)(?(<a-1>)a|b)") }.should.raise(RegexpError)
+
+    -> { Regexp.new("(?<a+>a)(?('a+')a|b)") }.should.raise(RegexpError)
+    -> { Regexp.new("(?<a+b>a)(?('a+b')a|b)") }.should.raise(RegexpError)
+    -> { Regexp.new("(?<a+1>a)(?('a+1')a|b)") }.should.raise(RegexpError)
+    -> { Regexp.new("(?<a->a)(?('a-')a|b)") }.should.raise(RegexpError)
+    -> { Regexp.new("(?<a-b>a)(?('a-b')a|b)") }.should.raise(RegexpError)
+    -> { Regexp.new("(?<a-1>a)(?('a-1')a|b)") }.should.raise(RegexpError)
+  end
+end
diff --git a/spec/ruby/language/regexp/character_classes_spec.rb b/spec/ruby/language/regexp/character_classes_spec.rb
new file mode 100644
index 0000000000..c6ed92b78e
--- /dev/null
+++ b/spec/ruby/language/regexp/character_classes_spec.rb
@@ -0,0 +1,647 @@
+# coding: utf-8
+require_relative '../../spec_helper'
+require_relative '../fixtures/classes'
+
+describe "Regexp with character classes" do
+  it "supports \\w (word character)" do
+    /\w/.match("a").to_a.should == ["a"]
+    /\w/.match("1").to_a.should == ["1"]
+    /\w/.match("_").to_a.should == ["_"]
+
+    # Non-matches
+    /\w/.match(LanguageSpecs.white_spaces).should == nil
+    /\w/.match(LanguageSpecs.non_alphanum_non_space).should == nil
+    /\w/.match("\0").should == nil
+  end
+
+  it "supports \\W (non-word character)" do
+    /\W+/.match(LanguageSpecs.white_spaces).to_a.should == [LanguageSpecs.white_spaces]
+    /\W+/.match(LanguageSpecs.non_alphanum_non_space).to_a.should == [LanguageSpecs.non_alphanum_non_space]
+    /\W/.match("\0").to_a.should == ["\0"]
+
+    # Non-matches
+    /\W/.match("a").should == nil
+    /\W/.match("1").should == nil
+    /\W/.match("_").should == nil
+  end
+
+  it "supports \\s (space character)" do
+    /\s+/.match(LanguageSpecs.white_spaces).to_a.should == [LanguageSpecs.white_spaces]
+
+    # Non-matches
+    /\s/.match("a").should == nil
+    /\s/.match("1").should == nil
+    /\s/.match(LanguageSpecs.non_alphanum_non_space).should == nil
+    /\s/.match("\0").should == nil
+  end
+
+  it "supports \\S (non-space character)" do
+    /\S/.match("a").to_a.should == ["a"]
+    /\S/.match("1").to_a.should == ["1"]
+    /\S+/.match(LanguageSpecs.non_alphanum_non_space).to_a.should == [LanguageSpecs.non_alphanum_non_space]
+    /\S/.match("\0").to_a.should == ["\0"]
+
+    # Non-matches
+    /\S/.match(LanguageSpecs.white_spaces).should == nil
+  end
+
+  it "supports \\d (numeric digit)" do
+    /\d/.match("1").to_a.should == ["1"]
+
+    # Non-matches
+    /\d/.match("a").should == nil
+    /\d/.match(LanguageSpecs.white_spaces).should == nil
+    /\d/.match(LanguageSpecs.non_alphanum_non_space).should == nil
+    /\d/.match("\0").should == nil
+  end
+
+  it "supports \\D (non-digit)" do
+    /\D/.match("a").to_a.should == ["a"]
+    /\D+/.match(LanguageSpecs.white_spaces).to_a.should == [LanguageSpecs.white_spaces]
+    /\D+/.match(LanguageSpecs.non_alphanum_non_space).to_a.should == [LanguageSpecs.non_alphanum_non_space]
+    /\D/.match("\0").to_a.should == ["\0"]
+
+    # Non-matches
+    /\D/.match("1").should == nil
+  end
+
+  it "supports [] (character class)" do
+    /[a-z]+/.match("fooBAR").to_a.should == ["foo"]
+    /[\b]/.match("\b").to_a.should == ["\b"] # \b inside character class is backspace
+  end
+
+  it "supports [[:alpha:][:digit:][:etc:]] (predefined character classes)" do
+    /[[:alnum:]]+/.match("a1").to_a.should == ["a1"]
+    /[[:alpha:]]+/.match("Aa1").to_a.should == ["Aa"]
+    /[[:blank:]]+/.match(LanguageSpecs.white_spaces).to_a.should == [LanguageSpecs.blanks]
+    # /[[:cntrl:]]/.match("").to_a.should == [""] # TODO: what should this match?
+    /[[:digit:]]/.match("1").to_a.should == ["1"]
+    # /[[:graph:]]/.match("").to_a.should == [""] # TODO: what should this match?
+    /[[:lower:]]+/.match("Aa1").to_a.should == ["a"]
+    /[[:print:]]+/.match(LanguageSpecs.white_spaces).to_a.should == [" "]     # include all of multibyte encoded characters
+    /[[:punct:]]+/.match(LanguageSpecs.punctuations).to_a.should == [LanguageSpecs.punctuations]
+    /[[:space:]]+/.match(LanguageSpecs.white_spaces).to_a.should == [LanguageSpecs.white_spaces]
+    /[[:upper:]]+/.match("123ABCabc").to_a.should == ["ABC"]
+    /[[:xdigit:]]+/.match("xyz0123456789ABCDEFabcdefXYZ").to_a.should == ["0123456789ABCDEFabcdef"]
+
+    # Parsing
+    /[[:lower:][:digit:]A-C]+/.match("a1ABCDEF").to_a.should == ["a1ABC"] # can be composed with other constructs in the character class
+    /[^[:lower:]A-C]+/.match("abcABCDEF123def").to_a.should == ["DEF123"] # negated character class
+    /[:alnum:]+/.match("a:l:n:u:m").to_a.should == ["a:l:n:u:m"] # should behave like regular character class composed of the individual letters
+    /[\[:alnum:]+/.match("[:a:l:n:u:m").to_a.should == ["[:a:l:n:u:m"] # should behave like regular character class composed of the individual letters
+    -> { eval('/[[:alpha:]-[:digit:]]/') }.should.raise(SyntaxError) # can't use character class as a start value of range
+  end
+
+  it "matches ASCII characters with [[:ascii:]]" do
+    "\x00".match(/[[:ascii:]]/).to_a.should == ["\x00"]
+    "\x7F".match(/[[:ascii:]]/).to_a.should == ["\x7F"]
+  end
+
+  not_supported_on :opal do
+    it "doesn't match non-ASCII characters with [[:ascii:]]" do
+      /[[:ascii:]]/.match("\u{80}").should == nil
+      /[[:ascii:]]/.match("\u{9898}").should == nil
+    end
+  end
+
+  it "matches Unicode letter characters with [[:alnum:]]" do
+    "à".match(/[[:alnum:]]/).to_a.should == ["à"]
+  end
+
+  it "matches Unicode digits with [[:alnum:]]" do
+    "\u{0660}".match(/[[:alnum:]]/).to_a.should == ["\u{0660}"]
+  end
+
+  it "doesn't matches Unicode marks with [[:alnum:]]" do
+    "\u{3099}".match(/[[:alnum:]]/).should == nil
+  end
+
+  it "doesn't match Unicode control characters with [[:alnum:]]" do
+    "\u{16}".match(/[[:alnum:]]/).to_a.should == []
+  end
+
+  it "doesn't match Unicode punctuation characters with [[:alnum:]]" do
+    "\u{3F}".match(/[[:alnum:]]/).to_a.should == []
+  end
+
+  it "matches Unicode letter characters with [[:alpha:]]" do
+    "à".match(/[[:alpha:]]/).to_a.should == ["à"]
+  end
+
+  it "doesn't match Unicode digits with [[:alpha:]]" do
+    "\u{0660}".match(/[[:alpha:]]/).to_a.should == []
+  end
+
+  it "doesn't matches Unicode marks with [[:alpha:]]" do
+    "\u{3099}".match(/[[:alpha:]]/).should == nil
+  end
+
+  it "doesn't match Unicode control characters with [[:alpha:]]" do
+    "\u{16}".match(/[[:alpha:]]/).to_a.should == []
+  end
+
+  it "doesn't match Unicode punctuation characters with [[:alpha:]]" do
+    "\u{3F}".match(/[[:alpha:]]/).to_a.should == []
+  end
+
+  it "matches Unicode space characters with [[:blank:]]" do
+    "\u{1680}".match(/[[:blank:]]/).to_a.should == ["\u{1680}"]
+  end
+
+  it "doesn't match Unicode control characters with [[:blank:]]" do
+    "\u{16}".match(/[[:blank:]]/).should == nil
+  end
+
+  it "doesn't match Unicode punctuation characters with [[:blank:]]" do
+    "\u{3F}".match(/[[:blank:]]/).should == nil
+  end
+
+  it "doesn't match Unicode letter characters with [[:blank:]]" do
+    "à".match(/[[:blank:]]/).should == nil
+  end
+
+  it "doesn't match Unicode digits with [[:blank:]]" do
+    "\u{0660}".match(/[[:blank:]]/).should == nil
+  end
+
+  it "doesn't match Unicode marks with [[:blank:]]" do
+    "\u{36F}".match(/[[:blank:]]/).should == nil
+  end
+
+  it "doesn't Unicode letter characters with [[:cntrl:]]" do
+    "à".match(/[[:cntrl:]]/).should == nil
+  end
+
+  it "doesn't match Unicode digits with [[:cntrl:]]" do
+    "\u{0660}".match(/[[:cntrl:]]/).should == nil
+  end
+
+  it "doesn't match Unicode marks with [[:cntrl:]]" do
+    "\u{36F}".match(/[[:cntrl:]]/).should == nil
+  end
+
+  it "doesn't match Unicode punctuation characters with [[:cntrl:]]" do
+    "\u{3F}".match(/[[:cntrl:]]/).should == nil
+  end
+
+  it "matches Unicode control characters with [[:cntrl:]]" do
+    "\u{16}".match(/[[:cntrl:]]/).to_a.should == ["\u{16}"]
+  end
+
+  it "doesn't match Unicode format characters with [[:cntrl:]]" do
+    "\u{2060}".match(/[[:cntrl:]]/).should == nil
+  end
+
+  it "doesn't match Unicode private-use characters with [[:cntrl:]]" do
+    "\u{E001}".match(/[[:cntrl:]]/).should == nil
+  end
+
+  it "doesn't match Unicode letter characters with [[:digit:]]" do
+    "à".match(/[[:digit:]]/).should == nil
+  end
+
+  it "matches Unicode digits with [[:digit:]]" do
+    "\u{0660}".match(/[[:digit:]]/).to_a.should == ["\u{0660}"]
+    "\u{FF12}".match(/[[:digit:]]/).to_a.should == ["\u{FF12}"]
+  end
+
+  it "doesn't match Unicode marks with [[:digit:]]" do
+    "\u{36F}".match(/[[:digit:]]/).should == nil
+  end
+
+  it "doesn't match Unicode punctuation characters with [[:digit:]]" do
+    "\u{3F}".match(/[[:digit:]]/).should == nil
+  end
+
+  it "doesn't match Unicode control characters with [[:digit:]]" do
+    "\u{16}".match(/[[:digit:]]/).should == nil
+  end
+
+  it "doesn't match Unicode format characters with [[:digit:]]" do
+    "\u{2060}".match(/[[:digit:]]/).should == nil
+  end
+
+  it "doesn't match Unicode private-use characters with [[:digit:]]" do
+    "\u{E001}".match(/[[:digit:]]/).should == nil
+  end
+
+  it "matches Unicode letter characters with [[:graph:]]" do
+    "à".match(/[[:graph:]]/).to_a.should == ["à"]
+  end
+
+  it "matches Unicode digits with [[:graph:]]" do
+    "\u{0660}".match(/[[:graph:]]/).to_a.should == ["\u{0660}"]
+    "\u{FF12}".match(/[[:graph:]]/).to_a.should == ["\u{FF12}"]
+  end
+
+  it "matches Unicode marks with [[:graph:]]" do
+    "\u{36F}".match(/[[:graph:]]/).to_a.should ==["\u{36F}"]
+  end
+
+  it "matches Unicode punctuation characters with [[:graph:]]" do
+    "\u{3F}".match(/[[:graph:]]/).to_a.should == ["\u{3F}"]
+  end
+
+  it "doesn't match Unicode control characters with [[:graph:]]" do
+    "\u{16}".match(/[[:graph:]]/).should == nil
+  end
+
+  it "match Unicode format characters with [[:graph:]]" do
+    "\u{2060}".match(/[[:graph:]]/).to_a.should == ["\u2060"]
+  end
+
+  it "match Unicode private-use characters with [[:graph:]]" do
+    "\u{E001}".match(/[[:graph:]]/).to_a.should == ["\u{E001}"]
+  end
+
+  it "matches Unicode lowercase letter characters with [[:lower:]]" do
+    "\u{FF41}".match(/[[:lower:]]/).to_a.should == ["\u{FF41}"]
+    "\u{1D484}".match(/[[:lower:]]/).to_a.should == ["\u{1D484}"]
+    "\u{E8}".match(/[[:lower:]]/).to_a.should == ["\u{E8}"]
+  end
+
+  it "doesn't match Unicode uppercase letter characters with [[:lower:]]" do
+    "\u{100}".match(/[[:lower:]]/).should == nil
+    "\u{130}".match(/[[:lower:]]/).should == nil
+    "\u{405}".match(/[[:lower:]]/).should == nil
+  end
+
+  it "doesn't match Unicode title-case characters with [[:lower:]]" do
+    "\u{1F88}".match(/[[:lower:]]/).should == nil
+    "\u{1FAD}".match(/[[:lower:]]/).should == nil
+    "\u{01C5}".match(/[[:lower:]]/).should == nil
+  end
+
+  it "doesn't match Unicode digits with [[:lower:]]" do
+    "\u{0660}".match(/[[:lower:]]/).should == nil
+    "\u{FF12}".match(/[[:lower:]]/).should == nil
+  end
+
+  it "doesn't match Unicode marks with [[:lower:]]" do
+    "\u{36F}".match(/[[:lower:]]/).should == nil
+  end
+
+  it "doesn't match Unicode punctuation characters with [[:lower:]]" do
+    "\u{3F}".match(/[[:lower:]]/).should == nil
+  end
+
+  it "doesn't match Unicode control characters with [[:lower:]]" do
+    "\u{16}".match(/[[:lower:]]/).should == nil
+  end
+
+  it "doesn't match Unicode format characters with [[:lower:]]" do
+    "\u{2060}".match(/[[:lower:]]/).should == nil
+  end
+
+  it "doesn't match Unicode private-use characters with [[:lower:]]" do
+    "\u{E001}".match(/[[:lower:]]/).should == nil
+  end
+
+  it "matches Unicode lowercase letter characters with [[:print:]]" do
+    "\u{FF41}".match(/[[:print:]]/).to_a.should == ["\u{FF41}"]
+    "\u{1D484}".match(/[[:print:]]/).to_a.should == ["\u{1D484}"]
+    "\u{E8}".match(/[[:print:]]/).to_a.should == ["\u{E8}"]
+  end
+
+  it "matches Unicode uppercase letter characters with [[:print:]]" do
+    "\u{100}".match(/[[:print:]]/).to_a.should == ["\u{100}"]
+    "\u{130}".match(/[[:print:]]/).to_a.should == ["\u{130}"]
+    "\u{405}".match(/[[:print:]]/).to_a.should == ["\u{405}"]
+  end
+
+  it "matches Unicode title-case characters with [[:print:]]" do
+    "\u{1F88}".match(/[[:print:]]/).to_a.should == ["\u{1F88}"]
+    "\u{1FAD}".match(/[[:print:]]/).to_a.should == ["\u{1FAD}"]
+    "\u{01C5}".match(/[[:print:]]/).to_a.should == ["\u{01C5}"]
+  end
+
+  it "matches Unicode digits with [[:print:]]" do
+    "\u{0660}".match(/[[:print:]]/).to_a.should == ["\u{0660}"]
+    "\u{FF12}".match(/[[:print:]]/).to_a.should == ["\u{FF12}"]
+  end
+
+  it "matches Unicode marks with [[:print:]]" do
+    "\u{36F}".match(/[[:print:]]/).to_a.should == ["\u{36F}"]
+  end
+
+  it "matches Unicode punctuation characters with [[:print:]]" do
+    "\u{3F}".match(/[[:print:]]/).to_a.should == ["\u{3F}"]
+  end
+
+  it "doesn't match Unicode control characters with [[:print:]]" do
+    "\u{16}".match(/[[:print:]]/).should == nil
+  end
+
+  it "match Unicode format characters with [[:print:]]" do
+    "\u{2060}".match(/[[:print:]]/).to_a.should == ["\u{2060}"]
+  end
+
+  it "match Unicode private-use characters with [[:print:]]" do
+    "\u{E001}".match(/[[:print:]]/).to_a.should == ["\u{E001}"]
+  end
+
+
+  it "doesn't match Unicode lowercase letter characters with [[:punct:]]" do
+    "\u{FF41}".match(/[[:punct:]]/).should == nil
+    "\u{1D484}".match(/[[:punct:]]/).should == nil
+    "\u{E8}".match(/[[:punct:]]/).should == nil
+  end
+
+  it "doesn't match Unicode uppercase letter characters with [[:punct:]]" do
+    "\u{100}".match(/[[:punct:]]/).should == nil
+    "\u{130}".match(/[[:punct:]]/).should == nil
+    "\u{405}".match(/[[:punct:]]/).should == nil
+  end
+
+  it "doesn't match Unicode title-case characters with [[:punct:]]" do
+    "\u{1F88}".match(/[[:punct:]]/).should == nil
+    "\u{1FAD}".match(/[[:punct:]]/).should == nil
+    "\u{01C5}".match(/[[:punct:]]/).should == nil
+  end
+
+  it "doesn't match Unicode digits with [[:punct:]]" do
+    "\u{0660}".match(/[[:punct:]]/).should == nil
+    "\u{FF12}".match(/[[:punct:]]/).should == nil
+  end
+
+  it "doesn't match Unicode marks with [[:punct:]]" do
+    "\u{36F}".match(/[[:punct:]]/).should == nil
+  end
+
+  it "matches Unicode Pc characters with [[:punct:]]" do
+    "\u{203F}".match(/[[:punct:]]/).to_a.should == ["\u{203F}"]
+  end
+
+  it "matches Unicode Pd characters with [[:punct:]]" do
+    "\u{2E17}".match(/[[:punct:]]/).to_a.should == ["\u{2E17}"]
+  end
+
+  it "matches Unicode Ps characters with [[:punct:]]" do
+    "\u{0F3A}".match(/[[:punct:]]/).to_a.should == ["\u{0F3A}"]
+  end
+
+  it "matches Unicode Pe characters with [[:punct:]]" do
+    "\u{2046}".match(/[[:punct:]]/).to_a.should == ["\u{2046}"]
+  end
+
+  it "matches Unicode Pi characters with [[:punct:]]" do
+    "\u{00AB}".match(/[[:punct:]]/).to_a.should == ["\u{00AB}"]
+  end
+
+  it "matches Unicode Pf characters with [[:punct:]]" do
+    "\u{201D}".match(/[[:punct:]]/).to_a.should == ["\u{201D}"]
+    "\u{00BB}".match(/[[:punct:]]/).to_a.should == ["\u{00BB}"]
+  end
+
+  it "matches Unicode Po characters with [[:punct:]]" do
+    "\u{00BF}".match(/[[:punct:]]/).to_a.should == ["\u{00BF}"]
+  end
+
+  it "doesn't match Unicode format characters with [[:punct:]]" do
+    "\u{2060}".match(/[[:punct:]]/).should == nil
+  end
+
+  it "doesn't match Unicode private-use characters with [[:punct:]]" do
+    "\u{E001}".match(/[[:punct:]]/).should == nil
+  end
+
+  it "doesn't match Unicode lowercase letter characters with [[:space:]]" do
+    "\u{FF41}".match(/[[:space:]]/).should == nil
+    "\u{1D484}".match(/[[:space:]]/).should == nil
+    "\u{E8}".match(/[[:space:]]/).should == nil
+  end
+
+  it "doesn't match Unicode uppercase letter characters with [[:space:]]" do
+    "\u{100}".match(/[[:space:]]/).should == nil
+    "\u{130}".match(/[[:space:]]/).should == nil
+    "\u{405}".match(/[[:space:]]/).should == nil
+  end
+
+  it "doesn't match Unicode title-case characters with [[:space:]]" do
+    "\u{1F88}".match(/[[:space:]]/).should == nil
+    "\u{1FAD}".match(/[[:space:]]/).should == nil
+    "\u{01C5}".match(/[[:space:]]/).should == nil
+  end
+
+  it "doesn't match Unicode digits with [[:space:]]" do
+    "\u{0660}".match(/[[:space:]]/).should == nil
+    "\u{FF12}".match(/[[:space:]]/).should == nil
+  end
+
+  it "doesn't match Unicode marks with [[:space:]]" do
+    "\u{36F}".match(/[[:space:]]/).should == nil
+  end
+
+  it "matches Unicode Zs characters with [[:space:]]" do
+    "\u{205F}".match(/[[:space:]]/).to_a.should == ["\u{205F}"]
+  end
+
+  it "matches Unicode Zl characters with [[:space:]]" do
+    "\u{2028}".match(/[[:space:]]/).to_a.should == ["\u{2028}"]
+  end
+
+  it "matches Unicode Zp characters with [[:space:]]" do
+    "\u{2029}".match(/[[:space:]]/).to_a.should == ["\u{2029}"]
+  end
+
+  it "doesn't match Unicode format characters with [[:space:]]" do
+    "\u{2060}".match(/[[:space:]]/).should == nil
+  end
+
+  it "doesn't match Unicode private-use characters with [[:space:]]" do
+    "\u{E001}".match(/[[:space:]]/).should == nil
+  end
+
+  it "doesn't match Unicode lowercase characters with [[:upper:]]" do
+    "\u{FF41}".match(/[[:upper:]]/).should == nil
+    "\u{1D484}".match(/[[:upper:]]/).should == nil
+    "\u{E8}".match(/[[:upper:]]/).should == nil
+  end
+
+  it "matches Unicode uppercase characters with [[:upper:]]" do
+    "\u{100}".match(/[[:upper:]]/).to_a.should == ["\u{100}"]
+    "\u{130}".match(/[[:upper:]]/).to_a.should == ["\u{130}"]
+    "\u{405}".match(/[[:upper:]]/).to_a.should == ["\u{405}"]
+  end
+
+  it "doesn't match Unicode title-case characters with [[:upper:]]" do
+    "\u{1F88}".match(/[[:upper:]]/).should == nil
+    "\u{1FAD}".match(/[[:upper:]]/).should == nil
+    "\u{01C5}".match(/[[:upper:]]/).should == nil
+  end
+
+  it "doesn't match Unicode digits with [[:upper:]]" do
+    "\u{0660}".match(/[[:upper:]]/).should == nil
+    "\u{FF12}".match(/[[:upper:]]/).should == nil
+  end
+
+  it "doesn't match Unicode marks with [[:upper:]]" do
+    "\u{36F}".match(/[[:upper:]]/).should == nil
+  end
+
+  it "doesn't match Unicode punctuation characters with [[:upper:]]" do
+    "\u{3F}".match(/[[:upper:]]/).should == nil
+  end
+
+  it "doesn't match Unicode control characters with [[:upper:]]" do
+    "\u{16}".match(/[[:upper:]]/).should == nil
+  end
+
+  it "doesn't match Unicode format characters with [[:upper:]]" do
+    "\u{2060}".match(/[[:upper:]]/).should == nil
+  end
+
+  it "doesn't match Unicode private-use characters with [[:upper:]]" do
+    "\u{E001}".match(/[[:upper:]]/).should == nil
+  end
+
+  it "doesn't match Unicode letter characters [^a-fA-F] with [[:xdigit:]]" do
+    "à".match(/[[:xdigit:]]/).should == nil
+    "g".match(/[[:xdigit:]]/).should == nil
+    "X".match(/[[:xdigit:]]/).should == nil
+  end
+
+  it "matches Unicode letter characters [a-fA-F] with [[:xdigit:]]" do
+    "a".match(/[[:xdigit:]]/).to_a.should == ["a"]
+    "F".match(/[[:xdigit:]]/).to_a.should == ["F"]
+  end
+
+  it "doesn't match Unicode digits [^0-9] with [[:xdigit:]]" do
+    "\u{0660}".match(/[[:xdigit:]]/).should == nil
+    "\u{FF12}".match(/[[:xdigit:]]/).should == nil
+  end
+
+  it "doesn't match Unicode marks with [[:xdigit:]]" do
+    "\u{36F}".match(/[[:xdigit:]]/).should == nil
+  end
+
+  it "doesn't match Unicode punctuation characters with [[:xdigit:]]" do
+    "\u{3F}".match(/[[:xdigit:]]/).should == nil
+  end
+
+  it "doesn't match Unicode control characters with [[:xdigit:]]" do
+    "\u{16}".match(/[[:xdigit:]]/).should == nil
+  end
+
+  it "doesn't match Unicode format characters with [[:xdigit:]]" do
+    "\u{2060}".match(/[[:xdigit:]]/).should == nil
+  end
+
+  it "doesn't match Unicode private-use characters with [[:xdigit:]]" do
+    "\u{E001}".match(/[[:xdigit:]]/).should == nil
+  end
+
+  it "matches Unicode lowercase characters with [[:word:]]" do
+    "\u{FF41}".match(/[[:word:]]/).to_a.should == ["\u{FF41}"]
+    "\u{1D484}".match(/[[:word:]]/).to_a.should == ["\u{1D484}"]
+    "\u{E8}".match(/[[:word:]]/).to_a.should == ["\u{E8}"]
+  end
+
+  it "matches Unicode uppercase characters with [[:word:]]" do
+    "\u{100}".match(/[[:word:]]/).to_a.should == ["\u{100}"]
+    "\u{130}".match(/[[:word:]]/).to_a.should == ["\u{130}"]
+    "\u{405}".match(/[[:word:]]/).to_a.should == ["\u{405}"]
+  end
+
+  it "matches Unicode title-case characters with [[:word:]]" do
+    "\u{1F88}".match(/[[:word:]]/).to_a.should == ["\u{1F88}"]
+    "\u{1FAD}".match(/[[:word:]]/).to_a.should == ["\u{1FAD}"]
+    "\u{01C5}".match(/[[:word:]]/).to_a.should == ["\u{01C5}"]
+  end
+
+  it "matches Unicode decimal digits with [[:word:]]" do
+    "\u{FF10}".match(/[[:word:]]/).to_a.should == ["\u{FF10}"]
+    "\u{096C}".match(/[[:word:]]/).to_a.should == ["\u{096C}"]
+  end
+
+  it "matches Unicode marks with [[:word:]]" do
+    "\u{36F}".match(/[[:word:]]/).to_a.should == ["\u{36F}"]
+  end
+
+  it "match Unicode Nl characters with [[:word:]]" do
+    "\u{16EE}".match(/[[:word:]]/).to_a.should == ["\u{16EE}"]
+  end
+
+  ruby_bug "#19417", ""..."3.4.6" do
+    it "matches Unicode join control characters with [[:word:]]" do
+      "\u{200C}".match(/[[:word:]]/).to_a.should == ["\u{200C}"]
+      "\u{200D}".match(/[[:word:]]/).to_a.should == ["\u{200D}"]
+    end
+  end
+
+  it "doesn't match Unicode No characters with [[:word:]]" do
+    "\u{17F0}".match(/[[:word:]]/).should == nil
+  end
+  it "doesn't match Unicode punctuation characters with [[:word:]]" do
+    "\u{3F}".match(/[[:word:]]/).should == nil
+  end
+
+  it "doesn't match Unicode control characters with [[:word:]]" do
+    "\u{16}".match(/[[:word:]]/).should == nil
+  end
+
+  it "doesn't match Unicode format characters with [[:word:]]" do
+    "\u{2060}".match(/[[:word:]]/).should == nil
+  end
+
+  it "doesn't match Unicode private-use characters with [[:word:]]" do
+    "\u{E001}".match(/[[:word:]]/).should == nil
+  end
+
+  it "matches unicode named character properties" do
+    "a1".match(/\p{Alpha}/).to_a.should == ["a"]
+  end
+
+  it "matches unicode abbreviated character properties" do
+    "a1".match(/\p{L}/).to_a.should == ["a"]
+  end
+
+  it "matches unicode script properties" do
+    "a\u06E9b".match(/\p{Arabic}/).to_a.should == ["\u06E9"]
+  end
+
+  it "matches unicode Han properties" do
+    "松本行弘 Ruby".match(/\p{Han}+/u).to_a.should == ["松本行弘"]
+  end
+
+  it "matches unicode Hiragana properties" do
+    "Ruby（ルビー）、まつもとゆきひろ".match(/\p{Hiragana}+/u).to_a.should == ["まつもとゆきひろ"]
+  end
+
+  it "matches unicode Katakana properties" do
+    "Ruby（ルビー）、まつもとゆきひろ".match(/\p{Katakana}+/u).to_a.should == ["ルビ"]
+  end
+
+  it "matches unicode Hangul properties" do
+    "루비(Ruby)".match(/\p{Hangul}+/u).to_a.should == ["루비"]
+  end
+
+  it "supports negated property condition" do
+    "a".match(eval("/\P{L}/")).should == nil
+    "1".match(eval("/\P{N}/")).should == nil
+  end
+
+  it "raises a RegexpError for an unterminated unicode property" do
+    -> { Regexp.new('\p{') }.should.raise(RegexpError)
+  end
+
+  it "supports \\X (unicode 9.0 with UTR #51 workarounds)" do
+    # simple emoji without any fancy modifier or ZWJ
+    /\X/.match("\u{1F98A}").to_a.should == ["🦊"]
+
+    # skin tone modifier
+    /\X/.match("\u{1F918}\u{1F3FD}").to_a.should == ["🤘🏽"]
+
+    # emoji joined with ZWJ
+    /\X/.match("\u{1F3F3}\u{FE0F}\u{200D}\u{1F308}").to_a.should == ["🏳️‍🌈"]
+    /\X/.match("\u{1F469}\u{200D}\u{1F469}\u{200D}\u{1F467}\u{200D}\u{1F466}").to_a.should == ["👩‍👩‍👧‍👦"]
+
+    # without the ZWJ
+    /\X+/.match("\u{1F3F3}\u{FE0F}\u{1F308}").to_a.should == ["🏳️🌈"]
+    /\X+/.match("\u{1F469}\u{1F469}\u{1F467}\u{1F466}").to_a.should == ["👩👩👧👦"]
+
+    # both of the ZWJ combined
+    /\X+/.match("\u{1F3F3}\u{FE0F}\u{200D}\u{1F308}\u{1F469}\u{200D}\u{1F469}\u{200D}\u{1F467}\u{200D}\u{1F466}")
+      .to_a.should == ["🏳️‍🌈👩‍👩‍👧‍👦"]
+  end
+end
diff --git a/spec/ruby/language/regexp/empty_checks_spec.rb b/spec/ruby/language/regexp/empty_checks_spec.rb
new file mode 100644
index 0000000000..391e65b003
--- /dev/null
+++ b/spec/ruby/language/regexp/empty_checks_spec.rb
@@ -0,0 +1,135 @@
+require_relative '../../spec_helper'
+require_relative '../fixtures/classes'
+
+describe "empty checks in Regexps" do
+
+  it "allow extra empty iterations" do
+    /()?/.match("").to_a.should == ["", ""]
+    /(a*)?/.match("").to_a.should == ["", ""]
+    /(a*)*/.match("").to_a.should == ["", ""]
+    # The bounds are high to avoid DFA-based matchers in implementations
+    # and to check backtracking behavior.
+    /(?:a|()){500,1000}/.match("a" * 500).to_a.should == ["a" * 500, ""]
+
+    # Variations with non-greedy loops.
+    /()??/.match("").to_a.should == ["", nil]
+    /(a*?)?/.match("").to_a.should == ["", ""]
+    /(a*)??/.match("").to_a.should == ["", nil]
+    /(a*?)??/.match("").to_a.should == ["", nil]
+    /(a*?)*/.match("").to_a.should == ["", ""]
+    /(a*)*?/.match("").to_a.should == ["", nil]
+    /(a*?)*?/.match("").to_a.should == ["", nil]
+  end
+
+  it "allow empty iterations in the middle of a loop" do
+    # One empty iteration between a's and b's.
+    /(a|\2b|())*/.match("aaabbb").to_a.should == ["aaabbb", "", ""]
+    /(a|\2b|()){2,4}/.match("aaabbb").to_a.should == ["aaa", "", ""]
+
+    # Two empty iterations between a's and b's.
+    /(a|\2b|\3()|())*/.match("aaabbb").to_a.should == ["aaabbb", "", "", ""]
+    /(a|\2b|\3()|()){2,4}/.match("aaabbb").to_a.should == ["aaa", "", nil, ""]
+
+    # Check that the empty iteration correctly updates the loop counter.
+    /(a|\2b|()){20,24}/.match("a" * 20 + "b" * 5).to_a.should == ["a" * 20 + "b" * 3, "b", ""]
+
+    # Variations with non-greedy loops.
+    /(a|\2b|())*?/.match("aaabbb").to_a.should == ["", nil, nil]
+    /(a|\2b|()){2,4}/.match("aaabbb").to_a.should == ["aaa", "", ""]
+    /(a|\2b|\3()|())*?/.match("aaabbb").to_a.should == ["", nil, nil, nil]
+    /(a|\2b|\3()|()){2,4}/.match("aaabbb").to_a.should == ["aaa", "", nil, ""]
+    /(a|\2b|()){20,24}/.match("a" * 20 + "b" * 5).to_a.should == ["a" * 20 + "b" * 3, "b", ""]
+  end
+
+  it "make the Regexp proceed past the quantified expression on failure" do
+    # If the contents of the ()* quantified group are empty (i.e., they fail
+    # the empty check), the loop will abort. It will not try to backtrack
+    # and try other alternatives (e.g. matching the "a") like in other Regexp
+    # dialects such as ECMAScript.
+    /(?:|a)*/.match("aaa").to_a.should == [""]
+    /(?:()|a)*/.match("aaa").to_a.should == ["", ""]
+    /(|a)*/.match("aaa").to_a.should == ["", ""]
+    /(()|a)*/.match("aaa").to_a.should == ["", "", ""]
+
+    # Same expressions, but with backreferences, to force the use of non-DFA-based
+    # engines.
+    /()\1(?:|a)*/.match("aaa").to_a.should == ["", ""]
+    /()\1(?:()|a)*/.match("aaa").to_a.should == ["", "", ""]
+    /()\1(|a)*/.match("aaa").to_a.should == ["", "", ""]
+    /()\1(()|a)*/.match("aaa").to_a.should == ["", "", "", ""]
+
+    # Variations with other zero-width contents of the quantified
+    # group: backreferences, capture groups, lookarounds
+    /()(?:\1|a)*/.match("aaa").to_a.should == ["", ""]
+    /()(?:()\1|a)*/.match("aaa").to_a.should == ["", "", ""]
+    /()(?:(\1)|a)*/.match("aaa").to_a.should == ["", "", ""]
+    /()(?:\1()|a)*/.match("aaa").to_a.should == ["", "", ""]
+    /()(\1|a)*/.match("aaa").to_a.should == ["", "", ""]
+    /()(()\1|a)*/.match("aaa").to_a.should == ["", "", "", ""]
+    /()((\1)|a)*/.match("aaa").to_a.should == ["", "", "", ""]
+    /()(\1()|a)*/.match("aaa").to_a.should == ["", "", "", ""]
+
+    /(?:(?=a)|a)*/.match("aaa").to_a.should == [""]
+    /(?:(?=a)()|a)*/.match("aaa").to_a.should == ["", ""]
+    /(?:()(?=a)|a)*/.match("aaa").to_a.should == ["", ""]
+    /(?:((?=a))|a)*/.match("aaa").to_a.should == ["", ""]
+    /()\1(?:(?=a)|a)*/.match("aaa").to_a.should == ["", ""]
+    /()\1(?:(?=a)()|a)*/.match("aaa").to_a.should == ["", "", ""]
+    /()\1(?:()(?=a)|a)*/.match("aaa").to_a.should == ["", "", ""]
+    /()\1(?:((?=a))|a)*/.match("aaa").to_a.should == ["", "", ""]
+
+    # Variations with non-greedy loops.
+    /(?:|a)*?/.match("aaa").to_a.should == [""]
+    /(?:()|a)*?/.match("aaa").to_a.should == ["", nil]
+    /(|a)*?/.match("aaa").to_a.should == ["", nil]
+    /(()|a)*?/.match("aaa").to_a.should == ["", nil, nil]
+
+    /()\1(?:|a)*?/.match("aaa").to_a.should == ["", ""]
+    /()\1(?:()|a)*?/.match("aaa").to_a.should == ["", "", nil]
+    /()\1(|a)*?/.match("aaa").to_a.should == ["", "", nil]
+    /()\1(()|a)*?/.match("aaa").to_a.should == ["", "", nil, nil]
+
+    /()(?:\1|a)*?/.match("aaa").to_a.should == ["", ""]
+    /()(?:()\1|a)*?/.match("aaa").to_a.should == ["", "", nil]
+    /()(?:(\1)|a)*?/.match("aaa").to_a.should == ["", "", nil]
+    /()(?:\1()|a)*?/.match("aaa").to_a.should == ["", "", nil]
+    /()(\1|a)*?/.match("aaa").to_a.should == ["", "", nil]
+    /()(()\1|a)*?/.match("aaa").to_a.should == ["", "", nil, nil]
+    /()((\1)|a)*?/.match("aaa").to_a.should == ["", "", nil, nil]
+    /()(\1()|a)*?/.match("aaa").to_a.should == ["", "", nil, nil]
+
+    /(?:(?=a)|a)*?/.match("aaa").to_a.should == [""]
+    /(?:(?=a)()|a)*?/.match("aaa").to_a.should == ["", nil]
+    /(?:()(?=a)|a)*?/.match("aaa").to_a.should == ["", nil]
+    /(?:((?=a))|a)*?/.match("aaa").to_a.should == ["", nil]
+    /()\1(?:(?=a)|a)*?/.match("aaa").to_a.should == ["", ""]
+    /()\1(?:(?=a)()|a)*?/.match("aaa").to_a.should == ["", "", nil]
+    /()\1(?:()(?=a)|a)*?/.match("aaa").to_a.should == ["", "", nil]
+    /()\1(?:((?=a))|a)*?/.match("aaa").to_a.should == ["", "", nil]
+  end
+
+  it "shouldn't cause the Regexp parser to get stuck in a loop" do
+    /(|a|\2b|())*/.match("aaabbb").to_a.should == ["", "", nil]
+    /(a||\2b|())*/.match("aaabbb").to_a.should == ["aaa", "", nil]
+    /(a|\2b||())*/.match("aaabbb").to_a.should == ["aaa", "", nil]
+    /(a|\2b|()|)*/.match("aaabbb").to_a.should == ["aaabbb", "", ""]
+    /(()|a|\3b|())*/.match("aaabbb").to_a.should == ["", "", "", nil]
+    /(a|()|\3b|())*/.match("aaabbb").to_a.should == ["aaa", "", "", nil]
+    /(a|\2b|()|())*/.match("aaabbb").to_a.should == ["aaabbb", "", "", nil]
+    /(a|\3b|()|())*/.match("aaabbb").to_a.should == ["aaa", "", "", nil]
+    /(a|()|())*/.match("aaa").to_a.should == ["aaa", "", "", nil]
+    /^(()|a|())*$/.match("aaa").to_a.should == ["aaa", "", "", nil]
+
+    # Variations with non-greedy loops.
+    /(|a|\2b|())*?/.match("aaabbb").to_a.should == ["", nil, nil]
+    /(a||\2b|())*?/.match("aaabbb").to_a.should == ["", nil, nil]
+    /(a|\2b||())*?/.match("aaabbb").to_a.should == ["", nil, nil]
+    /(a|\2b|()|)*?/.match("aaabbb").to_a.should == ["", nil, nil]
+    /(()|a|\3b|())*?/.match("aaabbb").to_a.should == ["", nil, nil, nil]
+    /(a|()|\3b|())*?/.match("aaabbb").to_a.should == ["", nil, nil, nil]
+    /(a|\2b|()|())*?/.match("aaabbb").to_a.should == ["", nil, nil, nil]
+    /(a|\3b|()|())*?/.match("aaabbb").to_a.should == ["", nil, nil, nil]
+    /(a|()|())*?/.match("aaa").to_a.should == ["", nil, nil, nil]
+    /^(()|a|())*?$/.match("aaa").to_a.should == ["aaa", "a", "", nil]
+  end
+end
diff --git a/spec/ruby/language/regexp/encoding_spec.rb b/spec/ruby/language/regexp/encoding_spec.rb
new file mode 100644
index 0000000000..81e845af0c
--- /dev/null
+++ b/spec/ruby/language/regexp/encoding_spec.rb
@@ -0,0 +1,152 @@
+# encoding: binary
+require_relative '../../spec_helper'
+require_relative '../fixtures/classes'
+
+describe "Regexps with encoding modifiers" do
+  it "supports /e (EUC encoding)" do
+    match = /./e.match("\303\251".dup.force_encoding(Encoding::EUC_JP))
+    match.to_a.should == ["\303\251".dup.force_encoding(Encoding::EUC_JP)]
+  end
+
+  it "supports /e (EUC encoding) with interpolation" do
+    match = /#{/./}/e.match("\303\251".dup.force_encoding(Encoding::EUC_JP))
+    match.to_a.should == ["\303\251".dup.force_encoding(Encoding::EUC_JP)]
+  end
+
+  it "supports /e (EUC encoding) with interpolation /o" do
+    match = /#{/./}/e.match("\303\251".dup.force_encoding(Encoding::EUC_JP))
+    match.to_a.should == ["\303\251".dup.force_encoding(Encoding::EUC_JP)]
+  end
+
+  it 'uses EUC-JP as /e encoding' do
+    /./e.encoding.should == Encoding::EUC_JP
+  end
+
+  it 'preserves EUC-JP as /e encoding through interpolation' do
+    /#{/./}/e.encoding.should == Encoding::EUC_JP
+  end
+
+  it "supports /n (No encoding)" do
+    /./n.match("\303\251").to_a.should == ["\303"]
+  end
+
+  it "supports /n (No encoding) with interpolation" do
+    /#{/./}/n.match("\303\251").to_a.should == ["\303"]
+  end
+
+  it "supports /n (No encoding) with interpolation /o" do
+    /#{/./}/n.match("\303\251").to_a.should == ["\303"]
+  end
+
+  it "warns when using /n with a match string with non-ASCII characters and an encoding other than ASCII-8BIT" do
+    -> {
+      eval <<~RUBY
+      /./n.match("\303\251".dup.force_encoding('utf-8'))
+      RUBY
+    }.should complain(%r{historical binary regexp match /.../n against UTF-8 string})
+  end
+
+  it 'uses US-ASCII as /n encoding if all chars are 7-bit' do
+    /./n.encoding.should == Encoding::US_ASCII
+  end
+
+  it 'uses BINARY when is not initialized' do
+    Regexp.allocate.encoding.should == Encoding::BINARY
+  end
+
+  it 'uses BINARY as /n encoding if not all chars are 7-bit' do
+    /\xFF/n.encoding.should == Encoding::BINARY
+  end
+
+  it 'preserves US-ASCII as /n encoding through interpolation if all chars are 7-bit' do
+    /.#{/./}/n.encoding.should == Encoding::US_ASCII
+  end
+
+  it 'preserves BINARY as /n encoding through interpolation if all chars are 7-bit' do
+    /\xFF#{/./}/n.encoding.should == Encoding::BINARY
+  end
+
+  it "supports /s (Windows_31J encoding)" do
+    match = /./s.match("\303\251".dup.force_encoding(Encoding::Windows_31J))
+    match.to_a.should == ["\303".dup.force_encoding(Encoding::Windows_31J)]
+  end
+
+  it "supports /s (Windows_31J encoding) with interpolation" do
+    match = /#{/./}/s.match("\303\251".dup.force_encoding(Encoding::Windows_31J))
+    match.to_a.should == ["\303".dup.force_encoding(Encoding::Windows_31J)]
+  end
+
+  it "supports /s (Windows_31J encoding) with interpolation and /o" do
+    match = /#{/./}/s.match("\303\251".dup.force_encoding(Encoding::Windows_31J))
+    match.to_a.should == ["\303".dup.force_encoding(Encoding::Windows_31J)]
+  end
+
+  it 'uses Windows-31J as /s encoding' do
+    /./s.encoding.should == Encoding::Windows_31J
+  end
+
+  it 'preserves Windows-31J as /s encoding through interpolation' do
+    /#{/./}/s.encoding.should == Encoding::Windows_31J
+  end
+
+  it "supports /u (UTF8 encoding)" do
+    /./u.match("\303\251".dup.force_encoding('utf-8')).to_a.should == ["\u{e9}"]
+  end
+
+  it "supports /u (UTF8 encoding) with interpolation" do
+    /#{/./}/u.match("\303\251".dup.force_encoding('utf-8')).to_a.should == ["\u{e9}"]
+  end
+
+  it "supports /u (UTF8 encoding) with interpolation and /o" do
+    /#{/./}/u.match("\303\251".dup.force_encoding('utf-8')).to_a.should == ["\u{e9}"]
+  end
+
+  it 'uses UTF-8 as /u encoding' do
+    /./u.encoding.should == Encoding::UTF_8
+  end
+
+  it 'preserves UTF-8 as /u encoding through interpolation' do
+    /#{/./}/u.encoding.should == Encoding::UTF_8
+  end
+
+  it "selects last of multiple encoding specifiers" do
+    /foo/ensuensuens.should == /foo/s
+  end
+
+  it "raises Encoding::CompatibilityError when trying match against different encodings" do
+    -> { /\A[[:space:]]*\z/.match(" ".encode("UTF-16LE")) }.should.raise(Encoding::CompatibilityError)
+  end
+
+  it "raises Encoding::CompatibilityError when trying match? against different encodings" do
+    -> { /\A[[:space:]]*\z/.match?(" ".encode("UTF-16LE")) }.should.raise(Encoding::CompatibilityError)
+  end
+
+  it "raises Encoding::CompatibilityError when trying =~ against different encodings" do
+    -> { /\A[[:space:]]*\z/ =~ " ".encode("UTF-16LE") }.should.raise(Encoding::CompatibilityError)
+  end
+
+  it "raises Encoding::CompatibilityError when the regexp has a fixed, non-ASCII-compatible encoding" do
+    -> { Regexp.new("".dup.force_encoding("UTF-16LE"), Regexp::FIXEDENCODING) =~ " ".encode("UTF-8") }.should.raise(Encoding::CompatibilityError)
+  end
+
+  it "raises Encoding::CompatibilityError when the regexp has a fixed encoding and the match string has non-ASCII characters" do
+    -> { Regexp.new("".dup.force_encoding("US-ASCII"), Regexp::FIXEDENCODING) =~ "\303\251".dup.force_encoding('UTF-8') }.should.raise(Encoding::CompatibilityError)
+  end
+
+  it "raises ArgumentError when trying to match a broken String" do
+    s = "\x80".dup.force_encoding('UTF-8')
+    -> { s =~ /./ }.should.raise(ArgumentError, "invalid byte sequence in UTF-8")
+  end
+
+  it "computes the Regexp Encoding for each interpolated Regexp instance" do
+    make_regexp = -> str { /#{str}/ }
+
+    r = make_regexp.call("été".dup.force_encoding(Encoding::UTF_8))
+    r.should.fixed_encoding?
+    r.encoding.should == Encoding::UTF_8
+
+    r = make_regexp.call("abc".dup.force_encoding(Encoding::UTF_8))
+    r.should_not.fixed_encoding?
+    r.encoding.should == Encoding::US_ASCII
+  end
+end
diff --git a/spec/ruby/language/regexp/escapes_spec.rb b/spec/ruby/language/regexp/escapes_spec.rb
new file mode 100644
index 0000000000..4a0e611540
--- /dev/null
+++ b/spec/ruby/language/regexp/escapes_spec.rb
@@ -0,0 +1,169 @@
+# encoding: binary
+require_relative '../../spec_helper'
+require_relative '../fixtures/classes'
+
+# TODO: synchronize with spec/core/regexp/new_spec.rb -
+#       escaping is also tested there
+describe "Regexps with escape characters" do
+  it "supports escape sequences" do
+    /\t/.match("\t").to_a.should == ["\t"] # horizontal tab
+    /\v/.match("\v").to_a.should == ["\v"] # vertical tab
+    /\n/.match("\n").to_a.should == ["\n"] # newline
+    /\r/.match("\r").to_a.should == ["\r"] # return
+    /\f/.match("\f").to_a.should == ["\f"] # form feed
+    /\a/.match("\a").to_a.should == ["\a"] # bell
+    /\e/.match("\e").to_a.should == ["\e"] # escape
+
+    # \nnn         octal char            (encoded byte value)
+  end
+
+  it "supports quoting meta-characters via escape sequence" do
+    # parenthesis, etc
+    /\(/.match("(").to_a.should == ["("]
+    /\)/.match(")").to_a.should == [")"]
+    /\[/.match("[").to_a.should == ["["]
+    /\]/.match("]").to_a.should == ["]"]
+    /\{/.match("{").to_a.should == ["{"]
+    /\}/.match("}").to_a.should == ["}"]
+    /\</.match("<").to_a.should == ["<"]
+    /\>/.match(">").to_a.should == [">"]
+    # alternation separator
+    /\|/.match("|").to_a.should == ["|"]
+    # quantifiers
+    /\?/.match("?").to_a.should == ["?"]
+    /\./.match(".").to_a.should == ["."]
+    /\*/.match("*").to_a.should == ["*"]
+    /\+/.match("+").to_a.should == ["+"]
+    # line anchors
+    /\^/.match("^").to_a.should == ["^"]
+    /\$/.match("$").to_a.should == ["$"]
+  end
+
+  it "supports quoting meta-characters via escape sequence when used as a terminator" do
+    # parenthesis, etc
+    # %r[[, %r((, etc literals - are forbidden
+    %r(\().match("(").to_a.should == ["("]
+    %r(\)).match(")").to_a.should == [")"]
+    %r)\().match("(").to_a.should == ["("]
+    %r)\)).match(")").to_a.should == [")"]
+
+    %r[\[].match("[").to_a.should == ["["]
+    %r[\]].match("]").to_a.should == ["]"]
+    %r]\[].match("[").to_a.should == ["["]
+    %r]\]].match("]").to_a.should == ["]"]
+
+    %r{\{}.match("{").to_a.should == ["{"]
+    %r{\}}.match("}").to_a.should == ["}"]
+    %r}\{}.match("{").to_a.should == ["{"]
+    %r}\}}.match("}").to_a.should == ["}"]
+
+    %r<\<>.match("<").to_a.should == ["<"]
+    %r<\>>.match(">").to_a.should == [">"]
+    %r>\<>.match("<").to_a.should == ["<"]
+    %r>\>>.match(">").to_a.should == [">"]
+
+    # alternation separator
+    %r|\||.match("|").to_a.should == ["|"]
+    # quantifiers
+    %r?\??.match("?").to_a.should == ["?"]
+    %r.\...match(".").to_a.should == ["."]
+    %r*\**.match("*").to_a.should == ["*"]
+    %r+\++.match("+").to_a.should == ["+"]
+    # line anchors
+    %r^\^^.match("^").to_a.should == ["^"]
+    %r$\$$.match("$").to_a.should == ["$"]
+  end
+
+  it "supports quoting non-meta-characters via escape sequence when used as a terminator" do
+    non_meta_character_terminators = [
+      '!', '"', '#', '%', '&', "'", ',', '-', ':', ';', '@', '_', '`', '/', '=', '~'
+    ]
+
+    non_meta_character_terminators.each do |c|
+      pattern = eval("%r" + c + "\\" + c + c)
+      pattern.match(c).to_a.should == [c]
+    end
+  end
+
+  it "does not change semantics of escaped non-meta-character when used as a terminator" do
+    all_terminators = [*("!".."/"), *(":".."@"), *("[".."`"), *("{".."~")]
+    meta_character_terminators = ["$", "^", "*", "+", ".", "?", "|", "}", ")", ">", "]"]
+    special_cases = ['(', '{', '[', '<', '\\']
+
+    # it should be equivalent to
+    #   [ '!', '"', '#', '%', '&', "'", ',', '-', ':', ';', '@', '_', '`', '/', '=', '~' ]
+    non_meta_character_terminators = all_terminators - meta_character_terminators - special_cases
+
+    non_meta_character_terminators.each do |c|
+      pattern = eval("%r" + c + "\\" + c + c)
+      pattern.should == /#{c}/
+    end
+  end
+
+  it "does not change semantics of escaped meta-character when used as a terminator" do
+    meta_character_terminators = ["$", "^", "*", "+", ".", "?", "|", "}", ")", ">", "]"]
+
+    meta_character_terminators.each do |c|
+      pattern = eval("%r" + c + "\\" + c + c)
+      pattern.should == eval("/\\#{c}/")
+    end
+  end
+
+  it "allows any character to be escaped" do
+    /\y/.match("y").to_a.should == ["y"]
+  end
+
+  it "supports \\x (hex characters)" do
+    /\xA/.match("\nxyz").to_a.should == ["\n"]
+    /\x0A/.match("\n").to_a.should == ["\n"]
+    /\xAA/.match("\nA").should == nil
+    /\x0AA/.match("\nA").to_a.should == ["\nA"]
+    /\xAG/.match("\nG").to_a.should == ["\nG"]
+    # Non-matches
+    -> { eval('/\xG/') }.should.raise(SyntaxError)
+
+    # \x{7HHHHHHH} wide hexadecimal char (character code point value)
+  end
+
+  it "supports \\c (control characters)" do
+    #/\c \c@\c`/.match("\00\00\00").to_a.should == ["\00\00\00"]
+    /\c#\cc\cC/.match("\03\03\03").to_a.should == ["\03\03\03"]
+    /\c'\cG\cg/.match("\a\a\a").to_a.should == ["\a\a\a"]
+    /\c(\cH\ch/.match("\b\b\b").to_a.should == ["\b\b\b"]
+    /\c)\cI\ci/.match("\t\t\t").to_a.should == ["\t\t\t"]
+    /\c*\cJ\cj/.match("\n\n\n").to_a.should == ["\n\n\n"]
+    /\c+\cK\ck/.match("\v\v\v").to_a.should == ["\v\v\v"]
+    /\c,\cL\cl/.match("\f\f\f").to_a.should == ["\f\f\f"]
+    /\c-\cM\cm/.match("\r\r\r").to_a.should == ["\r\r\r"]
+
+    /\cJ/.match("\r").should == nil
+
+    # Parsing precedence
+    /\cJ+/.match("\n\n").to_a.should == ["\n\n"] # Quantifiers apply to entire escape sequence
+    /\\cJ/.match("\\cJ").to_a.should == ["\\cJ"]
+    -> { eval('/[abc\x]/') }.should.raise(SyntaxError) # \x is treated as a escape sequence even inside a character class
+    # Syntax error
+    -> { eval('/\c/') }.should.raise(SyntaxError)
+
+    # \cx          control char          (character code point value)
+    # \C-x         control char          (character code point value)
+    # \M-x         meta  (x|0x80)        (character code point value)
+    # \M-\C-x      meta control char     (character code point value)
+  end
+
+  it "handles three digit octal escapes starting with 0" do
+    /[\000-\b]/.match("\x00")[0].should == "\x00"
+  end
+
+  it "handles control escapes with \\C-x syntax" do
+    /\C-*\C-J\C-j/.match("\n\n\n")[0].should == "\n\n\n"
+  end
+
+  it "supports the \\K keep operator" do
+    /a\Kb/.match("ab")[0].should == "b"
+  end
+
+  it "supports the \\R line break escape" do
+    /\R/.match("\n")[0].should == "\n"
+  end
+end
diff --git a/spec/ruby/language/regexp/grouping_spec.rb b/spec/ruby/language/regexp/grouping_spec.rb
new file mode 100644
index 0000000000..80ad7460da
--- /dev/null
+++ b/spec/ruby/language/regexp/grouping_spec.rb
@@ -0,0 +1,63 @@
+require_relative '../../spec_helper'
+require_relative '../fixtures/classes'
+
+describe "Regexps with grouping" do
+  it "support ()" do
+    /(a)/.match("a").to_a.should == ["a", "a"]
+  end
+
+  it "allows groups to be nested" do
+    md = /(hay(st)a)ck/.match('haystack')
+    md.to_a.should == ['haystack','haysta', 'st']
+  end
+
+  it "raises a SyntaxError when parentheses aren't balanced" do
+    -> { eval "/(hay(st)ack/" }.should.raise(SyntaxError)
+  end
+
+  it "supports (?: ) (non-capturing group)" do
+    /(?:foo)(bar)/.match("foobar").to_a.should == ["foobar", "bar"]
+    # Parsing precedence
+    /(?:xdigit:)/.match("xdigit:").to_a.should == ["xdigit:"]
+  end
+
+  it "group names cannot start with digits or minus" do
+    -> { Regexp.new("(?<1a>a)") }.should.raise(RegexpError)
+    -> { Regexp.new("(?<-a>a)") }.should.raise(RegexpError)
+  end
+
+  it "ignore capture groups in line comments" do
+    /^
+     (a) # there is a capture group on this line
+     b   # there is no capture group on this line (not even here)
+     $/x.match("ab").to_a.should == [ "ab", "a" ]
+  end
+
+  it "does not consider # inside a character class as a comment" do
+    # From https://github.com/rubocop/rubocop/blob/39fcf1c568/lib/rubocop/cop/utils/format_string.rb#L18
+    regexp = /
+        % (?<type>%) # line comment
+      | % (?<flags>(?-mix:[ #0+-]|(?-mix:(\d+)\$))*) (?#group comment)
+        (?:
+          (?: (?-mix:(?<width>(?-mix:\d+|(?-mix:\*(?-mix:(\d+)\$)?))))? (?-mix:\.(?<precision>(?-mix:\d+|(?-mix:\*(?-mix:(\d+)\$)?))))? (?-mix:<(?<name>\w+)>)?
+            | (?-mix:(?<width>(?-mix:\d+|(?-mix:\*(?-mix:(\d+)\$)?))))? (?-mix:<(?<name>\w+)>) (?-mix:\.(?<precision>(?-mix:\d+|(?-mix:\*(?-mix:(\d+)\$)?))))?
+            | (?-mix:<(?<name>\w+)>) (?<more_flags>(?-mix:[ #0+-]|(?-mix:(\d+)\$))*) (?-mix:(?<width>(?-mix:\d+|(?-mix:\*(?-mix:(\d+)\$)?))))? (?-mix:\.(?<precision>(?-mix:\d+|(?-mix:\*(?-mix:(\d+)\$)?))))?
+          ) (?-mix:(?<type>[bBdiouxXeEfgGaAcps]))
+          | (?-mix:(?<width>(?-mix:\d+|(?-mix:\*(?-mix:(\d+)\$)?))))? (?-mix:\.(?<precision>(?-mix:\d+|(?-mix:\*(?-mix:(\d+)\$)?))))? (?-mix:\{(?<name>\w+)\})
+        )
+    /x
+    regexp.named_captures.should == {
+      "type" => [1, 13],
+      "flags" => [2],
+      "width" => [3, 6, 11, 14],
+      "precision" => [4, 8, 12, 15],
+      "name" => [5, 7, 9, 16],
+      "more_flags" => [10]
+    }
+    match = regexp.match("%6.3f")
+    match[:width].should == '6'
+    match[:precision].should == '3'
+    match[:type].should == 'f'
+    match.to_a.should == [ "%6.3f", nil, "", "6", "3"] + [nil] * 8 + ["f"] + [nil] * 3
+  end
+end
diff --git a/spec/ruby/language/regexp/interpolation_spec.rb b/spec/ruby/language/regexp/interpolation_spec.rb
new file mode 100644
index 0000000000..f771d0a395
--- /dev/null
+++ b/spec/ruby/language/regexp/interpolation_spec.rb
@@ -0,0 +1,58 @@
+require_relative '../../spec_helper'
+require_relative '../fixtures/classes'
+
+describe "Regexps with interpolation" do
+
+  it "allows interpolation of strings" do
+    str = "foo|bar"
+    /#{str}/.should == /foo|bar/
+  end
+
+  it "allows interpolation of literal regexps" do
+    re = /foo|bar/
+    /#{re}/.should == /(?-mix:foo|bar)/
+  end
+
+  it "allows interpolation of any object that responds to to_s" do
+    o = Object.new
+    def o.to_s
+      "object_with_to_s"
+    end
+    /#{o}/.should == /object_with_to_s/
+  end
+
+  it "allows interpolation which mixes modifiers" do
+    re = /foo/i
+    /#{re} bar/m.should == /(?i-mx:foo) bar/m
+  end
+
+  it "allows interpolation to interact with other Regexp constructs" do
+    str = "foo)|(bar"
+    /(#{str})/.should == /(foo)|(bar)/
+
+    str = "a"
+    /[#{str}-z]/.should == /[a-z]/
+  end
+
+  it "gives precedence to escape sequences over substitution" do
+    str = "J"
+    /\c#{str}/.to_s.should.include?('{str}')
+  end
+
+  it "throws RegexpError for malformed interpolation" do
+    s = ""
+    -> { /(#{s}/ }.should.raise(RegexpError)
+    s = "("
+    -> { /#{s}/ }.should.raise(RegexpError)
+  end
+
+  it "allows interpolation in extended mode" do
+    var = "#comment\n  foo  #comment\n  |  bar"
+    (/#{var}/x =~ "foo").should == (/foo|bar/ =~ "foo")
+  end
+
+  it "allows escape sequences in interpolated regexps" do
+    escape_seq = %r{"\x80"}n
+    %r{#{escape_seq}}n.should == /(?-mix:"\x80")/n
+  end
+end
diff --git a/spec/ruby/language/regexp/modifiers_spec.rb b/spec/ruby/language/regexp/modifiers_spec.rb
new file mode 100644
index 0000000000..c96fbfa983
--- /dev/null
+++ b/spec/ruby/language/regexp/modifiers_spec.rb
@@ -0,0 +1,115 @@
+require_relative '../../spec_helper'
+require_relative '../fixtures/classes'
+
+describe "Regexps with modifiers" do
+  it "supports /i (case-insensitive)" do
+    /foo/i.match("FOO").to_a.should == ["FOO"]
+  end
+
+  it "supports /m (multiline)" do
+    /foo.bar/m.match("foo\nbar").to_a.should == ["foo\nbar"]
+    /foo.bar/.match("foo\nbar").should == nil
+  end
+
+  it "supports /x (extended syntax)" do
+    /\d +/x.match("abc123").to_a.should == ["123"] # Quantifiers can be separated from the expression they apply to
+  end
+
+  it "supports /o (once)" do
+    2.times do |i|
+      /#{i}/o.should == /0/
+    end
+  end
+
+  it "invokes substitutions for /o only once" do
+    ScratchPad.record []
+    o = Object.new
+    def o.to_s
+      ScratchPad << :to_s
+      "class_with_to_s"
+    end
+    eval "2.times { /#{o}/o }"
+    ScratchPad.recorded.should == [:to_s]
+  end
+
+  it "supports modifier combinations" do
+    /foo/imox.match("foo").to_a.should == ["foo"]
+    /foo/imoximox.match("foo").to_a.should == ["foo"]
+
+    -> { eval('/foo/a') }.should.raise(SyntaxError)
+  end
+
+  it "supports (?~) (absent operator)" do
+    Regexp.new("(?~foo)").match("hello").to_a.should == ["hello"]
+    "foo".scan(Regexp.new("(?~foo)")).should == ["fo","o",""]
+  end
+
+  it "supports (?imx-imx) (inline modifiers)" do
+    /(?i)foo/.match("FOO").to_a.should == ["FOO"]
+    /foo(?i)/.match("FOO").should == nil
+    # Interaction with /i
+    /(?-i)foo/i.match("FOO").should == nil
+    /foo(?-i)/i.match("FOO").to_a.should == ["FOO"]
+    # Multiple uses
+    /foo (?i)bar (?-i)baz/.match("foo BAR baz").to_a.should == ["foo BAR baz"]
+    /foo (?i)bar (?-i)baz/.match("foo BAR BAZ").should == nil
+
+    /(?m)./.match("\n").to_a.should == ["\n"]
+    /.(?m)/.match("\n").should == nil
+    # Interaction with /m
+    /(?-m)./m.match("\n").should == nil
+    /.(?-m)/m.match("\n").to_a.should == ["\n"]
+    # Multiple uses
+    /. (?m). (?-m)./.match(". \n .").to_a.should == [". \n ."]
+    /. (?m). (?-m)./.match(". \n \n").should == nil
+
+    /(?x) foo /.match("foo").to_a.should == ["foo"]
+    / foo (?x)/.match("foo").should == nil
+    # Interaction with /x
+    /(?-x) foo /x.match("foo").should == nil
+    / foo (?-x)/x.match("foo").to_a.should == ["foo"]
+    # Multiple uses
+    /( foo )(?x)( bar )(?-x)( baz )/.match(" foo bar baz ").to_a.should == [" foo bar baz ", " foo ", "bar", " baz "]
+    /( foo )(?x)( bar )(?-x)( baz )/.match(" foo barbaz").should == nil
+
+    # Parsing
+    /(?i-i)foo/.match("FOO").should == nil
+    /(?ii)foo/.match("FOO").to_a.should == ["FOO"]
+    /(?-)foo/.match("foo").to_a.should == ["foo"]
+    -> { eval('/(?o)/') }.should.raise(SyntaxError)
+  end
+
+  it "supports (?imx-imx:expr) (scoped inline modifiers)" do
+    /foo (?i:bar) baz/.match("foo BAR baz").to_a.should == ["foo BAR baz"]
+    /foo (?i:bar) baz/.match("foo BAR BAZ").should == nil
+    /foo (?-i:bar) baz/i.match("foo BAR BAZ").should == nil
+
+    /. (?m:.) ./.match(". \n .").to_a.should == [". \n ."]
+    /. (?m:.) ./.match(". \n \n").should == nil
+    /. (?-m:.) ./m.match("\n \n \n").should == nil
+
+    /( foo )(?x: bar )( baz )/.match(" foo bar baz ").to_a.should == [" foo bar baz ", " foo ", " baz "]
+    /( foo )(?x: bar )( baz )/.match(" foo barbaz").should == nil
+    /( foo )(?-x: bar )( baz )/x.match("foo bar baz").to_a.should == ["foo bar baz", "foo", "baz"]
+
+    # Parsing
+    /(?i-i:foo)/.match("FOO").should == nil
+    /(?ii:foo)/.match("FOO").to_a.should == ["FOO"]
+    /(?-:)foo/.match("foo").to_a.should == ["foo"]
+    -> { eval('/(?o:)/') }.should.raise(SyntaxError)
+  end
+
+  it "supports . with /m" do
+    # Basic matching
+    /./m.match("\n").to_a.should == ["\n"]
+  end
+
+  it "supports ASCII/Unicode modifiers" do
+    eval('/(?a)[[:alpha:]]+/').match("a\u3042").to_a.should == ["a"]
+    eval('/(?d)[[:alpha:]]+/').match("a\u3042").to_a.should == ["a\u3042"]
+    eval('/(?u)[[:alpha:]]+/').match("a\u3042").to_a.should == ["a\u3042"]
+    eval('/(?a)\w+/').match("a\u3042").to_a.should == ["a"]
+    eval('/(?d)\w+/').match("a\u3042").to_a.should == ["a"]
+    eval('/(?u)\w+/').match("a\u3042").to_a.should == ["a\u3042"]
+  end
+end
diff --git a/spec/ruby/language/regexp/repetition_spec.rb b/spec/ruby/language/regexp/repetition_spec.rb
new file mode 100644
index 0000000000..f24323de5c
--- /dev/null
+++ b/spec/ruby/language/regexp/repetition_spec.rb
@@ -0,0 +1,138 @@
+require_relative '../../spec_helper'
+require_relative '../fixtures/classes'
+
+describe "Regexps with repetition" do
+  it "supports * (0 or more of previous subexpression)" do
+    /a*/.match("aaa").to_a.should == ["aaa"]
+    /a*/.match("bbb").to_a.should == [""]
+    /<.*>/.match("<a>foo</a>").to_a.should == ["<a>foo</a>"] # it is greedy
+  end
+
+  it "supports *? (0 or more of previous subexpression - lazy)" do
+    /a*?/.match("aaa").to_a.should == [""]
+    /<.*?>/.match("<a>foo</a>").to_a.should == ["<a>"]
+  end
+
+  it "supports + (1 or more of previous subexpression)" do
+    /a+/.match("aaa").to_a.should == ["aaa"]
+    /a+/.match("bbb").should == nil
+    /<.+>/.match("<a>foo</a>").to_a.should == ["<a>foo</a>"] # it is greedy
+  end
+
+  it "supports +? (0 or more of previous subexpression - lazy)" do
+    /a+?/.match("aaa").to_a.should == ["a"]
+    /<.+?>/.match("<a>foo</a>").to_a.should == ["<a>"]
+  end
+
+  it "supports {m,n} (m to n of previous subexpression)" do
+    /a{2,4}/.match("aaaaaa").to_a.should == ["aaaa"]
+    /<.{1,}>/.match("<a>foo</a>").to_a.should == ["<a>foo</a>"] # it is greedy
+  end
+
+  it "supports {m,n}? (m to n of previous subexpression) - lazy)" do
+    /<.{1,}?>/.match("<a>foo</a>").to_a.should == ["<a>"]
+    /.([0-9]){3,5}?foo/.match("9876543210foo").to_a.should == ["543210foo", "0"]
+  end
+
+  it "does not treat {m,n}+ as possessive" do
+    -> {
+      @regexp = eval "/foo(A{0,1}+)Abar/"
+    }.should complain(/nested repeat operator/)
+    @regexp.match("fooAAAbar").to_a.should == ["fooAAAbar", "AA"]
+  end
+
+  it "supports ? (0 or 1 of previous subexpression)" do
+    /a?/.match("aaa").to_a.should == ["a"]
+    /a?/.match("bbb").to_a.should == [""]
+  end
+
+  it "handles incomplete range quantifiers" do
+    /a{}/.match("a{}")[0].should == "a{}"
+    /a{,}/.match("a{,}")[0].should == "a{,}"
+    /a{1/.match("a{1")[0].should == "a{1"
+    /a{1,2/.match("a{1,2")[0].should == "a{1,2"
+    /a{,5}/.match("aaa")[0].should == "aaa"
+  end
+
+  it "lets us use quantifiers on assertions" do
+    /a^?b/.match("ab")[0].should == "ab"
+    /a$?b/.match("ab")[0].should == "ab"
+    /a\A?b/.match("ab")[0].should == "ab"
+    /a\Z?b/.match("ab")[0].should == "ab"
+    /a\z?b/.match("ab")[0].should == "ab"
+    /a\G?b/.match("ab")[0].should == "ab"
+    /a\b?b/.match("ab")[0].should == "ab"
+    /a\B?b/.match("ab")[0].should == "ab"
+    /a(?=c)?b/.match("ab")[0].should == "ab"
+    /a(?!=b)?b/.match("ab")[0].should == "ab"
+    /a(?<=c)?b/.match("ab")[0].should == "ab"
+    /a(?<!a)?b/.match("ab")[0].should == "ab"
+  end
+
+  it "does not delete optional assertions" do
+    /(?=(a))?/.match("a").to_a.should == [ "", "a" ]
+  end
+
+  it "supports nested quantifiers" do
+    suppress_warning do
+      eval <<-RUBY
+      /a***/.match("aaa")[0].should == "aaa"
+
+      # a+?* should not be reduced, it should be equivalent to (a+?)*
+      # NB: the capture group prevents regex engines from reducing the two quantifiers
+      # https://bugs.ruby-lang.org/issues/17341
+      /a+?*/.match("")[0].should == ""
+      /(a+?)*/.match("")[0].should == ""
+
+      /a+?*/.match("a")[0].should == "a"
+      /(a+?)*/.match("a")[0].should == "a"
+
+      /a+?*/.match("aa")[0].should == "aa"
+      /(a+?)*/.match("aa")[0].should == "aa"
+
+      # a+?+ should not be reduced, it should be equivalent to (a+?)+
+      # https://bugs.ruby-lang.org/issues/17341
+      /a+?+/.match("").should == nil
+      /(a+?)+/.match("").should == nil
+
+      /a+?+/.match("a")[0].should == "a"
+      /(a+?)+/.match("a")[0].should == "a"
+
+      /a+?+/.match("aa")[0].should == "aa"
+      /(a+?)+/.match("aa")[0].should == "aa"
+
+      # both a**? and a+*? should be equivalent to (a+)??
+      # this quantifier would rather match nothing, but if that's not possible,
+      # it will greedily take everything
+      /a**?/.match("")[0].should == ""
+      /(a*)*?/.match("")[0].should == ""
+      /a+*?/.match("")[0].should == ""
+      /(a+)*?/.match("")[0].should == ""
+      /(a+)??/.match("")[0].should == ""
+
+      /a**?/.match("aaa")[0].should == ""
+      /(a*)*?/.match("aaa")[0].should == ""
+      /a+*?/.match("aaa")[0].should == ""
+      /(a+)*?/.match("aaa")[0].should == ""
+      /(a+)??/.match("aaa")[0].should == ""
+
+      /b.**?b/.match("baaabaaab")[0].should == "baaabaaab"
+      /b(.*)*?b/.match("baaabaaab")[0].should == "baaabaaab"
+      /b.+*?b/.match("baaabaaab")[0].should == "baaabaaab"
+      /b(.+)*?b/.match("baaabaaab")[0].should == "baaabaaab"
+      /b(.+)??b/.match("baaabaaab")[0].should == "baaabaaab"
+      RUBY
+    end
+  end
+
+  it "treats ? after {n} quantifier as another quantifier, not as non-greedy marker" do
+    /a{2}?/.match("").to_a.should == [""]
+  end
+
+  it "matches zero-width capture groups in optional iterations of loops" do
+    /()?/.match("").to_a.should == ["", ""]
+    /(a*)?/.match("").to_a.should == ["", ""]
+    /(a*)*/.match("").to_a.should == ["", ""]
+    /(?:a|()){500,1000}/.match("a" * 500).to_a.should == ["a" * 500, ""]
+  end
+end
diff --git a/spec/ruby/language/regexp/subexpression_call_spec.rb b/spec/ruby/language/regexp/subexpression_call_spec.rb
new file mode 100644
index 0000000000..16b64cb327
--- /dev/null
+++ b/spec/ruby/language/regexp/subexpression_call_spec.rb
@@ -0,0 +1,50 @@
+require_relative '../../spec_helper'
+require_relative '../fixtures/classes'
+
+describe "Regexps with subexpression calls" do
+  it "allows numeric subexpression calls" do
+    /(a)\g<1>/.match("aa").to_a.should == [ "aa", "a" ]
+  end
+
+  it "treats subexpression calls as distinct from simple back-references" do
+    # Back-references only match a string which is equal to the original captured string.
+    /(?<three_digits>[0-9]{3})-\k<three_digits>/.match("123-123")[0].should == "123-123"
+    /(?<three_digits>[0-9]{3})-\k<three_digits>/.match("123-456").should == nil
+    # However, subexpression calls reuse the previous expression and can match a different
+    # string.
+    /(?<three_digits>[0-9]{3})-\g<three_digits>/.match("123-456")[0].should == "123-456"
+  end
+
+  it "allows recursive subexpression calls" do
+    # This pattern matches well-nested parenthesized expression.
+    parens = /^ (?<parens>  (?: \( \g<parens> \) | [^()] )*  ) $/x
+    parens.match("((a)(b))c(d)")[0].should == "((a)(b))c(d)"
+    parens.match("((a)(b)c(d)").should == nil
+  end
+
+  it "allows access to back-references from the current level" do
+    # Using \\k<first_char-0> accesses the last value captured in first_char
+    # on the current stack level.
+    mirror = /^ (?<mirror> (?: (?<first_char>.) \g<mirror> \k<first_char-0> )? ) $/x
+    mirror.match("abccba")[0].should == "abccba"
+    mirror.match("abccbd").should == nil
+
+    # OTOH, using \\k<first_char> accesses the last value captured in first_char,
+    # regardless of the stack level. Therefore, it can't be used to implement
+    # the mirror language.
+    broken_mirror = /^ (?<mirror> (?: (?<first_char>.) \g<mirror> \k<first_char> )? ) $/x
+    broken_mirror.match("abccba").should == nil
+    # This matches because the 'c' is captured in first_char and that value is
+    # then used for all subsequent back-references, regardless of nesting.
+    broken_mirror.match("abcccc")[0].should == "abcccc"
+  end
+
+  it "allows + and - in group names and referential constructs that don't use levels, i.e. subexpression calls" do
+    /(?<a+>a)\g<a+>/.match("aa").to_a.should == [ "aa", "a" ]
+    /(?<a+b>a)\g<a+b>/.match("aa").to_a.should == [ "aa", "a" ]
+    /(?<a+1>a)\g<a+1>/.match("aa").to_a.should == [ "aa", "a" ]
+    /(?<a->a)\g<a->/.match("aa").to_a.should == [ "aa", "a" ]
+    /(?<a-b>a)\g<a-b>/.match("aa").to_a.should == [ "aa", "a" ]
+    /(?<a-1>a)\g<a-1>/.match("aa").to_a.should == [ "aa", "a" ]
+  end
+end