5 files changed, 130 insertions, 33 deletions
diff --git a/spec/ruby/language/regexp/back-references_spec.rb b/spec/ruby/language/regexp/back-references_spec.rb
index 26750c20c5..627c8daace 100644
--- a/spec/ruby/language/regexp/back-references_spec.rb
+++ b/spec/ruby/language/regexp/back-references_spec.rb
@@ -22,6 +22,15 @@ describe "Regexps with back-references" do
     $10.should == "0"
   end
 
+  it "returns nil for numbered variable with too large index" do
+    -> {
+      eval(<<~CODE).should == nil
+        "a" =~ /(.)/
+        eval('$4294967296')
+      CODE
+    }.should complain(/warning: ('|`)\$4294967296' is too big for a number variable, always nil/)
+  end
+
   it "will not clobber capture variables across threads" do
     cap1, cap2, cap3 = nil
     "foo" =~ /(o+)/
diff --git a/spec/ruby/language/regexp/character_classes_spec.rb b/spec/ruby/language/regexp/character_classes_spec.rb
index 0cf1e9b6f4..98d431a817 100644
--- a/spec/ruby/language/regexp/character_classes_spec.rb
+++ b/spec/ruby/language/regexp/character_classes_spec.rb
@@ -609,10 +609,13 @@ describe "Regexp with character classes" do
     "루비(Ruby)".match(/\p{Hangul}+/u).to_a.should == ["루비"]
   end
 
-  ruby_bug "#17340", ''...'3.0' do
-    it "raises a RegexpError for an unterminated unicode property" do
-      -> { Regexp.new('\p{') }.should raise_error(RegexpError)
-    end
+  it "supports negated property condition" do
+    "a".match(eval("/\P{L}/")).should be_nil
+    "1".match(eval("/\P{N}/")).should be_nil
+  end
+
+  it "raises a RegexpError for an unterminated unicode property" do
+    -> { Regexp.new('\p{') }.should raise_error(RegexpError)
   end
 
   it "supports \\X (unicode 9.0 with UTR #51 workarounds)" do
diff --git a/spec/ruby/language/regexp/encoding_spec.rb b/spec/ruby/language/regexp/encoding_spec.rb
index 8e2a294b95..0571b2d3cf 100644
--- a/spec/ruby/language/regexp/encoding_spec.rb
+++ b/spec/ruby/language/regexp/encoding_spec.rb
@@ -4,18 +4,18 @@ require_relative '../fixtures/classes'
 
 describe "Regexps with encoding modifiers" do
   it "supports /e (EUC encoding)" do
-    match = /./e.match("\303\251".force_encoding(Encoding::EUC_JP))
-    match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)]
+    match = /./e.match("\303\251".dup.force_encoding(Encoding::EUC_JP))
+    match.to_a.should == ["\303\251".dup.force_encoding(Encoding::EUC_JP)]
   end
 
   it "supports /e (EUC encoding) with interpolation" do
-    match = /#{/./}/e.match("\303\251".force_encoding(Encoding::EUC_JP))
-    match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)]
+    match = /#{/./}/e.match("\303\251".dup.force_encoding(Encoding::EUC_JP))
+    match.to_a.should == ["\303\251".dup.force_encoding(Encoding::EUC_JP)]
   end
 
   it "supports /e (EUC encoding) with interpolation /o" do
-    match = /#{/./}/e.match("\303\251".force_encoding(Encoding::EUC_JP))
-    match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)]
+    match = /#{/./}/e.match("\303\251".dup.force_encoding(Encoding::EUC_JP))
+    match.to_a.should == ["\303\251".dup.force_encoding(Encoding::EUC_JP)]
   end
 
   it 'uses EUC-JP as /e encoding' do
@@ -38,6 +38,10 @@ describe "Regexps with encoding modifiers" do
     /#{/./}/n.match("\303\251").to_a.should == ["\303"]
   end
 
+  it "warns when using /n with a match string with non-ASCII characters and an encoding other than ASCII-8BIT" do
+    -> { /./n.match("\303\251".dup.force_encoding('utf-8')) }.should complain(%r{historical binary regexp match /.../n against UTF-8 string})
+  end
+
   it 'uses US-ASCII as /n encoding if all chars are 7-bit' do
     /./n.encoding.should == Encoding::US_ASCII
   end
@@ -59,18 +63,18 @@ describe "Regexps with encoding modifiers" do
   end
 
   it "supports /s (Windows_31J encoding)" do
-    match = /./s.match("\303\251".force_encoding(Encoding::Windows_31J))
-    match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)]
+    match = /./s.match("\303\251".dup.force_encoding(Encoding::Windows_31J))
+    match.to_a.should == ["\303".dup.force_encoding(Encoding::Windows_31J)]
   end
 
   it "supports /s (Windows_31J encoding) with interpolation" do
-    match = /#{/./}/s.match("\303\251".force_encoding(Encoding::Windows_31J))
-    match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)]
+    match = /#{/./}/s.match("\303\251".dup.force_encoding(Encoding::Windows_31J))
+    match.to_a.should == ["\303".dup.force_encoding(Encoding::Windows_31J)]
   end
 
   it "supports /s (Windows_31J encoding) with interpolation and /o" do
-    match = /#{/./}/s.match("\303\251".force_encoding(Encoding::Windows_31J))
-    match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)]
+    match = /#{/./}/s.match("\303\251".dup.force_encoding(Encoding::Windows_31J))
+    match.to_a.should == ["\303".dup.force_encoding(Encoding::Windows_31J)]
   end
 
   it 'uses Windows-31J as /s encoding' do
@@ -82,15 +86,15 @@ describe "Regexps with encoding modifiers" do
   end
 
   it "supports /u (UTF8 encoding)" do
-    /./u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"]
+    /./u.match("\303\251".dup.force_encoding('utf-8')).to_a.should == ["\u{e9}"]
   end
 
   it "supports /u (UTF8 encoding) with interpolation" do
-    /#{/./}/u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"]
+    /#{/./}/u.match("\303\251".dup.force_encoding('utf-8')).to_a.should == ["\u{e9}"]
   end
 
   it "supports /u (UTF8 encoding) with interpolation and /o" do
-    /#{/./}/u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"]
+    /#{/./}/u.match("\303\251".dup.force_encoding('utf-8')).to_a.should == ["\u{e9}"]
   end
 
   it 'uses UTF-8 as /u encoding' do
@@ -117,14 +121,27 @@ describe "Regexps with encoding modifiers" do
     -> { /\A[[:space:]]*\z/ =~ " ".encode("UTF-16LE") }.should raise_error(Encoding::CompatibilityError)
   end
 
+  it "raises Encoding::CompatibilityError when the regexp has a fixed, non-ASCII-compatible encoding" do
+    -> { Regexp.new("".dup.force_encoding("UTF-16LE"), Regexp::FIXEDENCODING) =~ " ".encode("UTF-8") }.should raise_error(Encoding::CompatibilityError)
+  end
+
+  it "raises Encoding::CompatibilityError when the regexp has a fixed encoding and the match string has non-ASCII characters" do
+    -> { Regexp.new("".dup.force_encoding("US-ASCII"), Regexp::FIXEDENCODING) =~ "\303\251".dup.force_encoding('UTF-8') }.should raise_error(Encoding::CompatibilityError)
+  end
+
+  it "raises ArgumentError when trying to match a broken String" do
+    s = "\x80".dup.force_encoding('UTF-8')
+    -> { s =~ /./ }.should raise_error(ArgumentError, "invalid byte sequence in UTF-8")
+  end
+
   it "computes the Regexp Encoding for each interpolated Regexp instance" do
     make_regexp = -> str { /#{str}/ }
 
-    r = make_regexp.call("été".force_encoding(Encoding::UTF_8))
+    r = make_regexp.call("été".dup.force_encoding(Encoding::UTF_8))
     r.should.fixed_encoding?
     r.encoding.should == Encoding::UTF_8
 
-    r = make_regexp.call("abc".force_encoding(Encoding::UTF_8))
+    r = make_regexp.call("abc".dup.force_encoding(Encoding::UTF_8))
     r.should_not.fixed_encoding?
     r.encoding.should == Encoding::US_ASCII
   end
diff --git a/spec/ruby/language/regexp/escapes_spec.rb b/spec/ruby/language/regexp/escapes_spec.rb
index 2e5fe5ad2e..16a4d8c23b 100644
--- a/spec/ruby/language/regexp/escapes_spec.rb
+++ b/spec/ruby/language/regexp/escapes_spec.rb
@@ -2,8 +2,10 @@
 require_relative '../../spec_helper'
 require_relative '../fixtures/classes'
 
+# TODO: synchronize with spec/core/regexp/new_spec.rb -
+#       escaping is also tested there
 describe "Regexps with escape characters" do
-  it "they're supported" do
+  it "supports escape sequences" do
     /\t/.match("\t").to_a.should == ["\t"] # horizontal tab
     /\v/.match("\v").to_a.should == ["\v"] # vertical tab
     /\n/.match("\n").to_a.should == ["\n"] # newline
@@ -15,9 +17,7 @@ describe "Regexps with escape characters" do
     # \nnn         octal char            (encoded byte value)
   end
 
-  it "support quoting meta-characters via escape sequence" do
-    /\\/.match("\\").to_a.should == ["\\"]
-    /\//.match("/").to_a.should == ["/"]
+  it "supports quoting meta-characters via escape sequence" do
     # parenthesis, etc
     /\(/.match("(").to_a.should == ["("]
     /\)/.match(")").to_a.should == [")"]
@@ -25,6 +25,8 @@ describe "Regexps with escape characters" do
     /\]/.match("]").to_a.should == ["]"]
     /\{/.match("{").to_a.should == ["{"]
     /\}/.match("}").to_a.should == ["}"]
+    /\</.match("<").to_a.should == ["<"]
+    /\>/.match(">").to_a.should == [">"]
     # alternation separator
     /\|/.match("|").to_a.should == ["|"]
     # quantifiers
@@ -37,11 +39,81 @@ describe "Regexps with escape characters" do
     /\$/.match("$").to_a.should == ["$"]
   end
 
+  it "supports quoting meta-characters via escape sequence when used as a terminator" do
+    # parenthesis, etc
+    # %r[[, %r((, etc literals - are forbidden
+    %r(\().match("(").to_a.should == ["("]
+    %r(\)).match(")").to_a.should == [")"]
+    %r)\().match("(").to_a.should == ["("]
+    %r)\)).match(")").to_a.should == [")"]
+
+    %r[\[].match("[").to_a.should == ["["]
+    %r[\]].match("]").to_a.should == ["]"]
+    %r]\[].match("[").to_a.should == ["["]
+    %r]\]].match("]").to_a.should == ["]"]
+
+    %r{\{}.match("{").to_a.should == ["{"]
+    %r{\}}.match("}").to_a.should == ["}"]
+    %r}\{}.match("{").to_a.should == ["{"]
+    %r}\}}.match("}").to_a.should == ["}"]
+
+    %r<\<>.match("<").to_a.should == ["<"]
+    %r<\>>.match(">").to_a.should == [">"]
+    %r>\<>.match("<").to_a.should == ["<"]
+    %r>\>>.match(">").to_a.should == [">"]
+
+    # alternation separator
+    %r|\||.match("|").to_a.should == ["|"]
+    # quantifiers
+    %r?\??.match("?").to_a.should == ["?"]
+    %r.\...match(".").to_a.should == ["."]
+    %r*\**.match("*").to_a.should == ["*"]
+    %r+\++.match("+").to_a.should == ["+"]
+    # line anchors
+    %r^\^^.match("^").to_a.should == ["^"]
+    %r$\$$.match("$").to_a.should == ["$"]
+  end
+
+  it "supports quoting non-meta-characters via escape sequence when used as a terminator" do
+    non_meta_character_terminators = [
+      '!', '"', '#', '%', '&', "'", ',', '-', ':', ';', '@', '_', '`', '/', '=', '~'
+    ]
+
+    non_meta_character_terminators.each do |c|
+      pattern = eval("%r" + c + "\\" + c + c)
+      pattern.match(c).to_a.should == [c]
+    end
+  end
+
+  it "does not change semantics of escaped non-meta-character when used as a terminator" do
+    all_terminators = [*("!".."/"), *(":".."@"), *("[".."`"), *("{".."~")]
+    meta_character_terminators = ["$", "^", "*", "+", ".", "?", "|", "}", ")", ">", "]"]
+    special_cases = ['(', '{', '[', '<', '\\']
+
+    # it should be equivalent to
+    #   [ '!', '"', '#', '%', '&', "'", ',', '-', ':', ';', '@', '_', '`', '/', '=', '~' ]
+    non_meta_character_terminators = all_terminators - meta_character_terminators - special_cases
+
+    non_meta_character_terminators.each do |c|
+      pattern = eval("%r" + c + "\\" + c + c)
+      pattern.should == /#{c}/
+    end
+  end
+
+  it "does not change semantics of escaped meta-character when used as a terminator" do
+    meta_character_terminators = ["$", "^", "*", "+", ".", "?", "|", "}", ")", ">", "]"]
+
+    meta_character_terminators.each do |c|
+      pattern = eval("%r" + c + "\\" + c + c)
+      pattern.should == eval("/\\#{c}/")
+    end
+  end
+
   it "allows any character to be escaped" do
     /\y/.match("y").to_a.should == ["y"]
   end
 
-  it "support \\x (hex characters)" do
+  it "supports \\x (hex characters)" do
     /\xA/.match("\nxyz").to_a.should == ["\n"]
     /\x0A/.match("\n").to_a.should == ["\n"]
     /\xAA/.match("\nA").should be_nil
@@ -53,7 +125,7 @@ describe "Regexps with escape characters" do
     # \x{7HHHHHHH} wide hexadecimal char (character code point value)
   end
 
-  it "support \\c (control characters)" do
+  it "supports \\c (control characters)" do
     #/\c \c@\c`/.match("\00\00\00").to_a.should == ["\00\00\00"]
     /\c#\cc\cC/.match("\03\03\03").to_a.should == ["\03\03\03"]
     /\c'\cG\cg/.match("\a\a\a").to_a.should == ["\a\a\a"]
diff --git a/spec/ruby/language/regexp/repetition_spec.rb b/spec/ruby/language/regexp/repetition_spec.rb
index 9a191d74e2..d76619688f 100644
--- a/spec/ruby/language/regexp/repetition_spec.rb
+++ b/spec/ruby/language/regexp/repetition_spec.rb
@@ -87,9 +87,7 @@ describe "Regexps with repetition" do
       /a+?*/.match("a")[0].should == "a"
       /(a+?)*/.match("a")[0].should == "a"
 
-      ruby_bug '#17341', ''...'3.0' do
-        /a+?*/.match("aa")[0].should == "aa"
-      end
+      /a+?*/.match("aa")[0].should == "aa"
       /(a+?)*/.match("aa")[0].should == "aa"
 
       # a+?+ should not be reduced, it should be equivalent to (a+?)+
@@ -100,9 +98,7 @@ describe "Regexps with repetition" do
       /a+?+/.match("a")[0].should == "a"
       /(a+?)+/.match("a")[0].should == "a"
 
-      ruby_bug '#17341', ''...'3.0' do
-        /a+?+/.match("aa")[0].should == "aa"
-      end
+      /a+?+/.match("aa")[0].should == "aa"
       /(a+?)+/.match("aa")[0].should == "aa"
 
       # both a**? and a+*? should be equivalent to (a+)??