1 files changed, 42 insertions, 21 deletions
diff --git a/spec/ruby/language/regexp/encoding_spec.rb b/spec/ruby/language/regexp/encoding_spec.rb
index 8e2a294b95..81e845af0c 100644
--- a/spec/ruby/language/regexp/encoding_spec.rb
+++ b/spec/ruby/language/regexp/encoding_spec.rb
@@ -1,21 +1,21 @@
-# -*- encoding: binary -*-
+# encoding: binary
 require_relative '../../spec_helper'
 require_relative '../fixtures/classes'
 
 describe "Regexps with encoding modifiers" do
   it "supports /e (EUC encoding)" do
-    match = /./e.match("\303\251".force_encoding(Encoding::EUC_JP))
-    match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)]
+    match = /./e.match("\303\251".dup.force_encoding(Encoding::EUC_JP))
+    match.to_a.should == ["\303\251".dup.force_encoding(Encoding::EUC_JP)]
   end
 
   it "supports /e (EUC encoding) with interpolation" do
-    match = /#{/./}/e.match("\303\251".force_encoding(Encoding::EUC_JP))
-    match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)]
+    match = /#{/./}/e.match("\303\251".dup.force_encoding(Encoding::EUC_JP))
+    match.to_a.should == ["\303\251".dup.force_encoding(Encoding::EUC_JP)]
   end
 
   it "supports /e (EUC encoding) with interpolation /o" do
-    match = /#{/./}/e.match("\303\251".force_encoding(Encoding::EUC_JP))
-    match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)]
+    match = /#{/./}/e.match("\303\251".dup.force_encoding(Encoding::EUC_JP))
+    match.to_a.should == ["\303\251".dup.force_encoding(Encoding::EUC_JP)]
   end
 
   it 'uses EUC-JP as /e encoding' do
@@ -38,6 +38,14 @@ describe "Regexps with encoding modifiers" do
     /#{/./}/n.match("\303\251").to_a.should == ["\303"]
   end
 
+  it "warns when using /n with a match string with non-ASCII characters and an encoding other than ASCII-8BIT" do
+    -> {
+      eval <<~RUBY
+      /./n.match("\303\251".dup.force_encoding('utf-8'))
+      RUBY
+    }.should complain(%r{historical binary regexp match /.../n against UTF-8 string})
+  end
+
   it 'uses US-ASCII as /n encoding if all chars are 7-bit' do
     /./n.encoding.should == Encoding::US_ASCII
   end
@@ -59,18 +67,18 @@ describe "Regexps with encoding modifiers" do
   end
 
   it "supports /s (Windows_31J encoding)" do
-    match = /./s.match("\303\251".force_encoding(Encoding::Windows_31J))
-    match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)]
+    match = /./s.match("\303\251".dup.force_encoding(Encoding::Windows_31J))
+    match.to_a.should == ["\303".dup.force_encoding(Encoding::Windows_31J)]
   end
 
   it "supports /s (Windows_31J encoding) with interpolation" do
-    match = /#{/./}/s.match("\303\251".force_encoding(Encoding::Windows_31J))
-    match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)]
+    match = /#{/./}/s.match("\303\251".dup.force_encoding(Encoding::Windows_31J))
+    match.to_a.should == ["\303".dup.force_encoding(Encoding::Windows_31J)]
   end
 
   it "supports /s (Windows_31J encoding) with interpolation and /o" do
-    match = /#{/./}/s.match("\303\251".force_encoding(Encoding::Windows_31J))
-    match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)]
+    match = /#{/./}/s.match("\303\251".dup.force_encoding(Encoding::Windows_31J))
+    match.to_a.should == ["\303".dup.force_encoding(Encoding::Windows_31J)]
   end
 
   it 'uses Windows-31J as /s encoding' do
@@ -82,15 +90,15 @@ describe "Regexps with encoding modifiers" do
   end
 
   it "supports /u (UTF8 encoding)" do
-    /./u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"]
+    /./u.match("\303\251".dup.force_encoding('utf-8')).to_a.should == ["\u{e9}"]
   end
 
   it "supports /u (UTF8 encoding) with interpolation" do
-    /#{/./}/u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"]
+    /#{/./}/u.match("\303\251".dup.force_encoding('utf-8')).to_a.should == ["\u{e9}"]
   end
 
   it "supports /u (UTF8 encoding) with interpolation and /o" do
-    /#{/./}/u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"]
+    /#{/./}/u.match("\303\251".dup.force_encoding('utf-8')).to_a.should == ["\u{e9}"]
   end
 
   it 'uses UTF-8 as /u encoding' do
@@ -106,25 +114,38 @@ describe "Regexps with encoding modifiers" do
   end
 
   it "raises Encoding::CompatibilityError when trying match against different encodings" do
-    -> { /\A[[:space:]]*\z/.match(" ".encode("UTF-16LE")) }.should raise_error(Encoding::CompatibilityError)
+    -> { /\A[[:space:]]*\z/.match(" ".encode("UTF-16LE")) }.should.raise(Encoding::CompatibilityError)
   end
 
   it "raises Encoding::CompatibilityError when trying match? against different encodings" do
-    -> { /\A[[:space:]]*\z/.match?(" ".encode("UTF-16LE")) }.should raise_error(Encoding::CompatibilityError)
+    -> { /\A[[:space:]]*\z/.match?(" ".encode("UTF-16LE")) }.should.raise(Encoding::CompatibilityError)
   end
 
   it "raises Encoding::CompatibilityError when trying =~ against different encodings" do
-    -> { /\A[[:space:]]*\z/ =~ " ".encode("UTF-16LE") }.should raise_error(Encoding::CompatibilityError)
+    -> { /\A[[:space:]]*\z/ =~ " ".encode("UTF-16LE") }.should.raise(Encoding::CompatibilityError)
+  end
+
+  it "raises Encoding::CompatibilityError when the regexp has a fixed, non-ASCII-compatible encoding" do
+    -> { Regexp.new("".dup.force_encoding("UTF-16LE"), Regexp::FIXEDENCODING) =~ " ".encode("UTF-8") }.should.raise(Encoding::CompatibilityError)
+  end
+
+  it "raises Encoding::CompatibilityError when the regexp has a fixed encoding and the match string has non-ASCII characters" do
+    -> { Regexp.new("".dup.force_encoding("US-ASCII"), Regexp::FIXEDENCODING) =~ "\303\251".dup.force_encoding('UTF-8') }.should.raise(Encoding::CompatibilityError)
+  end
+
+  it "raises ArgumentError when trying to match a broken String" do
+    s = "\x80".dup.force_encoding('UTF-8')
+    -> { s =~ /./ }.should.raise(ArgumentError, "invalid byte sequence in UTF-8")
   end
 
   it "computes the Regexp Encoding for each interpolated Regexp instance" do
     make_regexp = -> str { /#{str}/ }
 
-    r = make_regexp.call("été".force_encoding(Encoding::UTF_8))
+    r = make_regexp.call("été".dup.force_encoding(Encoding::UTF_8))
     r.should.fixed_encoding?
     r.encoding.should == Encoding::UTF_8
 
-    r = make_regexp.call("abc".force_encoding(Encoding::UTF_8))
+    r = make_regexp.call("abc".dup.force_encoding(Encoding::UTF_8))
     r.should_not.fixed_encoding?
     r.encoding.should == Encoding::US_ASCII
   end