diff options
Diffstat (limited to 'spec/ruby/language/regexp/encoding_spec.rb')
-rw-r--r-- | spec/ruby/language/regexp/encoding_spec.rb | 51 |
1 files changed, 34 insertions, 17 deletions
diff --git a/spec/ruby/language/regexp/encoding_spec.rb b/spec/ruby/language/regexp/encoding_spec.rb index 8e2a294b95..0571b2d3cf 100644 --- a/spec/ruby/language/regexp/encoding_spec.rb +++ b/spec/ruby/language/regexp/encoding_spec.rb @@ -4,18 +4,18 @@ require_relative '../fixtures/classes' describe "Regexps with encoding modifiers" do it "supports /e (EUC encoding)" do - match = /./e.match("\303\251".force_encoding(Encoding::EUC_JP)) - match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)] + match = /./e.match("\303\251".dup.force_encoding(Encoding::EUC_JP)) + match.to_a.should == ["\303\251".dup.force_encoding(Encoding::EUC_JP)] end it "supports /e (EUC encoding) with interpolation" do - match = /#{/./}/e.match("\303\251".force_encoding(Encoding::EUC_JP)) - match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)] + match = /#{/./}/e.match("\303\251".dup.force_encoding(Encoding::EUC_JP)) + match.to_a.should == ["\303\251".dup.force_encoding(Encoding::EUC_JP)] end it "supports /e (EUC encoding) with interpolation /o" do - match = /#{/./}/e.match("\303\251".force_encoding(Encoding::EUC_JP)) - match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)] + match = /#{/./}/e.match("\303\251".dup.force_encoding(Encoding::EUC_JP)) + match.to_a.should == ["\303\251".dup.force_encoding(Encoding::EUC_JP)] end it 'uses EUC-JP as /e encoding' do @@ -38,6 +38,10 @@ describe "Regexps with encoding modifiers" do /#{/./}/n.match("\303\251").to_a.should == ["\303"] end + it "warns when using /n with a match string with non-ASCII characters and an encoding other than ASCII-8BIT" do + -> { /./n.match("\303\251".dup.force_encoding('utf-8')) }.should complain(%r{historical binary regexp match /.../n against UTF-8 string}) + end + it 'uses US-ASCII as /n encoding if all chars are 7-bit' do /./n.encoding.should == Encoding::US_ASCII end @@ -59,18 +63,18 @@ describe "Regexps with encoding modifiers" do end it "supports /s (Windows_31J encoding)" do - match = /./s.match("\303\251".force_encoding(Encoding::Windows_31J)) - match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)] + match = /./s.match("\303\251".dup.force_encoding(Encoding::Windows_31J)) + match.to_a.should == ["\303".dup.force_encoding(Encoding::Windows_31J)] end it "supports /s (Windows_31J encoding) with interpolation" do - match = /#{/./}/s.match("\303\251".force_encoding(Encoding::Windows_31J)) - match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)] + match = /#{/./}/s.match("\303\251".dup.force_encoding(Encoding::Windows_31J)) + match.to_a.should == ["\303".dup.force_encoding(Encoding::Windows_31J)] end it "supports /s (Windows_31J encoding) with interpolation and /o" do - match = /#{/./}/s.match("\303\251".force_encoding(Encoding::Windows_31J)) - match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)] + match = /#{/./}/s.match("\303\251".dup.force_encoding(Encoding::Windows_31J)) + match.to_a.should == ["\303".dup.force_encoding(Encoding::Windows_31J)] end it 'uses Windows-31J as /s encoding' do @@ -82,15 +86,15 @@ describe "Regexps with encoding modifiers" do end it "supports /u (UTF8 encoding)" do - /./u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"] + /./u.match("\303\251".dup.force_encoding('utf-8')).to_a.should == ["\u{e9}"] end it "supports /u (UTF8 encoding) with interpolation" do - /#{/./}/u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"] + /#{/./}/u.match("\303\251".dup.force_encoding('utf-8')).to_a.should == ["\u{e9}"] end it "supports /u (UTF8 encoding) with interpolation and /o" do - /#{/./}/u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"] + /#{/./}/u.match("\303\251".dup.force_encoding('utf-8')).to_a.should == ["\u{e9}"] end it 'uses UTF-8 as /u encoding' do @@ -117,14 +121,27 @@ describe "Regexps with encoding modifiers" do -> { /\A[[:space:]]*\z/ =~ " ".encode("UTF-16LE") }.should raise_error(Encoding::CompatibilityError) end + it "raises Encoding::CompatibilityError when the regexp has a fixed, non-ASCII-compatible encoding" do + -> { Regexp.new("".dup.force_encoding("UTF-16LE"), Regexp::FIXEDENCODING) =~ " ".encode("UTF-8") }.should raise_error(Encoding::CompatibilityError) + end + + it "raises Encoding::CompatibilityError when the regexp has a fixed encoding and the match string has non-ASCII characters" do + -> { Regexp.new("".dup.force_encoding("US-ASCII"), Regexp::FIXEDENCODING) =~ "\303\251".dup.force_encoding('UTF-8') }.should raise_error(Encoding::CompatibilityError) + end + + it "raises ArgumentError when trying to match a broken String" do + s = "\x80".dup.force_encoding('UTF-8') + -> { s =~ /./ }.should raise_error(ArgumentError, "invalid byte sequence in UTF-8") + end + it "computes the Regexp Encoding for each interpolated Regexp instance" do make_regexp = -> str { /#{str}/ } - r = make_regexp.call("été".force_encoding(Encoding::UTF_8)) + r = make_regexp.call("été".dup.force_encoding(Encoding::UTF_8)) r.should.fixed_encoding? r.encoding.should == Encoding::UTF_8 - r = make_regexp.call("abc".force_encoding(Encoding::UTF_8)) + r = make_regexp.call("abc".dup.force_encoding(Encoding::UTF_8)) r.should_not.fixed_encoding? r.encoding.should == Encoding::US_ASCII end |