diff options
Diffstat (limited to 'spec/ruby/language/regexp')
-rw-r--r-- | spec/ruby/language/regexp/back-references_spec.rb | 9 | ||||
-rw-r--r-- | spec/ruby/language/regexp/character_classes_spec.rb | 11 | ||||
-rw-r--r-- | spec/ruby/language/regexp/encoding_spec.rb | 51 | ||||
-rw-r--r-- | spec/ruby/language/regexp/escapes_spec.rb | 84 | ||||
-rw-r--r-- | spec/ruby/language/regexp/repetition_spec.rb | 8 |
5 files changed, 130 insertions, 33 deletions
diff --git a/spec/ruby/language/regexp/back-references_spec.rb b/spec/ruby/language/regexp/back-references_spec.rb index 26750c20c5..627c8daace 100644 --- a/spec/ruby/language/regexp/back-references_spec.rb +++ b/spec/ruby/language/regexp/back-references_spec.rb @@ -22,6 +22,15 @@ describe "Regexps with back-references" do $10.should == "0" end + it "returns nil for numbered variable with too large index" do + -> { + eval(<<~CODE).should == nil + "a" =~ /(.)/ + eval('$4294967296') + CODE + }.should complain(/warning: ('|`)\$4294967296' is too big for a number variable, always nil/) + end + it "will not clobber capture variables across threads" do cap1, cap2, cap3 = nil "foo" =~ /(o+)/ diff --git a/spec/ruby/language/regexp/character_classes_spec.rb b/spec/ruby/language/regexp/character_classes_spec.rb index 0cf1e9b6f4..98d431a817 100644 --- a/spec/ruby/language/regexp/character_classes_spec.rb +++ b/spec/ruby/language/regexp/character_classes_spec.rb @@ -609,10 +609,13 @@ describe "Regexp with character classes" do "루비(Ruby)".match(/\p{Hangul}+/u).to_a.should == ["루비"] end - ruby_bug "#17340", ''...'3.0' do - it "raises a RegexpError for an unterminated unicode property" do - -> { Regexp.new('\p{') }.should raise_error(RegexpError) - end + it "supports negated property condition" do + "a".match(eval("/\P{L}/")).should be_nil + "1".match(eval("/\P{N}/")).should be_nil + end + + it "raises a RegexpError for an unterminated unicode property" do + -> { Regexp.new('\p{') }.should raise_error(RegexpError) end it "supports \\X (unicode 9.0 with UTR #51 workarounds)" do diff --git a/spec/ruby/language/regexp/encoding_spec.rb b/spec/ruby/language/regexp/encoding_spec.rb index 8e2a294b95..0571b2d3cf 100644 --- a/spec/ruby/language/regexp/encoding_spec.rb +++ b/spec/ruby/language/regexp/encoding_spec.rb @@ -4,18 +4,18 @@ require_relative '../fixtures/classes' describe "Regexps with encoding modifiers" do it "supports /e (EUC encoding)" do - match = /./e.match("\303\251".force_encoding(Encoding::EUC_JP)) - match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)] + match = /./e.match("\303\251".dup.force_encoding(Encoding::EUC_JP)) + match.to_a.should == ["\303\251".dup.force_encoding(Encoding::EUC_JP)] end it "supports /e (EUC encoding) with interpolation" do - match = /#{/./}/e.match("\303\251".force_encoding(Encoding::EUC_JP)) - match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)] + match = /#{/./}/e.match("\303\251".dup.force_encoding(Encoding::EUC_JP)) + match.to_a.should == ["\303\251".dup.force_encoding(Encoding::EUC_JP)] end it "supports /e (EUC encoding) with interpolation /o" do - match = /#{/./}/e.match("\303\251".force_encoding(Encoding::EUC_JP)) - match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)] + match = /#{/./}/e.match("\303\251".dup.force_encoding(Encoding::EUC_JP)) + match.to_a.should == ["\303\251".dup.force_encoding(Encoding::EUC_JP)] end it 'uses EUC-JP as /e encoding' do @@ -38,6 +38,10 @@ describe "Regexps with encoding modifiers" do /#{/./}/n.match("\303\251").to_a.should == ["\303"] end + it "warns when using /n with a match string with non-ASCII characters and an encoding other than ASCII-8BIT" do + -> { /./n.match("\303\251".dup.force_encoding('utf-8')) }.should complain(%r{historical binary regexp match /.../n against UTF-8 string}) + end + it 'uses US-ASCII as /n encoding if all chars are 7-bit' do /./n.encoding.should == Encoding::US_ASCII end @@ -59,18 +63,18 @@ describe "Regexps with encoding modifiers" do end it "supports /s (Windows_31J encoding)" do - match = /./s.match("\303\251".force_encoding(Encoding::Windows_31J)) - match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)] + match = /./s.match("\303\251".dup.force_encoding(Encoding::Windows_31J)) + match.to_a.should == ["\303".dup.force_encoding(Encoding::Windows_31J)] end it "supports /s (Windows_31J encoding) with interpolation" do - match = /#{/./}/s.match("\303\251".force_encoding(Encoding::Windows_31J)) - match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)] + match = /#{/./}/s.match("\303\251".dup.force_encoding(Encoding::Windows_31J)) + match.to_a.should == ["\303".dup.force_encoding(Encoding::Windows_31J)] end it "supports /s (Windows_31J encoding) with interpolation and /o" do - match = /#{/./}/s.match("\303\251".force_encoding(Encoding::Windows_31J)) - match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)] + match = /#{/./}/s.match("\303\251".dup.force_encoding(Encoding::Windows_31J)) + match.to_a.should == ["\303".dup.force_encoding(Encoding::Windows_31J)] end it 'uses Windows-31J as /s encoding' do @@ -82,15 +86,15 @@ describe "Regexps with encoding modifiers" do end it "supports /u (UTF8 encoding)" do - /./u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"] + /./u.match("\303\251".dup.force_encoding('utf-8')).to_a.should == ["\u{e9}"] end it "supports /u (UTF8 encoding) with interpolation" do - /#{/./}/u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"] + /#{/./}/u.match("\303\251".dup.force_encoding('utf-8')).to_a.should == ["\u{e9}"] end it "supports /u (UTF8 encoding) with interpolation and /o" do - /#{/./}/u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"] + /#{/./}/u.match("\303\251".dup.force_encoding('utf-8')).to_a.should == ["\u{e9}"] end it 'uses UTF-8 as /u encoding' do @@ -117,14 +121,27 @@ describe "Regexps with encoding modifiers" do -> { /\A[[:space:]]*\z/ =~ " ".encode("UTF-16LE") }.should raise_error(Encoding::CompatibilityError) end + it "raises Encoding::CompatibilityError when the regexp has a fixed, non-ASCII-compatible encoding" do + -> { Regexp.new("".dup.force_encoding("UTF-16LE"), Regexp::FIXEDENCODING) =~ " ".encode("UTF-8") }.should raise_error(Encoding::CompatibilityError) + end + + it "raises Encoding::CompatibilityError when the regexp has a fixed encoding and the match string has non-ASCII characters" do + -> { Regexp.new("".dup.force_encoding("US-ASCII"), Regexp::FIXEDENCODING) =~ "\303\251".dup.force_encoding('UTF-8') }.should raise_error(Encoding::CompatibilityError) + end + + it "raises ArgumentError when trying to match a broken String" do + s = "\x80".dup.force_encoding('UTF-8') + -> { s =~ /./ }.should raise_error(ArgumentError, "invalid byte sequence in UTF-8") + end + it "computes the Regexp Encoding for each interpolated Regexp instance" do make_regexp = -> str { /#{str}/ } - r = make_regexp.call("été".force_encoding(Encoding::UTF_8)) + r = make_regexp.call("été".dup.force_encoding(Encoding::UTF_8)) r.should.fixed_encoding? r.encoding.should == Encoding::UTF_8 - r = make_regexp.call("abc".force_encoding(Encoding::UTF_8)) + r = make_regexp.call("abc".dup.force_encoding(Encoding::UTF_8)) r.should_not.fixed_encoding? r.encoding.should == Encoding::US_ASCII end diff --git a/spec/ruby/language/regexp/escapes_spec.rb b/spec/ruby/language/regexp/escapes_spec.rb index 2e5fe5ad2e..16a4d8c23b 100644 --- a/spec/ruby/language/regexp/escapes_spec.rb +++ b/spec/ruby/language/regexp/escapes_spec.rb @@ -2,8 +2,10 @@ require_relative '../../spec_helper' require_relative '../fixtures/classes' +# TODO: synchronize with spec/core/regexp/new_spec.rb - +# escaping is also tested there describe "Regexps with escape characters" do - it "they're supported" do + it "supports escape sequences" do /\t/.match("\t").to_a.should == ["\t"] # horizontal tab /\v/.match("\v").to_a.should == ["\v"] # vertical tab /\n/.match("\n").to_a.should == ["\n"] # newline @@ -15,9 +17,7 @@ describe "Regexps with escape characters" do # \nnn octal char (encoded byte value) end - it "support quoting meta-characters via escape sequence" do - /\\/.match("\\").to_a.should == ["\\"] - /\//.match("/").to_a.should == ["/"] + it "supports quoting meta-characters via escape sequence" do # parenthesis, etc /\(/.match("(").to_a.should == ["("] /\)/.match(")").to_a.should == [")"] @@ -25,6 +25,8 @@ describe "Regexps with escape characters" do /\]/.match("]").to_a.should == ["]"] /\{/.match("{").to_a.should == ["{"] /\}/.match("}").to_a.should == ["}"] + /\</.match("<").to_a.should == ["<"] + /\>/.match(">").to_a.should == [">"] # alternation separator /\|/.match("|").to_a.should == ["|"] # quantifiers @@ -37,11 +39,81 @@ describe "Regexps with escape characters" do /\$/.match("$").to_a.should == ["$"] end + it "supports quoting meta-characters via escape sequence when used as a terminator" do + # parenthesis, etc + # %r[[, %r((, etc literals - are forbidden + %r(\().match("(").to_a.should == ["("] + %r(\)).match(")").to_a.should == [")"] + %r)\().match("(").to_a.should == ["("] + %r)\)).match(")").to_a.should == [")"] + + %r[\[].match("[").to_a.should == ["["] + %r[\]].match("]").to_a.should == ["]"] + %r]\[].match("[").to_a.should == ["["] + %r]\]].match("]").to_a.should == ["]"] + + %r{\{}.match("{").to_a.should == ["{"] + %r{\}}.match("}").to_a.should == ["}"] + %r}\{}.match("{").to_a.should == ["{"] + %r}\}}.match("}").to_a.should == ["}"] + + %r<\<>.match("<").to_a.should == ["<"] + %r<\>>.match(">").to_a.should == [">"] + %r>\<>.match("<").to_a.should == ["<"] + %r>\>>.match(">").to_a.should == [">"] + + # alternation separator + %r|\||.match("|").to_a.should == ["|"] + # quantifiers + %r?\??.match("?").to_a.should == ["?"] + %r.\...match(".").to_a.should == ["."] + %r*\**.match("*").to_a.should == ["*"] + %r+\++.match("+").to_a.should == ["+"] + # line anchors + %r^\^^.match("^").to_a.should == ["^"] + %r$\$$.match("$").to_a.should == ["$"] + end + + it "supports quoting non-meta-characters via escape sequence when used as a terminator" do + non_meta_character_terminators = [ + '!', '"', '#', '%', '&', "'", ',', '-', ':', ';', '@', '_', '`', '/', '=', '~' + ] + + non_meta_character_terminators.each do |c| + pattern = eval("%r" + c + "\\" + c + c) + pattern.match(c).to_a.should == [c] + end + end + + it "does not change semantics of escaped non-meta-character when used as a terminator" do + all_terminators = [*("!".."/"), *(":".."@"), *("[".."`"), *("{".."~")] + meta_character_terminators = ["$", "^", "*", "+", ".", "?", "|", "}", ")", ">", "]"] + special_cases = ['(', '{', '[', '<', '\\'] + + # it should be equivalent to + # [ '!', '"', '#', '%', '&', "'", ',', '-', ':', ';', '@', '_', '`', '/', '=', '~' ] + non_meta_character_terminators = all_terminators - meta_character_terminators - special_cases + + non_meta_character_terminators.each do |c| + pattern = eval("%r" + c + "\\" + c + c) + pattern.should == /#{c}/ + end + end + + it "does not change semantics of escaped meta-character when used as a terminator" do + meta_character_terminators = ["$", "^", "*", "+", ".", "?", "|", "}", ")", ">", "]"] + + meta_character_terminators.each do |c| + pattern = eval("%r" + c + "\\" + c + c) + pattern.should == eval("/\\#{c}/") + end + end + it "allows any character to be escaped" do /\y/.match("y").to_a.should == ["y"] end - it "support \\x (hex characters)" do + it "supports \\x (hex characters)" do /\xA/.match("\nxyz").to_a.should == ["\n"] /\x0A/.match("\n").to_a.should == ["\n"] /\xAA/.match("\nA").should be_nil @@ -53,7 +125,7 @@ describe "Regexps with escape characters" do # \x{7HHHHHHH} wide hexadecimal char (character code point value) end - it "support \\c (control characters)" do + it "supports \\c (control characters)" do #/\c \c@\c`/.match("\00\00\00").to_a.should == ["\00\00\00"] /\c#\cc\cC/.match("\03\03\03").to_a.should == ["\03\03\03"] /\c'\cG\cg/.match("\a\a\a").to_a.should == ["\a\a\a"] diff --git a/spec/ruby/language/regexp/repetition_spec.rb b/spec/ruby/language/regexp/repetition_spec.rb index 9a191d74e2..d76619688f 100644 --- a/spec/ruby/language/regexp/repetition_spec.rb +++ b/spec/ruby/language/regexp/repetition_spec.rb @@ -87,9 +87,7 @@ describe "Regexps with repetition" do /a+?*/.match("a")[0].should == "a" /(a+?)*/.match("a")[0].should == "a" - ruby_bug '#17341', ''...'3.0' do - /a+?*/.match("aa")[0].should == "aa" - end + /a+?*/.match("aa")[0].should == "aa" /(a+?)*/.match("aa")[0].should == "aa" # a+?+ should not be reduced, it should be equivalent to (a+?)+ @@ -100,9 +98,7 @@ describe "Regexps with repetition" do /a+?+/.match("a")[0].should == "a" /(a+?)+/.match("a")[0].should == "a" - ruby_bug '#17341', ''...'3.0' do - /a+?+/.match("aa")[0].should == "aa" - end + /a+?+/.match("aa")[0].should == "aa" /(a+?)+/.match("aa")[0].should == "aa" # both a**? and a+*? should be equivalent to (a+)?? |