diff options
| author | Kevin Newton <kddnewton@gmail.com> | 2023-10-12 08:46:40 -0400 |
|---|---|---|
| committer | Kevin Newton <kddnewton@gmail.com> | 2023-10-13 15:31:30 -0400 |
| commit | fa76cddc5b1eebf77c9c5bbe951f70fd6c115716 (patch) | |
| tree | bf98c1898db99a2d35aa759c98dfb259f02055a5 /test | |
| parent | e4f1c06a9bb6012ac155b7a7789d2b5cb4e8abdc (diff) | |
[ruby/prism] Properly handle unescaping in regexp
https://github.com/ruby/prism/commit/abf9fd6863
Diffstat (limited to 'test')
| -rw-r--r-- | test/prism/snapshots/heredoc_with_escaped_newline_at_start.txt | 4 | ||||
| -rw-r--r-- | test/prism/snapshots/regex.txt | 12 | ||||
| -rw-r--r-- | test/prism/snapshots/seattlerb/bug190.txt | 2 | ||||
| -rw-r--r-- | test/prism/snapshots/seattlerb/regexp_esc_C_slash.txt | 2 | ||||
| -rw-r--r-- | test/prism/snapshots/seattlerb/regexp_esc_u.txt | 2 | ||||
| -rw-r--r-- | test/prism/snapshots/seattlerb/regexp_unicode_curlies.txt | 4 | ||||
| -rw-r--r-- | test/prism/snapshots/spanning_heredoc.txt | 4 | ||||
| -rw-r--r-- | test/prism/snapshots/unescaping.txt | 2 | ||||
| -rw-r--r-- | test/prism/snapshots/unparser/corpus/literal/literal.txt | 8 | ||||
| -rw-r--r-- | test/prism/snapshots/unparser/corpus/semantic/literal.txt | 2 | ||||
| -rw-r--r-- | test/prism/snapshots/whitequark/parser_bug_830.txt | 2 | ||||
| -rw-r--r-- | test/prism/unescape_test.rb | 72 |
12 files changed, 63 insertions, 53 deletions
diff --git a/test/prism/snapshots/heredoc_with_escaped_newline_at_start.txt b/test/prism/snapshots/heredoc_with_escaped_newline_at_start.txt index e158069bb6..e9bb768383 100644 --- a/test/prism/snapshots/heredoc_with_escaped_newline_at_start.txt +++ b/test/prism/snapshots/heredoc_with_escaped_newline_at_start.txt @@ -21,7 +21,7 @@ │ │ │ ├── opening_loc: (1,15)-(1,16) = "/" │ │ │ ├── content_loc: (1,16)-(1,20) = "^\\s{" │ │ │ ├── closing_loc: (1,20)-(1,21) = "/" - │ │ │ ├── unescaped: "^ {" + │ │ │ ├── unescaped: "^\\s{" │ │ │ └── flags: ∅ │ │ └── @ StringNode (location: (1,23)-(1,25)) │ │ ├── flags: ∅ @@ -51,7 +51,7 @@ │ │ ├── opening_loc: (5,15)-(5,16) = "/" │ │ ├── content_loc: (5,16)-(5,20) = "^\\s{" │ │ ├── closing_loc: (5,20)-(5,21) = "/" - │ │ ├── unescaped: "^ {" + │ │ ├── unescaped: "^\\s{" │ │ └── flags: ∅ │ └── @ StringNode (location: (5,23)-(5,25)) │ ├── flags: ∅ diff --git a/test/prism/snapshots/regex.txt b/test/prism/snapshots/regex.txt index ff0e3d3b56..5fa07265a3 100644 --- a/test/prism/snapshots/regex.txt +++ b/test/prism/snapshots/regex.txt @@ -31,7 +31,7 @@ │ ├── opening_loc: (5,0)-(5,1) = "/" │ ├── content_loc: (5,1)-(5,4) = "a\\b" │ ├── closing_loc: (5,4)-(5,5) = "/" - │ ├── unescaped: "a\b" + │ ├── unescaped: "a\\b" │ └── flags: ∅ ├── @ InterpolatedRegularExpressionNode (location: (7,0)-(7,11)) │ ├── opening_loc: (7,0)-(7,1) = "/" @@ -130,25 +130,25 @@ │ ├── opening_loc: (15,0)-(15,3) = "%r/" │ ├── content_loc: (15,3)-(15,24) = "[a-z$._?][\\w$.?\#@~]*:" │ ├── closing_loc: (15,24)-(15,26) = "/i" - │ ├── unescaped: "[a-z$._?][w$.?\#@~]*:" + │ ├── unescaped: "[a-z$._?][\\w$.?\#@~]*:" │ └── flags: ignore_case ├── @ RegularExpressionNode (location: (17,0)-(17,37)) │ ├── opening_loc: (17,0)-(17,3) = "%r/" │ ├── content_loc: (17,3)-(17,35) = "([a-z$._?][\\w$.?\#@~]*)(\\s+)(equ)" │ ├── closing_loc: (17,35)-(17,37) = "/i" - │ ├── unescaped: "([a-z$._?][w$.?\#@~]*)( +)(equ)" + │ ├── unescaped: "([a-z$._?][\\w$.?\#@~]*)(\\s+)(equ)" │ └── flags: ignore_case ├── @ RegularExpressionNode (location: (19,0)-(19,25)) │ ├── opening_loc: (19,0)-(19,3) = "%r/" │ ├── content_loc: (19,3)-(19,23) = "[a-z$._?][\\w$.?\#@~]*" │ ├── closing_loc: (19,23)-(19,25) = "/i" - │ ├── unescaped: "[a-z$._?][w$.?\#@~]*" + │ ├── unescaped: "[a-z$._?][\\w$.?\#@~]*" │ └── flags: ignore_case ├── @ RegularExpressionNode (location: (21,0)-(24,1)) │ ├── opening_loc: (21,0)-(21,3) = "%r(" │ ├── content_loc: (21,3)-(23,0) = "\n(?:[\\w\#$%_']|\\(\\)|\\(,\\)|\\[\\]|[0-9])*\n (?:[\\w\#$%_']+)\n" │ ├── closing_loc: (24,0)-(24,1) = ")" - │ ├── unescaped: "\n(?:[w\#$%_']|()|(,)|[]|[0-9])*\n (?:[w\#$%_']+)\n" + │ ├── unescaped: "\n(?:[\\w\#$%_']|\\(\\)|\\(,\\)|\\[\\]|[0-9])*\n (?:[\\w\#$%_']+)\n" │ └── flags: ∅ ├── @ CallNode (location: (26,0)-(26,16)) │ ├── receiver: @@ -156,7 +156,7 @@ │ │ ├── opening_loc: (26,0)-(26,1) = "/" │ │ ├── content_loc: (26,1)-(26,7) = "(?#\\))" │ │ ├── closing_loc: (26,7)-(26,8) = "/" - │ │ ├── unescaped: "(?#))" + │ │ ├── unescaped: "(?#\\))" │ │ └── flags: ∅ │ ├── call_operator_loc: ∅ │ ├── message_loc: (26,9)-(26,11) = "=~" diff --git a/test/prism/snapshots/seattlerb/bug190.txt b/test/prism/snapshots/seattlerb/bug190.txt index 527304835a..fec48914c9 100644 --- a/test/prism/snapshots/seattlerb/bug190.txt +++ b/test/prism/snapshots/seattlerb/bug190.txt @@ -7,5 +7,5 @@ ├── opening_loc: (1,0)-(1,3) = "%r'" ├── content_loc: (1,3)-(1,5) = "\\'" ├── closing_loc: (1,5)-(1,6) = "'" - ├── unescaped: "'" + ├── unescaped: "\\'" └── flags: ∅ diff --git a/test/prism/snapshots/seattlerb/regexp_esc_C_slash.txt b/test/prism/snapshots/seattlerb/regexp_esc_C_slash.txt index 3bc991033c..caf67b892d 100644 --- a/test/prism/snapshots/seattlerb/regexp_esc_C_slash.txt +++ b/test/prism/snapshots/seattlerb/regexp_esc_C_slash.txt @@ -7,5 +7,5 @@ ├── opening_loc: (1,0)-(1,1) = "/" ├── content_loc: (1,1)-(1,6) = "\\cC\\d" ├── closing_loc: (1,6)-(1,7) = "/" - ├── unescaped: "\u0003d" + ├── unescaped: "\\x03\\d" └── flags: ∅ diff --git a/test/prism/snapshots/seattlerb/regexp_esc_u.txt b/test/prism/snapshots/seattlerb/regexp_esc_u.txt index adbfe36880..ea6bbb6141 100644 --- a/test/prism/snapshots/seattlerb/regexp_esc_u.txt +++ b/test/prism/snapshots/seattlerb/regexp_esc_u.txt @@ -7,5 +7,5 @@ ├── opening_loc: (1,0)-(1,1) = "/" ├── content_loc: (1,1)-(1,16) = "[\\u0021-\\u0027]" ├── closing_loc: (1,16)-(1,17) = "/" - ├── unescaped: "[!-']" + ├── unescaped: "[\\u0021-\\u0027]" └── flags: ∅ diff --git a/test/prism/snapshots/seattlerb/regexp_unicode_curlies.txt b/test/prism/snapshots/seattlerb/regexp_unicode_curlies.txt index 5e039bd16e..74e8b52787 100644 --- a/test/prism/snapshots/seattlerb/regexp_unicode_curlies.txt +++ b/test/prism/snapshots/seattlerb/regexp_unicode_curlies.txt @@ -7,11 +7,11 @@ │ ├── opening_loc: (1,0)-(1,1) = "/" │ ├── content_loc: (1,1)-(1,14) = "\\u{c0de babe}" │ ├── closing_loc: (1,14)-(1,15) = "/" - │ ├── unescaped: "샞몾" + │ ├── unescaped: "\\u{c0de babe}" │ └── flags: ∅ └── @ RegularExpressionNode (location: (3,0)-(3,8)) ├── opening_loc: (3,0)-(3,1) = "/" ├── content_loc: (3,1)-(3,7) = "\\u{df}" ├── closing_loc: (3,7)-(3,8) = "/" - ├── unescaped: "ß" + ├── unescaped: "\\u{df}" └── flags: ∅ diff --git a/test/prism/snapshots/spanning_heredoc.txt b/test/prism/snapshots/spanning_heredoc.txt index 2c59cb4368..6b3e3c92d7 100644 --- a/test/prism/snapshots/spanning_heredoc.txt +++ b/test/prism/snapshots/spanning_heredoc.txt @@ -28,10 +28,10 @@ │ │ │ ├── @ InterpolatedRegularExpressionNode (location: (4,13)-(7,2)) │ │ │ │ ├── opening_loc: (4,13)-(4,14) = "/" │ │ │ │ ├── parts: (length: 2) - │ │ │ │ │ ├── @ StringNode (location: (4,14)-(4,0)) + │ │ │ │ │ ├── @ StringNode (location: (4,14)-(4,16)) │ │ │ │ │ │ ├── flags: ∅ │ │ │ │ │ │ ├── opening_loc: ∅ - │ │ │ │ │ │ ├── content_loc: (4,14)-(4,0) = "b\\\n" + │ │ │ │ │ │ ├── content_loc: (4,14)-(4,16) = "b\\" │ │ │ │ │ │ ├── closing_loc: ∅ │ │ │ │ │ │ └── unescaped: "b" │ │ │ │ │ └── @ StringNode (location: (7,0)-(7,1)) diff --git a/test/prism/snapshots/unescaping.txt b/test/prism/snapshots/unescaping.txt index a59dc01626..ee7c3759cb 100644 --- a/test/prism/snapshots/unescaping.txt +++ b/test/prism/snapshots/unescaping.txt @@ -17,7 +17,7 @@ │ ├── opening_loc: (3,0)-(3,1) = "/" │ ├── content_loc: (3,1)-(3,7) = "\\c\#{1}" │ ├── closing_loc: (3,7)-(3,8) = "/" - │ ├── unescaped: "\u0003{1}" + │ ├── unescaped: "\\x03{1}" │ └── flags: ∅ ├── @ StringNode (location: (5,0)-(5,8)) │ ├── flags: ∅ diff --git a/test/prism/snapshots/unparser/corpus/literal/literal.txt b/test/prism/snapshots/unparser/corpus/literal/literal.txt index 7c477382dc..21e73552ef 100644 --- a/test/prism/snapshots/unparser/corpus/literal/literal.txt +++ b/test/prism/snapshots/unparser/corpus/literal/literal.txt @@ -545,7 +545,7 @@ │ ├── opening_loc: (50,0)-(50,1) = "/" │ ├── content_loc: (50,1)-(50,27) = "[^-+',.\\/:@[:alnum:]\\[\\]]+" │ ├── closing_loc: (50,27)-(50,28) = "/" - │ ├── unescaped: "[^-+',./:@[:alnum:][]]+" + │ ├── unescaped: "[^-+',./:@[:alnum:]\\[\\]]+" │ └── flags: ∅ ├── @ InterpolatedRegularExpressionNode (location: (51,0)-(51,12)) │ ├── opening_loc: (51,0)-(51,1) = "/" @@ -606,19 +606,19 @@ │ ├── opening_loc: (54,0)-(54,1) = "/" │ ├── content_loc: (54,1)-(54,3) = "\\n" │ ├── closing_loc: (54,3)-(54,4) = "/" - │ ├── unescaped: "\n" + │ ├── unescaped: "\\n" │ └── flags: ∅ ├── @ RegularExpressionNode (location: (55,0)-(55,4)) │ ├── opening_loc: (55,0)-(55,1) = "/" │ ├── content_loc: (55,1)-(55,3) = "\\n" │ ├── closing_loc: (55,3)-(55,4) = "/" - │ ├── unescaped: "\n" + │ ├── unescaped: "\\n" │ └── flags: ∅ ├── @ RegularExpressionNode (location: (56,0)-(56,5)) │ ├── opening_loc: (56,0)-(56,1) = "/" │ ├── content_loc: (56,1)-(56,3) = "\\n" │ ├── closing_loc: (56,3)-(56,5) = "/x" - │ ├── unescaped: "\n" + │ ├── unescaped: "\\n" │ └── flags: extended ├── @ RegularExpressionNode (location: (57,0)-(57,7)) │ ├── opening_loc: (57,0)-(57,1) = "/" diff --git a/test/prism/snapshots/unparser/corpus/semantic/literal.txt b/test/prism/snapshots/unparser/corpus/semantic/literal.txt index c79d0370da..6da3b56f33 100644 --- a/test/prism/snapshots/unparser/corpus/semantic/literal.txt +++ b/test/prism/snapshots/unparser/corpus/semantic/literal.txt @@ -33,7 +33,7 @@ │ ├── opening_loc: (10,0)-(10,3) = "%r(" │ ├── content_loc: (10,3)-(10,5) = "\\)" │ ├── closing_loc: (10,5)-(10,6) = ")" - │ ├── unescaped: ")" + │ ├── unescaped: "\\)" │ └── flags: ∅ ├── @ InterpolatedRegularExpressionNode (location: (11,0)-(11,14)) │ ├── opening_loc: (11,0)-(11,3) = "%r(" diff --git a/test/prism/snapshots/whitequark/parser_bug_830.txt b/test/prism/snapshots/whitequark/parser_bug_830.txt index f19fffbba0..e380113372 100644 --- a/test/prism/snapshots/whitequark/parser_bug_830.txt +++ b/test/prism/snapshots/whitequark/parser_bug_830.txt @@ -7,5 +7,5 @@ ├── opening_loc: (1,0)-(1,1) = "/" ├── content_loc: (1,1)-(1,3) = "\\(" ├── closing_loc: (1,3)-(1,4) = "/" - ├── unescaped: "(" + ├── unescaped: "\\(" └── flags: ∅ diff --git a/test/prism/unescape_test.rb b/test/prism/unescape_test.rb index 123c139077..051b5e29d1 100644 --- a/test/prism/unescape_test.rb +++ b/test/prism/unescape_test.rb @@ -108,40 +108,50 @@ module Prism escapes = [*ascii, *ascii8, *newlines, *octal, *hex2, *hex4, *hex6, *ctrls] contexts = [ - [Context::String.new("?", ""), escapes], - [Context::String.new("'", "'"), escapes], - [Context::String.new("\"", "\""), escapes], - [Context::String.new("%q[", "]"), escapes], - [Context::String.new("%Q[", "]"), escapes], - [Context::String.new("%[", "]"), escapes], - [Context::String.new("`", "`"), escapes], - [Context::String.new("%x[", "]"), escapes], - [Context::String.new("<<H\n", "\nH"), escapes], - [Context::String.new("<<'H'\n", "\nH"), escapes], - [Context::String.new("<<\"H\"\n", "\nH"), escapes], - [Context::String.new("<<`H`\n", "\nH"), escapes], - [Context::String.new("<<-H\n", "\nH"), escapes], - [Context::String.new("<<-'H'\n", "\nH"), escapes], - [Context::String.new("<<-\"H\"\n", "\nH"), escapes], - [Context::String.new("<<-`H`\n", "\nH"), escapes], - [Context::Heredoc.new("<<~H\n", "\nH"), escapes], - [Context::Heredoc.new("<<~'H'\n", "\nH"), escapes], - [Context::Heredoc.new("<<~\"H\"\n", "\nH"), escapes], - [Context::Heredoc.new("<<~`H`\n", "\nH"), escapes], - [Context::List.new("%w[", "]"), escapes], - [Context::List.new("%W[", "]"), escapes], - [Context::List.new("%i[", "]"), escapes], - [Context::List.new("%I[", "]"), escapes], - [Context::Symbol.new("%s[", "]"), escapes], - [Context::Symbol.new(":'", "'"), escapes], - [Context::Symbol.new(":\"", "\""), escapes], - # [Context::RegExp.new("/", "/"), escapes], - # [Context::RegExp.new("%r[", "]"), escapes] + Context::String.new("?", ""), + Context::String.new("'", "'"), + Context::String.new("\"", "\""), + Context::String.new("%q[", "]"), + Context::String.new("%Q[", "]"), + Context::String.new("%[", "]"), + Context::String.new("`", "`"), + Context::String.new("%x[", "]"), + Context::String.new("<<H\n", "\nH"), + Context::String.new("<<'H'\n", "\nH"), + Context::String.new("<<\"H\"\n", "\nH"), + Context::String.new("<<`H`\n", "\nH"), + Context::String.new("<<-H\n", "\nH"), + Context::String.new("<<-'H'\n", "\nH"), + Context::String.new("<<-\"H\"\n", "\nH"), + Context::String.new("<<-`H`\n", "\nH"), + Context::Heredoc.new("<<~H\n", "\nH"), + Context::Heredoc.new("<<~'H'\n", "\nH"), + Context::Heredoc.new("<<~\"H\"\n", "\nH"), + Context::Heredoc.new("<<~`H`\n", "\nH"), + Context::List.new("%w[", "]"), + Context::List.new("%w<", ">"), + Context::List.new("%W[", "]"), + Context::List.new("%i[", "]"), + Context::List.new("%I[", "]"), + Context::Symbol.new("%s[", "]"), + Context::Symbol.new(":'", "'"), + Context::Symbol.new(":\"", "\""), + Context::RegExp.new("/", "/"), + Context::RegExp.new("%r[", "]"), + Context::RegExp.new("%r<", ">"), + Context::RegExp.new("%r{", "}"), + Context::RegExp.new("%r(", ")"), + Context::RegExp.new("%r|", "|"), ] - contexts.each do |(context, escapes)| + contexts.each do |context| escapes.each do |escape| - next if context.name == "?" && escape == "\xFF".b # wat? + # I think this might be a bug in Ruby. + next if context.name == "?" && escape == "\xFF".b + + # We don't currently support scanning for the number of capture groups, + # so these are all going to fail. + next if (context.name == "//" || context.name.start_with?("%r")) && escape.start_with?(/\d/) define_method(:"test_#{context.name}_#{escape.inspect}") do assert_unescape(context, escape) |
