From 1bc8838d60ef3fc6812d3b64ed87caaf0ae943d9 Mon Sep 17 00:00:00 2001
From: Jeremy Evans <code@jeremyevans.net>
Date: Fri, 30 Jun 2023 19:37:53 -0700
Subject: Handle unterminated unicode escapes in regexps

This fixes an infinite loop possible after ec3542229b29ec93062e9d90e877ea29d3c19472.
For \u{} escapes in regexps, skip validation in the parser, and rely on the regexp
code to handle validation. This is necessary so that invalid unicode escapes in
comments in extended regexps are allowed.

Fixes [Bug #19750]

Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
---
 test/ruby/test_parse.rb | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'test/ruby')

diff --git a/test/ruby/test_parse.rb b/test/ruby/test_parse.rb
index bf0d9f1bd5..cf989d190b 100644
--- a/test/ruby/test_parse.rb
+++ b/test/ruby/test_parse.rb
@@ -1052,6 +1052,22 @@ x = __ENCODING__
     assert_syntax_error("    0b\n", /\^/)
   end
 
+  def test_unclosed_unicode_escape_at_eol_bug_19750
+    assert_separately([], "#{<<-"begin;"}\n#{<<~'end;'}")
+    begin;
+      assert_syntax_error("/\\u", /too short escape sequence/)
+      assert_syntax_error("/\\u{", /unterminated regexp meets end of file/)
+      assert_syntax_error("/\\u{\\n", /invalid Unicode list/)
+      assert_syntax_error("/a#\\u{\\n/", /invalid Unicode list/)
+      re = eval("/a#\\u{\n$/x")
+      assert_match(re, 'a')
+      assert_not_match(re, 'a#')
+      re = eval("/a#\\u\n$/x")
+      assert_match(re, 'a')
+      assert_not_match(re, 'a#')
+    end;
+  end
+
   def test_error_def_in_argument
     assert_separately([], "#{<<-"begin;"}\n#{<<~"end;"}")
     begin;
-- 
cgit v1.2.3