From 1ff55bb09dca302d42951059a73e6d237fd8c338 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Tue, 4 Jun 2024 14:58:37 -0700 Subject: merge revision(s) 05553cf22d43dd78b8f30cc4591230b5c000c538: [Backport #20517] [Bug #20517] Make a multibyte character one token at meta escape --- parse.y | 6 +++++- test/ripper/test_lexer.rb | 54 ++++++++++++++++++++++++++++++++++++++++++++--- version.h | 2 +- 3 files changed, 57 insertions(+), 5 deletions(-) diff --git a/parse.y b/parse.y index 8755904b9a..dd96d6136e 100644 --- a/parse.y +++ b/parse.y @@ -8047,7 +8047,11 @@ read_escape(struct parser_params *p, int flags) } return read_escape(p, flags|ESCAPE_META) | 0x80; } - else if (c == -1 || !ISASCII(c)) goto eof; + else if (c == -1) goto eof; + else if (!ISASCII(c)) { + tokskip_mbchar(p); + goto eof; + } else { int c2 = escaped_control_code(c); if (c2) { diff --git a/test/ripper/test_lexer.rb b/test/ripper/test_lexer.rb index 7bff858705..65d4ce3b5b 100644 --- a/test/ripper/test_lexer.rb +++ b/test/ripper/test_lexer.rb @@ -302,9 +302,8 @@ world" [[6, 2], :on_tstring_content, "3\n", state(:EXPR_BEG)], [[7, 0], :on_heredoc_end, "H1\n", state(:EXPR_BEG)], ] - assert_equal(code, Ripper.tokenize(code).join("")) - assert_equal(expected, result = Ripper.lex(code), - proc {expected.zip(result) {|e, r| break diff(e, r) unless e == r}}) + + assert_lexer(expected, code) code = <<~'HEREDOC' <<-H1 @@ -330,6 +329,55 @@ world" [[6, 0], :on_tstring_content, " 3\n", state(:EXPR_BEG)], [[7, 0], :on_heredoc_end, "H1\n", state(:EXPR_BEG)], ] + + assert_lexer(expected, code) + end + + def test_invalid_escape_ctrl_mbchar + code = %["\\C-\u{3042}"] + expected = [ + [[1, 0], :on_tstring_beg, '"', state(:EXPR_BEG)], + [[1, 1], :on_tstring_content, "\\C-\u{3042}", state(:EXPR_BEG)], + [[1, 7], :on_tstring_end, '"', state(:EXPR_END)], + ] + + assert_lexer(expected, code) + end + + def test_invalid_escape_meta_mbchar + code = %["\\M-\u{3042}"] + expected = [ + [[1, 0], :on_tstring_beg, '"', state(:EXPR_BEG)], + [[1, 1], :on_tstring_content, "\\M-\u{3042}", state(:EXPR_BEG)], + [[1, 7], :on_tstring_end, '"', state(:EXPR_END)], + ] + + assert_lexer(expected, code) + end + + def test_invalid_escape_meta_ctrl_mbchar + code = %["\\M-\\C-\u{3042}"] + expected = [ + [[1, 0], :on_tstring_beg, '"', state(:EXPR_BEG)], + [[1, 1], :on_tstring_content, "\\M-\\C-\u{3042}", state(:EXPR_BEG)], + [[1, 10], :on_tstring_end, '"', state(:EXPR_END)], + ] + + assert_lexer(expected, code) + end + + def test_invalid_escape_ctrl_meta_mbchar + code = %["\\C-\\M-\u{3042}"] + expected = [ + [[1, 0], :on_tstring_beg, '"', state(:EXPR_BEG)], + [[1, 1], :on_tstring_content, "\\C-\\M-\u{3042}", state(:EXPR_BEG)], + [[1, 10], :on_tstring_end, '"', state(:EXPR_END)], + ] + + assert_lexer(expected, code) + end + + def assert_lexer(expected, code) assert_equal(code, Ripper.tokenize(code).join("")) assert_equal(expected, result = Ripper.lex(code), proc {expected.zip(result) {|e, r| break diff(e, r) unless e == r}}) diff --git a/version.h b/version.h index 5da189ff33..af217e3158 100644 --- a/version.h +++ b/version.h @@ -11,7 +11,7 @@ # define RUBY_VERSION_MINOR RUBY_API_VERSION_MINOR #define RUBY_VERSION_TEENY 2 #define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR -#define RUBY_PATCHLEVEL 85 +#define RUBY_PATCHLEVEL 86 #include "ruby/version.h" #include "ruby/internal/abi.h" -- cgit v1.2.3