From aebc6e8b8db259b7eeee203f6ec4137019081d70 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 11 Nov 2023 22:10:08 -0500 Subject: [ruby/prism] Fix parsing other encodings bytes >= 0x80 https://github.com/ruby/prism/commit/c787d2e076 --- prism/prism.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/prism/prism.c b/prism/prism.c index 347aa68039..572dc1f146 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -5793,7 +5793,16 @@ pm_parser_scope_pop(pm_parser_t *parser) { static inline size_t char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) { if (parser->encoding_changed) { - return parser->encoding.alpha_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80); + size_t width; + if ((width = parser->encoding.alpha_char(b, parser->end - b)) != 0) { + return width; + } else if (*b == '_') { + return 1; + } else if (*b >= 0x80) { + return parser->encoding.char_width(b, parser->end - b); + } else { + return 0; + } } else if (*b < 0x80) { return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_'); } else { @@ -5809,7 +5818,16 @@ char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) { static inline size_t char_is_identifier(pm_parser_t *parser, const uint8_t *b) { if (parser->encoding_changed) { - return parser->encoding.alnum_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80); + size_t width; + if ((width = parser->encoding.alnum_char(b, parser->end - b)) != 0) { + return width; + } else if (*b == '_') { + return 1; + } else if (*b >= 0x80) { + return parser->encoding.char_width(b, parser->end - b); + } else { + return 0; + } } else if (*b < 0x80) { return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) || (*b == '_'); } else { -- cgit v1.2.3