From cd5cafa4a380e2459862b6e99ff0c381362ef1be Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 14 Jun 2022 12:27:25 +0900 Subject: Respect the encoding of the source [Bug #18827] Do not override the input string encoding at the time of preparation, the source encoding is not determined from the input yet. --- parse.y | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'parse.y') diff --git a/parse.y b/parse.y index 59eff0228a..be5af00d43 100644 --- a/parse.y +++ b/parse.y @@ -6462,12 +6462,6 @@ lex_getline(struct parser_params *p) if (NIL_P(line)) return line; must_be_ascii_compatible(line); if (RB_OBJ_FROZEN(line)) line = rb_str_dup(line); // needed for RubyVM::AST.of because script_lines in iseq is deep-frozen -#ifndef RIPPER - if (p->debug_lines) { - rb_enc_associate(line, p->enc); - rb_ary_push(p->debug_lines, line); - } -#endif p->line_count++; return line; } @@ -6614,7 +6608,7 @@ add_delayed_token(struct parser_params *p, const char *tok, const char *end) #endif static int -nextline(struct parser_params *p) +nextline(struct parser_params *p, int set_encoding) { VALUE v = p->lex.nextline; p->lex.nextline = 0; @@ -6632,6 +6626,12 @@ nextline(struct parser_params *p) lex_goto_eol(p); return -1; } +#ifndef RIPPER + if (p->debug_lines) { + if (set_encoding) rb_enc_associate(v, p->enc); + rb_ary_push(p->debug_lines, v); + } +#endif p->cr_seen = FALSE; } else if (NIL_P(v)) { @@ -6663,12 +6663,12 @@ parser_cr(struct parser_params *p, int c) } static inline int -nextc(struct parser_params *p) +nextc0(struct parser_params *p, int set_encoding) { int c; if (UNLIKELY((p->lex.pcur == p->lex.pend) || p->eofp || RTEST(p->lex.nextline))) { - if (nextline(p)) return -1; + if (nextline(p, set_encoding)) return -1; } c = (unsigned char)*p->lex.pcur++; if (UNLIKELY(c == '\r')) { @@ -6677,6 +6677,7 @@ nextc(struct parser_params *p) return c; } +#define nextc(p) nextc0(p, TRUE) static void pushback(struct parser_params *p, int c) @@ -8467,7 +8468,7 @@ set_file_encoding(struct parser_params *p, const char *str, const char *send) static void parser_prepare(struct parser_params *p) { - int c = nextc(p); + int c = nextc0(p, FALSE); p->token_info_enabled = !compile_for_eval && RTEST(ruby_verbose); switch (c) { case '#': @@ -8479,6 +8480,11 @@ parser_prepare(struct parser_params *p) (unsigned char)p->lex.pcur[1] == 0xbf) { p->enc = rb_utf8_encoding(); p->lex.pcur += 2; +#ifndef RIPPER + if (p->debug_lines) { + rb_enc_associate(p->lex.lastline, p->enc); + } +#endif p->lex.pbeg = p->lex.pcur; return; } -- cgit v1.2.3