diff options
author | Jeremy Evans <code@jeremyevans.net> | 2019-05-14 22:04:46 -0700 |
---|---|---|
committer | Jeremy Evans <code@jeremyevans.net> | 2019-05-22 21:52:20 -0700 |
commit | c05eaa93258ddc01e685b6cc3a0da82998a2af48 (patch) | |
tree | aa5fea51e5d47ed7d49da4b3dfbdb8ce60364ae9 | |
parent | f91b1ab33d397fdcdf452d563d7f59469a078d88 (diff) |
Fix mixed encoding in heredoc
Heredocs are parsed line-by-line, so we need to keep track of the
temporary encoding of the string. Previously, a heredoc would
only detect mixed encoding errors if they were on the same line,
this changes things so they will be caught on different lines.
Fixes [Bug #15839]
-rw-r--r-- | parse.y | 40 | ||||
-rw-r--r-- | test/ruby/test_syntax.rb | 29 |
2 files changed, 50 insertions, 19 deletions
@@ -5233,7 +5233,7 @@ none : /* none */ # define yylval (*p->lval) static int regx_options(struct parser_params*); -static int tokadd_string(struct parser_params*,int,int,int,long*,rb_encoding**); +static int tokadd_string(struct parser_params*,int,int,int,long*,rb_encoding**,rb_encoding**); static void tokaddmbc(struct parser_params *p, int c, rb_encoding *enc); static enum yytokentype parse_string(struct parser_params*,rb_strterm_literal_t*); static enum yytokentype here_document(struct parser_params*,rb_strterm_heredoc_t*); @@ -6510,10 +6510,9 @@ parser_mixed_escape(struct parser_params *p, const char *beg, rb_encoding *enc1, static int tokadd_string(struct parser_params *p, int func, int term, int paren, long *nest, - rb_encoding **encp) + rb_encoding **encp, rb_encoding **enc) { int c; - rb_encoding *enc = 0; bool erred = false; #define mixed_error(enc1, enc2) \ @@ -6569,7 +6568,7 @@ tokadd_string(struct parser_params *p, tokadd(p, '\\'); break; } - if (!parser_tokadd_utf8(p, &enc, term, + if (!parser_tokadd_utf8(p, enc, term, func & STR_FUNC_SYMBOL, func & STR_FUNC_REGEXP)) { continue; @@ -6588,17 +6587,17 @@ tokadd_string(struct parser_params *p, continue; } pushback(p, c); - if ((c = tokadd_escape(p, &enc)) < 0) + if ((c = tokadd_escape(p, enc)) < 0) return -1; - if (enc && enc != *encp) { - mixed_escape(p->lex.ptok+2, enc, *encp); + if (*enc && *enc != *encp) { + mixed_escape(p->lex.ptok+2, *enc, *encp); } continue; } else if (func & STR_FUNC_EXPAND) { pushback(p, c); if (func & STR_FUNC_ESCAPE) tokadd(p, '\\'); - c = read_escape(p, 0, &enc); + c = read_escape(p, 0, enc); } else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) { /* ignore backslashed spaces in %w */ @@ -6612,11 +6611,11 @@ tokadd_string(struct parser_params *p, } else if (!parser_isascii(p)) { non_ascii: - if (!enc) { - enc = *encp; + if (!*enc) { + *enc = *encp; } - else if (enc != *encp) { - mixed_error(enc, *encp); + else if (*enc != *encp) { + mixed_error(*enc, *encp); continue; } if (tokadd_mbchar(p, c) == -1) return -1; @@ -6627,18 +6626,18 @@ tokadd_string(struct parser_params *p, break; } if (c & 0x80) { - if (!enc) { - enc = *encp; + if (!*enc) { + *enc = *encp; } - else if (enc != *encp) { - mixed_error(enc, *encp); + else if (*enc != *encp) { + mixed_error(*enc, *encp); continue; } } tokadd(p, c); } terminate: - if (enc) *encp = enc; + if (*enc) *encp = *enc; return c; } @@ -6774,6 +6773,7 @@ parse_string(struct parser_params *p, rb_strterm_literal_t *quote) int paren = (int)quote->u2.paren; int c, space = 0; rb_encoding *enc = p->enc; + rb_encoding *base_enc = 0; VALUE lit; if (func & STR_FUNC_TERM) { @@ -6814,7 +6814,7 @@ parse_string(struct parser_params *p, rb_strterm_literal_t *quote) } pushback(p, c); if (tokadd_string(p, func, term, paren, "e->u0.nest, - &enc) == -1) { + &enc, &base_enc) == -1) { if (p->eofp) { #ifndef RIPPER # define unterminated_literal(mesg) yyerror0(mesg) @@ -7171,6 +7171,7 @@ here_document(struct parser_params *p, rb_strterm_heredoc_t *here) long len; VALUE str = 0; rb_encoding *enc = p->enc; + rb_encoding *base_enc = 0; int bol; eos = RSTRING_PTR(here->lastline) + here->offset; @@ -7282,7 +7283,8 @@ here_document(struct parser_params *p, rb_strterm_heredoc_t *here) } do { pushback(p, c); - if ((c = tokadd_string(p, func, '\n', 0, NULL, &enc)) == -1) { + enc = p->enc; + if ((c = tokadd_string(p, func, '\n', 0, NULL, &enc, &base_enc)) == -1) { if (p->eofp) goto error; goto restore; } diff --git a/test/ruby/test_syntax.rb b/test/ruby/test_syntax.rb index c9eaa5af6d..7bf1e0e43c 100644 --- a/test/ruby/test_syntax.rb +++ b/test/ruby/test_syntax.rb @@ -774,6 +774,35 @@ eom assert_equal("\n0\n1", eval("<<~0 '1'\n \n0\#{}\n0")) end + def test_heredoc_mixed_encoding + assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') + #encoding: cp932 + <<-TEXT + \xe9\x9d\u1234 + TEXT + HEREDOC + assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') + #encoding: cp932 + <<-TEXT + \xe9\x9d + \u1234 + TEXT + HEREDOC + assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') + #encoding: cp932 + <<-TEXT + \u1234\xe9\x9d + TEXT + HEREDOC + assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') + #encoding: cp932 + <<-TEXT + \u1234 + \xe9\x9d + TEXT + HEREDOC + end + def test_lineno_operation_brace_block expected = __LINE__ + 1 actual = caller_lineno\ |