From f29e5013adcd9c8753587471727e3280aaf89686 Mon Sep 17 00:00:00 2001 From: nobu Date: Tue, 25 Jul 2017 08:30:11 +0000 Subject: parse.y: limit codepoint length * parse.y (parser_tokadd_codepoint): limit Unicode codepoint length. too long codepoint has been split unexpectedly since r57050. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@59417 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- parse.y | 17 ++++++++--------- test/ruby/test_parse.rb | 2 ++ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/parse.y b/parse.y index 0a1910e880..44a0bb6785 100644 --- a/parse.y +++ b/parse.y @@ -5661,20 +5661,20 @@ parser_tokadd_codepoint(struct parser_params *parser, rb_encoding **encp, int regexp_literal, int wide) { size_t numlen; - int codepoint = scan_hex(lex_p, wide ? 6 : 4, &numlen); + int codepoint = scan_hex(lex_p, wide ? lex_pend - lex_p : 4, &numlen); literal_flush(lex_p); lex_p += numlen; - if (wide ? (numlen == 0) : (numlen < 4)) { + if (wide ? (numlen == 0 || numlen > 6) : (numlen < 4)) { yyerror("invalid Unicode escape"); - return FALSE; + return wide && numlen > 0; } if (codepoint > 0x10ffff) { yyerror("invalid Unicode codepoint (too large)"); - return FALSE; + return wide; } if ((codepoint & 0xfffff800) == 0xd800) { yyerror("invalid Unicode codepoint"); - return FALSE; + return wide; } if (regexp_literal) { tokcopy((int)numlen); @@ -5687,7 +5687,7 @@ parser_tokadd_codepoint(struct parser_params *parser, rb_encoding **encp, char *mesg = alloca(len); snprintf(mesg, len, mixed_utf8, rb_enc_name(*encp)); yyerror(mesg); - return TRUE; + return wide; } *encp = utf8; tokaddmbc(codepoint, *encp); @@ -5718,7 +5718,7 @@ parser_tokadd_utf8(struct parser_params *parser, rb_encoding **encp, int c, last = nextc(); if (lex_p >= lex_pend) goto unterminated; while (ISSPACE(c = *lex_p) && ++lex_p < lex_pend); - while (!string_literal || c != close_brace) { + do { if (regexp_literal) tokadd(last); if (!parser_tokadd_codepoint(parser, encp, regexp_literal, TRUE)) { break; @@ -5727,8 +5727,7 @@ parser_tokadd_utf8(struct parser_params *parser, rb_encoding **encp, if (++lex_p >= lex_pend) goto unterminated; last = c; } - if (!string_literal) break; - } + } while (c != close_brace); if (c != close_brace) { unterminated: diff --git a/test/ruby/test_parse.rb b/test/ruby/test_parse.rb index 982f91dc52..cc38214fd6 100644 --- a/test/ruby/test_parse.rb +++ b/test/ruby/test_parse.rb @@ -516,6 +516,8 @@ class TestParse < Test::Unit::TestCase src = '"\xD0\u{90'"\n""000000000000000000000000" assert_syntax_error(src, /:#{__LINE__}: unterminated/o) + assert_syntax_error('"\u{100000000}"', /invalid Unicode escape/) + assert_equal("\x81", eval('"\C-\M-a"')) assert_equal("\177", eval('"\c?"')) end -- cgit v1.2.3