summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNobuyoshi Nakada <nobu@ruby-lang.org>2019-07-05 22:18:08 +0900
committerNobuyoshi Nakada <nobu@ruby-lang.org>2019-07-05 22:39:54 +0900
commitd746a41e85b746a90eef20c46d24880fe084ffc5 (patch)
tree1065357f1862a72fca4327cb29554ac6cb652fef
parent0a2f598d23ef54ce906ebe302cc06e07a16f9022 (diff)
Multiple codepoints are not allowed at single character literal
It has unintentionally passed since 2.5.
-rw-r--r--parse.y25
-rw-r--r--test/ruby/test_parse.rb1
2 files changed, 21 insertions, 5 deletions
diff --git a/parse.y b/parse.y
index 9ec262c5a5..3708601e35 100644
--- a/parse.y
+++ b/parse.y
@@ -6246,24 +6246,28 @@ tokadd_codepoint(struct parser_params *p, rb_encoding **encp,
/* return value is for ?\u3042 */
static void
tokadd_utf8(struct parser_params *p, rb_encoding **encp,
- int string_literal, int symbol_literal, int regexp_literal)
+ int term, int symbol_literal, int regexp_literal)
{
/*
- * If string_literal is true, then we allow multiple codepoints
- * in \u{}, and add the codepoints to the current token.
- * Otherwise we're parsing a character literal and return a single
- * codepoint without adding it
+ * If `term` is not -1, then we allow multiple codepoints in \u{}
+ * upto `term` byte, otherwise we're parsing a character literal.
+ * And then add the codepoints to the current token.
*/
+ static const char multiple_codepoints[] = "Multiple codepoints at single character literal";
const int open_brace = '{', close_brace = '}';
if (regexp_literal) { tokadd(p, '\\'); tokadd(p, 'u'); }
if (peek(p, open_brace)) { /* handle \u{...} form */
+ const char *second = NULL;
int c, last = nextc(p);
if (p->lex.pcur >= p->lex.pend) goto unterminated;
while (ISSPACE(c = *p->lex.pcur) && ++p->lex.pcur < p->lex.pend);
while (c != close_brace) {
+ if (c == term) goto unterminated;
+ if (second == multiple_codepoints)
+ second = p->lex.pcur;
if (regexp_literal) tokadd(p, last);
if (!tokadd_codepoint(p, encp, regexp_literal, TRUE)) {
break;
@@ -6272,6 +6276,8 @@ tokadd_utf8(struct parser_params *p, rb_encoding **encp,
if (++p->lex.pcur >= p->lex.pend) goto unterminated;
last = c;
}
+ if (term == -1 && !second)
+ second = multiple_codepoints;
}
if (c != close_brace) {
@@ -6280,6 +6286,15 @@ tokadd_utf8(struct parser_params *p, rb_encoding **encp,
yyerror0("unterminated Unicode escape");
return;
}
+ if (second && second != multiple_codepoints) {
+ const char *pcur = p->lex.pcur;
+ p->lex.pcur = second;
+ dispatch_scan_event(p, tSTRING_CONTENT);
+ token_flush(p);
+ p->lex.pcur = pcur;
+ yyerror0(multiple_codepoints);
+ token_flush(p);
+ }
if (regexp_literal) tokadd(p, close_brace);
nextc(p);
diff --git a/test/ruby/test_parse.rb b/test/ruby/test_parse.rb
index 4a0c296a84..75f0896bce 100644
--- a/test/ruby/test_parse.rb
+++ b/test/ruby/test_parse.rb
@@ -577,6 +577,7 @@ class TestParse < Test::Unit::TestCase
assert_equal("\u{1234}", eval("?\u{1234}"))
assert_equal("\u{1234}", eval('?\u{1234}'))
assert_equal("\u{1234}", eval('?\u1234'))
+ assert_syntax_error('?\u{41 42}', 'Multiple codepoints at single character literal')
e = assert_syntax_error('"#{?\u123}"', 'invalid Unicode escape')
assert_not_match(/end-of-input/, e.message)