summaryrefslogtreecommitdiff
path: root/parse.y
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-07-16 13:39:18 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-07-16 13:39:18 +0000
commit80d502b88116e95e4b17b4e7278d0d47106253db (patch)
tree21903c82e7e913360dd2c79a47a43c0fcd5066a3 /parse.y
parentf01f98f022123a3e5608e43bc47a32a7fac7b90a (diff)
parse.y: utf-8 codepoints
* parse.y (parser_tokadd_utf8): skip spaces in the current line, without advancing the line, to get rid of dangling pointer. [ruby-core:82029] [Bug #13742] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@59344 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'parse.y')
-rw-r--r--parse.y61
1 files changed, 38 insertions, 23 deletions
diff --git a/parse.y b/parse.y
index c681d41a74..e0e07f2184 100644
--- a/parse.y
+++ b/parse.y
@@ -5652,6 +5652,7 @@ parser_tokadd_codepoint(struct parser_params *parser, rb_encoding **encp,
{
size_t numlen;
int codepoint = scan_hex(lex_p, wide ? 6 : 4, &numlen);
+ lex_p += numlen;
if (wide ? (numlen == 0) : (numlen < 4)) {
yyerror("invalid Unicode escape");
return FALSE;
@@ -5664,12 +5665,20 @@ parser_tokadd_codepoint(struct parser_params *parser, rb_encoding **encp,
yyerror("invalid Unicode codepoint");
return FALSE;
}
- lex_p += numlen;
if (regexp_literal) {
tokcopy((int)numlen);
}
else if (codepoint >= 0x80) {
- *encp = rb_utf8_encoding();
+ rb_encoding *utf8 = rb_utf8_encoding();
+ if (*encp && utf8 != *encp) {
+ static const char mixed_utf8[] = "UTF-8 mixed within %s source";
+ size_t len = sizeof(mixed_utf8) - 2 + strlen(rb_enc_name(*encp));
+ char *mesg = alloca(len);
+ snprintf(mesg, len, mixed_utf8, rb_enc_name(*encp));
+ yyerror(mesg);
+ return TRUE;
+ }
+ *encp = utf8;
tokaddmbc(codepoint, *encp);
}
else {
@@ -5696,19 +5705,23 @@ parser_tokadd_utf8(struct parser_params *parser, rb_encoding **encp,
if (peek(open_brace)) { /* handle \u{...} form */
int c, last = nextc();
- do c = nextc(); while (ISSPACE(c));
- pushback(c);
+ if (lex_p >= lex_pend) goto unterminated;
+ while (ISSPACE(c = *lex_p) && ++lex_p < lex_pend);
while (!string_literal || c != close_brace) {
if (regexp_literal) tokadd(last);
if (!parser_tokadd_codepoint(parser, encp, regexp_literal, TRUE)) {
return 0;
}
- while (ISSPACE(c = nextc())) last = c;
- pushback(c);
+ while (ISSPACE(c = *lex_p)) {
+ if (++lex_p >= lex_pend) goto unterminated;
+ last = c;
+ }
if (!string_literal) break;
}
if (c != close_brace) {
+ unterminated:
+ parser->tokp = lex_p;
yyerror("unterminated Unicode escape");
return 0;
}
@@ -5999,8 +6012,7 @@ parser_tokadd_string(struct parser_params *parser,
rb_encoding **encp)
{
int c;
- int has_nonascii = 0;
- rb_encoding *enc = *encp;
+ rb_encoding *enc = 0;
char *errbuf = 0;
static const char mixed_msg[] = "%s mixed within %s source";
@@ -6044,9 +6056,8 @@ parser_tokadd_string(struct parser_params *parser,
}
}
else if (c == '\\') {
- const char *beg = lex_p - 1;
#ifndef RIPPER
- parser->tokp = beg;
+ parser->tokp = lex_p - 1;
#endif
c = nextc();
switch (c) {
@@ -6065,11 +6076,10 @@ parser_tokadd_string(struct parser_params *parser,
tokadd('\\');
break;
}
- parser_tokadd_utf8(parser, &enc, 1,
- func & STR_FUNC_SYMBOL,
- func & STR_FUNC_REGEXP);
- if (has_nonascii && enc != *encp) {
- mixed_escape(beg, enc, *encp);
+ if (!parser_tokadd_utf8(parser, &enc, term,
+ func & STR_FUNC_SYMBOL,
+ func & STR_FUNC_REGEXP)) {
+ return -1;
}
continue;
@@ -6087,8 +6097,8 @@ parser_tokadd_string(struct parser_params *parser,
pushback(c);
if ((c = tokadd_escape(&enc)) < 0)
return -1;
- if (has_nonascii && enc != *encp) {
- mixed_escape(beg, enc, *encp);
+ if (enc && enc != *encp) {
+ mixed_escape(parser->tokp+2, enc, *encp);
}
continue;
}
@@ -6109,8 +6119,10 @@ parser_tokadd_string(struct parser_params *parser,
}
else if (!parser_isascii()) {
non_ascii:
- has_nonascii = 1;
- if (enc != *encp) {
+ if (!enc) {
+ enc = *encp;
+ }
+ else if (enc != *encp) {
mixed_error(enc, *encp);
continue;
}
@@ -6122,15 +6134,17 @@ parser_tokadd_string(struct parser_params *parser,
break;
}
if (c & 0x80) {
- has_nonascii = 1;
- if (enc != *encp) {
+ if (!enc) {
+ enc = *encp;
+ }
+ else if (enc != *encp) {
mixed_error(enc, *encp);
continue;
}
}
tokadd(c);
}
- *encp = enc;
+ if (enc) *encp = enc;
return c;
}
@@ -7460,7 +7474,8 @@ parse_qmark(struct parser_params *parser, int space_seen)
else if (c == '\\') {
if (peek('u')) {
nextc();
- if (!parser_tokadd_utf8(parser, &enc, 0, 0, 0))
+ enc = rb_utf8_encoding();
+ if (!parser_tokadd_utf8(parser, &enc, -1, 0, 0))
return 0;
}
else if (!lex_eol_p() && !(c = *lex_p, ISASCII(c))) {