summaryrefslogtreecommitdiff
path: root/re.c
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2018-03-11 00:05:12 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2018-03-11 00:05:12 +0000
commit5fade63482d39d9c550a0ce8a555dc8e3c0aedec (patch)
tree5d99adf08d3c5e7503b353229fe71c2ba633eb0f /re.c
parent724878c9139715fea803690cfd2a0687abbd7242 (diff)
re.c: fixed escaped multibyte char
* re.c (unescape_nonascii): escaped multibyte character should be copied as-is, just with checking if the encoding matches. https://twitter.com/sakuro/status/972014409986883584 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62718 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 're.c')
-rw-r--r--re.c12
1 files changed, 12 insertions, 0 deletions
diff --git a/re.c b/re.c
index 0508e5f322..84c2e060dd 100644
--- a/re.c
+++ b/re.c
@@ -2537,11 +2537,13 @@ unescape_nonascii(const char *p, const char *end, rb_encoding *enc,
while (p < end) {
int chlen = rb_enc_precise_mbclen(p, end, enc);
if (!MBCLEN_CHARFOUND_P(chlen)) {
+ invalid_multibyte:
errcpy(err, "invalid multibyte character");
return -1;
}
chlen = MBCLEN_CHARFOUND_LEN(chlen);
if (1 < chlen || (*p & 0x80)) {
+ multibyte:
rb_str_buf_cat(buf, p, chlen);
p += chlen;
if (*encp == 0)
@@ -2559,6 +2561,16 @@ unescape_nonascii(const char *p, const char *end, rb_encoding *enc,
errcpy(err, "too short escape sequence");
return -1;
}
+ chlen = rb_enc_precise_mbclen(p, end, enc);
+ if (!MBCLEN_CHARFOUND_P(chlen)) {
+ goto invalid_multibyte;
+ }
+ if ((chlen = MBCLEN_CHARFOUND_LEN(chlen)) > 1) {
+ /* include the previous backslash */
+ --p;
+ ++chlen;
+ goto multibyte;
+ }
switch (c = *p++) {
case '1': case '2': case '3':
case '4': case '5': case '6': case '7': /* \O, \OO, \OOO or backref */