From e193fd8d665567c7f3b827c21a1b77af43e2c391 Mon Sep 17 00:00:00 2001 From: matz Date: Mon, 2 Dec 2002 07:13:56 +0000 Subject: * pack.c (utf8_to_uv): added checks for malformed or redundant UTF-8 sequences. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3105 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 5 +++++ ext/socket/socket.c | 15 +++++++++++++ pack.c | 61 ++++++++++++++++++++++++++++++++++++++++------------- 3 files changed, 66 insertions(+), 15 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4a4a665570..2db3084025 100644 --- a/ChangeLog +++ b/ChangeLog @@ -8,6 +8,11 @@ Sun Dec 1 22:43:29 2002 Nobuyoshi Nakada * win32/win32.c (rb_w32_stat): empty path is invalid, and return ENOENT rather than EBADF in such case. [ruby-talk:57177] +Fri Nov 29 18:01:48 2002 Yukihiro Matsumoto + + * pack.c (utf8_to_uv): added checks for malformed or redundant + UTF-8 sequences. + Thu Nov 28 12:08:30 2002 Akinori MUSHA * lib/mkmf.rb: Avoid the use of "clean::" in favor of "clean:" in diff --git a/ext/socket/socket.c b/ext/socket/socket.c index 17df4476c0..54dbaaf5f2 100644 --- a/ext/socket/socket.c +++ b/ext/socket/socket.c @@ -2922,4 +2922,19 @@ Init_socket() #ifdef NI_DGRAM sock_define_const("NI_DGRAM", NI_DGRAM); #endif +#ifdef SHUT_RD + sock_define_const("SHUT_RD", SHUT_RD); +#else + sock_define_const("SHUT_RD", 0); +#endif +#ifdef SHUT_WR + sock_define_const("SHUT_WR", SHUT_WR); +#else + sock_define_const("SHUT_WR", 1); +#endif +#ifdef SHUT_RDWR + sock_define_const("SHUT_RDWR", SHUT_RDWR); +#else + sock_define_const("SHUT_RDWR", 2); +#endif } diff --git a/pack.c b/pack.c index a880083d6f..d482465111 100644 --- a/pack.c +++ b/pack.c @@ -1855,25 +1855,56 @@ utf8_to_uv(p, lenp) char *p; long *lenp; { - int c = (*p++)&0xff; - unsigned long uv; - long n = 1; - - if (c < 0xc0) n = 1; - else if (c < 0xe0) n = 2; - else if (c < 0xf0) n = 3; - else if (c < 0xf8) n = 4; - else if (c < 0xfc) n = 5; - else if (c < 0xfe) n = 6; - else if (c == 0xfe) n = 7; - if (n > *lenp) return 0; + int c = *p++ & 0xff; + unsigned long uv = c; + long n; + + if (!(uv & 0x80)) { + *lenp = 1; + return uv; + } + if (!(uv & 0x40)) { + rb_warning("malformed UTF-8 character"); + *lenp = 1; + return uv; + } + + if (!(uv & 0x20)) { n = 2; uv &= 0x1f; } + else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; } + else if (!(uv & 0x08)) { n = 4; uv &= 0x07; } + else if (!(uv & 0x04)) { n = 5; uv &= 0x03; } + else if (!(uv & 0x02)) { n = 6; uv &= 0x01; } + else if (!(uv & 0x01)) { n = 7; uv = 0; } + else { n = 13; uv = 0; } + if (n > *lenp) { + rb_warning("malformed UTF-8 character (expected %d bytes, given %d bytes)", + n, *lenp); + return 0xfffd; + } *lenp = n--; - uv = c; if (n != 0) { - uv &= (1<<(BYTEWIDTH-2-n)) - 1; while (n--) { - uv = uv << 6 | (*p++ & ((1<<6)-1)); + c = *p++ & 0xff; + if ((c & 0xc0) != 0x80) { + rb_warning("malformed UTF-8 character"); + *lenp -= n + 1; + return 0xfffd; + } + else { + c &= 0x3f; + if (uv == 0 && c == 0) { + int i; + + for (i=0; n-i>0 && (p[i] & 0x3f) == 0; i++) + ; + rb_warning("redundant UTF-8 sequence (skip %d bytes)", i+1); + n -= i; + p += i; + continue; + } + uv = uv << 6 | c; + } } } return uv; -- cgit v1.2.3