summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2002-12-02 07:13:56 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2002-12-02 07:13:56 +0000
commite193fd8d665567c7f3b827c21a1b77af43e2c391 (patch)
tree6941b26e87693c8919f4c87fb03f6345a54cb0c4
parent96986a7a9019ff063e2ef3ecbe223c0b23c68d52 (diff)
* pack.c (utf8_to_uv): added checks for malformed or redundant
UTF-8 sequences. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3105 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog5
-rw-r--r--ext/socket/socket.c15
-rw-r--r--pack.c61
3 files changed, 66 insertions, 15 deletions
diff --git a/ChangeLog b/ChangeLog
index 4a4a665..2db3084 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -8,6 +8,11 @@ Sun Dec 1 22:43:29 2002 Nobuyoshi Nakada <nobu.nokada@softhome.net>
* win32/win32.c (rb_w32_stat): empty path is invalid, and return
ENOENT rather than EBADF in such case. [ruby-talk:57177]
+Fri Nov 29 18:01:48 2002 Yukihiro Matsumoto <matz@ruby-lang.org>
+
+ * pack.c (utf8_to_uv): added checks for malformed or redundant
+ UTF-8 sequences.
+
Thu Nov 28 12:08:30 2002 Akinori MUSHA <knu@iDaemons.org>
* lib/mkmf.rb: Avoid the use of "clean::" in favor of "clean:" in
diff --git a/ext/socket/socket.c b/ext/socket/socket.c
index 17df447..54dbaaf 100644
--- a/ext/socket/socket.c
+++ b/ext/socket/socket.c
@@ -2922,4 +2922,19 @@ Init_socket()
#ifdef NI_DGRAM
sock_define_const("NI_DGRAM", NI_DGRAM);
#endif
+#ifdef SHUT_RD
+ sock_define_const("SHUT_RD", SHUT_RD);
+#else
+ sock_define_const("SHUT_RD", 0);
+#endif
+#ifdef SHUT_WR
+ sock_define_const("SHUT_WR", SHUT_WR);
+#else
+ sock_define_const("SHUT_WR", 1);
+#endif
+#ifdef SHUT_RDWR
+ sock_define_const("SHUT_RDWR", SHUT_RDWR);
+#else
+ sock_define_const("SHUT_RDWR", 2);
+#endif
}
diff --git a/pack.c b/pack.c
index a880083..d482465 100644
--- a/pack.c
+++ b/pack.c
@@ -1855,25 +1855,56 @@ utf8_to_uv(p, lenp)
char *p;
long *lenp;
{
- int c = (*p++)&0xff;
- unsigned long uv;
- long n = 1;
-
- if (c < 0xc0) n = 1;
- else if (c < 0xe0) n = 2;
- else if (c < 0xf0) n = 3;
- else if (c < 0xf8) n = 4;
- else if (c < 0xfc) n = 5;
- else if (c < 0xfe) n = 6;
- else if (c == 0xfe) n = 7;
- if (n > *lenp) return 0;
+ int c = *p++ & 0xff;
+ unsigned long uv = c;
+ long n;
+
+ if (!(uv & 0x80)) {
+ *lenp = 1;
+ return uv;
+ }
+ if (!(uv & 0x40)) {
+ rb_warning("malformed UTF-8 character");
+ *lenp = 1;
+ return uv;
+ }
+
+ if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
+ else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
+ else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
+ else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
+ else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
+ else if (!(uv & 0x01)) { n = 7; uv = 0; }
+ else { n = 13; uv = 0; }
+ if (n > *lenp) {
+ rb_warning("malformed UTF-8 character (expected %d bytes, given %d bytes)",
+ n, *lenp);
+ return 0xfffd;
+ }
*lenp = n--;
- uv = c;
if (n != 0) {
- uv &= (1<<(BYTEWIDTH-2-n)) - 1;
while (n--) {
- uv = uv << 6 | (*p++ & ((1<<6)-1));
+ c = *p++ & 0xff;
+ if ((c & 0xc0) != 0x80) {
+ rb_warning("malformed UTF-8 character");
+ *lenp -= n + 1;
+ return 0xfffd;
+ }
+ else {
+ c &= 0x3f;
+ if (uv == 0 && c == 0) {
+ int i;
+
+ for (i=0; n-i>0 && (p[i] & 0x3f) == 0; i++)
+ ;
+ rb_warning("redundant UTF-8 sequence (skip %d bytes)", i+1);
+ n -= i;
+ p += i;
+ continue;
+ }
+ uv = uv << 6 | c;
+ }
}
}
return uv;