From 44cfd58dc5f3949ff5cbcf7350a3e76d6ff1c49c Mon Sep 17 00:00:00 2001 From: akr Date: Wed, 30 Jan 2008 03:49:54 +0000 Subject: * enc/utf_16be.c (UTF16_IS_SURROGATE_FIRST): avoid branch. (UTF16_IS_SURROGATE_SECOND): ditto. (UTF16_IS_SURROGATE): defined. (utf16be_mbc_enc_len): validation implemented. * enc/utf_16le.c (UTF16_IS_SURROGATE_FIRST): avoid branch. (UTF16_IS_SURROGATE_SECOND): ditto. (UTF16_IS_SURROGATE): defined. (utf16le_mbc_enc_len): validation implemented. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15338 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- enc/utf_16le.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'enc/utf_16le.c') diff --git a/enc/utf_16le.c b/enc/utf_16le.c index 355d9c23eb..33bfe08f47 100644 --- a/enc/utf_16le.c +++ b/enc/utf_16le.c @@ -29,8 +29,9 @@ #include "regenc.h" -#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb) -#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf) +#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) +#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) +#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8) static const int EncLen_UTF16[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -52,10 +53,23 @@ static const int EncLen_UTF16[] = { }; static int -utf16le_mbc_enc_len(const UChar* p, const OnigUChar* e ARG_UNUSED, +utf16le_mbc_enc_len(const UChar* p, const OnigUChar* e, OnigEncoding enc ARG_UNUSED) { - return EncLen_UTF16[*(p+1)]; + int len = e-p, byte; + if (len < 2) + return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1); + byte = p[1]; + if (!UTF16_IS_SURROGATE(byte)) { + return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2); + } + if (UTF16_IS_SURROGATE_FIRST(byte)) { + if (len < 4) + return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-len); + if (UTF16_IS_SURROGATE_SECOND(p[3])) + return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4); + } + return ONIGENC_CONSTRUCT_MBCLEN_INVALID(); } static int -- cgit v1.2.3