diff options
Diffstat (limited to 'enc/utf_16be.c')
| -rw-r--r-- | enc/utf_16be.c | 32 |
1 files changed, 17 insertions, 15 deletions
diff --git a/enc/utf_16be.c b/enc/utf_16be.c index 8d7c8e9b11..0086040b5d 100644 --- a/enc/utf_16be.c +++ b/enc/utf_16be.c @@ -2,7 +2,7 @@ utf_16be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,11 +28,9 @@ */ #include "regenc.h" +#include "iso_8859.h" -#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) -#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) -#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8) - +#if 0 static const int EncLen_UTF16[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -51,6 +49,7 @@ static const int EncLen_UTF16[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }; +#endif static int utf16be_mbc_enc_len(const UChar* p, const OnigUChar* e ARG_UNUSED, @@ -88,11 +87,8 @@ utf16be_is_mbc_newline(const UChar* p, const UChar* end, if (*(p+1) == 0x0a && *p == 0x00) return 1; #ifdef USE_UNICODE_ALL_LINE_TERMINATORS - if (( -#ifndef USE_CRNL_AS_LINE_TERMINATOR - *(p+1) == 0x0d || -#endif - *(p+1) == 0x85) && *p == 0x00) + if ((*(p+1) == 0x0b || *(p+1) == 0x0c || *(p+1) == 0x0d || *(p+1) == 0x85) + && *p == 0x00) return 1; if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28)) return 1; @@ -108,9 +104,8 @@ utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED, OnigCodePoint code; if (UTF16_IS_SURROGATE_FIRST(*p)) { - code = ((((p[0] - 0xd8) << 2) + ((p[1] & 0xc0) >> 6) + 1) << 16) - + ((((p[1] & 0x3f) << 2) + (p[2] - 0xdc)) << 8) - + p[3]; + code = ((((p[0] << 8) + p[1]) & 0x03ff) << 10) + + (((p[2] << 8) + p[3]) & 0x03ff) + 0x10000; } else { code = p[0] * 256 + p[1]; @@ -189,7 +184,7 @@ utf16be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* e int c, v; p++; - if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + if (*p == SHARP_s && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { return TRUE; } @@ -253,6 +248,13 @@ OnigEncodingDefine(utf_16be, UTF_16BE) = { onigenc_unicode_is_code_ctype, onigenc_utf16_32_get_ctype_code_range, utf16be_left_adjust_char_head, - onigenc_always_false_is_allowed_reverse_match + onigenc_always_false_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API + onigenc_unicode_case_map, +#else + NULL, +#endif + 0, + ONIGENC_FLAG_UNICODE, }; ENC_ALIAS("UCS-2BE", "UTF-16BE") |
