summaryrefslogtreecommitdiff
path: root/enc/utf_16be.c
diff options
context:
space:
mode:
Diffstat (limited to 'enc/utf_16be.c')
-rw-r--r--enc/utf_16be.c32
1 files changed, 17 insertions, 15 deletions
diff --git a/enc/utf_16be.c b/enc/utf_16be.c
index 8d7c8e9b11..0086040b5d 100644
--- a/enc/utf_16be.c
+++ b/enc/utf_16be.c
@@ -2,7 +2,7 @@
utf_16be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,11 +28,9 @@
*/
#include "regenc.h"
+#include "iso_8859.h"
-#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
-#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
-#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8)
-
+#if 0
static const int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -51,6 +49,7 @@ static const int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
+#endif
static int
utf16be_mbc_enc_len(const UChar* p, const OnigUChar* e ARG_UNUSED,
@@ -88,11 +87,8 @@ utf16be_is_mbc_newline(const UChar* p, const UChar* end,
if (*(p+1) == 0x0a && *p == 0x00)
return 1;
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
- if ((
-#ifndef USE_CRNL_AS_LINE_TERMINATOR
- *(p+1) == 0x0d ||
-#endif
- *(p+1) == 0x85) && *p == 0x00)
+ if ((*(p+1) == 0x0b || *(p+1) == 0x0c || *(p+1) == 0x0d || *(p+1) == 0x85)
+ && *p == 0x00)
return 1;
if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28))
return 1;
@@ -108,9 +104,8 @@ utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED,
OnigCodePoint code;
if (UTF16_IS_SURROGATE_FIRST(*p)) {
- code = ((((p[0] - 0xd8) << 2) + ((p[1] & 0xc0) >> 6) + 1) << 16)
- + ((((p[1] & 0x3f) << 2) + (p[2] - 0xdc)) << 8)
- + p[3];
+ code = ((((p[0] << 8) + p[1]) & 0x03ff) << 10)
+ + (((p[2] << 8) + p[3]) & 0x03ff) + 0x10000;
}
else {
code = p[0] * 256 + p[1];
@@ -189,7 +184,7 @@ utf16be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* e
int c, v;
p++;
- if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+ if (*p == SHARP_s && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
return TRUE;
}
@@ -253,6 +248,13 @@ OnigEncodingDefine(utf_16be, UTF_16BE) = {
onigenc_unicode_is_code_ctype,
onigenc_utf16_32_get_ctype_code_range,
utf16be_left_adjust_char_head,
- onigenc_always_false_is_allowed_reverse_match
+ onigenc_always_false_is_allowed_reverse_match,
+#ifdef USE_CASE_MAP_API
+ onigenc_unicode_case_map,
+#else
+ NULL,
+#endif
+ 0,
+ ONIGENC_FLAG_UNICODE,
};
ENC_ALIAS("UCS-2BE", "UTF-16BE")