diff options
Diffstat (limited to 'enc/utf_32be.c')
| -rw-r--r-- | enc/utf_32be.c | 41 |
1 files changed, 28 insertions, 13 deletions
diff --git a/enc/utf_32be.c b/enc/utf_32be.c index 61e7d0f1b5..e05cfaf1b2 100644 --- a/enc/utf_32be.c +++ b/enc/utf_32be.c @@ -28,12 +28,25 @@ */ #include "regenc.h" +#include "iso_8859.h" +static OnigCodePoint utf32be_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc); static int -utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED, - OnigEncoding enc ARG_UNUSED) +utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e, + OnigEncoding enc) { - return 4; + if (e < p) { + return ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + } + else if (e-p < 4) { + return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-(int)(e-p)); + } + else { + OnigCodePoint c = utf32be_mbc_to_code(p, e, enc); + if (!UNICODE_VALID_CODEPOINT_P(c)) + return ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4); + } } static int @@ -44,11 +57,7 @@ utf32be_is_mbc_newline(const UChar* p, const UChar* end, if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0) return 1; #ifdef USE_UNICODE_ALL_LINE_TERMINATORS - if (( -#ifndef USE_CRNL_AS_LINE_TERMINATOR - *(p+3) == 0x0d || -#endif - *(p+3) == 0x85) + if ((*(p+3) == 0x0b || *(p+3) == 0x0c || *(p+3) == 0x0d || *(p+3) == 0x85) && *(p+2) == 0 && *(p+1) == 0 && *p == 0x00) return 1; if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28) @@ -130,7 +139,7 @@ utf32be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* e int c, v; p += 3; - if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + if (*p == SHARP_s && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { return TRUE; } @@ -155,11 +164,11 @@ static UChar* utf32be_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc ARG_UNUSED) { - int rem; + ptrdiff_t rem; if (s <= start) return (UChar* )s; - rem = (s - start) % 4; + rem = (int )((s - start) % 4); return (UChar* )(s - rem); } @@ -189,7 +198,13 @@ OnigEncodingDefine(utf_32be, UTF_32BE) = { onigenc_unicode_is_code_ctype, onigenc_utf16_32_get_ctype_code_range, utf32be_left_adjust_char_head, - onigenc_always_false_is_allowed_reverse_match + onigenc_always_false_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API + onigenc_unicode_case_map, +#else + NULL, +#endif + 0, + ONIGENC_FLAG_UNICODE, }; ENC_ALIAS("UCS-4BE", "UTF-32BE") - |
