diff options
Diffstat (limited to 'regenc.c')
| -rw-r--r-- | regenc.c | 355 |
1 files changed, 242 insertions, 113 deletions
@@ -1,8 +1,9 @@ /********************************************************************** - regenc.c - Oniguruma (regular expression library) + regenc.c - Onigmo (Oniguruma-mod) (regular expression library) **********************************************************************/ /*- * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2011-2019 K.Takata <kentkt AT csc DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,25 +51,51 @@ onigenc_set_default_encoding(OnigEncoding enc) return 0; } +extern int +onigenc_mbclen(const OnigUChar* p,const OnigUChar* e, OnigEncoding enc) +{ + int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e); + if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) { + ret = ONIGENC_MBCLEN_CHARFOUND_LEN(ret); + if (p + ret > e) ret = (int)(e - p); // just for case + return ret; + } + else if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) { + return (int)(e - p); + } + return p < e ? 1 : 0; +} + +extern int +onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, OnigEncoding enc) +{ + int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e); + if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) + return ONIGENC_MBCLEN_CHARFOUND_LEN(ret); + else if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) + return (int )(e - p) + ONIGENC_MBCLEN_NEEDMORE_LEN(ret); + return 1; +} + extern UChar* -onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) +onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) { - UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); + UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); if (p < s) { - p += enc_len(enc, p, s); + p += enclen(enc, p, end); } return p; } extern UChar* onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, - const UChar* start, const UChar* s, const UChar** prev) + const UChar* start, const UChar* s, const UChar* end, const UChar** prev) { - UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); + UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); if (p < s) { if (prev) *prev = (const UChar* )p; - p += enc_len(enc, p, s); + p += enclen(enc, p, end); } else { if (prev) *prev = (const UChar* )NULL; /* Sorry */ @@ -77,22 +104,22 @@ onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, } extern UChar* -onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s) +onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) { if (s <= start) return (UChar* )NULL; - return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1); + return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end); } extern UChar* -onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n) +onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end, int n) { while (ONIG_IS_NOT_NULL(s) && n-- > 0) { if (s <= start) return (UChar* )NULL; - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1); + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end); } return (UChar* )s; } @@ -112,7 +139,7 @@ onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end) { int n = 0; UChar* q = (UChar* )p; - + while (q < end) { q += ONIGENC_MBC_ENC_LEN(enc, q, end); n++; @@ -125,8 +152,8 @@ onigenc_strlen_null(OnigEncoding enc, const UChar* s) { int n = 0; UChar* p = (UChar* )s; - UChar* e = p + strlen(s); - + UChar* e; + while (1) { if (*p == '\0') { UChar* q; @@ -141,6 +168,7 @@ onigenc_strlen_null(OnigEncoding enc, const UChar* s) } if (len == 1) return n; } + e = p + ONIGENC_MBC_MAXLEN(enc); p += ONIGENC_MBC_ENC_LEN(enc, p, e); n++; } @@ -151,7 +179,7 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) { UChar* start = (UChar* )s; UChar* p = (UChar* )s; - UChar* e = p + strlen(s); + UChar* e; while (1) { if (*p == '\0') { @@ -167,6 +195,7 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) } if (len == 1) return (int )(p - start); } + e = p + ONIGENC_MBC_MAXLEN(enc); p += ONIGENC_MBC_ENC_LEN(enc, p, e); } } @@ -350,17 +379,19 @@ const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { }; #endif +#if 0 extern void -onigenc_set_default_caseconv_table(const UChar* table) +onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED) { /* nothing */ /* obsoleted. */ } +#endif extern UChar* -onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) +onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) { - return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); + return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); } const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = { @@ -393,15 +424,14 @@ const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = { }; extern int -onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag, - OnigApplyAllCaseFoldFunc f, void* arg, - OnigEncoding enc) +onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, + OnigApplyAllCaseFoldFunc f, void* arg, + OnigEncoding enc ARG_UNUSED) { OnigCodePoint code; int i, r; - for (i = 0; i < sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes) - ; i++) { + for (i = 0; i < numberof(OnigAsciiLowerMap); i++) { code = OnigAsciiLowerMap[i].to; r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg); if (r != 0) return r; @@ -415,9 +445,9 @@ onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag, } extern int -onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag, - const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], - OnigEncoding enc) +onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, + const OnigUChar* p, const OnigUChar* end ARG_UNUSED, + OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED) { if (0x41 <= *p && *p <= 0x5a) { items[0].byte_len = 1; @@ -435,11 +465,11 @@ onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag, return 0; } -extern int -ss_apply_all_case_fold(OnigCaseFoldType flag, - OnigApplyAllCaseFoldFunc f, void* arg) +static int +ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, + OnigApplyAllCaseFoldFunc f, void* arg) { - static OnigCodePoint ss[] = { 0x73, 0x73 }; + OnigCodePoint ss[] = { 0x73, 0x73 }; return (*f)((OnigCodePoint )0xdf, ss, 2, arg); } @@ -475,7 +505,7 @@ onigenc_apply_all_case_fold_with_map(int map_size, extern int onigenc_get_case_fold_codes_by_str_with_map(int map_size, const OnigPairCaseFoldCodes map[], - int ess_tsett_flag, OnigCaseFoldType flag, + int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) { if (0x41 <= *p && *p <= 0x5a) { @@ -483,7 +513,7 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size, items[0].code_len = 1; items[0].code[0] = (OnigCodePoint )(*p + 0x20); if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1 - && (*(p+1) == 0x53 || *(p+1) == 0x73)) { + && (*(p+1) == 0x53 || *(p+1) == 0x73)) { /* SS */ items[1].byte_len = 2; items[1].code_len = 1; @@ -498,7 +528,7 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size, items[0].code_len = 1; items[0].code[0] = (OnigCodePoint )(*p - 0x20); if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1 - && (*(p+1) == 0x73 || *(p+1) == 0x53)) { + && (*(p+1) == 0x73 || *(p+1) == 0x53)) { /* ss */ items[1].byte_len = 2; items[1].code_len = 1; @@ -536,16 +566,16 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size, for (i = 0; i < map_size; i++) { if (*p == map[i].from) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = map[i].to; - return 1; + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = map[i].to; + return 1; } else if (*p == map[i].to) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = map[i].from; - return 1; + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = map[i].from; + return 1; } } } @@ -555,15 +585,16 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size, extern int -onigenc_not_support_get_ctype_code_range(int ctype, - OnigCodePoint* sb_out, const OnigCodePoint* ranges[], - OnigEncoding enc) +onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED, + OnigCodePoint* sb_out ARG_UNUSED, + const OnigCodePoint* ranges[] ARG_UNUSED, + OnigEncoding enc) { return ONIG_NO_SUPPORT_CONFIG; } extern int -onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc) +onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED) { if (p < end) { if (*p == 0x0a) return 1; @@ -573,8 +604,8 @@ onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc) /* for single byte encodings */ extern int -onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag, const UChar** p, - const UChar*end, UChar* lower, OnigEncoding enc) +onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p, + const UChar* end, UChar* lower, OnigEncoding enc ARG_UNUSED) { *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p); @@ -584,8 +615,8 @@ onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag, const UChar** p, #if 0 extern int -onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag, - const UChar** pp, const UChar* end) +onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED) { const UChar* p = *pp; @@ -595,58 +626,79 @@ onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag, #endif extern int -onigenc_single_byte_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc) +onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) { return 1; } extern OnigCodePoint -onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) +onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) { return (OnigCodePoint )(*p); } extern int -onigenc_single_byte_code_to_mbclen(OnigCodePoint code, OnigEncoding enc) +onigenc_single_byte_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) { + if (code > 0xff) + return ONIGERR_INVALID_CODE_POINT_VALUE; return 1; } extern int -onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) +onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED) { + if (code > 0xff) { + return ONIGERR_INVALID_CODE_POINT_VALUE; + } *buf = (UChar )(code & 0xff); return 1; } extern UChar* -onigenc_single_byte_left_adjust_char_head(const UChar* start, const UChar* s, - OnigEncoding enc) +onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, + const UChar* s, + const UChar* end ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) { return (UChar* )s; } extern int -onigenc_always_true_is_allowed_reverse_match(const UChar* s, const UChar* end, - OnigEncoding enc) +onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, + const UChar* end ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) { return TRUE; } extern int -onigenc_always_false_is_allowed_reverse_match(const UChar* s, const UChar* end, - OnigEncoding enc) +onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, + const UChar* end ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) { return FALSE; } +extern int +onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype, + OnigEncoding enc ARG_UNUSED) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else + return FALSE; +} + extern OnigCodePoint onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) { int c, i, len; OnigCodePoint n; - len = enc_len(enc, p, end); + len = enclen(enc, p, end); n = (OnigCodePoint )(*p++); if (len == 1) return n; @@ -659,8 +711,9 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) } extern int -onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, - const UChar** pp, const UChar* end, UChar* lower) +onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, + UChar* lower) { int len; const UChar *p = *pp; @@ -673,7 +726,7 @@ onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, else { int i; - len = enc_len(enc, p, end); + len = enclen(enc, p, end); for (i = 0; i < len; i++) { *lower++ = *p++; } @@ -685,7 +738,7 @@ onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, #if 0 extern int onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag, - const UChar** pp, const UChar* end) + const UChar** pp, const UChar* end ARG_UNUSED) { const UChar* p = *pp; @@ -694,20 +747,21 @@ onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag, return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); } - (*pp) += enc_len(enc, p); + (*pp) += enclen(enc, p); return FALSE; } #endif extern int -onigenc_mb2_code_to_mbclen(OnigCodePoint code) +onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) { - if ((code & 0xff00) != 0) return 2; - else return 1; + if (code <= 0xff) return 1; + if (code <= 0xffff) return 2; + return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; } extern int -onigenc_mb4_code_to_mbclen(OnigCodePoint code) +onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) { if ((code & 0xff000000) != 0) return 4; else if ((code & 0xff0000) != 0) return 3; @@ -726,10 +780,10 @@ onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) *p++ = (UChar )(code & 0xff); #if 1 - if (enc_len(enc, buf, p) != (p - buf)) - return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE; + if (enclen(enc, buf, p) != (p - buf)) + return ONIGERR_INVALID_CODE_POINT_VALUE; #endif - return p - buf; + return (int )(p - buf); } extern int @@ -749,40 +803,39 @@ onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) *p++ = (UChar )(code & 0xff); #if 1 - if (enc_len(enc, buf, p) != (p - buf)) - return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE; + if (enclen(enc, buf, p) != (p - buf)) + return ONIGERR_INVALID_CODE_POINT_VALUE; #endif - return p - buf; -} - -extern int -onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) -{ - static PosixBracketEntryType PBS[] = { - { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 }, - { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 }, - { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 }, - { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 }, - { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 }, - { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 }, - { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 }, - { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 }, - { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 }, - { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 }, - { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 }, - { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 }, - { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 }, - { (UChar* )"Word", ONIGENC_CTYPE_WORD, 4 }, - { (UChar* )NULL, -1, 0 } + return (int )(p - buf); +} + +extern int +onigenc_minimum_property_name_to_ctype(OnigEncoding enc, const UChar* p, const UChar* end) +{ + static const PosixBracketEntryType PBS[] = { + POSIX_BRACKET_ENTRY_INIT("Alnum", ONIGENC_CTYPE_ALNUM), + POSIX_BRACKET_ENTRY_INIT("Alpha", ONIGENC_CTYPE_ALPHA), + POSIX_BRACKET_ENTRY_INIT("Blank", ONIGENC_CTYPE_BLANK), + POSIX_BRACKET_ENTRY_INIT("Cntrl", ONIGENC_CTYPE_CNTRL), + POSIX_BRACKET_ENTRY_INIT("Digit", ONIGENC_CTYPE_DIGIT), + POSIX_BRACKET_ENTRY_INIT("Graph", ONIGENC_CTYPE_GRAPH), + POSIX_BRACKET_ENTRY_INIT("Lower", ONIGENC_CTYPE_LOWER), + POSIX_BRACKET_ENTRY_INIT("Print", ONIGENC_CTYPE_PRINT), + POSIX_BRACKET_ENTRY_INIT("Punct", ONIGENC_CTYPE_PUNCT), + POSIX_BRACKET_ENTRY_INIT("Space", ONIGENC_CTYPE_SPACE), + POSIX_BRACKET_ENTRY_INIT("Upper", ONIGENC_CTYPE_UPPER), + POSIX_BRACKET_ENTRY_INIT("XDigit", ONIGENC_CTYPE_XDIGIT), + POSIX_BRACKET_ENTRY_INIT("ASCII", ONIGENC_CTYPE_ASCII), + POSIX_BRACKET_ENTRY_INIT("Word", ONIGENC_CTYPE_WORD), }; - PosixBracketEntryType *pb; + const PosixBracketEntryType *pb; int len; len = onigenc_strlen(enc, p, end); - for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { + for (pb = PBS; pb < PBS + numberof(PBS); pb++) { if (len == pb->len && - onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) + onigenc_with_ascii_strnicmp(enc, p, end, pb->name, pb->len) == 0) return pb->ctype; } @@ -791,7 +844,7 @@ onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) extern int onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, - unsigned int ctype) + unsigned int ctype) { if (code < 128) return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); @@ -806,7 +859,7 @@ onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, extern int onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, - unsigned int ctype) + unsigned int ctype) { if (code < 128) return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); @@ -833,28 +886,52 @@ onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, if (x) return x; sascii++; - p += enc_len(enc, p, end); + p += enclen(enc, p, end); } return 0; } +extern int +onigenc_with_ascii_strnicmp(OnigEncoding enc, const UChar* p, const UChar* end, + const UChar* sascii /* ascii */, int n) +{ + int x, c; + + while (n-- > 0) { + if (p >= end) return (int )(*sascii); + + c = (int )ONIGENC_MBC_TO_CODE(enc, p, end); + if (ONIGENC_IS_ASCII_CODE(c)) + c = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c); + x = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*sascii) - c; + if (x) return x; + + sascii++; + p += enclen(enc, p, end); + } + return 0; +} + +#if 0 /* Property management */ static int resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize) { - int size; + size_t size; const OnigCodePoint **list = *plist; size = sizeof(OnigCodePoint*) * new_size; if (IS_NULL(list)) { list = (const OnigCodePoint** )xmalloc(size); + if (IS_NULL(list)) return ONIGERR_MEMORY; } else { - list = (const OnigCodePoint** )xrealloc((void* )list, size); + const OnigCodePoint **tmp; + tmp = (const OnigCodePoint** )xrealloc((void* )list, size); + if (IS_NULL(tmp)) return ONIGERR_MEMORY; + list = tmp; } - if (IS_NULL(list)) return ONIGERR_MEMORY; - *plist = list; *psize = new_size; @@ -885,19 +962,71 @@ onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, *pnum = *pnum + 1; onig_st_insert_strend(*table, name, name + strlen((char* )name), - (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE)); + (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE)); return 0; } +#endif +#ifdef USE_CASE_MAP_API extern int -onigenc_property_list_init(int (*f)()) +onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, + OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc) { - int r; + OnigCodePoint code; + OnigUChar *to_start = to; + OnigCaseFoldType flags = *flagP; + int codepoint_length; + + while (*pp < end && to < to_end) { + codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end); + if (codepoint_length < 0) + return codepoint_length; /* encoding invalid */ + code = ONIGENC_MBC_TO_CODE(enc, *pp, end); + *pp += codepoint_length; + + if (code >= 'a' && code <= 'z' && (flags & ONIGENC_CASE_UPCASE)) { + flags |= ONIGENC_CASE_MODIFIED; + code -= 'a' - 'A'; + } + else if (code >= 'A' && code <= 'Z' && + (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { + flags |= ONIGENC_CASE_MODIFIED; + code += 'a' - 'A'; + } + to += ONIGENC_CODE_TO_MBC(enc, code, to); + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); + } + *flagP = flags; + return (int )(to - to_start); +} - THREAD_ATOMIC_START; +extern int +onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, + const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc) +{ + OnigCodePoint code; + OnigUChar *to_start = to; + OnigCaseFoldType flags = *flagP; - r = f(); + while (*pp < end && to < to_end) { + code = *(*pp)++; - THREAD_ATOMIC_END; - return r; + if (code >= 'a' && code <= 'z' && (flags & ONIGENC_CASE_UPCASE)) { + flags |= ONIGENC_CASE_MODIFIED; + code -= 'a' - 'A'; + } + else if (code >= 'A' && code <= 'Z' && + (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { + flags |= ONIGENC_CASE_MODIFIED; + code += 'a' - 'A'; + } + *to++ = code; + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); + } + *flagP = flags; + return (int )(to - to_start); } +#endif |
