diff options
Diffstat (limited to 'enc/unicode.c')
| -rw-r--r-- | enc/unicode.c | 59 |
1 files changed, 23 insertions, 36 deletions
diff --git a/enc/unicode.c b/enc/unicode.c index 2c0d91dfea..5bc806863e 100644 --- a/enc/unicode.c +++ b/enc/unicode.c @@ -493,6 +493,10 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, #endif if ((to = onigenc_unicode_fold_lookup(code)) != 0) { + if (OnigCodePointCount(to->n) == 0) { + /* any codepoint should not be empty */ + UNREACHABLE_RETURN(0); + } if (OnigCodePointCount(to->n) == 1) { OnigCodePoint orig_code = code; @@ -651,6 +655,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, return n; } +#ifdef USE_CASE_MAP_API /* length in bytes for three characters in UTF-32; e.g. needed for ffi (U+FB03) */ #define CASE_MAPPING_SLACK 12 #define MODIFIED (flags |= ONIGENC_CASE_MODIFIED) @@ -678,15 +683,13 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP, *pp += codepoint_length; if (code <= 'z') { /* ASCII comes first */ - if (code >= 'a' && code <= 'z') { + if (code >= 'a' /*&& code <= 'z'*/) { if (flags & ONIGENC_CASE_UPCASE) { MODIFIED; if (flags & ONIGENC_CASE_FOLD_TURKISH_AZERI && code == 'i') code = I_WITH_DOT_ABOVE; - else { - code -= 'a'; - code += 'A'; - } + else + code -= 'a' - 'A'; } } else if (code >= 'A' && code <= 'Z') { @@ -719,7 +722,11 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP, } } else if ((folded = onigenc_unicode_fold_lookup(code)) != 0) { /* data about character found in CaseFold_11_Table */ - if ((flags & ONIGENC_CASE_TITLECASE) /* Titlecase needed, */ + if ((flags & ONIGENC_CASE_TITLECASE) && code>=0x1C90 && code<=0x1CBF) { /* Georgian MTAVRULI */ + MODIFIED; + code += 0x10D0 - 0x1C90; + } + else if ((flags & ONIGENC_CASE_TITLECASE) /* Titlecase needed, */ && (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_IS_TITLECASE)) { /* but already Titlecase */ /* already Titlecase, no changes needed */ } @@ -772,10 +779,15 @@ SpecialsCopy: } } } - else if ((folded = onigenc_unicode_unfold1_lookup(code)) != 0 /* data about character found in CaseUnfold_11_Table */ - && flags & OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */ - MODIFIED; - code = folded->code[(flags & OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_TITLECASE) ? 1 : 0]; + else if ((folded = onigenc_unicode_unfold1_lookup(code)) != 0) { /* data about character found in CaseUnfold_11_Table */ + if ((flags & ONIGENC_CASE_TITLECASE) /* Titlecase needed, */ + && (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_IS_TITLECASE)) { /* but already Titlecase */ + /* already Titlecase, no changes needed */ + } + else if (flags & OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */ + MODIFIED; + code = folded->code[(flags & OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_TITLECASE) ? 1 : 0]; + } } } to += ONIGENC_CODE_TO_MBC(enc, code, to); @@ -787,32 +799,8 @@ SpecialsCopy: *flagP = flags; return (int )(to - to_start); } +#endif -/* for extended grapheme cluster */ -/* TODO: generate from Unicode data */ -const OnigCodePoint -onigenc_unicode_GCB_ranges_GAZ[] = { - 0, -}; - -const OnigCodePoint -onigenc_unicode_GCB_ranges_E_Base[] = { - 3, - 0x1F3F3, 0x1F3F3, - 0x1F441, 0x1F441, - 0x1F46F, 0x1F46F, -}; - -const OnigCodePoint -onigenc_unicode_GCB_ranges_Emoji[] = { - 4, - 0x2640, 0x2640, - 0x2642, 0x2642, - 0x2695, 0x2696, - 0x2708, 0x2708, -}; - -#if 0 const char onigenc_unicode_version_string[] = #ifdef ONIG_UNICODE_VERSION_STRING ONIG_UNICODE_VERSION_STRING @@ -828,4 +816,3 @@ const int onigenc_unicode_version_number[3] = { 0 #endif }; -#endif |
