1 files changed, 140 insertions, 134 deletions
diff --git a/enc/unicode.c b/enc/unicode.c
index e72b2e64b2..5bc806863e 100644
--- a/enc/unicode.c
+++ b/enc/unicode.c
@@ -139,17 +139,17 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y)
 
 /* macros related to ONIGENC_CASE flags */
 /* defined here because not used in other files */
-#define ONIGENC_CASE_SPECIALS       (ONIGENC_CASE_TITLECASE|ONIGENC_CASE_IS_TITLECASE|ONIGENC_CASE_UP_SPECIAL|ONIGENC_CASE_DOWN_SPECIAL)
+#define ONIGENC_CASE_SPECIALS       (ONIGENC_CASE_TITLECASE | ONIGENC_CASE_IS_TITLECASE | ONIGENC_CASE_UP_SPECIAL | ONIGENC_CASE_DOWN_SPECIAL)
 
 /* macros for length in CaseMappingSpecials array in enc/unicode/casefold.h */
 #define SpecialsLengthOffset 25  /* needs to be higher than the 22 bits used for Unicode codepoints */
-#define SpecialsLengthExtract(n)    ((n)>>SpecialsLengthOffset)
-#define SpecialsCodepointExtract(n) ((n)&((1<<SpecialsLengthOffset)-1))
-#define SpecialsLengthEncode(n)     ((n)<<SpecialsLengthOffset)
+#define SpecialsLengthExtract(n)    ((n) >> SpecialsLengthOffset)
+#define SpecialsCodepointExtract(n) ((n) & ((1 << SpecialsLengthOffset) - 1))
+#define SpecialsLengthEncode(n)     ((n) << SpecialsLengthOffset)
 
-#define OnigSpecialIndexMask        (((1<<OnigSpecialIndexWidth)-1)<<OnigSpecialIndexShift)
-#define OnigSpecialIndexEncode(n)   ((n)<<OnigSpecialIndexShift)
-#define OnigSpecialIndexDecode(n)   (((n)&OnigSpecialIndexMask)>>OnigSpecialIndexShift)
+#define OnigSpecialIndexMask        (((1 << OnigSpecialIndexWidth) - 1) << OnigSpecialIndexShift)
+#define OnigSpecialIndexEncode(n)   ((n) << OnigSpecialIndexShift)
+#define OnigSpecialIndexDecode(n)   (((n) & OnigSpecialIndexMask) >> OnigSpecialIndexShift)
 
 /* macros to shorten "enc/unicode/casefold.h", undefined immediately after including the file */
 #define U ONIGENC_CASE_UPCASE
@@ -493,6 +493,10 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
 #endif
 
   if ((to = onigenc_unicode_fold_lookup(code)) != 0) {
+    if (OnigCodePointCount(to->n) == 0) {
+      /* any codepoint should not be empty */
+      UNREACHABLE_RETURN(0);
+    }
     if (OnigCodePointCount(to->n) == 1) {
       OnigCodePoint orig_code = code;
 
@@ -651,6 +655,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
   return n;
 }
 
+#ifdef USE_CASE_MAP_API
 /* length in bytes for three characters in UTF-32; e.g. needed for ffi (U+FB03) */
 #define CASE_MAPPING_SLACK 12
 #define MODIFIED (flags |= ONIGENC_CASE_MODIFIED)
@@ -660,140 +665,142 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP,
     OnigUChar* to, OnigUChar* to_end,
     const struct OnigEncodingTypeST* enc)
 {
-    OnigCodePoint code;
-    OnigUChar *to_start = to;
-    OnigCaseFoldType flags = *flagP;
-    int codepoint_length;
-
-    to_end -= CASE_MAPPING_SLACK;
-    /* copy flags ONIGENC_CASE_UPCASE     and ONIGENC_CASE_DOWNCASE over to
-     *            ONIGENC_CASE_UP_SPECIAL and ONIGENC_CASE_DOWN_SPECIAL */
-    flags |= (flags&(ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE))<<ONIGENC_CASE_SPECIAL_OFFSET;
-
-    while (*pp<end && to<=to_end) {
-	codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end);
-	if (codepoint_length < 0)
-	    return codepoint_length; /* encoding invalid */
-	code = ONIGENC_MBC_TO_CODE(enc, *pp, end);
-	*pp += codepoint_length;
-
-	if (code<='z') { /* ASCII comes first */
-	    if (code>='a' && code<='z') {
-	        if (flags&ONIGENC_CASE_UPCASE) {
-		    MODIFIED;
-		    if (flags&ONIGENC_CASE_FOLD_TURKISH_AZERI && code=='i')
-			code = I_WITH_DOT_ABOVE;
-		    else
-			code += 'A'-'a';
-		}
-	    }
-	    else if (code>='A' && code<='Z') {
-		if (flags&(ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD)) {
-		    MODIFIED;
-		    if (flags&ONIGENC_CASE_FOLD_TURKISH_AZERI && code=='I')
-			code = DOTLESS_i;
-		    else
-			code += 'a'-'A';
-		}
-	    }
+  OnigCodePoint code;
+  OnigUChar *to_start = to;
+  OnigCaseFoldType flags = *flagP;
+  int codepoint_length;
+
+  to_end -= CASE_MAPPING_SLACK;
+  /* copy flags ONIGENC_CASE_UPCASE     and ONIGENC_CASE_DOWNCASE over to
+   *            ONIGENC_CASE_UP_SPECIAL and ONIGENC_CASE_DOWN_SPECIAL */
+  flags |= (flags & (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE)) << ONIGENC_CASE_SPECIAL_OFFSET;
+
+  while (*pp < end && to <= to_end) {
+    codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end);
+    if (codepoint_length < 0)
+      return codepoint_length; /* encoding invalid */
+    code = ONIGENC_MBC_TO_CODE(enc, *pp, end);
+    *pp += codepoint_length;
+
+    if (code <= 'z') { /* ASCII comes first */
+      if (code >= 'a' /*&& code <= 'z'*/) {
+	if (flags & ONIGENC_CASE_UPCASE) {
+	  MODIFIED;
+	  if (flags & ONIGENC_CASE_FOLD_TURKISH_AZERI && code == 'i')
+	    code = I_WITH_DOT_ABOVE;
+          else
+            code -= 'a' - 'A';
 	}
-	else if (!(flags&ONIGENC_CASE_ASCII_ONLY) && code>=0x00B5) { /* deal with non-ASCII; micron sign (U+00B5) is lowest affected */
-	    const CodePointList3 *folded;
-
-	    if (code==I_WITH_DOT_ABOVE) {
-		if (flags&(ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD)) {
-		    MODIFIED;
-		    code = 'i';
-		    if (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI)) { /* make dot above explicit */
-			to += ONIGENC_CODE_TO_MBC(enc, code, to);
-			code = DOT_ABOVE;
-		    }
-		}
-	    }
-	    else if (code==DOTLESS_i) { /* handle this manually, because it isn't involved in folding */
-		if (flags&ONIGENC_CASE_UPCASE)
-		    MODIFIED, code = 'I';
+      }
+      else if (code >= 'A' && code <= 'Z') {
+	if (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD)) {
+	  MODIFIED;
+	  if (flags & ONIGENC_CASE_FOLD_TURKISH_AZERI && code == 'I')
+	    code = DOTLESS_i;
+	  else
+	    code += 'a' - 'A';
+	}
+      }
+    }
+    else if (!(flags & ONIGENC_CASE_ASCII_ONLY) && code >= 0x00B5) { /* deal with non-ASCII; micron sign (U+00B5) is lowest affected */
+      const CodePointList3 *folded;
+
+      if (code == I_WITH_DOT_ABOVE) {
+	if (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD)) {
+	  MODIFIED;
+	  code = 'i';
+	  if (!(flags & ONIGENC_CASE_FOLD_TURKISH_AZERI)) { /* make dot above explicit */
+	    to += ONIGENC_CODE_TO_MBC(enc, code, to);
+	    code = DOT_ABOVE;
+	  }
+	}
+      }
+      else if (code == DOTLESS_i) { /* handle this manually, because it isn't involved in folding */
+	if (flags & ONIGENC_CASE_UPCASE) {
+	  MODIFIED;
+	  code = 'I';
+	}
+      }
+      else if ((folded = onigenc_unicode_fold_lookup(code)) != 0) { /* data about character found in CaseFold_11_Table */
+	if ((flags & ONIGENC_CASE_TITLECASE) && code>=0x1C90 && code<=0x1CBF) { /* Georgian MTAVRULI */
+          MODIFIED;
+	  code += 0x10D0 - 0x1C90;
+        }
+        else if ((flags & ONIGENC_CASE_TITLECASE)                            /* Titlecase needed, */
+	    && (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_IS_TITLECASE)) { /* but already Titlecase  */
+	  /* already Titlecase, no changes needed */
+	}
+	else if (flags & OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
+	  const OnigCodePoint *next;
+	  int count;
+
+	  MODIFIED;
+	  if (flags & OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_SPECIALS) { /* special */
+	    const OnigCodePoint *SpecialsStart = CaseMappingSpecials + OnigSpecialIndexDecode(folded->n);
+
+	    if (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_IS_TITLECASE) { /* swapCASE available */
+	      if ((flags & (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE))
+		  == (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE)) /* swapCASE needed */
+		goto SpecialsCopy;
+	      else /* swapCASE not needed */
+		SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
 	    }
-	    else if ((folded = onigenc_unicode_fold_lookup(code)) != 0) { /* data about character found in CaseFold_11_Table */
-		if ((flags&ONIGENC_CASE_TITLECASE)                                 /* Titlecase needed, */
-		    && (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_IS_TITLECASE)) { /* but already Titlecase  */
-		    /* already Titlecase, no changes needed */
-		}
-		else if (flags&OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
-		    const OnigCodePoint *next;
-		    int count;
-
-		    MODIFIED;
-		    if (flags&OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_SPECIALS) { /* special */
-			OnigCodePoint *SpecialsStart = CaseMappingSpecials + OnigSpecialIndexDecode(folded->n);
-
-			if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_IS_TITLECASE) { /* swapCASE available */
-			    if ((flags&(ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE))
-				    == (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) /* swapCASE needed */
-			        goto SpecialsCopy;
-			    else /* swapCASE not needed */
-			        SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
-			}
-			if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_TITLECASE) { /* Titlecase available */
-			    if (flags&ONIGENC_CASE_TITLECASE) /* Titlecase needed, but not yet Titlecase */
-				goto SpecialsCopy;
-			    else /* Titlecase not needed */
-				SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
-			}
-			if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_DOWN_SPECIAL) {
-			    if (!(flags&ONIGENC_CASE_DOWN_SPECIAL))
-				SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
-			}
-			/* here, we know we use ONIGENC_CASE_UP_SPECIAL, and the position is right */
-		      SpecialsCopy:
-		        count = SpecialsLengthExtract(*SpecialsStart);
-			next = SpecialsStart;
-			code = SpecialsCodepointExtract(*next++);
-		    }
-		    else { /* no specials */
-			count = OnigCodePointCount(folded->n);
-			next = folded->code;
-			code = *next++;
-		    }
-		    if (count==1)
-			;
-		    else if (count==2) {
-			to += ONIGENC_CODE_TO_MBC(enc, code, to);
-			code = *next;
-		    }
-		    else { /* count == 3 */
-			to += ONIGENC_CODE_TO_MBC(enc, code, to);
-			to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
-			code = *next;
-		    }
-		}
+	    if (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_TITLECASE) { /* Titlecase available */
+	      if (flags & ONIGENC_CASE_TITLECASE) /* Titlecase needed, but not yet Titlecase */
+		goto SpecialsCopy;
+	      else /* Titlecase not needed */
+		SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
 	    }
-	    else if ((folded = onigenc_unicode_unfold1_lookup(code)) != 0) {  /* data about character found in CaseUnfold_11_Table */
-		if (flags&OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
-		    MODIFIED;
-		    if (flags&OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_TITLECASE)
-		        code = folded->code[1];
-		    else
-			code = folded->code[0];
-		}
-		else if ((flags&(ONIGENC_CASE_UPCASE))
-			 && (code==0x03B9||code==0x03BC)) { /* GREEK SMALL LETTERs IOTA/MU */
-		    MODIFIED;
-		    code = folded->code[1];
-		}
+	    if (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_DOWN_SPECIAL) {
+	      if (!(flags & ONIGENC_CASE_DOWN_SPECIAL))
+		SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
 	    }
+	    /* here, we know we use ONIGENC_CASE_UP_SPECIAL, and the position is right */
+SpecialsCopy:
+	    count = SpecialsLengthExtract(*SpecialsStart);
+	    next = SpecialsStart;
+	    code = SpecialsCodepointExtract(*next++);
+	  }
+	  else { /* no specials */
+	    count = OnigCodePointCount(folded->n);
+	    next = folded->code;
+	    code = *next++;
+	  }
+	  if (count == 1)
+	    ;
+	  else if (count == 2) {
+	    to += ONIGENC_CODE_TO_MBC(enc, code, to);
+	    code = *next;
+	  }
+	  else { /* count == 3 */
+	    to += ONIGENC_CODE_TO_MBC(enc, code, to);
+	    to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
+	    code = *next;
+	  }
+	}
+      }
+      else if ((folded = onigenc_unicode_unfold1_lookup(code)) != 0) { /* data about character found in CaseUnfold_11_Table */
+	if ((flags & ONIGENC_CASE_TITLECASE)                                 /* Titlecase needed, */
+	    && (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_IS_TITLECASE)) { /* but already Titlecase */
+	  /* already Titlecase, no changes needed */
 	}
-	to += ONIGENC_CODE_TO_MBC(enc, code, to);
-	/* switch from titlecase to lowercase for capitalize */
-	if (flags & ONIGENC_CASE_TITLECASE)
-	    flags ^= (ONIGENC_CASE_UPCASE    |ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE|
-		      ONIGENC_CASE_UP_SPECIAL|ONIGENC_CASE_DOWN_SPECIAL);
+	else if (flags & OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
+	  MODIFIED;
+	  code = folded->code[(flags & OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_TITLECASE) ? 1 : 0];
+	}
+      }
     }
-    *flagP = flags;
-    return (int)(to-to_start);
+    to += ONIGENC_CODE_TO_MBC(enc, code, to);
+    /* switch from titlecase to lowercase for capitalize */
+    if (flags & ONIGENC_CASE_TITLECASE)
+      flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE |
+	  ONIGENC_CASE_UP_SPECIAL | ONIGENC_CASE_DOWN_SPECIAL);
+  }
+  *flagP = flags;
+  return (int )(to - to_start);
 }
+#endif
 
-#if 0
 const char onigenc_unicode_version_string[] =
 #ifdef ONIG_UNICODE_VERSION_STRING
     ONIG_UNICODE_VERSION_STRING
@@ -809,4 +816,3 @@ const int onigenc_unicode_version_number[3] = {
     0
 #endif
 };
-#endif