1 files changed, 268 insertions, 55 deletions
diff --git a/enc/unicode.c b/enc/unicode.c
index 12d3a5414b..5bc806863e 100644
--- a/enc/unicode.c
+++ b/enc/unicode.c
@@ -2,7 +2,7 @@
   unicode.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2013  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -137,9 +137,44 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y)
   return 1;
 }
 
-#include "enc/unicode/casefold.h"
-
-#include "enc/unicode/name2ctype.h"
+/* macros related to ONIGENC_CASE flags */
+/* defined here because not used in other files */
+#define ONIGENC_CASE_SPECIALS       (ONIGENC_CASE_TITLECASE | ONIGENC_CASE_IS_TITLECASE | ONIGENC_CASE_UP_SPECIAL | ONIGENC_CASE_DOWN_SPECIAL)
+
+/* macros for length in CaseMappingSpecials array in enc/unicode/casefold.h */
+#define SpecialsLengthOffset 25  /* needs to be higher than the 22 bits used for Unicode codepoints */
+#define SpecialsLengthExtract(n)    ((n) >> SpecialsLengthOffset)
+#define SpecialsCodepointExtract(n) ((n) & ((1 << SpecialsLengthOffset) - 1))
+#define SpecialsLengthEncode(n)     ((n) << SpecialsLengthOffset)
+
+#define OnigSpecialIndexMask        (((1 << OnigSpecialIndexWidth) - 1) << OnigSpecialIndexShift)
+#define OnigSpecialIndexEncode(n)   ((n) << OnigSpecialIndexShift)
+#define OnigSpecialIndexDecode(n)   (((n) & OnigSpecialIndexMask) >> OnigSpecialIndexShift)
+
+/* macros to shorten "enc/unicode/casefold.h", undefined immediately after including the file */
+#define U ONIGENC_CASE_UPCASE
+#define D ONIGENC_CASE_DOWNCASE
+#define F ONIGENC_CASE_FOLD
+#define ST ONIGENC_CASE_TITLECASE
+#define SU ONIGENC_CASE_UP_SPECIAL
+#define SL ONIGENC_CASE_DOWN_SPECIAL
+#define IT ONIGENC_CASE_IS_TITLECASE
+#define I(n) OnigSpecialIndexEncode(n)
+#define L(n) SpecialsLengthEncode(n)
+
+#include "casefold.h"
+
+#undef U
+#undef D
+#undef F
+#undef ST
+#undef SU
+#undef SL
+#undef IT
+#undef I
+#undef L
+
+#include "name2ctype.h"
 
 #define CODE_RANGES_NUM numberof(CodeRanges)
 
@@ -221,6 +256,12 @@ onigenc_unicode_property_name_to_ctype(OnigEncoding enc, const UChar* name, cons
 #define onigenc_unicode_unfold2_lookup onigenc_unicode_CaseUnfold_12_lookup
 #define onigenc_unicode_unfold3_lookup onigenc_unicode_CaseUnfold_13_lookup
 
+enum {
+  I_WITH_DOT_ABOVE = 0x0130,
+  DOTLESS_i = 0x0131,
+  DOT_ABOVE = 0x0307
+};
+
 extern int
 onigenc_unicode_mbc_case_fold(OnigEncoding enc,
     OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end,
@@ -237,17 +278,17 @@ onigenc_unicode_mbc_case_fold(OnigEncoding enc,
 
 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
   if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
-    if (code == 0x0049) {
-      return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold);
+    if (code == 'I') {
+      return ONIGENC_CODE_TO_MBC(enc, DOTLESS_i, fold);
     }
-    else if (code == 0x0130) {
-      return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold);
+    else if (code == I_WITH_DOT_ABOVE) {
+      return ONIGENC_CODE_TO_MBC(enc, 'i', fold);
     }
   }
 #endif
 
   if ((to = onigenc_unicode_fold_lookup(code)) != 0) {
-    if (to->n == 1) {
+    if (OnigCodePointCount(to->n) == 1) {
       return ONIGENC_CODE_TO_MBC(enc, to->code[0], fold);
     }
 #if 0
@@ -258,7 +299,7 @@ onigenc_unicode_mbc_case_fold(OnigEncoding enc,
 #endif
     {
       rlen = 0;
-      for (i = 0; i < to->n; i++) {
+      for (i = 0; i < OnigCodePointCount(to->n); i++) {
 	len = ONIGENC_CODE_TO_MBC(enc, to->code[i], fold);
 	fold += len;
 	rlen += len;
@@ -284,7 +325,7 @@ onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
 
   for (i = 0; i < numberof(CaseUnfold_11); i++) {
     p11 = &CaseUnfold_11[i];
-    for (j = 0; j < p11->to.n; j++) {
+    for (j = 0; j < OnigCodePointCount(p11->to.n); j++) {
       code = p11->from;
       r = (*f)(p11->to.code[j], &code, 1, arg);
       if (r != 0) return r;
@@ -305,25 +346,25 @@ onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
 
 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
   if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
-    code = 0x0131;
-    r = (*f)(0x0049, &code, 1, arg);
+    code = DOTLESS_i;
+    r = (*f)('I', &code, 1, arg);
     if (r != 0) return r;
-    code = 0x0049;
-    r = (*f)(0x0131, &code, 1, arg);
+    code = 'I';
+    r = (*f)(DOTLESS_i, &code, 1, arg);
     if (r != 0) return r;
 
-    code = 0x0130;
-    r = (*f)(0x0069, &code, 1, arg);
+    code = I_WITH_DOT_ABOVE;
+    r = (*f)('i', &code, 1, arg);
     if (r != 0) return r;
-    code = 0x0069;
-    r = (*f)(0x0130, &code, 1, arg);
+    code = 'i';
+    r = (*f)(I_WITH_DOT_ABOVE, &code, 1, arg);
     if (r != 0) return r;
   }
   else {
 #endif
     for (i = 0; i < numberof(CaseUnfold_11_Locale); i++) {
       p11 = &CaseUnfold_11_Locale[i];
-      for (j = 0; j < p11->to.n; j++) {
+      for (j = 0; j < OnigCodePointCount(p11->to.n); j++) {
 	code = p11->from;
 	r = (*f)(p11->to.code[j], &code, 1, arg);
 	if (r != 0) return r;
@@ -349,12 +390,12 @@ onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
 
   if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
     for (i = 0; i < numberof(CaseUnfold_12); i++) {
-      for (j = 0; j < CaseUnfold_12[i].to.n; j++) {
+      for (j = 0; j < OnigCodePointCount(CaseUnfold_12[i].to.n); j++) {
 	r = (*f)(CaseUnfold_12[i].to.code[j],
 		 (OnigCodePoint* )CaseUnfold_12[i].from, 2, arg);
 	if (r != 0) return r;
 
-	for (k = 0; k < CaseUnfold_12[i].to.n; k++) {
+	for (k = 0; k < OnigCodePointCount(CaseUnfold_12[i].to.n); k++) {
 	  if (k == j) continue;
 
 	  r = (*f)(CaseUnfold_12[i].to.code[j],
@@ -368,12 +409,12 @@ onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
     if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) {
 #endif
       for (i = 0; i < numberof(CaseUnfold_12_Locale); i++) {
-	for (j = 0; j < CaseUnfold_12_Locale[i].to.n; j++) {
+	for (j = 0; j < OnigCodePointCount(CaseUnfold_12_Locale[i].to.n); j++) {
 	  r = (*f)(CaseUnfold_12_Locale[i].to.code[j],
 		   (OnigCodePoint* )CaseUnfold_12_Locale[i].from, 2, arg);
 	  if (r != 0) return r;
 
-	  for (k = 0; k < CaseUnfold_12_Locale[i].to.n; k++) {
+	  for (k = 0; k < OnigCodePointCount(CaseUnfold_12_Locale[i].to.n); k++) {
 	    if (k == j) continue;
 
 	    r = (*f)(CaseUnfold_12_Locale[i].to.code[j],
@@ -388,12 +429,12 @@ onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
 #endif
 
     for (i = 0; i < numberof(CaseUnfold_13); i++) {
-      for (j = 0; j < CaseUnfold_13[i].to.n; j++) {
+      for (j = 0; j < OnigCodePointCount(CaseUnfold_13[i].to.n); j++) {
 	r = (*f)(CaseUnfold_13[i].to.code[j],
 		 (OnigCodePoint* )CaseUnfold_13[i].from, 3, arg);
 	if (r != 0) return r;
 
-	for (k = 0; k < CaseUnfold_13[i].to.n; k++) {
+	for (k = 0; k < OnigCodePointCount(CaseUnfold_13[i].to.n); k++) {
 	  if (k == j) continue;
 
 	  r = (*f)(CaseUnfold_13[i].to.code[j],
@@ -407,6 +448,8 @@ onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
   return 0;
 }
 
+#define CodePointListValidP(x) (OnigCodePointCount((x)->n) <= numberof((x)->code))
+
 extern int
 onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
     OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end,
@@ -424,35 +467,37 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
 
 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
   if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
-    if (code == 0x0049) {
+    switch (code) {
+    case 'I':
       items[0].byte_len = len;
       items[0].code_len = 1;
-      items[0].code[0]  = 0x0131;
+      items[0].code[0]  = DOTLESS_i;
       return 1;
-    }
-    else if (code == 0x0130) {
+    case I_WITH_DOT_ABOVE:
       items[0].byte_len = len;
       items[0].code_len = 1;
-      items[0].code[0]  = 0x0069;
+      items[0].code[0]  = 'i';
       return 1;
-    }
-    else if (code == 0x0131) {
+    case DOTLESS_i:
       items[0].byte_len = len;
       items[0].code_len = 1;
-      items[0].code[0]  = 0x0049;
+      items[0].code[0]  = 'I';
       return 1;
-    }
-    else if (code == 0x0069) {
+    case 'i':
       items[0].byte_len = len;
       items[0].code_len = 1;
-      items[0].code[0]  = 0x0130;
+      items[0].code[0]  = I_WITH_DOT_ABOVE;
       return 1;
     }
   }
 #endif
 
   if ((to = onigenc_unicode_fold_lookup(code)) != 0) {
-    if (to->n == 1) {
+    if (OnigCodePointCount(to->n) == 0) {
+      /* any codepoint should not be empty */
+      UNREACHABLE_RETURN(0);
+    }
+    if (OnigCodePointCount(to->n) == 1) {
       OnigCodePoint orig_code = code;
 
       items[0].byte_len = len;
@@ -461,8 +506,9 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
       n++;
 
       code = to->code[0];
-      if ((to = onigenc_unicode_unfold1_lookup(code)) != 0) {
-	for (i = 0; i < to->n; i++) {
+      if ((to = onigenc_unicode_unfold1_lookup(code)) != 0 &&
+	  CodePointListValidP(to)) {
+	for (i = 0; i < OnigCodePointCount(to->n); i++) {
 	  if (to->code[i] != orig_code) {
 	    items[n].byte_len = len;
 	    items[n].code_len = 1;
@@ -476,13 +522,13 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
       OnigCodePoint cs[3][4];
       int fn, ncs[3];
 
-      for (fn = 0; fn < to->n; fn++) {
+      for (fn = 0; fn < OnigCodePointCount(to->n); fn++) {
 	cs[fn][0] = to->code[fn];
 	if ((z3 = onigenc_unicode_unfold1_lookup(cs[fn][0])) != 0) {
-	  for (i = 0; i < z3->n; i++) {
+	  for (i = 0; i < OnigCodePointCount(z3->n); i++) {
 	    cs[fn][i+1] = z3->code[i];
 	  }
-	  ncs[fn] = z3->n + 1;
+	  ncs[fn] = OnigCodePointCount(z3->n) + 1;
 	}
 	else
 	  ncs[fn] = 1;
@@ -499,8 +545,9 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
 	  }
 	}
 
-	if ((z2 = onigenc_unicode_unfold2_lookup(to->code)) != 0) {
-	  for (i = 0; i < z2->n; i++) {
+	if ((z2 = onigenc_unicode_unfold2_lookup(to->code)) != 0 &&
+	    CodePointListValidP(z2)) {
+	  for (i = 0; i < OnigCodePointCount(z2->n); i++) {
 	    if (z2->code[i] == code) continue;
 
 	    items[n].byte_len = len;
@@ -524,8 +571,9 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
 	  }
 	}
 
-	if ((z2 = onigenc_unicode_unfold3_lookup(to->code)) != 0) {
-	  for (i = 0; i < z2->n; i++) {
+	if ((z2 = onigenc_unicode_unfold3_lookup(to->code)) != 0 &&
+	    CodePointListValidP(z2)) {
+	  for (i = 0; i < OnigCodePointCount(z2->n); i++) {
 	    if (z2->code[i] == code) continue;
 
 	    items[n].byte_len = len;
@@ -541,8 +589,9 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
     }
   }
   else {
-    if ((to = onigenc_unicode_unfold1_lookup(code)) != 0) {
-      for (i = 0; i < to->n; i++) {
+    if ((to = onigenc_unicode_unfold1_lookup(code)) != 0 &&
+	CodePointListValidP(to)) {
+      for (i = 0; i < OnigCodePointCount(to->n); i++) {
 	items[n].byte_len = len;
 	items[n].code_len = 1;
 	items[n].code[0]  = to->code[i];
@@ -560,7 +609,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
       codes[0] = code;
       code = ONIGENC_MBC_TO_CODE(enc, p, end);
       if ((to = onigenc_unicode_fold_lookup(code)) != 0
-	  && to->n == 1) {
+	  && OnigCodePointCount(to->n) == 1) {
 	codes[1] = to->code[0];
       }
       else
@@ -568,8 +617,9 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
 
       clen = enclen(enc, p, end);
       len += clen;
-      if ((z2 = onigenc_unicode_unfold2_lookup(codes)) != 0) {
-	for (i = 0; i < z2->n; i++) {
+      if ((z2 = onigenc_unicode_unfold2_lookup(codes)) != 0 &&
+	  CodePointListValidP(z2)) {
+	for (i = 0; i < OnigCodePointCount(z2->n); i++) {
 	  items[n].byte_len = len;
 	  items[n].code_len = 1;
 	  items[n].code[0]  = z2->code[i];
@@ -581,7 +631,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
       if (p < end) {
 	code = ONIGENC_MBC_TO_CODE(enc, p, end);
 	if ((to = onigenc_unicode_fold_lookup(code)) != 0
-	    && to->n == 1) {
+	    && OnigCodePointCount(to->n) == 1) {
 	  codes[2] = to->code[0];
 	}
 	else
@@ -589,8 +639,9 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
 
 	clen = enclen(enc, p, end);
 	len += clen;
-	if ((z2 = onigenc_unicode_unfold3_lookup(codes)) != 0) {
-	  for (i = 0; i < z2->n; i++) {
+	if ((z2 = onigenc_unicode_unfold3_lookup(codes)) != 0 &&
+	    CodePointListValidP(z2)) {
+	  for (i = 0; i < OnigCodePointCount(z2->n); i++) {
 	    items[n].byte_len = len;
 	    items[n].code_len = 1;
 	    items[n].code[0]  = z2->code[i];
@@ -603,3 +654,165 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
 
   return n;
 }
+
+#ifdef USE_CASE_MAP_API
+/* length in bytes for three characters in UTF-32; e.g. needed for ffi (U+FB03) */
+#define CASE_MAPPING_SLACK 12
+#define MODIFIED (flags |= ONIGENC_CASE_MODIFIED)
+extern int
+onigenc_unicode_case_map(OnigCaseFoldType* flagP,
+    const OnigUChar** pp, const OnigUChar* end,
+    OnigUChar* to, OnigUChar* to_end,
+    const struct OnigEncodingTypeST* enc)
+{
+  OnigCodePoint code;
+  OnigUChar *to_start = to;
+  OnigCaseFoldType flags = *flagP;
+  int codepoint_length;
+
+  to_end -= CASE_MAPPING_SLACK;
+  /* copy flags ONIGENC_CASE_UPCASE     and ONIGENC_CASE_DOWNCASE over to
+   *            ONIGENC_CASE_UP_SPECIAL and ONIGENC_CASE_DOWN_SPECIAL */
+  flags |= (flags & (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE)) << ONIGENC_CASE_SPECIAL_OFFSET;
+
+  while (*pp < end && to <= to_end) {
+    codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end);
+    if (codepoint_length < 0)
+      return codepoint_length; /* encoding invalid */
+    code = ONIGENC_MBC_TO_CODE(enc, *pp, end);
+    *pp += codepoint_length;
+
+    if (code <= 'z') { /* ASCII comes first */
+      if (code >= 'a' /*&& code <= 'z'*/) {
+	if (flags & ONIGENC_CASE_UPCASE) {
+	  MODIFIED;
+	  if (flags & ONIGENC_CASE_FOLD_TURKISH_AZERI && code == 'i')
+	    code = I_WITH_DOT_ABOVE;
+          else
+            code -= 'a' - 'A';
+	}
+      }
+      else if (code >= 'A' && code <= 'Z') {
+	if (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD)) {
+	  MODIFIED;
+	  if (flags & ONIGENC_CASE_FOLD_TURKISH_AZERI && code == 'I')
+	    code = DOTLESS_i;
+	  else
+	    code += 'a' - 'A';
+	}
+      }
+    }
+    else if (!(flags & ONIGENC_CASE_ASCII_ONLY) && code >= 0x00B5) { /* deal with non-ASCII; micron sign (U+00B5) is lowest affected */
+      const CodePointList3 *folded;
+
+      if (code == I_WITH_DOT_ABOVE) {
+	if (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD)) {
+	  MODIFIED;
+	  code = 'i';
+	  if (!(flags & ONIGENC_CASE_FOLD_TURKISH_AZERI)) { /* make dot above explicit */
+	    to += ONIGENC_CODE_TO_MBC(enc, code, to);
+	    code = DOT_ABOVE;
+	  }
+	}
+      }
+      else if (code == DOTLESS_i) { /* handle this manually, because it isn't involved in folding */
+	if (flags & ONIGENC_CASE_UPCASE) {
+	  MODIFIED;
+	  code = 'I';
+	}
+      }
+      else if ((folded = onigenc_unicode_fold_lookup(code)) != 0) { /* data about character found in CaseFold_11_Table */
+	if ((flags & ONIGENC_CASE_TITLECASE) && code>=0x1C90 && code<=0x1CBF) { /* Georgian MTAVRULI */
+          MODIFIED;
+	  code += 0x10D0 - 0x1C90;
+        }
+        else if ((flags & ONIGENC_CASE_TITLECASE)                            /* Titlecase needed, */
+	    && (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_IS_TITLECASE)) { /* but already Titlecase  */
+	  /* already Titlecase, no changes needed */
+	}
+	else if (flags & OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
+	  const OnigCodePoint *next;
+	  int count;
+
+	  MODIFIED;
+	  if (flags & OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_SPECIALS) { /* special */
+	    const OnigCodePoint *SpecialsStart = CaseMappingSpecials + OnigSpecialIndexDecode(folded->n);
+
+	    if (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_IS_TITLECASE) { /* swapCASE available */
+	      if ((flags & (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE))
+		  == (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE)) /* swapCASE needed */
+		goto SpecialsCopy;
+	      else /* swapCASE not needed */
+		SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
+	    }
+	    if (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_TITLECASE) { /* Titlecase available */
+	      if (flags & ONIGENC_CASE_TITLECASE) /* Titlecase needed, but not yet Titlecase */
+		goto SpecialsCopy;
+	      else /* Titlecase not needed */
+		SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
+	    }
+	    if (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_DOWN_SPECIAL) {
+	      if (!(flags & ONIGENC_CASE_DOWN_SPECIAL))
+		SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
+	    }
+	    /* here, we know we use ONIGENC_CASE_UP_SPECIAL, and the position is right */
+SpecialsCopy:
+	    count = SpecialsLengthExtract(*SpecialsStart);
+	    next = SpecialsStart;
+	    code = SpecialsCodepointExtract(*next++);
+	  }
+	  else { /* no specials */
+	    count = OnigCodePointCount(folded->n);
+	    next = folded->code;
+	    code = *next++;
+	  }
+	  if (count == 1)
+	    ;
+	  else if (count == 2) {
+	    to += ONIGENC_CODE_TO_MBC(enc, code, to);
+	    code = *next;
+	  }
+	  else { /* count == 3 */
+	    to += ONIGENC_CODE_TO_MBC(enc, code, to);
+	    to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
+	    code = *next;
+	  }
+	}
+      }
+      else if ((folded = onigenc_unicode_unfold1_lookup(code)) != 0) { /* data about character found in CaseUnfold_11_Table */
+	if ((flags & ONIGENC_CASE_TITLECASE)                                 /* Titlecase needed, */
+	    && (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_IS_TITLECASE)) { /* but already Titlecase */
+	  /* already Titlecase, no changes needed */
+	}
+	else if (flags & OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
+	  MODIFIED;
+	  code = folded->code[(flags & OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_TITLECASE) ? 1 : 0];
+	}
+      }
+    }
+    to += ONIGENC_CODE_TO_MBC(enc, code, to);
+    /* switch from titlecase to lowercase for capitalize */
+    if (flags & ONIGENC_CASE_TITLECASE)
+      flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE |
+	  ONIGENC_CASE_UP_SPECIAL | ONIGENC_CASE_DOWN_SPECIAL);
+  }
+  *flagP = flags;
+  return (int )(to - to_start);
+}
+#endif
+
+const char onigenc_unicode_version_string[] =
+#ifdef ONIG_UNICODE_VERSION_STRING
+    ONIG_UNICODE_VERSION_STRING
+#endif
+    "";
+
+const int onigenc_unicode_version_number[3] = {
+#ifdef ONIG_UNICODE_VERSION_MAJOR
+    ONIG_UNICODE_VERSION_MAJOR,
+    ONIG_UNICODE_VERSION_MINOR,
+    ONIG_UNICODE_VERSION_TEENY,
+#else
+    0
+#endif
+};