summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--ascii.c19
-rw-r--r--common.mk3
-rw-r--r--defines.h1
-rw-r--r--euc_jp.c186
-rw-r--r--oniguruma.h245
-rw-r--r--re.c4
-rw-r--r--regcomp.c867
-rw-r--r--regenc.c632
-rw-r--r--regenc.h94
-rw-r--r--regerror.c6
-rw-r--r--regexec.c531
-rw-r--r--regint.h281
-rw-r--r--regparse.c1022
-rw-r--r--regparse.h131
-rw-r--r--regsyntax.c315
-rw-r--r--sjis.c176
-rw-r--r--unicode.c11344
-rw-r--r--utf8.c3525
19 files changed, 13885 insertions, 5503 deletions
diff --git a/ChangeLog b/ChangeLog
index f2f7fbe923..4987dae622 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Wed May 23 10:31:53 2007 Yukihiro Matsumoto <matz@ruby-lang.org>
+
+ * oniguruma.h: updated to Oniguruma 5.7.0.
+
+ * regsyntax.c, unicode.c: new files along with Oniguruma 5.x.
+
Wed May 23 05:49:49 2007 Nobuyoshi Nakada <nobu@ruby-lang.org>
* ext/extmk.rb, ext/purelib.rb, lib/mkmf.rb, runruby.rb: clear default
diff --git a/ascii.c b/ascii.c
index 64be21d7ff..c2715f4e0d 100644
--- a/ascii.c
+++ b/ascii.c
@@ -2,7 +2,7 @@
ascii.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -43,23 +43,14 @@ OnigEncodingType OnigEncodingASCII = {
"US-ASCII", /* name */
1, /* max byte length */
1, /* min byte length */
- ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
- {
- (OnigCodePoint )'\\' /* esc */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
- },
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- onigenc_ascii_mbc_to_normalize,
- onigenc_ascii_is_mbc_ambiguous,
- onigenc_ascii_get_all_pair_ambig_codes,
- onigenc_nothing_get_all_comp_ambig_codes,
+ onigenc_ascii_mbc_case_fold,
+ onigenc_ascii_apply_all_case_fold,
+ onigenc_ascii_get_case_fold_codes_by_str,
+ onigenc_minimum_property_name_to_ctype,
ascii_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
diff --git a/common.mk b/common.mk
index 536d996378..8052dae088 100644
--- a/common.mk
+++ b/common.mk
@@ -61,6 +61,7 @@ OBJS = array.$(OBJEXT) \
string.$(OBJEXT) \
struct.$(OBJEXT) \
time.$(OBJEXT) \
+ unicode.$(OBJEXT) \
utf8.$(OBJEXT) \
util.$(OBJEXT) \
variable.$(OBJEXT) \
@@ -520,6 +521,8 @@ thread.$(OBJEXT): {$(VPATH)}thread.c {$(VPATH)}eval_intern.h \
{$(VPATH)}rubysig.h {$(VPATH)}st.h {$(VPATH)}dln.h
time.$(OBJEXT): {$(VPATH)}time.c {$(VPATH)}ruby.h config.h \
{$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h
+unicode.$(OBJEXT): {$(VPATH)}unicode.c {$(VPATH)}regenc.h \
+ {$(VPATH)}oniguruma.h config.h
utf8.$(OBJEXT): {$(VPATH)}utf8.c {$(VPATH)}regenc.h \
{$(VPATH)}oniguruma.h config.h
util.$(OBJEXT): {$(VPATH)}util.c {$(VPATH)}ruby.h config.h \
diff --git a/defines.h b/defines.h
index c8c1350554..1f5a0054f1 100644
--- a/defines.h
+++ b/defines.h
@@ -12,6 +12,7 @@
#define RUBY
+#include <stdlib.h>
#ifdef __cplusplus
# ifndef HAVE_PROTOTYPES
# define HAVE_PROTOTYPES 1
diff --git a/euc_jp.c b/euc_jp.c
index 71c81ee9fe..bbe888d417 100644
--- a/euc_jp.c
+++ b/euc_jp.c
@@ -2,7 +2,7 @@
euc_jp.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*/
-#include "regenc.h"
+#include "regint.h"
#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
@@ -51,13 +51,13 @@ static const int EncLen_EUCJP[] = {
};
static int
-eucjp_mbc_enc_len(const UChar* p)
+mbc_enc_len(const UChar* p)
{
return EncLen_EUCJP[*p];
}
static OnigCodePoint
-eucjp_mbc_to_code(const UChar* p, const UChar* end)
+mbc_to_code(const UChar* p, const UChar* end)
{
int c, i, len;
OnigCodePoint n;
@@ -75,7 +75,7 @@ eucjp_mbc_to_code(const UChar* p, const UChar* end)
}
static int
-eucjp_code_to_mbclen(OnigCodePoint code)
+code_to_mbclen(OnigCodePoint code)
{
if (ONIGENC_IS_CODE_ASCII(code)) return 1;
else if ((code & 0xff0000) != 0) return 3;
@@ -85,7 +85,7 @@ eucjp_code_to_mbclen(OnigCodePoint code)
#if 0
static int
-eucjp_code_to_mbc_first(OnigCodePoint code)
+code_to_mbc_first(OnigCodePoint code)
{
int first;
@@ -103,7 +103,7 @@ eucjp_code_to_mbc_first(OnigCodePoint code)
#endif
static int
-eucjp_code_to_mbc(OnigCodePoint code, UChar *buf)
+code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar *p = buf;
@@ -119,59 +119,31 @@ eucjp_code_to_mbc(OnigCodePoint code, UChar *buf)
}
static int
-eucjp_mbc_to_normalize(OnigAmbigType flag,
- const UChar** pp, const UChar* end, UChar* lower)
+mbc_case_fold(OnigCaseFoldType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
int len;
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
- *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
- }
- else {
- *lower = *p;
- }
-
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
else {
+ int i;
+
len = enc_len(ONIG_ENCODING_EUC_JP, p);
- if (lower != p) {
- int i;
- for (i = 0; i < len; i++) {
- *lower++ = *p++;
- }
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
}
(*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
-static int
-eucjp_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
-{
- return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_JP, flag, pp, end);
-}
-
-static int
-eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype)
-{
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else {
- if ((ctype & (ONIGENC_CTYPE_WORD |
- ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
- return (eucjp_code_to_mbclen(code) > 1 ? TRUE : FALSE);
- }
- }
-
- return FALSE;
-}
-
static UChar*
-eucjp_left_adjust_char_head(const UChar* start, const UChar* s)
+left_adjust_char_head(const UChar* start, const UChar* s)
{
/* In this encoding
mb-trail bytes doesn't mix with single bytes.
@@ -190,7 +162,7 @@ eucjp_left_adjust_char_head(const UChar* start, const UChar* s)
}
static int
-eucjp_is_allowed_reverse_match(const UChar* s, const UChar* end)
+is_allowed_reverse_match(const UChar* s, const UChar* end)
{
const UChar c = *s;
if (c <= 0x7e || c == 0x8e || c == 0x8f)
@@ -199,30 +171,114 @@ eucjp_is_allowed_reverse_match(const UChar* s, const UChar* end)
return FALSE;
}
+
+static int PropertyInited = 0;
+static const OnigCodePoint** PropertyList;
+static int PropertyListNum;
+static int PropertyListSize;
+static hash_table_type* PropertyNameTable;
+
+static const OnigCodePoint CR_Hiragana[] = {
+ 1,
+ 0xa4a1, 0xa4f3
+}; /* CR_Hiragana */
+
+static const OnigCodePoint CR_Katakana[] = {
+ 3,
+ 0xa5a1, 0xa5f6,
+ 0xaaa6, 0xaaaf,
+ 0xaab1, 0xaadd
+}; /* CR_Katakana */
+
+static int
+init_property_list(void)
+{
+ int r;
+
+ PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana);
+ PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana);
+ PropertyInited = 1;
+
+ end:
+ return r;
+}
+
+static int
+property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
+{
+ int ctype;
+
+ PROPERTY_LIST_INIT_CHECK;
+
+ if (onig_st_lookup_strend(PropertyNameTable, p, end, (void*)&ctype) == 0) {
+ return onigenc_minimum_property_name_to_ctype(enc, p, end);
+ }
+
+ return ctype;
+}
+
+static int
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (ctype <= ONIGENC_MAX_STD_CTYPE) {
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
+ return (code_to_mbclen(code) > 1 ? TRUE : FALSE);
+ }
+ }
+ }
+ else {
+ PROPERTY_LIST_INIT_CHECK;
+
+ ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
+ if (ctype >= (unsigned int )PropertyListNum)
+ return ONIGENCERR_TYPE_BUG;
+
+ return onig_is_in_code_range((UChar* )PropertyList[ctype], code);
+ }
+
+ return FALSE;
+}
+
+static int
+get_ctype_code_range(int ctype, OnigCodePoint* sb_out,
+ const OnigCodePoint* ranges[])
+{
+ if (ctype <= ONIGENC_MAX_STD_CTYPE) {
+ return ONIG_NO_SUPPORT_CONFIG;
+ }
+ else {
+ *sb_out = 0x80;
+
+ PROPERTY_LIST_INIT_CHECK;
+
+ ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
+ if (ctype >= PropertyListNum)
+ return ONIGENCERR_TYPE_BUG;
+
+ *ranges = PropertyList[ctype];
+ return 0;
+ }
+}
+
+
OnigEncodingType OnigEncodingEUC_JP = {
- eucjp_mbc_enc_len,
+ mbc_enc_len,
"EUC-JP", /* name */
3, /* max enc length */
1, /* min enc length */
- ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
- {
- (OnigCodePoint )'\\' /* esc */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
- },
onigenc_is_mbc_newline_0x0a,
- eucjp_mbc_to_code,
- eucjp_code_to_mbclen,
- eucjp_code_to_mbc,
- eucjp_mbc_to_normalize,
- eucjp_is_mbc_ambiguous,
- onigenc_ascii_get_all_pair_ambig_codes,
- onigenc_nothing_get_all_comp_ambig_codes,
- eucjp_is_code_ctype,
- onigenc_not_support_get_ctype_code_range,
- eucjp_left_adjust_char_head,
- eucjp_is_allowed_reverse_match
+ mbc_to_code,
+ code_to_mbclen,
+ code_to_mbc,
+ mbc_case_fold,
+ onigenc_ascii_apply_all_case_fold,
+ onigenc_ascii_get_case_fold_codes_by_str,
+ property_name_to_ctype,
+ is_code_ctype,
+ get_ctype_code_range,
+ left_adjust_char_head,
+ is_allowed_reverse_match
};
diff --git a/oniguruma.h b/oniguruma.h
index 2dd5323609..97c581dfd6 100644
--- a/oniguruma.h
+++ b/oniguruma.h
@@ -4,7 +4,7 @@
oniguruma.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,9 +34,9 @@ extern "C" {
#endif
#define ONIGURUMA
-#define ONIGURUMA_VERSION_MAJOR 4
-#define ONIGURUMA_VERSION_MINOR 4
-#define ONIGURUMA_VERSION_TEENY 5
+#define ONIGURUMA_VERSION_MAJOR 5
+#define ONIGURUMA_VERSION_MINOR 7
+#define ONIGURUMA_VERSION_TEENY 0
#ifdef __cplusplus
# ifndef HAVE_PROTOTYPES
@@ -96,30 +96,23 @@ typedef unsigned int OnigDistance;
#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
-/* ambiguous match flag */
-typedef unsigned int OnigAmbigType;
+/* case fold flag */
+typedef unsigned int OnigCaseFoldType;
-ONIG_EXTERN OnigAmbigType OnigDefaultAmbigFlag;
+ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag;
-#define ONIGENC_AMBIGUOUS_MATCH_NONE 0
-#define ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE (1<<0)
-#define ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE (1<<1)
-/* #define ONIGENC_AMBIGUOUS_MATCH_ACCENT (1<<2) */
-/* #define ONIGENC_AMBIGUOUS_MATCH_HIRAGANA_KATAKANA (1<<3) */
-/* #define ONIGENC_AMBIGUOUS_MATCH_KATAKANA_WIDTH (1<<4) */
+/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */
+/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */
+#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20)
+#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30)
-#define ONIGENC_AMBIGUOUS_MATCH_LIMIT (1<<1)
-#define ONIGENC_AMBIGUOUS_MATCH_COMPOUND (1<<30)
+#define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR
+#define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag
-#define ONIGENC_AMBIGUOUS_MATCH_FULL \
- ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | \
- ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | \
- ONIGENC_AMBIGUOUS_MATCH_COMPOUND )
-#define ONIGENC_AMBIGUOUS_MATCH_DEFAULT OnigDefaultAmbigFlag
-
-#define ONIGENC_MAX_COMP_AMBIG_CODE_LEN 3
-#define ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM 4
+#define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3
+#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13
+/* 13 => Unicode:0x1ffc */
/* code range */
#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0])
@@ -127,20 +120,10 @@ ONIG_EXTERN OnigAmbigType OnigDefaultAmbigFlag;
#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2]
typedef struct {
- int len;
- OnigCodePoint code[ONIGENC_MAX_COMP_AMBIG_CODE_LEN];
-} OnigCompAmbigCodeItem;
-
-typedef struct {
- int n;
- OnigCodePoint code;
- OnigCompAmbigCodeItem items[ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM];
-} OnigCompAmbigCodes;
-
-typedef struct {
- OnigCodePoint from;
- OnigCodePoint to;
-} OnigPairAmbigCodes;
+ int byte_len; /* argument(original) character(s) byte length */
+ int code_len; /* number of code */
+ OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN];
+} OnigCaseFoldCodeItem;
typedef struct {
OnigCodePoint esc;
@@ -150,32 +133,24 @@ typedef struct {
OnigCodePoint one_or_more_time;
OnigCodePoint anychar_anytime;
} OnigMetaCharTableType;
+
+typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
-
-#if defined(RUBY_PLATFORM) && defined(M17N_H)
-
-#define ONIG_RUBY_M17N
-typedef m17n_encoding* OnigEncoding;
-
-#else
-
-typedef struct {
+typedef struct OnigEncodingTypeST {
int (*mbc_enc_len)(const OnigUChar* p);
const char* name;
int max_enc_len;
int min_enc_len;
- OnigAmbigType support_ambig_flag;
- OnigMetaCharTableType meta_char_table;
int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end);
OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end);
int (*code_to_mbclen)(OnigCodePoint code);
int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf);
- int (*mbc_to_normalize)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to);
- int (*is_mbc_ambiguous)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end);
- int (*get_all_pair_ambig_codes)(OnigAmbigType flag, const OnigPairAmbigCodes** acs);
- int (*get_all_comp_ambig_codes)(OnigAmbigType flag, const OnigCompAmbigCodes** acs);
+ int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to);
+ int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg);
+ int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[]);
+ int (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end);
int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype);
- int (*get_ctype_code_range)(int ctype, const OnigCodePoint* sb_range[], const OnigCodePoint* mb_range[]);
+ int (*get_ctype_code_range)(int ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]);
OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p);
int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end);
} OnigEncodingType;
@@ -210,6 +185,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingEUC_CN;
ONIG_EXTERN OnigEncodingType OnigEncodingSJIS;
ONIG_EXTERN OnigEncodingType OnigEncodingKOI8;
ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R;
+ONIG_EXTERN OnigEncodingType OnigEncodingCP1251;
ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
@@ -241,34 +217,36 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
#define ONIG_ENCODING_SJIS (&OnigEncodingSJIS)
#define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8)
#define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R)
+#define ONIG_ENCODING_CP1251 (&OnigEncodingCP1251)
#define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5)
#define ONIG_ENCODING_GB18030 (&OnigEncodingGB18030)
-#endif /* else RUBY && M17N */
-
#define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
/* work size */
-#define ONIGENC_CODE_TO_MBC_MAXLEN 7
-#define ONIGENC_MBC_NORMALIZE_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN
+#define ONIGENC_CODE_TO_MBC_MAXLEN 7
+#define ONIGENC_MBC_CASE_FOLD_MAXLEN 18
+/* 18: 6(max-byte) * 3(case-fold chars) */
/* character types */
-#define ONIGENC_CTYPE_NEWLINE (1<< 0)
-#define ONIGENC_CTYPE_ALPHA (1<< 1)
-#define ONIGENC_CTYPE_BLANK (1<< 2)
-#define ONIGENC_CTYPE_CNTRL (1<< 3)
-#define ONIGENC_CTYPE_DIGIT (1<< 4)
-#define ONIGENC_CTYPE_GRAPH (1<< 5)
-#define ONIGENC_CTYPE_LOWER (1<< 6)
-#define ONIGENC_CTYPE_PRINT (1<< 7)
-#define ONIGENC_CTYPE_PUNCT (1<< 8)
-#define ONIGENC_CTYPE_SPACE (1<< 9)
-#define ONIGENC_CTYPE_UPPER (1<<10)
-#define ONIGENC_CTYPE_XDIGIT (1<<11)
-#define ONIGENC_CTYPE_WORD (1<<12)
-#define ONIGENC_CTYPE_ASCII (1<<13)
-#define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT)
+#define ONIGENC_CTYPE_NEWLINE 0
+#define ONIGENC_CTYPE_ALPHA 1
+#define ONIGENC_CTYPE_BLANK 2
+#define ONIGENC_CTYPE_CNTRL 3
+#define ONIGENC_CTYPE_DIGIT 4
+#define ONIGENC_CTYPE_GRAPH 5
+#define ONIGENC_CTYPE_LOWER 6
+#define ONIGENC_CTYPE_PRINT 7
+#define ONIGENC_CTYPE_PUNCT 8
+#define ONIGENC_CTYPE_SPACE 9
+#define ONIGENC_CTYPE_UPPER 10
+#define ONIGENC_CTYPE_XDIGIT 11
+#define ONIGENC_CTYPE_WORD 12
+#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */
+#define ONIGENC_CTYPE_ASCII 14
+#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII
+
#define enc_len(enc,p) ONIGENC_MBC_ENC_LEN(enc, p)
@@ -277,100 +255,22 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
#define ONIGENC_IS_MBC_HEAD(enc,p) (ONIGENC_MBC_ENC_LEN(enc,p) != 1)
#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
-#define ONIGENC_IS_CODE_SB_WORD(enc,code) \
- (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))
#define ONIGENC_IS_MBC_WORD(enc,s,end) \
ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
-#ifdef ONIG_RUBY_M17N
-
-#include <ctype.h> /* for isblank(), isgraph() */
-
-#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \
- onigenc_mbc_to_normalize(enc,flag,pp,end,buf)
-#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \
- onigenc_is_mbc_ambiguous(enc,flag,pp,end)
-
-#define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE
-#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
- onigenc_is_allowed_reverse_match(enc, s, end)
-#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
- onigenc_get_left_adjust_char_head(enc, start, s)
-#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, ambig_flag, acs) 0
-#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, ambig_flag, acs) 0
-#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \
- ONIG_NO_SUPPORT_CONFIG
-#define ONIGENC_MBC_ENC_LEN(enc,p) m17n_mbclen(enc,(int )(*p))
-#define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc)
-#define ONIGENC_MBC_MAXLEN_DIST(enc) \
- (ONIGENC_MBC_MAXLEN(enc) > 0 ? ONIGENC_MBC_MAXLEN(enc) \
- : ONIG_INFINITE_DISTANCE)
-#define ONIGENC_MBC_MINLEN(enc) 1
-#define ONIGENC_MBC_TO_CODE(enc,p,e) m17n_codepoint((enc),(p),(e))
-#define ONIGENC_CODE_TO_MBCLEN(enc,code) m17n_codelen((enc),(code))
-#define ONIGENC_CODE_TO_MBC(enc,code,buf) onigenc_code_to_mbc(enc, code, buf)
-
-#if 0 /* !! not supported !! */
-#define ONIGENC_IS_MBC_NEWLINE(enc,p,end)
-#define ONIGENC_STEP_BACK(enc,start,s,n)
-#endif
-
-#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) \
- onigenc_is_code_ctype(enc,code,ctype)
-
-#ifdef isblank
-# define ONIGENC_IS_CODE_BLANK(enc,code) isblank((int )code)
-#else
-# define ONIGENC_IS_CODE_BLANK(enc,code) ((code) == ' ' || (code) == '\t')
-#endif
-#ifdef isgraph
-# define ONIGENC_IS_CODE_GRAPH(enc,code) isgraph((int )code)
-#else
-# define ONIGENC_IS_CODE_GRAPH(enc,code) \
- (isprint((int )code) && !isspace((int )code))
-#endif
-
-#define ONIGENC_IS_CODE_PRINT(enc,code) m17n_isprint(enc,code)
-#define ONIGENC_IS_CODE_ALNUM(enc,code) m17n_isalnum(enc,code)
-#define ONIGENC_IS_CODE_ALPHA(enc,code) m17n_isalpha(enc,code)
-#define ONIGENC_IS_CODE_LOWER(enc,code) m17n_islower(enc,code)
-#define ONIGENC_IS_CODE_UPPER(enc,code) m17n_isupper(enc,code)
-#define ONIGENC_IS_CODE_CNTRL(enc,code) m17n_iscntrl(enc,code)
-#define ONIGENC_IS_CODE_PUNCT(enc,code) m17n_ispunct(enc,code)
-#define ONIGENC_IS_CODE_SPACE(enc,code) m17n_isspace(enc,code)
-#define ONIGENC_IS_CODE_DIGIT(enc,code) m17n_isdigit(enc,code)
-#define ONIGENC_IS_CODE_XDIGIT(enc,code) m17n_isxdigit(enc,code)
-#define ONIGENC_IS_CODE_WORD(enc,code) m17n_iswchar(enc,code)
-
-ONIG_EXTERN
-int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype));
-ONIG_EXTERN
-int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, OnigUChar *buf));
-ONIG_EXTERN
-int onigenc_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* buf));
-ONIG_EXTERN
-int onigenc_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end));
-ONIG_EXTERN
-int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end));
-
-#else /* ONIG_RUBY_M17N */
-
#define ONIGENC_NAME(enc) ((enc)->name)
-#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \
- (enc)->mbc_to_normalize(flag,(const OnigUChar** )pp,end,buf)
-#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \
- (enc)->is_mbc_ambiguous(flag,(const OnigUChar** )pp,end)
-#define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ((enc)->support_ambig_flag)
+#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
+ (enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf)
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
(enc)->is_allowed_reverse_match(s,end)
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
(enc)->left_adjust_char_head(start, s)
-#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc,ambig_flag,acs) \
- (enc)->get_all_pair_ambig_codes(ambig_flag,acs)
-#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc,ambig_flag,acs) \
- (enc)->get_all_comp_ambig_codes(ambig_flag,acs)
+#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
+ (enc)->apply_all_case_fold(case_fold_flag,f,arg)
+#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
+ (enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs)
#define ONIGENC_STEP_BACK(enc,start,s,n) \
onigenc_step_back((enc),(start),(s),(n))
@@ -382,6 +282,8 @@ int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, const OnigUChar* s, c
#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end))
#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code)
#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf)
+#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \
+ (enc)->property_name_to_ctype(enc,p,end)
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype)
@@ -414,14 +316,12 @@ int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, const OnigUChar* s, c
#define ONIGENC_IS_CODE_WORD(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
-#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \
- (enc)->get_ctype_code_range(ctype,sbr,mbr)
+#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \
+ (enc)->get_ctype_code_range(ctype,sbout,ranges)
ONIG_EXTERN
OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, int n));
-#endif /* is not ONIG_RUBY_M17N */
-
/* encoding API */
ONIG_EXTERN
@@ -486,10 +386,11 @@ typedef unsigned int OnigOptionType;
/* syntax */
typedef struct {
- unsigned int op;
- unsigned int op2;
- unsigned int behavior;
- OnigOptionType options; /* default option */
+ unsigned int op;
+ unsigned int op2;
+ unsigned int behavior;
+ OnigOptionType options; /* default option */
+ OnigMetaCharTableType meta_char_table;
} OnigSyntaxType;
ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS;
@@ -570,7 +471,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */
#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */
#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */
-#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) /* \p{IsXDigit} */
+/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */
#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */
#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */
@@ -759,10 +660,10 @@ typedef struct re_pattern_buffer {
int repeat_range_alloc;
OnigRepeatRange* repeat_range;
- OnigEncoding enc;
+ OnigEncoding enc;
OnigOptionType options;
OnigSyntaxType* syntax;
- OnigAmbigType ambig_flag;
+ OnigCaseFoldType case_fold_flag;
void* name_table;
/* optimization info (string search, char-map and anchors) */
@@ -797,7 +698,7 @@ typedef struct {
OnigEncoding target_enc;
OnigSyntaxType* syntax;
OnigOptionType option;
- OnigAmbigType ambig_flag;
+ OnigCaseFoldType case_fold_flag;
} OnigCompileInfo;
/* Oniguruma Native API */
@@ -860,7 +761,7 @@ OnigEncoding onig_get_encoding P_((OnigRegex reg));
ONIG_EXTERN
OnigOptionType onig_get_options P_((OnigRegex reg));
ONIG_EXTERN
-OnigAmbigType onig_get_ambig_flag P_((OnigRegex reg));
+OnigCaseFoldType onig_get_case_fold_flag P_((OnigRegex reg));
ONIG_EXTERN
OnigSyntaxType* onig_get_syntax P_((OnigRegex reg));
ONIG_EXTERN
@@ -884,13 +785,13 @@ void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior)
ONIG_EXTERN
void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
ONIG_EXTERN
-int onig_set_meta_char P_((OnigEncoding enc, unsigned int what, OnigCodePoint code));
+int onig_set_meta_char P_((OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code));
ONIG_EXTERN
void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from));
ONIG_EXTERN
-OnigAmbigType onig_get_default_ambig_flag P_((void));
+OnigCaseFoldType onig_get_default_case_fold_flag P_((void));
ONIG_EXTERN
-int onig_set_default_ambig_flag P_((OnigAmbigType ambig_flag));
+int onig_set_default_case_fold_flag P_((OnigCaseFoldType case_fold_flag));
ONIG_EXTERN
unsigned int onig_get_match_stack_limit_size P_((void));
ONIG_EXTERN
diff --git a/re.c b/re.c
index b06e229fe9..a1dfa273c9 100644
--- a/re.c
+++ b/re.c
@@ -554,7 +554,7 @@ rb_reg_to_s(VALUE re)
Regexp *rp;
kcode_set_option(re);
r = onig_alloc_init(&rp, ONIG_OPTION_DEFAULT,
- ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
+ ONIGENC_CASE_FOLD_DEFAULT,
onigenc_get_default_encoding(),
OnigDefaultSyntax);
if (r == 0) {
@@ -697,7 +697,7 @@ make_regexp(const char *s, long len, int flags, int ce)
*/
r = onig_alloc_init(&rp, flags,
- ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
+ ONIGENC_CASE_FOLD_DEFAULT,
onigenc_get_default_encoding(),
OnigDefaultSyntax);
if (r) {
diff --git a/regcomp.c b/regcomp.c
index 7a5e952462..332129a7da 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2,7 +2,7 @@
regcomp.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,20 +29,18 @@
#include "regparse.h"
-OnigAmbigType OnigDefaultAmbigFlag =
- (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE);
+OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
-extern OnigAmbigType
-onig_get_default_ambig_flag(void)
+extern OnigCaseFoldType
+onig_get_default_case_fold_flag(void)
{
- return OnigDefaultAmbigFlag;
+ return OnigDefaultCaseFoldFlag;
}
extern int
-onig_set_default_ambig_flag(OnigAmbigType ambig_flag)
+onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
{
- OnigDefaultAmbigFlag = ambig_flag;
+ OnigDefaultCaseFoldFlag = case_fold_flag;
return 0;
}
@@ -52,7 +50,7 @@ static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
#endif
static UChar*
-k_strdup(UChar* s, UChar* end)
+str_dup(UChar* s, UChar* end)
{
int len = end - s;
@@ -66,15 +64,29 @@ k_strdup(UChar* s, UChar* end)
else return NULL;
}
-/*
- Caution: node should not be a string node.
- (s and end member address break)
-*/
static void
swap_node(Node* a, Node* b)
{
Node c;
c = *a; *a = *b; *b = c;
+
+ if (NTYPE(a) == N_STRING) {
+ StrNode* sn = &(NSTRING(a));
+ if (sn->capa == 0) {
+ int len = sn->end - sn->s;
+ sn->s = sn->buf;
+ sn->end = sn->s + len;
+ }
+ }
+
+ if (NTYPE(b) == N_STRING) {
+ StrNode* sn = &(NSTRING(b));
+ if (sn->capa == 0) {
+ int len = sn->end - sn->s;
+ sn->s = sn->buf;
+ sn->end = sn->s + len;
+ }
+ }
}
static OnigDistance
@@ -660,7 +672,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
}
static int
-compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
+compile_range_repeat_node(QuantifierNode* qn, int target_len, int empty_info,
regex_t* reg)
{
int r;
@@ -684,7 +696,7 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
#ifdef USE_SUBEXP_CALL
reg->num_call > 0 ||
#endif
- IS_QUALIFIER_IN_REPEAT(qn)) {
+ IS_QUANTIFIER_IN_REPEAT(qn)) {
r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
}
else {
@@ -696,7 +708,7 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
}
static int
-is_anychar_star_qualifier(QualifierNode* qn)
+is_anychar_star_quantifier(QuantifierNode* qn)
{
if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
NTYPE(qn->target) == N_ANYCHAR)
@@ -705,13 +717,13 @@ is_anychar_star_qualifier(QualifierNode* qn)
return 0;
}
-#define QUALIFIER_EXPAND_LIMIT_SIZE 50
+#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
#define CKN_ON (ckn > 0)
#ifdef USE_COMBINATION_EXPLOSION_CHECK
static int
-compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
+compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
{
int len, mod_tlen, cklen;
int ckn;
@@ -791,7 +803,7 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
}
static int
-compile_qualifier_node(QualifierNode* qn, regex_t* reg)
+compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
{
int r, mod_tlen;
int ckn;
@@ -803,7 +815,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
- if (is_anychar_star_qualifier(qn)) {
+ if (is_anychar_star_quantifier(qn)) {
r = compile_tree_n_times(qn->target, qn->lower, reg);
if (r) return r;
if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
@@ -945,7 +957,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
#else /* USE_COMBINATION_EXPLOSION_CHECK */
static int
-compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
+compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
{
int len, mod_tlen;
int infinite = IS_REPEAT_INFINITE(qn->upper);
@@ -970,8 +982,8 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
mod_tlen = tlen;
if (infinite &&
- (qn->lower <= 1 || tlen * qn->lower <= QUALIFIER_EXPAND_LIMIT_SIZE)) {
- if (qn->lower == 1 && tlen > QUALIFIER_EXPAND_LIMIT_SIZE) {
+ (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
len = SIZE_OP_JUMP;
}
else {
@@ -994,7 +1006,7 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
}
else if (!infinite && qn->greedy &&
(qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
- <= QUALIFIER_EXPAND_LIMIT_SIZE)) {
+ <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
len = tlen * qn->lower;
len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
}
@@ -1010,7 +1022,7 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
}
static int
-compile_qualifier_node(QualifierNode* qn, regex_t* reg)
+compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
{
int i, r, mod_tlen;
int infinite = IS_REPEAT_INFINITE(qn->upper);
@@ -1019,7 +1031,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
if (tlen < 0) return tlen;
- if (is_anychar_star_qualifier(qn)) {
+ if (is_anychar_star_quantifier(qn)) {
r = compile_tree_n_times(qn->target, qn->lower, reg);
if (r) return r;
if (IS_NOT_NULL(qn->next_head_exact)) {
@@ -1044,8 +1056,8 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
mod_tlen = tlen;
if (infinite &&
- (qn->lower <= 1 || tlen * qn->lower <= QUALIFIER_EXPAND_LIMIT_SIZE)) {
- if (qn->lower == 1 && tlen > QUALIFIER_EXPAND_LIMIT_SIZE) {
+ (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
if (qn->greedy) {
if (IS_NOT_NULL(qn->head_exact))
r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
@@ -1109,7 +1121,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
}
else if (!infinite && qn->greedy &&
(qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
- <= QUALIFIER_EXPAND_LIMIT_SIZE)) {
+ <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
int n = qn->upper - qn->lower;
r = compile_tree_n_times(qn->target, qn->lower, reg);
@@ -1227,7 +1239,7 @@ compile_length_effect_node(EffectNode* node, regex_t* reg)
case EFFECT_STOP_BACKTRACK:
if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) {
- QualifierNode* qn = &NQUALIFIER(node->target);
+ QuantifierNode* qn = &NQUANTIFIER(node->target);
tlen = compile_length_tree(qn->target, reg);
if (tlen < 0) return tlen;
@@ -1317,7 +1329,7 @@ compile_effect_node(EffectNode* node, regex_t* reg)
case EFFECT_STOP_BACKTRACK:
if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) {
- QualifierNode* qn = &NQUALIFIER(node->target);
+ QuantifierNode* qn = &NQUANTIFIER(node->target);
r = compile_tree_n_times(qn->target, qn->lower, reg);
if (r) return r;
@@ -1540,8 +1552,8 @@ compile_length_tree(Node* node, regex_t* reg)
break;
#endif
- case N_QUALIFIER:
- r = compile_length_qualifier_node(&(NQUALIFIER(node)), reg);
+ case N_QUANTIFIER:
+ r = compile_length_quantifier_node(&(NQUANTIFIER(node)), reg);
break;
case N_EFFECT:
@@ -1617,9 +1629,11 @@ compile_tree(Node* node, regex_t* reg)
{
int op;
- switch (NCTYPE(node).type) {
- case CTYPE_WORD: op = OP_WORD; break;
- case CTYPE_NOT_WORD: op = OP_NOT_WORD; break;
+ switch (NCTYPE(node).ctype) {
+ case ONIGENC_CTYPE_WORD:
+ if (NCTYPE(node).not != 0) op = OP_NOT_WORD;
+ else op = OP_WORD;
+ break;
default:
return ONIGERR_TYPE_BUG;
break;
@@ -1703,8 +1717,8 @@ compile_tree(Node* node, regex_t* reg)
break;
#endif
- case N_QUALIFIER:
- r = compile_qualifier_node(&(NQUALIFIER(node)), reg);
+ case N_QUANTIFIER:
+ r = compile_quantifier_node(&(NQUANTIFIER(node)), reg);
break;
case N_EFFECT:
@@ -1741,13 +1755,13 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
} while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
break;
- case N_QUALIFIER:
+ case N_QUANTIFIER:
{
- Node** ptarget = &(NQUALIFIER(node).target);
+ Node** ptarget = &(NQUANTIFIER(node).target);
Node* old = *ptarget;
r = noname_disable_map(ptarget, map, counter);
- if (*ptarget != old && NTYPE(*ptarget) == N_QUALIFIER) {
- onig_reduce_nested_qualifier(node, *ptarget);
+ if (*ptarget != old && NTYPE(*ptarget) == N_QUANTIFIER) {
+ onig_reduce_nested_quantifier(node, *ptarget);
}
}
break;
@@ -1821,8 +1835,8 @@ renumber_by_map(Node* node, GroupNumRemap* map)
r = renumber_by_map(NCONS(node).left, map);
} while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
break;
- case N_QUALIFIER:
- r = renumber_by_map(NQUALIFIER(node).target, map);
+ case N_QUANTIFIER:
+ r = renumber_by_map(NQUANTIFIER(node).target, map);
break;
case N_EFFECT:
r = renumber_by_map(NEFFECT(node).target, map);
@@ -1851,8 +1865,8 @@ numbered_ref_check(Node* node)
r = numbered_ref_check(NCONS(node).left);
} while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
break;
- case N_QUALIFIER:
- r = numbered_ref_check(NQUALIFIER(node).target);
+ case N_QUANTIFIER:
+ r = numbered_ref_check(NQUANTIFIER(node).target);
break;
case N_EFFECT:
r = numbered_ref_check(NEFFECT(node).target);
@@ -1933,7 +1947,7 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
static int
-qualifiers_memory_node_info(Node* node)
+quantifiers_memory_node_info(Node* node)
{
int r = 0;
@@ -1943,7 +1957,7 @@ qualifiers_memory_node_info(Node* node)
{
int v;
do {
- v = qualifiers_memory_node_info(NCONS(node).left);
+ v = quantifiers_memory_node_info(NCONS(node).left);
if (v > r) r = v;
} while (v >= 0 && IS_NOT_NULL(node = NCONS(node).right));
}
@@ -1955,15 +1969,15 @@ qualifiers_memory_node_info(Node* node)
return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
}
else
- r = qualifiers_memory_node_info(NCALL(node).target);
+ r = quantifiers_memory_node_info(NCALL(node).target);
break;
#endif
- case N_QUALIFIER:
+ case N_QUANTIFIER:
{
- QualifierNode* qn = &(NQUALIFIER(node));
+ QuantifierNode* qn = &(NQUANTIFIER(node));
if (qn->upper != 0) {
- r = qualifiers_memory_node_info(qn->target);
+ r = quantifiers_memory_node_info(qn->target);
}
}
break;
@@ -1978,7 +1992,7 @@ qualifiers_memory_node_info(Node* node)
case EFFECT_OPTION:
case EFFECT_STOP_BACKTRACK:
- r = qualifiers_memory_node_info(en->target);
+ r = quantifiers_memory_node_info(en->target);
break;
default:
break;
@@ -2070,12 +2084,7 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
break;
case N_CTYPE:
- switch (NCTYPE(node).type) {
- case CTYPE_WORD: *min = 1; break;
- case CTYPE_NOT_WORD: *min = 1; break;
- default:
- break;
- }
+ *min = 1;
break;
case N_CCLASS:
@@ -2083,9 +2092,9 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
*min = 1;
break;
- case N_QUALIFIER:
+ case N_QUANTIFIER:
{
- QualifierNode* qn = &(NQUALIFIER(node));
+ QuantifierNode* qn = &(NQUANTIFIER(node));
if (qn->lower > 0) {
r = get_min_match_length(qn->target, min, env);
@@ -2159,15 +2168,7 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
break;
case N_CTYPE:
- switch (NCTYPE(node).type) {
- case CTYPE_WORD:
- case CTYPE_NOT_WORD:
- *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
- break;
-
- default:
- break;
- }
+ *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
break;
case N_CCLASS:
@@ -2204,9 +2205,9 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
break;
#endif
- case N_QUALIFIER:
+ case N_QUANTIFIER:
{
- QualifierNode* qn = &(NQUALIFIER(node));
+ QuantifierNode* qn = &(NQUANTIFIER(node));
if (qn->upper != 0) {
r = get_max_match_length(qn->target, max, env);
@@ -2311,9 +2312,9 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
}
break;
- case N_QUALIFIER:
+ case N_QUANTIFIER:
{
- QualifierNode* qn = &(NQUALIFIER(node));
+ QuantifierNode* qn = &(NQUANTIFIER(node));
if (qn->lower == qn->upper) {
r = get_char_length_tree1(qn->target, reg, &tlen, level);
if (r == 0)
@@ -2334,12 +2335,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
#endif
case N_CTYPE:
- switch (NCTYPE(node).type) {
- case CTYPE_WORD:
- case CTYPE_NOT_WORD:
- *len = 1;
- break;
- }
+ *len = 1;
break;
case N_CCLASS:
@@ -2407,22 +2403,11 @@ is_not_included(Node* x, Node* y, regex_t* reg)
{
switch (ytype) {
case N_CTYPE:
- switch (NCTYPE(x).type) {
- case CTYPE_WORD:
- if (NCTYPE(y).type == CTYPE_NOT_WORD)
- return 1;
- else
- return 0;
- break;
- case CTYPE_NOT_WORD:
- if (NCTYPE(y).type == CTYPE_WORD)
- return 1;
- else
- return 0;
- break;
- default:
- break;
- }
+ if (NCTYPE(y).ctype == NCTYPE(x).ctype &&
+ NCTYPE(y).not != NCTYPE(x).not)
+ return 1;
+ else
+ return 0;
break;
case N_CCLASS:
@@ -2449,32 +2434,34 @@ is_not_included(Node* x, Node* y, regex_t* reg)
CClassNode* xc = &(NCCLASS(x));
switch (ytype) {
case N_CTYPE:
- switch (NCTYPE(y).type) {
- case CTYPE_WORD:
- if (IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (BITSET_AT(xc->bs, i)) {
- if (ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) return 0;
+ switch (NCTYPE(y).ctype) {
+ case ONIGENC_CTYPE_WORD:
+ if (NCTYPE(y).not == 0) {
+ if (IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (BITSET_AT(xc->bs, i)) {
+ if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
+ }
}
+ return 1;
}
- return 1;
+ return 0;
}
- return 0;
- break;
- case CTYPE_NOT_WORD:
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (! ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) {
- if (!IS_CCLASS_NOT(xc)) {
- if (BITSET_AT(xc->bs, i))
- return 0;
- }
- else {
- if (! BITSET_AT(xc->bs, i))
- return 0;
+ else {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (! IS_CODE_SB_WORD(reg->enc, i)) {
+ if (!IS_CCLASS_NOT(xc)) {
+ if (BITSET_AT(xc->bs, i))
+ return 0;
+ }
+ else {
+ if (! BITSET_AT(xc->bs, i))
+ return 0;
+ }
}
}
+ return 1;
}
- return 1;
break;
default:
@@ -2523,12 +2510,12 @@ is_not_included(Node* x, Node* y, regex_t* reg)
c = *(xs->s);
switch (ytype) {
case N_CTYPE:
- switch (NCTYPE(y).type) {
- case CTYPE_WORD:
- return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 0 : 1);
- break;
- case CTYPE_NOT_WORD:
- return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 1 : 0);
+ switch (NCTYPE(y).ctype) {
+ case ONIGENC_CTYPE_WORD:
+ if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
+ return NCTYPE(y).not;
+ else
+ return !(NCTYPE(y).not);
break;
default:
break;
@@ -2610,12 +2597,6 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
if (exact != 0 &&
!NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
-#if 0
- UChar* tmp = sn->s;
- if (! ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag,
- &tmp, sn->end))
- n = node;
-#endif
}
else {
n = node;
@@ -2623,9 +2604,9 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
}
break;
- case N_QUALIFIER:
+ case N_QUANTIFIER:
{
- QualifierNode* qn = &(NQUALIFIER(node));
+ QuantifierNode* qn = &(NQUANTIFIER(node));
if (qn->lower > 0) {
if (IS_NOT_NULL(qn->head_exact))
n = qn->head_exact;
@@ -2686,8 +2667,8 @@ check_type_tree(Node* node, int type_mask, int effect_mask, int anchor_mask)
} while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
break;
- case N_QUALIFIER:
- r = check_type_tree(NQUALIFIER(node).target, type_mask, effect_mask,
+ case N_QUANTIFIER:
+ r = check_type_tree(NQUANTIFIER(node).target, type_mask, effect_mask,
anchor_mask);
break;
@@ -2762,10 +2743,10 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
}
break;
- case N_QUALIFIER:
- r = subexp_inf_recursive_check(NQUALIFIER(node).target, env, head);
+ case N_QUANTIFIER:
+ r = subexp_inf_recursive_check(NQUANTIFIER(node).target, env, head);
if (r == RECURSION_EXIST) {
- if (NQUALIFIER(node).lower == 0) r = 0;
+ if (NQUANTIFIER(node).lower == 0) r = 0;
}
break;
@@ -2821,8 +2802,8 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
} while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
break;
- case N_QUALIFIER:
- r = subexp_inf_recursive_check_trav(NQUALIFIER(node).target, env);
+ case N_QUANTIFIER:
+ r = subexp_inf_recursive_check_trav(NQUANTIFIER(node).target, env);
break;
case N_ANCHOR:
@@ -2876,8 +2857,8 @@ subexp_recursive_check(Node* node)
} while (IS_NOT_NULL(node = NCONS(node).right));
break;
- case N_QUALIFIER:
- r = subexp_recursive_check(NQUALIFIER(node).target);
+ case N_QUANTIFIER:
+ r = subexp_recursive_check(NQUANTIFIER(node).target);
break;
case N_ANCHOR:
@@ -2941,11 +2922,11 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env)
}
break;
- case N_QUALIFIER:
- r = subexp_recursive_check_trav(NQUALIFIER(node).target, env);
- if (NQUALIFIER(node).upper == 0) {
+ case N_QUANTIFIER:
+ r = subexp_recursive_check_trav(NQUANTIFIER(node).target, env);
+ if (NQUANTIFIER(node).upper == 0) {
if (r == FOUND_CALLED_NODE)
- NQUALIFIER(node).is_refered = 1;
+ NQUANTIFIER(node).is_refered = 1;
}
break;
@@ -3008,8 +2989,8 @@ setup_subexp_call(Node* node, ScanEnv* env)
} while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
break;
- case N_QUALIFIER:
- r = setup_subexp_call(NQUALIFIER(node).target, env);
+ case N_QUANTIFIER:
+ r = setup_subexp_call(NQUANTIFIER(node).target, env);
break;
case N_EFFECT:
r = setup_subexp_call(NEFFECT(node).target, env);
@@ -3158,10 +3139,10 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
retry:
type = NTYPE(node);
- if (type == N_QUALIFIER) {
- QualifierNode* qn = &(NQUALIFIER(node));
+ if (type == N_QUANTIFIER) {
+ QuantifierNode* qn = &(NQUANTIFIER(node));
if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
-#ifdef USE_QUALIFIER_PEEK_NEXT
+#ifdef USE_QUANTIFIER_PEEK_NEXT
qn->next_head_exact = get_head_value_node(next_node, 1, reg);
#endif
/* automatic posseivation a*b ==> (?>a*)b */
@@ -3196,100 +3177,317 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
static int
-divide_ambig_string_node_sub(regex_t* reg, int prev_ambig,
- UChar* prev_start, UChar* prev,
- UChar* end, Node*** tailp, Node** root)
+update_string_node_case_fold(regex_t* reg, Node *node)
{
- UChar *tmp, *wp;
- Node* snode;
+ UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+ UChar *sbuf, *ebuf, *sp;
+ int r, i, len, sbuf_size;
+ StrNode* sn = &NSTRING(node);
+
+ end = sn->end;
+ sbuf_size = (end - sn->s) * 2;
+ sbuf = (UChar* )xmalloc(sbuf_size);
+ CHECK_NULL_RETURN_VAL(sbuf, ONIGERR_MEMORY);
+ ebuf = sbuf + sbuf_size;
- if (prev_ambig != 0) {
- tmp = prev_start;
- wp = prev_start;
- while (tmp < prev) {
- wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
- &tmp, end, wp);
+ sp = sbuf;
+ p = sn->s;
+ while (p < end) {
+ len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
+ q = buf;
+ for (i = 0; i < len; i++) {
+ if (sp >= ebuf) {
+ sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2);
+ CHECK_NULL_RETURN_VAL(sbuf, ONIGERR_MEMORY);
+ sp = sbuf + sbuf_size;
+ sbuf_size *= 2;
+ ebuf = sbuf + sbuf_size;
+ }
+
+ *sp++ = buf[i];
}
- snode = onig_node_new_str(prev_start, wp);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
- NSTRING_SET_AMBIG(snode);
- if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode);
}
- else {
- snode = onig_node_new_str(prev_start, prev);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+
+ r = onig_node_str_set(node, sbuf, sp);
+ if (r != 0) {
+ xfree(sbuf);
+ return r;
+ }
+
+ xfree(sbuf);
+ return 0;
+}
+
+static int
+expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
+ regex_t* reg)
+{
+ int r;
+ Node *node;
+
+ node = onig_node_new_str(s, end);
+ if (IS_NULL(node)) return ONIGERR_MEMORY;
+
+ r = update_string_node_case_fold(reg, node);
+ if (r != 0) {
+ onig_node_free(node);
+ return r;
}
- if (*tailp == (Node** )0) {
- *root = onig_node_new_list(snode, NULL);
- CHECK_NULL_RETURN_VAL(*root, ONIGERR_MEMORY);
- *tailp = &(NCONS(*root).right);
+ NSTRING_SET_AMBIG(node);
+ NSTRING_SET_DONT_GET_OPT_INFO(node);
+ *rnode = node;
+ return 0;
+}
+
+static int
+expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
+ UChar *p, int slen, UChar *end,
+ regex_t* reg, Node **rnode)
+{
+ int r, i, j, len, varlen;
+ Node *anode, *var_anode, *snode, *xnode, *an;
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+
+ *rnode = var_anode = NULL_NODE;
+
+ varlen = 0;
+ for (i = 0; i < item_num; i++) {
+ if (items[i].byte_len != slen) {
+ varlen = 1;
+ break;
+ }
+ }
+
+ if (varlen != 0) {
+ *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
+ if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
+
+ xnode = onig_node_new_list(NULL, NULL);
+ if (IS_NULL(xnode)) goto mem_err;
+ NCONS(var_anode).left = xnode;
+
+ anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
+ if (IS_NULL(anode)) goto mem_err;
+ NCONS(xnode).left = anode;
}
else {
- **tailp = onig_node_new_list(snode, NULL);
- CHECK_NULL_RETURN_VAL(**tailp, ONIGERR_MEMORY);
- *tailp = &(NCONS(**tailp).right);
+ *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
+ if (IS_NULL(anode)) return ONIGERR_MEMORY;
}
- return 0;
+ snode = onig_node_new_str(p, p + slen);
+ if (IS_NULL(snode)) goto mem_err;
+
+ NCONS(anode).left = snode;
+
+ for (i = 0; i < item_num; i++) {
+ snode = onig_node_new_str(NULL, NULL);
+ if (IS_NULL(snode)) goto mem_err;
+
+ for (j = 0; j < items[i].code_len; j++) {
+ len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
+ if (len < 0) {
+ r = len;
+ goto mem_err2;
+ }
+
+ r = onig_node_str_cat(snode, buf, buf + len);
+ if (r != 0) goto mem_err2;
+ }
+
+ an = onig_node_new_alt(NULL_NODE, NULL_NODE);
+ if (IS_NULL(an)) {
+ goto mem_err2;
+ }
+
+ if (items[i].byte_len != slen) {
+ Node *rem;
+ UChar *q = p + items[i].byte_len;
+
+ if (q < end) {
+ r = expand_case_fold_make_rem_string(&rem, q, end, reg);
+ if (r != 0) {
+ onig_node_free(an);
+ goto mem_err2;
+ }
+
+ xnode = onig_node_list_add(NULL_NODE, snode);
+ if (IS_NULL(xnode)) {
+ onig_node_free(an);
+ onig_node_free(rem);
+ goto mem_err2;
+ }
+ if (IS_NULL(onig_node_list_add(xnode, rem))) {
+ onig_node_free(an);
+ onig_node_free(xnode);
+ onig_node_free(rem);
+ goto mem_err;
+ }
+
+ NCONS(an).left = xnode;
+ }
+ else {
+ NCONS(an).left = snode;
+ }
+
+ NCONS(var_anode).right = an;
+ var_anode = an;
+ }
+ else {
+ NCONS(an).left = snode;
+ NCONS(anode).right = an;
+ anode = an;
+ }
+ }
+
+ return varlen;
+
+ mem_err2:
+ onig_node_free(snode);
+
+ mem_err:
+ if (IS_NOT_NULL(*rnode))
+ onig_node_free(*rnode);
+
+ return ONIGERR_MEMORY;
}
static int
-divide_ambig_string_node(Node* node, regex_t* reg)
+expand_case_fold_string(Node* node, regex_t* reg)
{
+#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
+
+ int r, n, len, alt_num;
+ UChar *start, *end, *p;
+ Node *top_root, *root, *snode, *prev_node;
+ OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
StrNode* sn = &NSTRING(node);
- int ambig, prev_ambig;
- UChar *prev, *p, *end, *prev_start, *start, *tmp, *wp;
- Node *root = NULL_NODE;
- Node **tailp = (Node** )0;
- int r;
- start = prev_start = p = sn->s;
- end = sn->end;
- if (p >= end) return 0;
+ if (NSTRING_IS_AMBIG(node)) return 0;
- prev_ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag, &p, end);
+ start = sn->s;
+ end = sn->end;
+ if (start >= end) return 0;
+ r = 0;
+ top_root = root = prev_node = snode = NULL_NODE;
+ alt_num = 1;
+ p = start;
while (p < end) {
- prev = p;
- if (prev_ambig != (ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc,
- reg->ambig_flag, &p, end))) {
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
+ p, end, items);
+ if (n < 0) goto err;
+
+ len = enc_len(reg->enc, p);
+
+ if (n == 0) {
+ if (IS_NULL(snode)) {
+ if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
+ top_root = root = onig_node_list_add(NULL_NODE, prev_node);
+ if (IS_NULL(root)) {
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
+ }
- r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, prev,
- end, &tailp, &root);
- if (r != 0) return r;
+ prev_node = snode = onig_node_new_str(NULL, NULL);
+ if (IS_NULL(snode)) goto mem_err;
+ if (IS_NOT_NULL(root)) {
+ if (IS_NULL(onig_node_list_add(root, snode))) {
+ onig_node_free(snode);
+ goto mem_err;
+ }
+ }
+ }
- prev_ambig = ambig;
- prev_start = prev;
+ r = onig_node_str_cat(snode, p, p + len);
+ if (r != 0) goto err;
}
- }
+ else {
+ alt_num *= (n + 1);
+ if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
+
+ if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
+ top_root = root = onig_node_list_add(NULL_NODE, prev_node);
+ if (IS_NULL(root)) {
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
+ }
+
+ r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
+ if (r < 0) goto mem_err;
+ if (r == 1) {
+ if (IS_NULL(root)) {
+ top_root = prev_node;
+ }
+ else {
+ if (IS_NULL(onig_node_list_add(root, prev_node))) {
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
+ }
- if (prev_start == start) {
- if (prev_ambig != 0) {
- NSTRING_SET_AMBIG(node);
- tmp = start;
- wp = start;
- while (tmp < end) {
- wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
- &tmp, end, wp);
+ root = NCONS(prev_node).left;
+ }
+ else { /* r == 0 */
+ if (IS_NOT_NULL(root)) {
+ if (IS_NULL(onig_node_list_add(root, prev_node))) {
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
+ }
}
- if (wp != sn->end) NSTRING_SET_AMBIG_REDUCE(node);
- sn->end = wp;
+
+ snode = NULL_NODE;
}
+
+ p += len;
}
- else {
- r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, end,
- end, &tailp, &root);
- if (r != 0) return r;
- swap_node(node, root);
- onig_node_str_clear(root); /* should be after swap! */
- onig_node_free(root); /* free original string node */
+ if (p < end) {
+ Node *srem;
+
+ r = expand_case_fold_make_rem_string(&srem, p, end, reg);
+ if (r != 0) goto mem_err;
+
+ if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
+ top_root = root = onig_node_list_add(NULL_NODE, prev_node);
+ if (IS_NULL(root)) {
+ onig_node_free(srem);
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
+ }
+
+ if (IS_NULL(root)) {
+ prev_node = srem;
+ }
+ else {
+ if (IS_NULL(onig_node_list_add(root, srem))) {
+ onig_node_free(srem);
+ goto mem_err;
+ }
+ }
}
+ /* ending */
+ top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
+ swap_node(node, top_root);
+ onig_node_free(top_root);
return 0;
+
+ mem_err:
+ r = ONIGERR_MEMORY;
+
+ err:
+ if (IS_NOT_NULL(top_root))
+ onig_node_free(top_root);
+ return r;
}
+
#ifdef USE_COMBINATION_EXPLOSION_CHECK
#define CEC_THRES_NUM_BIG_REPEAT 512
@@ -3327,11 +3525,11 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
}
break;
- case N_QUALIFIER:
+ case N_QUANTIFIER:
{
int child_state = state;
int add_state = 0;
- QualifierNode* qn = &(NQUALIFIER(node));
+ QuantifierNode* qn = &(NQUANTIFIER(node));
Node* target = qn->target;
int var_num;
@@ -3345,8 +3543,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
if (NTYPE(qn->target) == N_EFFECT) {
EffectNode* en = &(NEFFECT(qn->target));
if (en->type == EFFECT_MEMORY) {
- if (NTYPE(en->target) == N_QUALIFIER) {
- QualifierNode* q = &(NQUALIFIER(en->target));
+ if (NTYPE(en->target) == N_QUANTIFIER) {
+ QuantifierNode* q = &(NQUANTIFIER(en->target));
if (IS_REPEAT_INFINITE(q->upper)
&& q->greedy == qn->greedy) {
qn->upper = (qn->lower == 0 ? 1 : qn->lower);
@@ -3475,7 +3673,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
case N_STRING:
if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
- r = divide_ambig_string_node(node, reg);
+ r = expand_case_fold_string(node, reg);
}
break;
@@ -3509,10 +3707,10 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
}
break;
- case N_QUALIFIER:
+ case N_QUANTIFIER:
{
OnigDistance d;
- QualifierNode* qn = &(NQUALIFIER(node));
+ QuantifierNode* qn = &(NQUANTIFIER(node));
Node* target = qn->target;
if ((state & IN_REPEAT) != 0) {
@@ -3525,7 +3723,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
if (d == 0) {
qn->target_empty_info = NQ_TARGET_IS_EMPTY;
#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
- r = qualifiers_memory_node_info(target);
+ r = quantifiers_memory_node_info(target);
if (r < 0) break;
if (r > 0) {
qn->target_empty_info = r;
@@ -3567,15 +3765,15 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
if (r) break;
}
onig_node_free(target);
- break; /* break case N_QUALIFIER: */
+ break; /* break case N_QUANTIFIER: */
}
}
}
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
if (qn->greedy && (qn->target_empty_info != 0)) {
- if (NTYPE(target) == N_QUALIFIER) {
- QualifierNode* tqn = &(NQUALIFIER(target));
+ if (NTYPE(target) == N_QUANTIFIER) {
+ QuantifierNode* tqn = &(NQUANTIFIER(target));
if (IS_NOT_NULL(tqn->head_exact)) {
qn->head_exact = tqn->head_exact;
tqn->head_exact = NULL;
@@ -3615,8 +3813,8 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
{
Node* target = en->target;
r = setup_tree(target, reg, state, env);
- if (NTYPE(target) == N_QUALIFIER) {
- QualifierNode* tqn = &(NQUALIFIER(target));
+ if (NTYPE(target) == N_QUANTIFIER) {
+ QuantifierNode* tqn = &(NQUANTIFIER(target));
if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
tqn->greedy != 0) { /* (?>a*), a*+ etc... */
int qtype = NTYPE(tqn->target);
@@ -3645,7 +3843,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
/* allowed node types in look-behind */
#define ALLOWED_TYPE_IN_LB \
( N_LIST | N_ALT | N_STRING | N_CCLASS | N_CTYPE | \
- N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUALIFIER | N_CALL )
+ N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUANTIFIER | N_CALL )
#define ALLOWED_EFFECT_IN_LB ( EFFECT_MEMORY )
#define ALLOWED_EFFECT_IN_LB_NOT 0
@@ -3724,11 +3922,11 @@ typedef struct {
} MinMaxLen;
typedef struct {
- MinMaxLen mmd;
- OnigEncoding enc;
- OnigOptionType options;
- OnigAmbigType ambig_flag;
- ScanEnv* scan_env;
+ MinMaxLen mmd;
+ OnigEncoding enc;
+ OnigOptionType options;
+ OnigCaseFoldType case_fold_flag;
+ ScanEnv* scan_env;
} OptEnv;
typedef struct {
@@ -4080,7 +4278,14 @@ select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
v1 = now->len;
v2 = alt->len;
- if (v1 <= 2 && v2 <= 2) {
+ if (v2 == 0) {
+ return ;
+ }
+ else if (v1 == 0) {
+ copy_opt_exact_info(now, alt);
+ return ;
+ }
+ else if (v1 <= 2 && v2 <= 2) {
/* ByteValTable[x] is big value --> low price */
v2 = map_position_value(enc, now->s[0]);
v1 = map_position_value(enc, alt->s[0]);
@@ -4141,45 +4346,23 @@ add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
static int
add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
- OnigEncoding enc, OnigAmbigType ambig_flag)
+ OnigEncoding enc, OnigCaseFoldType case_fold_flag)
{
- int i, j, n, len;
- UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN];
- OnigCodePoint code, ccode;
- const OnigCompAmbigCodes* ccs;
- const OnigPairAmbigCodes* pccs;
- OnigAmbigType amb;
+ OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+ int i, n;
add_char_opt_map_info(map, p[0], enc);
- code = ONIGENC_MBC_TO_CODE(enc, p, end);
- for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
- if ((amb & ambig_flag) == 0) continue;
+ case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
+ if (n < 0) return n;
- n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, amb, &pccs);
- for (i = 0; i < n; i++) {
- if (pccs[i].from == code) {
- len = ONIGENC_CODE_TO_MBC(enc, pccs[i].to, buf);
- if (len < 0) return len;
- add_char_opt_map_info(map, buf[0], enc);
- }
- }
-
- if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- n = ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, amb, &ccs);
- for (i = 0; i < n; i++) {
- if (ccs[i].code == code) {
- for (j = 0; j < ccs[i].n; j++) {
- ccode = ccs[i].items[j].code[0];
- len = ONIGENC_CODE_TO_MBC(enc, ccode, buf);
- if (len < 0) return len;
- add_char_opt_map_info(map, buf[0], enc);
- }
- break;
- }
- }
- }
+ for (i = 0; i < n; i++) {
+ ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
+ add_char_opt_map_info(map, buf[0], enc);
}
+
return 0;
}
@@ -4399,25 +4582,26 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
set_mml(&opt->len, slen, slen);
}
else {
- int n, max;
+ int max;
- concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
- is_raw, env->enc);
- opt->exb.ignore_case = 1;
+ if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
+ int n = onigenc_strlen(env->enc, sn->s, sn->end);
+ max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
+ }
+ else {
+ concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
+ is_raw, env->enc);
+ opt->exb.ignore_case = 1;
+
+ if (slen > 0) {
+ r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
+ env->enc, env->case_fold_flag);
+ if (r != 0) break;
+ }
- if (slen > 0) {
- r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
- env->enc, env->ambig_flag);
- if (r != 0) break;
+ max = slen;
}
- if (NSTRING_IS_AMBIG_REDUCE(node)) {
- n = onigenc_strlen(env->enc, sn->s, sn->end);
- max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
- }
- else {
- max = slen;
- }
set_mml(&opt->len, slen, max);
}
@@ -4460,21 +4644,22 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
if (max == 1) {
min = 1;
- switch (NCTYPE(node).type) {
- case CTYPE_NOT_WORD:
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
- add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
- }
- }
- break;
-
- case CTYPE_WORD:
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
- add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
- }
- }
+ switch (NCTYPE(node).ctype) {
+ case ONIGENC_CTYPE_WORD:
+ if (NCTYPE(node).not != 0) {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+ }
+ }
+ }
+ else {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+ }
+ }
+ }
break;
}
}
@@ -4572,12 +4757,12 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
break;
#endif
- case N_QUALIFIER:
+ case N_QUANTIFIER:
{
int i;
OnigDistance min, max;
NodeOptInfo nopt;
- QualifierNode* qn = &(NQUALIFIER(node));
+ QuantifierNode* qn = &(NQUANTIFIER(node));
r = optimize_node_left(qn->target, &nopt, env);
if (r) break;
@@ -4700,7 +4885,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
else {
int allow_reverse;
- reg->exact = k_strdup(e->s, e->s + e->len);
+ reg->exact = str_dup(e->s, e->s + e->len);
CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
reg->exact_end = reg->exact + e->len;
@@ -4766,9 +4951,9 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
NodeOptInfo opt;
OptEnv env;
- env.enc = reg->enc;
- env.options = reg->options;
- env.ambig_flag = reg->ambig_flag;
+ env.enc = reg->enc;
+ env.options = reg->options;
+ env.case_fold_flag = reg->case_fold_flag;
env.scan_env = scan_env;
clear_mml(&env.mmd);
@@ -4831,6 +5016,38 @@ clear_optimize_info(regex_t* reg)
#ifdef ONIG_DEBUG
+static void print_enc_string(FILE* fp, OnigEncoding enc,
+ const UChar *s, const UChar *end)
+{
+ fprintf(fp, "\nPATTERN: /");
+
+ if (ONIGENC_MBC_MINLEN(enc) > 1) {
+ const UChar *p;
+ OnigCodePoint code;
+
+ p = s;
+ while (p < end) {
+ code = ONIGENC_MBC_TO_CODE(enc, p, end);
+ if (code >= 0x80) {
+ fprintf(fp, " 0x%04x ", (int )code);
+ }
+ else {
+ fputc((int )code, fp);
+ }
+
+ p += enc_len(enc, p);
+ }
+ }
+ else {
+ while (s < end) {
+ fputc((int )*s, fp);
+ s++;
+ }
+ }
+
+ fprintf(fp, "/\n");
+}
+
static void
print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
{
@@ -5048,7 +5265,7 @@ onig_clone(regex_t** to, regex_t* from)
}
#endif /* USE_MULTI_THREAD_SYSTEM */
- r = onig_alloc_init(&reg, ONIG_OPTION_NONE, ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
+ r = onig_alloc_init(&reg, ONIG_OPTION_NONE, ONIGENC_CASE_FOLD_DEFAULT,
from->enc, ONIG_SYNTAX_DEFAULT);
if (r != 0) {
ONIG_STATE_DEC(from);
@@ -5122,6 +5339,10 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
reg->state = ONIG_STATE_COMPILING;
+#ifdef ONIG_DEBUG
+ print_enc_string(stderr, reg->enc, pattern, pattern_end);
+#endif
+
if (reg->alloc == 0) {
init_size = (pattern_end - pattern) * 2;
if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
@@ -5157,10 +5378,6 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
}
#endif
-#ifdef ONIG_DEBUG_PARSE_TREE
- print_tree(stderr, root);
-#endif
-
#ifdef USE_SUBEXP_CALL
if (scan_env.num_call > 0) {
r = unset_addr_list_init(&uslist, scan_env.num_call);
@@ -5182,6 +5399,10 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
r = setup_tree(root, reg, 0, &scan_env);
if (r != 0) goto err_unset;
+#ifdef ONIG_DEBUG_PARSE_TREE
+ print_tree(stderr, root);
+#endif
+
reg->capture_history = scan_env.capture_history;
reg->bt_mem_start = scan_env.bt_mem_start;
reg->bt_mem_start |= reg->capture_history;
@@ -5313,7 +5534,8 @@ onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
static int onig_inited = 0;
extern int
-onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag,
+onig_alloc_init(regex_t** reg, OnigOptionType option,
+ OnigCaseFoldType case_fold_flag,
OnigEncoding enc, OnigSyntaxType* syntax)
{
if (! onig_inited)
@@ -5352,9 +5574,7 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag,
(*reg)->used = 0;
(*reg)->name_table = (void* )NULL;
- (*reg)->ambig_flag = ambig_flag;
- (*reg)->ambig_flag &= ONIGENC_SUPPORT_AMBIG_FLAG(enc);
-
+ (*reg)->case_fold_flag = case_fold_flag;
return 0;
}
@@ -5367,7 +5587,7 @@ onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
- r = onig_alloc_init(reg, option, ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
+ r = onig_alloc_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT,
enc, syntax);
if (r) return r;
@@ -5380,13 +5600,14 @@ onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
}
extern int
-onig_init()
+onig_init(void)
{
if (onig_inited != 0)
return 0;
onig_inited = 1;
+ THREAD_SYSTEM_INIT;
THREAD_ATOMIC_START;
onigenc_init();
@@ -5423,6 +5644,7 @@ onig_end(void)
onig_inited = 0;
THREAD_ATOMIC_END;
+ THREAD_SYSTEM_END;
return 0;
}
@@ -5905,9 +6127,14 @@ print_indent_tree(FILE* f, Node* node, int indent)
case N_CTYPE:
fprintf(f, "<ctype:%x> ", (int )node);
- switch (NCTYPE(node).type) {
- case CTYPE_WORD: fputs("word", f); break;
- case CTYPE_NOT_WORD: fputs("not word", f); break;
+ switch (NCTYPE(node).ctype) {
+ case ONIGENC_CTYPE_WORD:
+ if (NCTYPE(node).not != 0)
+ fputs("not word", f);
+ else
+ fputs("word", f);
+ break;
+
default:
fprintf(f, "ERROR: undefined ctype.\n");
exit(0);
@@ -5968,11 +6195,11 @@ print_indent_tree(FILE* f, Node* node, int indent)
break;
#endif
- case N_QUALIFIER:
- fprintf(f, "<qualifier:%x>{%d,%d}%s\n", (int )node,
- NQUALIFIER(node).lower, NQUALIFIER(node).upper,
- (NQUALIFIER(node).greedy ? "" : "?"));
- print_indent_tree(f, NQUALIFIER(node).target, indent + add);
+ case N_QUANTIFIER:
+ fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node,
+ NQUANTIFIER(node).lower, NQUANTIFIER(node).upper,
+ (NQUANTIFIER(node).greedy ? "" : "?"));
+ print_indent_tree(f, NQUANTIFIER(node).target, indent + add);
break;
case N_EFFECT:
@@ -6001,7 +6228,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
break;
}
- if (type != N_LIST && type != N_ALT && type != N_QUALIFIER &&
+ if (type != N_LIST && type != N_ALT && type != N_QUANTIFIER &&
type != N_EFFECT)
fprintf(f, "\n");
fflush(f);
diff --git a/regenc.c b/regenc.c
index e505a5fa34..9f50cb63db 100644
--- a/regenc.c
+++ b/regenc.c
@@ -2,7 +2,7 @@
regenc.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -169,51 +169,9 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
}
}
-#ifndef ONIG_RUBY_M17N
-
-#ifndef NOT_RUBY
-
-#define USE_APPLICATION_TO_LOWER_CASE_TABLE
-
-const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
- 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008,
- 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
- 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
- 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
- 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
- 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
- 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
- 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
- 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
- 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
- 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
- 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
- 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
- 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
- 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
- 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
- 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
- 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
- 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
- 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
- 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
-};
-#endif
-
const UChar* OnigEncAsciiToLowerCaseTable = (const UChar* )0;
-#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
+#ifndef USE_EXTERNAL_LOWER_CASE_CONV_TABLE
static const UChar BuiltInAsciiToLowerCaseTable[] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
@@ -248,7 +206,7 @@ static const UChar BuiltInAsciiToLowerCaseTable[] = {
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
};
-#endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */
+#endif /* not USE_EXTERNAL_LOWER_CASE_CONV_TABLE */
#ifdef USE_UPPER_CASE_TABLE
const UChar OnigEncAsciiToUpperCaseTable[256] = {
@@ -288,23 +246,22 @@ const UChar OnigEncAsciiToUpperCaseTable[256] = {
#endif
const unsigned short OnigEncAsciiCtypeTable[256] = {
- 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
- 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
- 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
- 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
- 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
-
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
@@ -399,7 +356,7 @@ extern void
onigenc_set_default_caseconv_table(const UChar* table)
{
if (table == (const UChar* )0) {
-#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
+#ifndef USE_EXTERNAL_LOWER_CASE_CONV_TABLE
table = BuiltInAsciiToLowerCaseTable;
#else
return ;
@@ -417,7 +374,7 @@ onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UC
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
}
-const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
+const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
{ 0x41, 0x61 },
{ 0x42, 0x62 },
{ 0x43, 0x63 },
@@ -443,157 +400,172 @@ const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
{ 0x57, 0x77 },
{ 0x58, 0x78 },
{ 0x59, 0x79 },
- { 0x5a, 0x7a },
-
- { 0x61, 0x41 },
- { 0x62, 0x42 },
- { 0x63, 0x43 },
- { 0x64, 0x44 },
- { 0x65, 0x45 },
- { 0x66, 0x46 },
- { 0x67, 0x47 },
- { 0x68, 0x48 },
- { 0x69, 0x49 },
- { 0x6a, 0x4a },
- { 0x6b, 0x4b },
- { 0x6c, 0x4c },
- { 0x6d, 0x4d },
- { 0x6e, 0x4e },
- { 0x6f, 0x4f },
- { 0x70, 0x50 },
- { 0x71, 0x51 },
- { 0x72, 0x52 },
- { 0x73, 0x53 },
- { 0x74, 0x54 },
- { 0x75, 0x55 },
- { 0x76, 0x56 },
- { 0x77, 0x57 },
- { 0x78, 0x58 },
- { 0x79, 0x59 },
- { 0x7a, 0x5a }
+ { 0x5a, 0x7a }
};
extern int
-onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag,
- const OnigPairAmbigCodes** ccs)
+onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag,
+ OnigApplyAllCaseFoldFunc f, void* arg)
{
- if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
- *ccs = OnigAsciiPairAmbigCodes;
- return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes));
+ OnigCodePoint code;
+ int i, r;
+
+ for (i = 0; i < sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes)
+ ; i++) {
+ code = OnigAsciiLowerMap[i].to;
+ r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
+ if (r != 0) return r;
+
+ code = OnigAsciiLowerMap[i].from;
+ r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
+ if (r != 0) return r;
}
- else {
- return 0;
+
+ return 0;
+}
+
+extern int
+onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag,
+ const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+ if (0x41 <= *p && *p <= 0x5a) {
+ items[0].byte_len = 1;
+ items[0].code_len = 1;
+ items[0].code[0] = (OnigCodePoint )(*p + 0x20);
+ return 1;
+ }
+ else if (0x61 <= *p && *p <= 0x7a) {
+ items[0].byte_len = 1;
+ items[0].code_len = 1;
+ items[0].code[0] = (OnigCodePoint )(*p - 0x20);
+ return 1;
}
+ else
+ return 0;
}
extern int
-onigenc_nothing_get_all_comp_ambig_codes(OnigAmbigType flag,
- const OnigCompAmbigCodes** ccs)
+ss_apply_all_case_fold(OnigCaseFoldType flag,
+ OnigApplyAllCaseFoldFunc f, void* arg)
{
- return 0;
+ static OnigCodePoint ss[] = { 0x73, 0x73 };
+
+ return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
}
extern int
-onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag,
- const OnigPairAmbigCodes** ccs)
+onigenc_apply_all_case_fold_with_map(int map_size,
+ const OnigPairCaseFoldCodes map[],
+ int ess_tsett_flag, OnigCaseFoldType flag,
+ OnigApplyAllCaseFoldFunc f, void* arg)
{
- static const OnigPairAmbigCodes cc[] = {
- { 0xc0, 0xe0 },
- { 0xc1, 0xe1 },
- { 0xc2, 0xe2 },
- { 0xc3, 0xe3 },
- { 0xc4, 0xe4 },
- { 0xc5, 0xe5 },
- { 0xc6, 0xe6 },
- { 0xc7, 0xe7 },
- { 0xc8, 0xe8 },
- { 0xc9, 0xe9 },
- { 0xca, 0xea },
- { 0xcb, 0xeb },
- { 0xcc, 0xec },
- { 0xcd, 0xed },
- { 0xce, 0xee },
- { 0xcf, 0xef },
-
- { 0xd0, 0xf0 },
- { 0xd1, 0xf1 },
- { 0xd2, 0xf2 },
- { 0xd3, 0xf3 },
- { 0xd4, 0xf4 },
- { 0xd5, 0xf5 },
- { 0xd6, 0xf6 },
- { 0xd8, 0xf8 },
- { 0xd9, 0xf9 },
- { 0xda, 0xfa },
- { 0xdb, 0xfb },
- { 0xdc, 0xfc },
- { 0xdd, 0xfd },
- { 0xde, 0xfe },
-
- { 0xe0, 0xc0 },
- { 0xe1, 0xc1 },
- { 0xe2, 0xc2 },
- { 0xe3, 0xc3 },
- { 0xe4, 0xc4 },
- { 0xe5, 0xc5 },
- { 0xe6, 0xc6 },
- { 0xe7, 0xc7 },
- { 0xe8, 0xc8 },
- { 0xe9, 0xc9 },
- { 0xea, 0xca },
- { 0xeb, 0xcb },
- { 0xec, 0xcc },
- { 0xed, 0xcd },
- { 0xee, 0xce },
- { 0xef, 0xcf },
-
- { 0xf0, 0xd0 },
- { 0xf1, 0xd1 },
- { 0xf2, 0xd2 },
- { 0xf3, 0xd3 },
- { 0xf4, 0xd4 },
- { 0xf5, 0xd5 },
- { 0xf6, 0xd6 },
- { 0xf8, 0xd8 },
- { 0xf9, 0xd9 },
- { 0xfa, 0xda },
- { 0xfb, 0xdb },
- { 0xfc, 0xdc },
- { 0xfd, 0xdd },
- { 0xfe, 0xde }
- };
+ OnigCodePoint code;
+ int i, r;
- if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
- *ccs = OnigAsciiPairAmbigCodes;
- return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes));
- }
- else if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
- *ccs = cc;
- return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ r = onigenc_ascii_apply_all_case_fold(flag, f, arg);
+ if (r != 0) return r;
+
+ for (i = 0; i < map_size; i++) {
+ code = map[i].to;
+ r = (*f)(map[i].from, &code, 1, arg);
+ if (r != 0) return r;
+
+ code = map[i].from;
+ r = (*f)(map[i].to, &code, 1, arg);
+ if (r != 0) return r;
}
- else
- return 0;
+
+ if (ess_tsett_flag != 0)
+ return ss_apply_all_case_fold(flag, f, arg);
+
+ return 0;
}
extern int
-onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag,
- const OnigCompAmbigCodes** ccs)
+onigenc_get_case_fold_codes_by_str_with_map(int map_size,
+ const OnigPairCaseFoldCodes map[],
+ int ess_tsett_flag, OnigCaseFoldType flag,
+ const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
- static const OnigCompAmbigCodes folds[] = {
- { 2, 0xdf, {{ 2, { 0x53, 0x53 } }, { 2, { 0x73, 0x73} } } }
- };
+ if (0x41 <= *p && *p <= 0x5a) {
+ items[0].byte_len = 1;
+ items[0].code_len = 1;
+ items[0].code[0] = (OnigCodePoint )(*p + 0x20);
+ if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
+ && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
+ /* SS */
+ items[1].byte_len = 2;
+ items[1].code_len = 1;
+ items[1].code[0] = (OnigCodePoint )0xdf;
+ return 2;
+ }
+ else
+ return 1;
+ }
+ else if (0x61 <= *p && *p <= 0x7a) {
+ items[0].byte_len = 1;
+ items[0].code_len = 1;
+ items[0].code[0] = (OnigCodePoint )(*p - 0x20);
+ if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
+ && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
+ /* ss */
+ items[1].byte_len = 2;
+ items[1].code_len = 1;
+ items[1].code[0] = (OnigCodePoint )0xdf;
+ return 2;
+ }
+ else
+ return 1;
+ }
+ else if (*p == 0xdf && ess_tsett_flag != 0) {
+ items[0].byte_len = 1;
+ items[0].code_len = 2;
+ items[0].code[0] = (OnigCodePoint )'s';
+ items[0].code[1] = (OnigCodePoint )'s';
+
+ items[1].byte_len = 1;
+ items[1].code_len = 2;
+ items[1].code[0] = (OnigCodePoint )'S';
+ items[1].code[1] = (OnigCodePoint )'S';
+
+ items[2].byte_len = 1;
+ items[2].code_len = 2;
+ items[2].code[0] = (OnigCodePoint )'s';
+ items[2].code[1] = (OnigCodePoint )'S';
- if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
- *ccs = folds;
- return sizeof(folds) / sizeof(OnigCompAmbigCodes);
+ items[3].byte_len = 1;
+ items[3].code_len = 2;
+ items[3].code[0] = (OnigCodePoint )'S';
+ items[3].code[1] = (OnigCodePoint )'s';
+
+ return 4;
}
- else
- return 0;
+ else {
+ int i;
+
+ for (i = 0; i < map_size; i++) {
+ if (*p == map[i].from) {
+ items[0].byte_len = 1;
+ items[0].code_len = 1;
+ items[0].code[0] = map[i].to;
+ return 1;
+ }
+ else if (*p == map[i].to) {
+ items[0].byte_len = 1;
+ items[0].code_len = 1;
+ items[0].code[0] = map[i].from;
+ return 1;
+ }
+ }
+ }
+
+ return 0;
}
+
extern int
onigenc_not_support_get_ctype_code_range(int ctype,
- const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
+ OnigCodePoint* sb_out, const OnigCodePoint* ranges[])
{
return ONIG_NO_SUPPORT_CONFIG;
}
@@ -609,34 +581,26 @@ onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
/* for single byte encodings */
extern int
-onigenc_ascii_mbc_to_normalize(OnigAmbigType flag, const UChar** p, const UChar*end,
- UChar* lower)
+onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag, const UChar** p,
+ const UChar*end, UChar* lower)
{
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
- *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
- }
- else {
- *lower = **p;
- }
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
(*p)++;
return 1; /* return byte length of converted char to lower */
}
+#if 0
extern int
-onigenc_ascii_is_mbc_ambiguous(OnigAmbigType flag,
+onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
(*pp)++;
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
- return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
- }
- else {
- return FALSE;
- }
+ return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
}
+#endif
extern int
onigenc_single_byte_mbc_enc_len(const UChar* p)
@@ -657,12 +621,6 @@ onigenc_single_byte_code_to_mbclen(OnigCodePoint code)
}
extern int
-onigenc_single_byte_code_to_mbc_first(OnigCodePoint code)
-{
- return (code & 0xff);
-}
-
-extern int
onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
{
*buf = (UChar )(code & 0xff);
@@ -706,54 +664,45 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
}
extern int
-onigenc_mbn_mbc_to_normalize(OnigEncoding enc, OnigAmbigType flag,
- const UChar** pp, const UChar* end, UChar* lower)
+onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
int len;
const UChar *p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
- *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
- }
- else {
- *lower = *p;
- }
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
else {
+ int i;
+
len = enc_len(enc, p);
- if (lower != p) {
- int i;
- for (i = 0; i < len; i++) {
- *lower++ = *p++;
- }
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
}
(*pp) += len;
return len; /* return byte length of converted to lower char */
}
}
+#if 0
extern int
-onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag,
+onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
(*pp)++;
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
- return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
- }
- else {
- return FALSE;
- }
+ return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
}
(*pp) += enc_len(enc, p);
return FALSE;
}
+#endif
extern int
onigenc_mb2_code_to_mbclen(OnigCodePoint code)
@@ -772,40 +721,6 @@ onigenc_mb4_code_to_mbclen(OnigCodePoint code)
}
extern int
-onigenc_mb2_code_to_mbc_first(OnigCodePoint code)
-{
- int first;
-
- if ((code & 0xff00) != 0) {
- first = (code >> 8) & 0xff;
- }
- else {
- return (int )code;
- }
- return first;
-}
-
-extern int
-onigenc_mb4_code_to_mbc_first(OnigCodePoint code)
-{
- int first;
-
- if ((code & 0xff000000) != 0) {
- first = (code >> 24) & 0xff;
- }
- else if ((code & 0xff0000) != 0) {
- first = (code >> 16) & 0xff;
- }
- else if ((code & 0xff00) != 0) {
- first = (code >> 8) & 0xff;
- }
- else {
- return (int )code;
- }
- return first;
-}
-
-extern int
onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
{
UChar *p = buf;
@@ -846,14 +761,47 @@ onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
}
extern int
+onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
+{
+ static PosixBracketEntryType PBS[] = {
+ { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 },
+ { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 },
+ { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 },
+ { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
+ { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 },
+ { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 },
+ { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 },
+ { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 },
+ { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 },
+ { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 },
+ { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 },
+ { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
+ { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 },
+ { (UChar* )"Word", ONIGENC_CTYPE_WORD, 4 },
+ { (UChar* )NULL, -1, 0 }
+ };
+
+ PosixBracketEntryType *pb;
+ int len;
+
+ len = onigenc_strlen(enc, p, end);
+ for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
+ if (len == pb->len &&
+ onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
+ return pb->ctype;
+ }
+
+ return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+}
+
+extern int
onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
unsigned int ctype)
{
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
- if ((ctype & (ONIGENC_CTYPE_WORD |
- ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
+ if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
}
}
@@ -868,8 +816,7 @@ onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
- if ((ctype & (ONIGENC_CTYPE_WORD |
- ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
+ if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
}
}
@@ -896,133 +843,66 @@ onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
return 0;
}
-#else /* ONIG_RUBY_M17N */
-
-extern int
-onigenc_is_code_ctype(OnigEncoding enc, OnigCodePoint code, int ctype)
+/* Property management */
+static int
+resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
{
- switch (ctype) {
- case ONIGENC_CTYPE_NEWLINE:
- if (code == 0x0a) return 1;
- break;
-
- case ONIGENC_CTYPE_ALPHA:
- return m17n_isalpha(enc, code);
- break;
- case ONIGENC_CTYPE_BLANK:
- return ONIGENC_IS_CODE_BLANK(enc, (int )(code));
- break;
- case ONIGENC_CTYPE_CNTRL:
- return m17n_iscntrl(enc, code);
- break;
- case ONIGENC_CTYPE_DIGIT:
- return m17n_isdigit(enc, code);
- break;
- case ONIGENC_CTYPE_GRAPH:
- return ONIGENC_IS_CODE_GRAPH(enc, (int )(code));
- break;
- case ONIGENC_CTYPE_LOWER:
- return m17n_islower(enc, code);
- break;
- case ONIGENC_CTYPE_PRINT:
- return m17n_isprint(enc, code);
- break;
- case ONIGENC_CTYPE_PUNCT:
- return m17n_ispunct(enc, code);
- break;
- case ONIGENC_CTYPE_SPACE:
- return m17n_isspace(enc, code);
- break;
- case ONIGENC_CTYPE_UPPER:
- return m17n_isupper(enc, code);
- break;
- case ONIGENC_CTYPE_XDIGIT:
- return m17n_isxdigit(enc, code);
- break;
- case ONIGENC_CTYPE_WORD:
- return m17n_iswchar(enc, code);
- break;
- case ONIGENC_CTYPE_ASCII:
- return (code < 128 ? TRUE : FALSE);
- break;
- case ONIGENC_CTYPE_ALNUM:
- return m17n_isalnum(enc, code);
- break;
- default:
- break;
+ int size;
+ const OnigCodePoint **list = *plist;
+
+ size = sizeof(OnigCodePoint*) * new_size;
+ if (IS_NULL(list)) {
+ list = (const OnigCodePoint** )xmalloc(size);
+ }
+ else {
+ list = (const OnigCodePoint** )xrealloc((void* )list, size);
}
- return 0;
-}
+ if (IS_NULL(list)) return ONIGERR_MEMORY;
-extern int
-onigenc_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
-{
- int c, len;
+ *plist = list;
+ *psize = new_size;
- m17n_mbcput(enc, code, buf);
- c = m17n_firstbyte(enc, code);
- len = enc_len(enc, c);
- return len;
+ return 0;
}
extern int
-onigenc_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* buf)
+onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
+ hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
+ int *psize)
{
- unsigned int c, low;
+#define PROP_INIT_SIZE 16
- c = m17n_codepoint(enc, p, p + enc_len(enc, *p));
- low = m17n_tolower(enc, c);
- m17n_mbcput(enc, low, buf);
+ int r;
- return m17n_codelen(enc, low);
-}
-
-extern int
-onigenc_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag,
- UChar** pp, UChar* end)
-{
- int len;
- unsigned int c;
- UChar* p = *pp;
+ if (*psize <= *pnum) {
+ int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
+ r = resize_property_list(new_size, plist, psize);
+ if (r != 0) return r;
+ }
- len = enc_len(enc, *p);
- (*pp) += len;
- c = m17n_codepoint(enc, p, p + len);
+ (*plist)[*pnum] = prop;
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
- if (m17n_isupper(enc, c) || m17n_islower(enc, c))
- return TRUE;
+ if (ONIG_IS_NULL(*table)) {
+ *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
+ if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
}
- return FALSE;
+ *pnum = *pnum + 1;
+ onig_st_insert_strend(*table, name, name + strlen((char* )name),
+ (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
+ return 0;
}
-extern UChar*
-onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
+extern int
+onigenc_property_list_init(int (*f)())
{
- UChar *p;
- int len;
+ int r;
- if (s <= start) return s;
- p = s;
+ THREAD_ATOMIC_START;
- while (!m17n_islead(enc, *p) && p > start) p--;
- while (p + (len = enc_len(enc, *p)) < s) {
- p += len;
- }
- if (p + len == s) return s;
- return p;
-}
+ r = f();
-extern int
-onigenc_is_allowed_reverse_match(OnigEncoding enc,
- const UChar* s, const UChar* end)
-{
- return ONIGENC_IS_SINGLEBYTE(enc);
+ THREAD_ATOMIC_END;
+ return r;
}
-
-extern void
-onigenc_set_default_caseconv_table(UChar* table) { }
-
-#endif /* ONIG_RUBY_M17N */
diff --git a/regenc.h b/regenc.h
index 58ee3e7f22..b1d6f22818 100644
--- a/regenc.h
+++ b/regenc.h
@@ -28,12 +28,20 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
-
-#ifndef RUBY_PLATFORM
+#ifndef REGINT_H
#include "config.h"
+#ifdef ONIG_ESCAPE_UCHAR_COLLISION
+#undef ONIG_ESCAPE_UCHAR_COLLISION
+#endif
#endif
#include "oniguruma.h"
+typedef struct {
+ OnigCodePoint from;
+ OnigCodePoint to;
+} OnigPairCaseFoldCodes;
+
+
#ifndef NULL
#define NULL ((void* )0)
#endif
@@ -57,34 +65,59 @@
#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
+/* character types bit flag */
+#define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE)
+#define BIT_CTYPE_ALPHA (1<< ONIGENC_CTYPE_ALPHA)
+#define BIT_CTYPE_BLANK (1<< ONIGENC_CTYPE_BLANK)
+#define BIT_CTYPE_CNTRL (1<< ONIGENC_CTYPE_CNTRL)
+#define BIT_CTYPE_DIGIT (1<< ONIGENC_CTYPE_DIGIT)
+#define BIT_CTYPE_GRAPH (1<< ONIGENC_CTYPE_GRAPH)
+#define BIT_CTYPE_LOWER (1<< ONIGENC_CTYPE_LOWER)
+#define BIT_CTYPE_PRINT (1<< ONIGENC_CTYPE_PRINT)
+#define BIT_CTYPE_PUNCT (1<< ONIGENC_CTYPE_PUNCT)
+#define BIT_CTYPE_SPACE (1<< ONIGENC_CTYPE_SPACE)
+#define BIT_CTYPE_UPPER (1<< ONIGENC_CTYPE_UPPER)
+#define BIT_CTYPE_XDIGIT (1<< ONIGENC_CTYPE_XDIGIT)
+#define BIT_CTYPE_WORD (1<< ONIGENC_CTYPE_WORD)
+#define BIT_CTYPE_ALNUM (1<< ONIGENC_CTYPE_ALNUM)
+#define BIT_CTYPE_ASCII (1<< ONIGENC_CTYPE_ASCII)
+
+#define CTYPE_TO_BIT(ctype) (1<<(ctype))
+#define CTYPE_IS_WORD_GRAPH_PRINT(ctype) \
+ ((ctype) == ONIGENC_CTYPE_WORD || (ctype) == ONIGENC_CTYPE_GRAPH ||\
+ (ctype) == ONIGENC_CTYPE_PRINT)
+
+
+typedef struct {
+ UChar *name;
+ int ctype;
+ short int len;
+} PosixBracketEntryType;
+
+
+#define USE_UNICODE_PROPERTIES
+/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
-#ifdef ONIG_RUBY_M17N
-
-#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_UNDEF
-
-#else /* ONIG_RUBY_M17N */
-
-#define USE_UNICODE_FULL_RANGE_CTYPE
/* following must not use with USE_CRNL_AS_LINE_TERMINATOR */
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */
+
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
/* for encoding system implementation (internal) */
-ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs));
-ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs));
-ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs));
-ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs));
-ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]));
+ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
+ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
+ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
+ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
+ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]));
ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
+
/* methods for single byte encoding */
-ONIG_EXTERN int onigenc_ascii_mbc_to_normalize P_((OnigAmbigType flag, const UChar** p, const UChar* end, UChar* lower));
-ONIG_EXTERN int onigenc_ascii_is_mbc_ambiguous P_((OnigAmbigType flag, const UChar** p, const UChar* end));
+ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p));
ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end));
ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
-ONIG_EXTERN int onigenc_single_byte_code_to_mbc_first P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s));
ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
@@ -92,37 +125,33 @@ ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s
/* methods for multi byte encoding */
ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
-ONIG_EXTERN int onigenc_mbn_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const UChar** p, const UChar* end, UChar* lower));
-ONIG_EXTERN int onigenc_mbn_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const UChar** p, const UChar* end));
+ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
-ONIG_EXTERN int onigenc_mb2_code_to_mbc_first P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
+ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
+ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
-ONIG_EXTERN int onigenc_mb4_code_to_mbc_first P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
-ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** codes));
/* in enc/unicode.c */
ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
-ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]));
+ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((int ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[]));
+ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[]));
+ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
+ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
+ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
OnigEncISO_8859_1_ToLowerCaseTable[c]
#define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \
OnigEncISO_8859_1_ToUpperCaseTable[c]
-#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
- ((OnigEnc_Unicode_ISO_8859_1_CtypeTable[code] & ctype) != 0)
ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
-ONIG_EXTERN const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[];
-ONIG_EXTERN const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[];
-
-#endif /* is not ONIG_RUBY_M17N */
ONIG_EXTERN int
onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
@@ -137,11 +166,14 @@ ONIG_EXTERN const UChar* OnigEncAsciiToLowerCaseTable;
ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[];
ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
+#define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80)
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]
#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
- ((OnigEncAsciiCtypeTable[code] & ctype) != 0)
+ ((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
- ONIGENC_IS_ASCII_CODE_CTYPE(code, (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER))
+ (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\
+ ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER))
+
#endif /* REGENC_H */
diff --git a/regerror.c b/regerror.c
index d6ec91856d..d23b2b030d 100644
--- a/regerror.c
+++ b/regerror.c
@@ -198,7 +198,7 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
if (code >= 0x80) {
if (len + 5 <= buf_size) {
sprintf((char* )(&(buf[len])), "\\%03o",
- (unsigned int)(code & 0377));
+ (unsigned int )(code & 0377));
len += 5;
}
else {
@@ -328,13 +328,13 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
p = pat;
while (p < pat_end) {
- if (*p == MC_ESC(enc)) {
+ if (*p == '\\') {
*s++ = *p++;
len = enc_len(enc, p);
while (len-- > 0) *s++ = *p++;
}
else if (*p == '/') {
- *s++ = (unsigned char )MC_ESC(enc);
+ *s++ = (unsigned char )'\\';
*s++ = *p++;
}
else if (ONIGENC_IS_MBC_HEAD(enc, p)) {
diff --git a/regexec.c b/regexec.c
index ba2a1b1cd4..2a568fb1da 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2,7 +2,7 @@
regexec.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -76,7 +76,7 @@ history_root_free(OnigRegion* r)
}
static OnigCaptureTreeNode*
-history_node_new()
+history_node_new(void)
{
OnigCaptureTreeNode* node;
@@ -233,7 +233,7 @@ onig_region_init(OnigRegion* region)
}
extern OnigRegion*
-onig_region_new()
+onig_region_new(void)
{
OnigRegion* r;
@@ -297,47 +297,6 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
/** stack **/
#define INVALID_STACK_INDEX -1
-typedef long StackIndex;
-
-typedef struct _StackType {
- unsigned int type;
- union {
- struct {
- UChar *pcode; /* byte code position */
- UChar *pstr; /* string position */
- UChar *pstr_prev; /* previous char position of pstr */
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- unsigned int state_check;
-#endif
- } state;
- struct {
- int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
- UChar *pcode; /* byte code position (head of repeated target) */
- int num; /* repeat id */
- } repeat;
- struct {
- StackIndex si; /* index of stack */
- } repeat_inc;
- struct {
- int num; /* memory num */
- UChar *pstr; /* start/end position */
- /* Following information is setted, if this stack type is MEM-START */
- StackIndex start; /* prev. info (for backtrack "(...)*" ) */
- StackIndex end; /* prev. info (for backtrack "(...)*" ) */
- } mem;
- struct {
- int num; /* null check id */
- UChar *pstr; /* start position */
- } null_check;
-#ifdef USE_SUBEXP_CALL
- struct {
- UChar *ret_addr; /* byte code position */
- int num; /* null check id */
- UChar *pstr; /* string position */
- } call_frame;
-#endif
- } u;
-} StackType;
/* stack type */
/* used by normal-POP */
@@ -365,24 +324,22 @@ typedef struct _StackType {
#define STK_MASK_TO_VOID_TARGET 0x10ff
#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
-typedef struct {
- void* stack_p;
- int stack_n;
- OnigOptionType options;
- OnigRegion* region;
- const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- void* state_check_buff;
- int state_check_buff_size;
-#endif
-} MatchArg;
-
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
+ (msa).stack_p = (void* )0;\
+ (msa).options = (arg_option);\
+ (msa).region = (arg_region);\
+ (msa).start = (arg_start);\
+ (msa).best_len = ONIG_MISMATCH;\
+} while (0)
+#else
#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
- (msa).stack_p = (void* )0;\
- (msa).options = (arg_option);\
- (msa).region = (arg_region);\
- (msa).start = (arg_start);\
+ (msa).stack_p = (void* )0;\
+ (msa).options = (arg_option);\
+ (msa).region = (arg_region);\
+ (msa).start = (arg_start);\
} while (0)
+#endif
#ifdef USE_COMBINATION_EXPLOSION_CHECK
@@ -428,15 +385,15 @@ typedef struct {
#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
if (msa->stack_p) {\
alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\
- stk_alloc = (StackType* )(msa->stack_p);\
+ stk_alloc = (OnigStackType* )(msa->stack_p);\
stk_base = stk_alloc;\
stk = stk_base;\
stk_end = stk_base + msa->stack_n;\
}\
else {\
alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\
- + sizeof(StackType) * (stack_num));\
- stk_alloc = (StackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
+ + sizeof(OnigStackType) * (stack_num));\
+ stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
stk_base = stk_alloc;\
stk = stk_base;\
stk_end = stk_base + (stack_num);\
@@ -466,11 +423,11 @@ onig_set_match_stack_limit_size(unsigned int size)
}
static int
-stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
- StackType** arg_stk, StackType* stk_alloc, MatchArg* msa)
+stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
+ OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
{
unsigned int n;
- StackType *x, *stk_base, *stk_end, *stk;
+ OnigStackType *x, *stk_base, *stk_end, *stk;
stk_base = *arg_stk_base;
stk_end = *arg_stk_end;
@@ -478,12 +435,12 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
n = stk_end - stk_base;
if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
- x = (StackType* )xmalloc(sizeof(StackType) * n * 2);
+ x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
if (IS_NULL(x)) {
STACK_SAVE;
return ONIGERR_MEMORY;
}
- xmemcpy(x, stk_base, n * sizeof(StackType));
+ xmemcpy(x, stk_base, n * sizeof(OnigStackType));
n *= 2;
}
else {
@@ -494,7 +451,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
else
n = MatchStackLimitSize;
}
- x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n);
+ x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
if (IS_NULL(x)) {
STACK_SAVE;
return ONIGERR_MEMORY;
@@ -830,7 +787,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
} while(0)
#define STACK_STOP_BT_END do {\
- StackType *k = stk;\
+ OnigStackType *k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
@@ -845,7 +802,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
} while(0)
#define STACK_NULL_CHECK(isnull,id,s) do {\
- StackType* k = stk;\
+ OnigStackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
@@ -860,7 +817,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
int level = 0;\
- StackType* k = stk;\
+ OnigStackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
@@ -880,7 +837,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
} while(0)
#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
- StackType* k = stk;\
+ OnigStackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
@@ -920,7 +877,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
int level = 0;\
- StackType* k = stk;\
+ OnigStackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
@@ -986,7 +943,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
#define STACK_RETURN(addr) do {\
int level = 0;\
- StackType* k = stk;\
+ OnigStackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_RETURN"); \
@@ -1009,25 +966,25 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
}\
} while(0)
-#define STRING_CMP_IC(ambig_flag,s1,ps2,len) do {\
- if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
+#define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
+ if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
goto fail; \
} while(0)
-static int string_cmp_ic(OnigEncoding enc, int ambig_flag,
+static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
UChar* s1, UChar** ps2, int mblen)
{
- UChar buf1[ONIGENC_MBC_NORMALIZE_MAXLEN];
- UChar buf2[ONIGENC_MBC_NORMALIZE_MAXLEN];
- UChar *p1, *p2, *end, *s2, *end2;
+ UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+ UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+ UChar *p1, *p2, *end1, *s2, *end2;
int len1, len2;
s2 = *ps2;
- end = s1 + mblen;
+ end1 = s1 + mblen;
end2 = s2 + mblen;
- while (s1 < end) {
- len1 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s1, end, buf1);
- len2 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s2, end2, buf2);
+ while (s1 < end1) {
+ len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
+ len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
if (len1 != len2) return 0;
p1 = buf1;
p2 = buf2;
@@ -1051,31 +1008,36 @@ static int string_cmp_ic(OnigEncoding enc, int ambig_flag,
}\
} while(0)
-#define STRING_CMP_VALUE_IC(ambig_flag,s1,ps2,len,is_fail) do {\
- if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
+#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
+ if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
is_fail = 1; \
else \
is_fail = 0; \
} while(0)
-#define ON_STR_BEGIN(s) ((s) == str)
-#define ON_STR_END(s) ((s) == end)
-#define IS_EMPTY_STR (str == end)
-
-#define DATA_ENSURE(n) \
- if (s + (n) > end) goto fail
-
+#define IS_EMPTY_STR (str == end)
+#define ON_STR_BEGIN(s) ((s) == str)
+#define ON_STR_END(s) ((s) == end)
+#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE
+#define DATA_ENSURE_CHECK1 (s < right_range)
+#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
+#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
+#else
+#define DATA_ENSURE_CHECK1 (s < end)
#define DATA_ENSURE_CHECK(n) (s + (n) <= end)
+#define DATA_ENSURE(n) if (s + (n) > end) goto fail
+#endif /* USE_MATCH_RANGE_IS_COMPLETE_RANGE */
+
#ifdef USE_CAPTURE_HISTORY
static int
-make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
- StackType* stk_top, UChar* str, regex_t* reg)
+make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
+ OnigStackType* stk_top, UChar* str, regex_t* reg)
{
int n, r;
OnigCaptureTreeNode* child;
- StackType* k = *kp;
+ OnigStackType* k = *kp;
while (k < stk_top) {
if (k->type == STK_MEM_START) {
@@ -1124,13 +1086,13 @@ static int mem_is_in_memp(int mem, int num, UChar* memp)
}
static int backref_match_at_nested_level(regex_t* reg
- , StackType* top, StackType* stk_base
- , int ignore_case, int ambig_flag
+ , OnigStackType* top, OnigStackType* stk_base
+ , int ignore_case, int case_fold_flag
, int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
{
UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
int level;
- StackType* k;
+ OnigStackType* k;
level = 0;
k = top;
@@ -1152,7 +1114,7 @@ static int backref_match_at_nested_level(regex_t* reg
ss = *s;
if (ignore_case != 0) {
- if (string_cmp_ic(reg->enc, ambig_flag,
+ if (string_cmp_ic(reg->enc, case_fold_flag,
pstart, &ss, (int )(pend - pstart)) == 0)
return 0; /* or goto next_mem; */
}
@@ -1181,68 +1143,6 @@ static int backref_match_at_nested_level(regex_t* reg
#endif /* USE_BACKREF_AT_LEVEL */
-#ifdef RUBY_PLATFORM
-
-typedef struct {
- int state;
- regex_t* reg;
- MatchArg* msa;
- StackType* stk_base;
-} TrapEnsureArg;
-
-static VALUE
-trap_ensure(VALUE arg)
-{
- TrapEnsureArg* ta = (TrapEnsureArg* )arg;
-
- if (ta->state == 0) { /* trap_exec() is not normal return */
- ONIG_STATE_DEC_THREAD(ta->reg);
- if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p)
- xfree(ta->stk_base);
-
- MATCH_ARG_FREE(*(ta->msa));
- }
-
- return Qnil;
-}
-
-static VALUE
-trap_exec(VALUE arg)
-{
- TrapEnsureArg* ta;
-
- rb_trap_exec();
-
- ta = (TrapEnsureArg* )arg;
- ta->state = 1; /* normal return */
- return Qnil;
-}
-
-extern void
-onig_exec_trap(regex_t* reg, MatchArg* msa, StackType* stk_base)
-{
- VALUE arg;
- TrapEnsureArg ta;
-
- ta.state = 0;
- ta.reg = reg;
- ta.msa = msa;
- ta.stk_base = stk_base;
- arg = (VALUE )(&ta);
- rb_ensure(trap_exec, arg, trap_ensure, arg);
-}
-
-#define CHECK_INTERRUPT_IN_MATCH_AT do {\
- if (rb_trap_pending) {\
- if (! rb_prohibit_interrupt) {\
- onig_exec_trap(reg, msa, stk_base);\
- }\
- }\
-} while (0)
-#else
-#define CHECK_INTERRUPT_IN_MATCH_AT
-#endif /* RUBY_PLATFORM */
-
#ifdef ONIG_DEBUG_STATISTICS
#define USE_TIMEOFDAY
@@ -1286,29 +1186,14 @@ static int MaxStackDepth = 0;
OpTime[OpCurr] += TIMEDIFF(te, ts);\
} while (0)
-#ifdef RUBY_PLATFORM
-
-/*
- * :nodoc:
- */
-static VALUE onig_stat_print()
-{
- onig_print_statistics(stderr);
- return Qnil;
-}
-#endif
-
-extern void onig_statistics_init()
+extern void
+onig_statistics_init(void)
{
int i;
for (i = 0; i < 256; i++) {
OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
}
MaxStackDepth = 0;
-
-#ifdef RUBY_PLATFORM
- rb_define_global_function("onig_stat_print", onig_stat_print, 0);
-#endif
}
extern void
@@ -1406,8 +1291,11 @@ typedef struct {
/* match data(str - end) from position (sstart). */
/* if sstart == str then set sprev to NULL. */
static int
-match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
- UChar* sprev, MatchArg* msa)
+match_at(regex_t* reg, const UChar* str, const UChar* end,
+#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE
+ const UChar* right_range,
+#endif
+ const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
{
static UChar FinishCode[] = { OP_FINISH };
@@ -1417,15 +1305,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
RelAddrType addr;
OnigOptionType option = reg->options;
OnigEncoding encode = reg->enc;
- OnigAmbigType ambig_flag = reg->ambig_flag;
+ OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
UChar *s, *q, *sbegin;
UChar *p = reg->p;
char *alloca_base;
- StackType *stk_alloc, *stk_base, *stk, *stk_end;
- StackType *stkp; /* used as any purpose. */
- StackIndex si;
- StackIndex *repeat_stk;
- StackIndex *mem_start_stk, *mem_end_stk;
+ OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
+ OnigStackType *stkp; /* used as any purpose. */
+ OnigStackIndex si;
+ OnigStackIndex *repeat_stk;
+ OnigStackIndex *mem_start_stk, *mem_end_stk;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
int scv;
unsigned char* state_check_buff = msa->state_check_buff;
@@ -1436,9 +1324,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
pop_level = reg->stack_pop_level;
num_mem = reg->num_mem;
- repeat_stk = (StackIndex* )alloca_base;
+ repeat_stk = (OnigStackIndex* )alloca_base;
- mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);
+ mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
mem_end_stk = mem_start_stk + num_mem;
mem_start_stk--; /* for index start from 1,
mem_start_stk[1]..mem_start_stk[num_mem] */
@@ -1472,7 +1360,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
else { xmemcpy(bp, "\"", 1); bp += 1; }
*bp = 0;
- fputs(buf, stderr);
+ fputs((char* )buf, stderr);
for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
onig_print_compiled_byte_code(stderr, p, NULL, encode);
fprintf(stderr, "\n");
@@ -1484,8 +1372,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
case OP_END: STAT_OP_IN(OP_END);
n = s - sstart;
if (n > best_len) {
- OnigRegion* region = msa->region;
+ OnigRegion* region;
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+ if (IS_FIND_LONGEST(option)) {
+ if (n > msa->best_len) {
+ msa->best_len = n;
+ msa->best_s = (UChar* )sstart;
+ }
+ else
+ goto end_best_len;
+ }
+#endif
best_len = n;
+ region = msa->region;
if (region) {
#ifdef USE_POSIX_REGION_OPTION
if (IS_POSIX_REGION(msa->options)) {
@@ -1561,6 +1460,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
#endif
} /* if (region) */
} /* n > best_len */
+
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+ end_best_len:
+#endif
STAT_OP_OUT;
if (IS_FIND_CONDITION(option)) {
@@ -1568,7 +1471,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
best_len = ONIG_MISMATCH;
goto fail; /* for retry */
}
- if (IS_FIND_LONGEST(option) && s < end) {
+ if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
goto fail; /* for retry */
}
}
@@ -1592,30 +1495,21 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC);
{
int len;
- UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
DATA_ENSURE(1);
ss = s;
sp = p;
- exact1_ic_retry:
- len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
+ len = ONIGENC_MBC_CASE_FOLD(encode,
+ /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
+ case_fold_flag,
+ &s, end, lowbuf);
DATA_ENSURE(0);
q = lowbuf;
while (len-- > 0) {
if (*p != *q) {
-#if 1
- if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
- s = ss;
- p = sp;
- goto exact1_ic_retry;
- }
- else
- goto fail;
-#else
goto fail;
-#endif
}
p++; q++;
}
@@ -1693,7 +1587,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC);
{
int len;
- UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
GET_LENGTH_INC(tlen, p);
endp = p + tlen;
@@ -1704,25 +1598,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
ss = s;
sp = p;
- exactn_ic_retry:
- len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
+ len = ONIGENC_MBC_CASE_FOLD(encode,
+ /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
+ case_fold_flag,
+ &s, end, lowbuf);
DATA_ENSURE(0);
q = lowbuf;
while (len-- > 0) {
- if (*p != *q) {
-#if 1
- if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
- s = ss;
- p = sp;
- goto exactn_ic_retry;
- }
- else
- goto fail;
-#else
- goto fail;
-#endif
- }
+ if (*p != *q) goto fail;
p++; q++;
}
}
@@ -1898,7 +1781,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
UChar *ss;
int mb_len = enc_len(encode, s);
- if (s + mb_len > end) {
+ if (! DATA_ENSURE_CHECK(mb_len)) {
DATA_ENSURE(1);
s = (UChar* )end;
p += tlen;
@@ -1978,7 +1861,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
break;
case OP_ANYCHAR_STAR: STAT_OP_IN(OP_ANYCHAR_STAR);
- while (s < end) {
+ while (DATA_ENSURE_CHECK1) {
STACK_PUSH_ALT(p, s, sprev);
n = enc_len(encode, s);
DATA_ENSURE(n);
@@ -1990,7 +1873,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
break;
case OP_ANYCHAR_ML_STAR: STAT_OP_IN(OP_ANYCHAR_ML_STAR);
- while (s < end) {
+ while (DATA_ENSURE_CHECK1) {
STACK_PUSH_ALT(p, s, sprev);
n = enc_len(encode, s);
if (n > 1) {
@@ -2007,7 +1890,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
break;
case OP_ANYCHAR_STAR_PEEK_NEXT: STAT_OP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
- while (s < end) {
+ while (DATA_ENSURE_CHECK1) {
if (*p == *s) {
STACK_PUSH_ALT(p + 1, s, sprev);
}
@@ -2022,7 +1905,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
break;
case OP_ANYCHAR_ML_STAR_PEEK_NEXT:STAT_OP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
- while (s < end) {
+ while (DATA_ENSURE_CHECK1) {
if (*p == *s) {
STACK_PUSH_ALT(p + 1, s, sprev);
}
@@ -2044,7 +1927,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
#ifdef USE_COMBINATION_EXPLOSION_CHECK
case OP_STATE_CHECK_ANYCHAR_STAR: STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
GET_STATE_CHECK_NUM_INC(mem, p);
- while (s < end) {
+ while (DATA_ENSURE_CHECK1) {
STATE_CHECK_VAL(scv, mem);
if (scv) goto fail;
@@ -2062,7 +1945,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
GET_STATE_CHECK_NUM_INC(mem, p);
- while (s < end) {
+ while (DATA_ENSURE_CHECK1) {
STATE_CHECK_VAL(scv, mem);
if (scv) goto fail;
@@ -2121,7 +2004,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
case OP_NOT_WORD_BOUND: STAT_OP_IN(OP_NOT_WORD_BOUND);
if (ON_STR_BEGIN(s)) {
- if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end))
+ if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail;
}
else if (ON_STR_END(s)) {
@@ -2139,7 +2022,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
#ifdef USE_WORD_BEGIN_END
case OP_WORD_BEGIN: STAT_OP_IN(OP_WORD_BEGIN);
- if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) {
+ if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
STAT_OP_OUT;
continue;
@@ -2257,7 +2140,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
case OP_MEMORY_START: STAT_OP_IN(OP_MEMORY_START);
GET_MEMNUM_INC(mem, p);
- mem_start_stk[mem] = (StackIndex )((void* )s);
+ mem_start_stk[mem] = (OnigStackIndex )((void* )s);
STAT_OP_OUT;
continue;
break;
@@ -2271,7 +2154,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
case OP_MEMORY_END: STAT_OP_IN(OP_MEMORY_END);
GET_MEMNUM_INC(mem, p);
- mem_end_stk[mem] = (StackIndex )((void* )s);
+ mem_end_stk[mem] = (OnigStackIndex )((void* )s);
STAT_OP_OUT;
continue;
break;
@@ -2288,13 +2171,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
case OP_MEMORY_END_REC: STAT_OP_IN(OP_MEMORY_END_REC);
GET_MEMNUM_INC(mem, p);
- mem_end_stk[mem] = (StackIndex )((void* )s);
+ mem_end_stk[mem] = (OnigStackIndex )((void* )s);
STACK_GET_MEM_START(mem, stkp);
if (BIT_STATUS_AT(reg->bt_mem_start, mem))
mem_start_stk[mem] = GET_STACK_INDEX(stkp);
else
- mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
+ mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
STACK_PUSH_MEM_END_MARK(mem);
STAT_OP_OUT;
@@ -2368,7 +2251,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
n = pend - pstart;
DATA_ENSURE(n);
sprev = s;
- STRING_CMP_IC(ambig_flag, pstart, &s, n);
+ STRING_CMP_IC(case_fold_flag, pstart, &s, n);
while (sprev + (len = enc_len(encode, sprev)) < s)
sprev += len;
@@ -2440,7 +2323,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
DATA_ENSURE(n);
sprev = s;
swork = s;
- STRING_CMP_VALUE_IC(ambig_flag, pstart, &swork, n, is_fail);
+ STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
if (is_fail) continue;
s = swork;
while (sprev + (len = enc_len(encode, sprev)) < s)
@@ -2467,8 +2350,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
GET_LENGTH_INC(tlen, p);
sprev = s;
- if (backref_match_at_nested_level(reg, stk, stk_base, ic, ambig_flag
- , (int )level, (int )tlen, p, &s, end)) {
+ if (backref_match_at_nested_level(reg, stk, stk_base, ic
+ , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
while (sprev + (len = enc_len(encode, sprev)) < s)
sprev += len;
@@ -2649,7 +2532,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
case OP_PUSH_OR_JUMP_EXACT1: STAT_OP_IN(OP_PUSH_OR_JUMP_EXACT1);
GET_RELADDR_INC(addr, p);
- if (*p == *s && DATA_ENSURE_CHECK(1)) {
+ if (*p == *s && DATA_ENSURE_CHECK1) {
p++;
STACK_PUSH_ALT(p + addr, s, sprev);
STAT_OP_OUT;
@@ -2945,33 +2828,18 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
}
static int
-str_lower_case_match(OnigEncoding enc, int ambig_flag,
+str_lower_case_match(OnigEncoding enc, int case_fold_flag,
const UChar* t, const UChar* tend,
const UChar* p, const UChar* end)
{
int lowlen;
- UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
- const UChar* tsave;
- const UChar* psave;
-
- tsave = t;
- psave = p;
+ UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
- retry:
while (t < tend) {
- lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf);
+ lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
q = lowbuf;
while (lowlen > 0) {
- if (*t++ != *q++) {
- if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
- t = tsave;
- p = psave;
- goto retry;
- }
- else
- return 0;
- }
+ if (*t++ != *q++) return 0;
lowlen--;
}
}
@@ -2980,7 +2848,7 @@ str_lower_case_match(OnigEncoding enc, int ambig_flag,
}
static UChar*
-slow_search_ic(OnigEncoding enc, int ambig_flag,
+slow_search_ic(OnigEncoding enc, int case_fold_flag,
UChar* target, UChar* target_end,
const UChar* text, const UChar* text_end, UChar* text_range)
{
@@ -2994,7 +2862,8 @@ slow_search_ic(OnigEncoding enc, int ambig_flag,
s = (UChar* )text;
while (s < end) {
- if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end))
+ if (str_lower_case_match(enc, case_fold_flag, target, target_end,
+ s, text_end))
return s;
s += enc_len(enc, s);
@@ -3036,7 +2905,7 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
}
static UChar*
-slow_search_backward_ic(OnigEncoding enc, int ambig_flag,
+slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
UChar* target, UChar* target_end,
const UChar* text, const UChar* adjust_text,
const UChar* text_end, const UChar* text_start)
@@ -3051,7 +2920,7 @@ slow_search_backward_ic(OnigEncoding enc, int ambig_flag,
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
while (s >= text) {
- if (str_lower_case_match(enc, ambig_flag,
+ if (str_lower_case_match(enc, case_fold_flag,
target, target_end, s, text_end))
return s;
@@ -3242,7 +3111,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
{
int r;
UChar *prev;
- MatchArg msa;
+ OnigMatchArg msa;
#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
start:
@@ -3289,7 +3158,11 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
if (r == 0) {
prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
- r = match_at(reg, str, end, at, prev, &msa);
+ r = match_at(reg, str, end,
+#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE
+ end,
+#endif
+ at, prev, &msa);
}
MATCH_ARG_FREE(msa);
@@ -3325,7 +3198,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
break;
case ONIG_OPTIMIZE_EXACT_IC:
- p = slow_search_ic(reg->enc, reg->ambig_flag,
+ p = slow_search_ic(reg->enc, reg->case_fold_flag,
reg->exact, reg->exact_end, p, end, range);
break;
@@ -3446,7 +3319,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
break;
case ONIG_OPTIMIZE_EXACT_IC:
- p = slow_search_backward_ic(reg->enc, reg->ambig_flag,
+ p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
reg->exact, reg->exact_end,
range, adjrange, end, p);
break;
@@ -3535,8 +3408,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
{
int r;
UChar *s, *prev;
- MatchArg msa;
+ OnigMatchArg msa;
const UChar *orig_start = start;
+#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE
+ const UChar *orig_range = range;
+#endif
#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
start:
@@ -3580,16 +3456,57 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
if (start > end || start < str) goto mismatch_no_msa;
-#define MATCH_AND_RETURN_CHECK \
+
+#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+#define MATCH_AND_RETURN_CHECK(upper_range) \
+ r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
+ if (r != ONIG_MISMATCH) {\
+ if (r >= 0) {\
+ if (! IS_FIND_LONGEST(reg->options)) {\
+ goto match;\
+ }\
+ }\
+ else goto finish; /* error */ \
+ }
+#else
+#define MATCH_AND_RETURN_CHECK(upper_range) \
+ r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
+ if (r != ONIG_MISMATCH) {\
+ if (r >= 0) {\
+ goto match;\
+ }\
+ else goto finish; /* error */ \
+ }
+#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
+#else
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+#define MATCH_AND_RETURN_CHECK(none) \
+ r = match_at(reg, str, end, s, prev, &msa);\
+ if (r != ONIG_MISMATCH) {\
+ if (r >= 0) {\
+ if (! IS_FIND_LONGEST(reg->options)) {\
+ goto match;\
+ }\
+ }\
+ else goto finish; /* error */ \
+ }
+#else
+#define MATCH_AND_RETURN_CHECK(none) \
r = match_at(reg, str, end, s, prev, &msa);\
if (r != ONIG_MISMATCH) {\
- if (r >= 0) goto match;\
- goto finish; /* error */ \
+ if (r >= 0) {\
+ goto match;\
+ }\
+ else goto finish; /* error */ \
}
+#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
+#endif /* USE_MATCH_RANGE_IS_COMPLETE_RANGE */
+
/* anchor optimize: resume search range */
if (reg->anchor != 0 && str < end) {
- UChar* semi_end;
+ UChar *min_semi_end, *max_semi_end;
if (reg->anchor & ANCHOR_BEGIN_POSITION) {
/* search start-position only */
@@ -3615,49 +3532,50 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
}
}
else if (reg->anchor & ANCHOR_END_BUF) {
- semi_end = (UChar* )end;
+ min_semi_end = max_semi_end = (UChar* )end;
end_buf:
- if ((OnigDistance )(semi_end - str) < reg->anchor_dmin)
+ if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
goto mismatch_no_msa;
if (range > start) {
- if ((OnigDistance )(semi_end - start) > reg->anchor_dmax) {
- start = semi_end - reg->anchor_dmax;
+ if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
+ start = min_semi_end - reg->anchor_dmax;
if (start < end)
start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
else { /* match with empty at end */
start = onigenc_get_prev_char_head(reg->enc, str, end);
}
}
- if ((OnigDistance )(semi_end - (range - 1)) < reg->anchor_dmin) {
- range = semi_end - reg->anchor_dmin + 1;
+ if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
+ range = max_semi_end - reg->anchor_dmin + 1;
}
if (start >= range) goto mismatch_no_msa;
}
else {
- if ((OnigDistance )(semi_end - range) > reg->anchor_dmax) {
- range = semi_end - reg->anchor_dmax;
+ if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
+ range = min_semi_end - reg->anchor_dmax;
}
- if ((OnigDistance )(semi_end - start) < reg->anchor_dmin) {
- start = semi_end - reg->anchor_dmin;
+ if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
+ start = max_semi_end - reg->anchor_dmin;
start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
- if (range > start) goto mismatch_no_msa;
}
+ if (range > start) goto mismatch_no_msa;
}
}
else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
+ max_semi_end = (UChar* )end;
if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
- semi_end = pre_end;
- if (semi_end > str && start <= semi_end) {
+ min_semi_end = pre_end;
+ if (min_semi_end > str && start <= min_semi_end) {
goto end_buf;
}
}
else {
- semi_end = (UChar* )end;
+ min_semi_end = (UChar* )end;
goto end_buf;
}
}
@@ -3679,10 +3597,10 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
MATCH_ARG_INIT(msa, option, region, start);
#ifdef USE_COMBINATION_EXPLOSION_CHECK
- msa.state_check_buff = (void* )0;
- msa.state_check_buff_size = 0;
+ msa.state_check_buff = (void* )0;
+ msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
#endif
- MATCH_AND_RETURN_CHECK;
+ MATCH_AND_RETURN_CHECK(end);
goto mismatch;
}
goto mismatch_no_msa;
@@ -3733,7 +3651,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
prev = low_prev;
}
while (s <= high) {
- MATCH_AND_RETURN_CHECK;
+ MATCH_AND_RETURN_CHECK(orig_range);
prev = s;
s += enc_len(reg->enc, s);
}
@@ -3746,7 +3664,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
do {
- MATCH_AND_RETURN_CHECK;
+ MATCH_AND_RETURN_CHECK(orig_range);
prev = s;
s += enc_len(reg->enc, s);
@@ -3761,16 +3679,21 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
}
do {
- MATCH_AND_RETURN_CHECK;
+ MATCH_AND_RETURN_CHECK(orig_range);
prev = s;
s += enc_len(reg->enc, s);
} while (s < range);
if (s == range) { /* because empty match with /$/. */
- MATCH_AND_RETURN_CHECK;
+ MATCH_AND_RETURN_CHECK(orig_range);
}
}
else { /* backward search */
+#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE
+ if (orig_start < end)
+ orig_start += enc_len(reg->enc, orig_start); /* is upper range */
+#endif
+
if (reg->optimize != ONIG_OPTIMIZE_NONE) {
UChar *low, *high, *adjrange, *sch_start;
@@ -3793,7 +3716,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
while (s >= low) {
prev = onigenc_get_prev_char_head(reg->enc, str, s);
- MATCH_AND_RETURN_CHECK;
+ MATCH_AND_RETURN_CHECK(orig_start);
s = prev;
}
} while (s >= range);
@@ -3821,12 +3744,20 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
do {
prev = onigenc_get_prev_char_head(reg->enc, str, s);
- MATCH_AND_RETURN_CHECK;
+ MATCH_AND_RETURN_CHECK(orig_start);
s = prev;
} while (s >= range);
}
mismatch:
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+ if (IS_FIND_LONGEST(reg->options)) {
+ if (msa.best_len >= 0) {
+ s = msa.best_s;
+ goto match;
+ }
+ }
+#endif
r = ONIG_MISMATCH;
finish:
@@ -3877,10 +3808,10 @@ onig_get_options(regex_t* reg)
return reg->options;
}
-extern OnigAmbigType
-onig_get_ambig_flag(regex_t* reg)
+extern OnigCaseFoldType
+onig_get_case_fold_flag(regex_t* reg)
{
- return reg->ambig_flag;
+ return reg->case_fold_flag;
}
extern OnigSyntaxType*
diff --git a/regint.h b/regint.h
index e364e951ed..927ae572b1 100644
--- a/regint.h
+++ b/regint.h
@@ -4,7 +4,7 @@
regint.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -56,9 +56,9 @@
/* config */
/* spec. config */
-/* #define USE_UNICODE_FULL_RANGE_CTYPE */ /* --> move to regenc.h */
#define USE_NAMED_GROUP
#define USE_SUBEXP_CALL
+#define USE_BACKREF_AT_LEVEL /* \k<name+n>, \k<name-n> */
#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
@@ -71,56 +71,93 @@
/* internal config */
#define USE_RECYCLE_NODE
#define USE_OP_PUSH_OR_JUMP_EXACT
-#define USE_QUALIFIER_PEEK_NEXT
+#define USE_QUANTIFIER_PEEK_NEXT
#define USE_ST_HASH_TABLE
#define USE_SHARED_CCLASS_TABLE
#define INIT_MATCH_STACK_SIZE 160
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
-/* interface to external system */
-#ifdef NOT_RUBY /* given from Makefile */
+/* */
+/* escape other system UChar definition */
#include "config.h"
-#define USE_BACKREF_AT_LEVEL
+#include "defines.h"
+#ifdef ONIG_ESCAPE_UCHAR_COLLISION
+#undef ONIG_ESCAPE_UCHAR_COLLISION
+#endif
+#define USE_MATCH_RANGE_IS_COMPLETE_RANGE
#define USE_CAPTURE_HISTORY
#define USE_VARIABLE_META_CHARS
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
+#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */
/* #define USE_MULTI_THREAD_SYSTEM */
-#define THREAD_ATOMIC_START /* depend on thread system */
-#define THREAD_ATOMIC_END /* depend on thread system */
-#define THREAD_PASS /* depend on thread system */
-#define CHECK_INTERRUPT /* depend on application */
+#define THREAD_SYSTEM_INIT /* depend on thread system */
+#define THREAD_SYSTEM_END /* depend on thread system */
+#define THREAD_ATOMIC_START /* depend on thread system */
+#define THREAD_ATOMIC_END /* depend on thread system */
+#define THREAD_PASS /* depend on thread system */
+#ifndef xmalloc
#define xmalloc malloc
#define xrealloc realloc
#define xcalloc calloc
#define xfree free
+#endif
+
+#define CHECK_INTERRUPT_IN_MATCH_AT
+
+#if defined(_WIN32) && !defined(__GNUC__)
+#define vsnprintf _vsnprintf
+#endif
+
+#ifdef RUBY
+
+#define onig_st_init_table st_init_table
+#define onig_st_init_table_with_size st_init_table_with_size
+#define onig_st_init_numtable st_init_numtable
+#define onig_st_init_numtable_with_size st_init_numtable_with_size
+#define onig_st_init_strtable st_init_strtable
+#define onig_st_init_strtable_with_size st_init_strtable_with_size
+#define onig_st_delete st_delete
+#define onig_st_delete_safe st_delete_safe
+#define onig_st_insert st_insert
+#define onig_st_lookup st_lookup
+#define onig_st_foreach st_foreach
+#define onig_st_add_direct st_add_direct
+#define onig_st_free_table st_free_table
+#define onig_st_cleanup_safe st_cleanup_safe
+#define onig_st_copy st_copy
+#define onig_st_nothing_key_clone st_nothing_key_clone
+#define onig_st_nothing_key_free st_nothing_key_free
+#define onig_st_is_member st_is_member
+
#else
-#include "ruby.h"
-#include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */
-
-#define USE_COMBINATION_EXPLOSION_CHECK /* (X*)* */
-#define USE_MULTI_THREAD_SYSTEM
-
-#define THREAD_ATOMIC_START DEFER_INTS
-#define THREAD_ATOMIC_END ENABLE_INTS
-#define THREAD_PASS rb_thread_schedule()
-#define CHECK_INTERRUPT do {\
- if (rb_trap_pending) {\
- if (! rb_prohibit_interrupt) {\
- rb_trap_exec();\
- }\
- }\
-} while (0)
-#define DEFAULT_WARN_FUNCTION onig_rb_warn
-#define DEFAULT_VERB_WARN_FUNCTION onig_rb_warning
+#define st_init_table onig_st_init_table
+#define st_init_table_with_size onig_st_init_table_with_size
+#define st_init_numtable onig_st_init_numtable
+#define st_init_numtable_with_size onig_st_init_numtable_with_size
+#define st_init_strtable onig_st_init_strtable
+#define st_init_strtable_with_size onig_st_init_strtable_with_size
+#define st_delete onig_st_delete
+#define st_delete_safe onig_st_delete_safe
+#define st_insert onig_st_insert
+#define st_lookup onig_st_lookup
+#define st_foreach onig_st_foreach
+#define st_add_direct onig_st_add_direct
+#define st_free_table onig_st_free_table
+#define st_cleanup_safe onig_st_cleanup_safe
+#define st_copy onig_st_copy
+#define st_nothing_key_clone onig_st_nothing_key_clone
+#define st_nothing_key_free onig_st_nothing_key_free
+/* */
+#define onig_st_is_member st_is_member
-#endif /* else NOT_RUBY */
+#endif
#define STATE_CHECK_STRING_THRESHOLD_LEN 7
-#define STATE_CHECK_BUFF_MAX_SIZE 0x8000
+#define STATE_CHECK_BUFF_MAX_SIZE 0x4000
#define THREAD_PASS_LIMIT_COUNT 8
#define xmemset memset
@@ -128,14 +165,10 @@
#define xmemmove memmove
#if defined(_WIN32) && !defined(__GNUC__)
#define xalloca _alloca
-#ifdef NOT_RUBY
-#define vsnprintf _vsnprintf
-#endif
#else
#define xalloca alloca
#endif
-
#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
#define ONIG_STATE_INC(reg) (reg)->state++
#define ONIG_STATE_DEC(reg) (reg)->state--
@@ -157,60 +190,6 @@
#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */
#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
-
-#define onig_st_is_member st_is_member
-
-#ifdef NOT_RUBY
-
-#define st_init_table onig_st_init_table
-#define st_init_table_with_size onig_st_init_table_with_size
-#define st_init_numtable onig_st_init_numtable
-#define st_init_numtable_with_size onig_st_init_numtable_with_size
-#define st_init_strtable onig_st_init_strtable
-#define st_init_strtable_with_size onig_st_init_strtable_with_size
-#define st_init_strend_table_with_size onig_st_init_strend_table_with_size
-#define st_delete onig_st_delete
-#define st_delete_safe onig_st_delete_safe
-#define st_insert onig_st_insert
-#define st_insert_strend onig_st_insert_strend
-#define st_lookup onig_st_lookup
-#define st_lookup_strend onig_st_lookup_strend
-#define st_foreach onig_st_foreach
-#define st_add_direct onig_st_add_direct
-#define st_add_direct_strend onig_st_add_direct_strend
-#define st_free_table onig_st_free_table
-#define st_cleanup_safe onig_st_cleanup_safe
-#define st_copy onig_st_copy
-#define st_nothing_key_clone onig_st_nothing_key_clone
-#define st_nothing_key_free onig_st_nothing_key_free
-
-#else /* NOT_RUBY */
-
-#define onig_st_init_table st_init_table
-#define onig_st_init_table_with_size st_init_table_with_size
-#define onig_st_init_numtable st_init_numtable
-#define onig_st_init_numtable_with_size st_init_numtable_with_size
-#define onig_st_init_strtable st_init_strtable
-#define onig_st_init_strtable_with_size st_init_strtable_with_size
-#define onig_st_init_strend_table_with_size st_init_strend_table_with_size
-#define onig_st_delete st_delete
-#define onig_st_delete_safe st_delete_safe
-#define onig_st_insert st_insert
-#define onig_st_insert_strend st_insert_strend
-#define onig_st_lookup st_lookup
-#define onig_st_lookup_strend st_lookup_strend
-#define onig_st_foreach st_foreach
-#define onig_st_add_direct st_add_direct
-#define onig_st_add_direct_strend st_add_direct_strend
-#define onig_st_free_table st_free_table
-#define onig_st_cleanup_safe st_cleanup_safe
-#define onig_st_copy st_copy
-#define onig_st_nothing_key_clone st_nothing_key_clone
-#define onig_st_nothing_key_free st_nothing_key_free
-
-#endif /* NOT_RUBY */
-
-
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
@@ -232,12 +211,15 @@
#endif
#endif
+#ifdef __BORLANDC__
+#include <malloc.h>
+#endif
+
#ifdef ONIG_DEBUG
# include <stdio.h>
#endif
#include "regenc.h"
-#include "oniguruma.h"
#ifdef MIN
#undef MIN
@@ -413,6 +395,9 @@ typedef unsigned int BitStatusType;
/* ignore-case and multibyte status are included in compiled code. */
#define IS_DYNAMIC_OPTION(option) 0
+#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \
+ ((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR)
+
#define REPEAT_INFINITE -1
#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
@@ -743,15 +728,15 @@ typedef void* PointerType;
#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
#endif
-#define MC_ESC(enc) (enc)->meta_char_table.esc
-#define MC_ANYCHAR(enc) (enc)->meta_char_table.anychar
-#define MC_ANYTIME(enc) (enc)->meta_char_table.anytime
-#define MC_ZERO_OR_ONE_TIME(enc) (enc)->meta_char_table.zero_or_one_time
-#define MC_ONE_OR_MORE_TIME(enc) (enc)->meta_char_table.one_or_more_time
-#define MC_ANYCHAR_ANYTIME(enc) (enc)->meta_char_table.anychar_anytime
+#define MC_ESC(syn) (syn)->meta_char_table.esc
+#define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar
+#define MC_ANYTIME(syn) (syn)->meta_char_table.anytime
+#define MC_ZERO_OR_ONE_TIME(syn) (syn)->meta_char_table.zero_or_one_time
+#define MC_ONE_OR_MORE_TIME(syn) (syn)->meta_char_table.one_or_more_time
+#define MC_ANYCHAR_ANYTIME(syn) (syn)->meta_char_table.anychar_anytime
-#define IS_MC_ESC_CODE(code, enc, syn) \
- ((code) == MC_ESC(enc) && \
+#define IS_MC_ESC_CODE(code, syn) \
+ ((code) == MC_ESC(syn) && \
!IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE))
@@ -781,14 +766,15 @@ typedef void* PointerType;
ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
/* cclass node */
-#define FLAG_CCLASS_NOT 1
-#define FLAG_CCLASS_SHARE (1<<1)
+#define CCLASS_FLAG_NOT 1
+#define CCLASS_FLAG_SHARE (1<<1)
-#define CCLASS_SET_NOT(cc) (cc)->flags |= FLAG_CCLASS_NOT
-#define CCLASS_CLEAR_NOT(cc) (cc)->flags &= ~FLAG_CCLASS_NOT
-#define CCLASS_SET_SHARE(cc) (cc)->flags |= FLAG_CCLASS_SHARE
-#define IS_CCLASS_NOT(cc) (((cc)->flags & FLAG_CCLASS_NOT) != 0)
-#define IS_CCLASS_SHARE(cc) (((cc)->flags & FLAG_CCLASS_SHARE) != 0)
+#define CCLASS_SET_NOT(cc) (cc)->flags |= CCLASS_FLAG_NOT
+#define CCLASS_SET_SHARE(cc) (cc)->flags |= CCLASS_FLAG_SHARE
+#define CCLASS_CLEAR_NOT(cc) (cc)->flags &= ~CCLASS_FLAG_NOT
+
+#define IS_CCLASS_NOT(cc) (((cc)->flags & CCLASS_FLAG_NOT) != 0)
+#define IS_CCLASS_SHARE(cc) (((cc)->flags & CCLASS_FLAG_SHARE) != 0)
typedef struct {
int flags;
@@ -796,6 +782,67 @@ typedef struct {
BBuf* mbuf; /* multi-byte info or NULL */
} CClassNode;
+typedef long OnigStackIndex;
+
+typedef struct _OnigStackType {
+ unsigned int type;
+ union {
+ struct {
+ UChar *pcode; /* byte code position */
+ UChar *pstr; /* string position */
+ UChar *pstr_prev; /* previous char position of pstr */
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ unsigned int state_check;
+#endif
+ } state;
+ struct {
+ int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
+ UChar *pcode; /* byte code position (head of repeated target) */
+ int num; /* repeat id */
+ } repeat;
+ struct {
+ OnigStackIndex si; /* index of stack */
+ } repeat_inc;
+ struct {
+ int num; /* memory num */
+ UChar *pstr; /* start/end position */
+ /* Following information is setted, if this stack type is MEM-START */
+ OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */
+ OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */
+ } mem;
+ struct {
+ int num; /* null check id */
+ UChar *pstr; /* start position */
+ } null_check;
+#ifdef USE_SUBEXP_CALL
+ struct {
+ UChar *ret_addr; /* byte code position */
+ int num; /* null check id */
+ UChar *pstr; /* string position */
+ } call_frame;
+#endif
+ } u;
+} OnigStackType;
+
+typedef struct {
+ void* stack_p;
+ int stack_n;
+ OnigOptionType options;
+ OnigRegion* region;
+ const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+ int best_len; /* for ONIG_OPTION_FIND_LONGEST */
+ UChar* best_s;
+#endif
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ void* state_check_buff;
+ int state_check_buff_size;
+#endif
+} OnigMatchArg;
+
+
+#define IS_CODE_SB_WORD(enc,code) \
+ (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))
#ifdef ONIG_DEBUG
@@ -818,11 +865,35 @@ extern void onig_print_statistics P_((FILE* f));
extern UChar* onig_error_code_to_format P_((int code));
extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...));
extern int onig_bbuf_init P_((BBuf* buf, int size));
-extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax));
+extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax));
extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));
extern void onig_chain_reduce P_((regex_t* reg));
extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
extern void onig_transfer P_((regex_t* to, regex_t* from));
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
+/* strend hash */
+typedef void hash_table_type;
+typedef unsigned long hash_data_type;
+
+extern hash_table_type* onig_st_init_strend_table_with_size P_((int size));
+extern int onig_st_lookup_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value));
+extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value));
+
+/* encoding property management */
+#define PROPERTY_LIST_ADD_PROP(Name, CR) \
+ r = onigenc_property_list_add_property((UChar* )Name, CR,\
+ &PropertyNameTable, &PropertyList, &PropertyListNum,\
+ &PropertyListSize);\
+ if (r != 0) goto end
+
+#define PROPERTY_LIST_INIT_CHECK \
+ if (PropertyInited == 0) {\
+ int r = onigenc_property_list_init(init_property_list);\
+ if (r != 0) return r;\
+ }
+
+extern int onigenc_property_list_add_property P_((UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize));
+extern int onigenc_property_list_init P_((int (*f)()));
+
#endif /* REGINT_H */
diff --git a/regparse.c b/regparse.c
index b9a9452a0f..a255644b34 100644
--- a/regparse.c
+++ b/regparse.c
@@ -2,7 +2,7 @@
regparse.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -41,6 +41,8 @@ OnigSyntaxType OnigSyntaxRuby = {
ONIG_SYN_OP2_OPTION_RUBY |
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
+ ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
@@ -54,26 +56,21 @@ OnigSyntaxType OnigSyntaxRuby = {
ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
, ONIG_OPTION_NONE
+ ,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ }
};
OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
extern void onig_null_warn(const char* s) { }
-#ifdef RUBY_PLATFORM
-extern void
-onig_rb_warn(const char* s)
-{
- rb_warn("%s", s);
-}
-
-extern void
-onig_rb_warning(const char* s)
-{
- rb_warning("%s", s);
-}
-#endif
-
#ifdef DEFAULT_WARN_FUNCTION
static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
#else
@@ -160,9 +157,7 @@ static void
bitset_set_all(BitSetRef bs)
{
int i;
- for (i = 0; i < BITSET_SIZE; i++) {
- bs[i] = ~((Bits )0);
- }
+ for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
}
#endif
@@ -170,45 +165,35 @@ static void
bitset_invert(BitSetRef bs)
{
int i;
- for (i = 0; i < BITSET_SIZE; i++) {
- bs[i] = ~(bs[i]);
- }
+ for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
}
static void
bitset_invert_to(BitSetRef from, BitSetRef to)
{
int i;
- for (i = 0; i < BITSET_SIZE; i++) {
- to[i] = ~(from[i]);
- }
+ for (i = 0; i < BITSET_SIZE; i++) { to[i] = ~(from[i]); }
}
static void
bitset_and(BitSetRef dest, BitSetRef bs)
{
int i;
- for (i = 0; i < BITSET_SIZE; i++) {
- dest[i] &= bs[i];
- }
+ for (i = 0; i < BITSET_SIZE; i++) { dest[i] &= bs[i]; }
}
static void
bitset_or(BitSetRef dest, BitSetRef bs)
{
int i;
- for (i = 0; i < BITSET_SIZE; i++) {
- dest[i] |= bs[i];
- }
+ for (i = 0; i < BITSET_SIZE; i++) { dest[i] |= bs[i]; }
}
static void
bitset_copy(BitSetRef dest, BitSetRef bs)
{
int i;
- for (i = 0; i < BITSET_SIZE; i++) {
- dest[i] = bs[i];
- }
+ for (i = 0; i < BITSET_SIZE; i++) { dest[i] = bs[i]; }
}
extern int
@@ -223,8 +208,8 @@ onig_strncmp(const UChar* s1, const UChar* s2, int n)
return 0;
}
-static void
-k_strcpy(UChar* dest, const UChar* src, const UChar* end)
+extern void
+onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
{
int len = end - src;
if (len > 0) {
@@ -273,7 +258,7 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
static UChar*
-k_strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
+strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
int capa)
{
UChar* r;
@@ -284,7 +269,7 @@ k_strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_e
r = (UChar* )xmalloc(capa + 1);
CHECK_NULL_RETURN(r);
- k_strcpy(r + (dest_end - dest), src, src_end);
+ onig_strcpy(r + (dest_end - dest), src, src_end);
return r;
}
@@ -297,8 +282,8 @@ strcat_capa_from_static(UChar* dest, UChar* dest_end,
r = (UChar* )xmalloc(capa + 1);
CHECK_NULL_RETURN(r);
- k_strcpy(r, dest, dest_end);
- k_strcpy(r + (dest_end - dest), src, src_end);
+ onig_strcpy(r, dest, dest_end);
+ onig_strcpy(r + (dest_end - dest), src, src_end);
return r;
}
@@ -332,25 +317,28 @@ static struct st_hash_type type_strend_hash = {
strend_hash,
};
-static st_table*
+extern hash_table_type*
onig_st_init_strend_table_with_size(int size)
{
- return onig_st_init_table_with_size(&type_strend_hash, size);
+ return (hash_table_type* )onig_st_init_table_with_size(&type_strend_hash,
+ size);
}
-static int
-onig_st_lookup_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t *value)
+extern int
+onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,
+ const UChar* end_key, hash_data_type *value)
{
- st_strend_key key;
+ st_strend_key key;
- key.s = (unsigned char* )str_key;
- key.end = (unsigned char* )end_key;
+ key.s = (unsigned char* )str_key;
+ key.end = (unsigned char* )end_key;
- return onig_st_lookup(table, (st_data_t )(&key), value);
+ return onig_st_lookup(table, (st_data_t )(&key), value);
}
-static int
-onig_st_insert_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t value)
+extern int
+onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
+ const UChar* end_key, hash_data_type value)
{
st_strend_key* key;
int result;
@@ -504,7 +492,6 @@ static int
i_names(UChar* key, NameEntry* e, INamesArg* arg)
{
int r = (*(arg->func))(e->name,
- /*e->name + onigenc_str_bytelen_null(arg->enc, e->name), */
e->name + e->name_len,
e->back_num,
(e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
@@ -518,8 +505,7 @@ i_names(UChar* key, NameEntry* e, INamesArg* arg)
extern int
onig_foreach_name(regex_t* reg,
- int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
- void* arg)
+ int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
{
INamesArg narg;
NameTable* t = (NameTable* )reg->name_table;
@@ -585,7 +571,6 @@ typedef struct {
int alloc;
} NameTable;
-
#ifdef ONIG_DEBUG
extern int
onig_print_names(FILE* fp, regex_t* reg)
@@ -683,8 +668,7 @@ name_find(regex_t* reg, UChar* name, UChar* name_end)
extern int
onig_foreach_name(regex_t* reg,
- int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
- void* arg)
+ int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
{
int i, r;
NameEntry* e;
@@ -826,9 +810,8 @@ extern int
onig_name_to_group_numbers(regex_t* reg, const UChar* name,
const UChar* name_end, int** nums)
{
- NameEntry* e;
+ NameEntry* e = name_find(reg, name, name_end);
- e = name_find(reg, name, name_end);
if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
switch (e->back_num) {
@@ -886,8 +869,7 @@ onig_name_to_backref_number(regex_t* reg, const UChar* name,
extern int
onig_foreach_name(regex_t* reg,
- int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
- void* arg)
+ int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
{
return ONIG_NO_SUPPORT_CONFIG;
}
@@ -928,12 +910,12 @@ scan_env_clear(ScanEnv* env)
BIT_STATUS_CLEAR(env->bt_mem_start);
BIT_STATUS_CLEAR(env->bt_mem_end);
BIT_STATUS_CLEAR(env->backrefed_mem);
- env->error = (UChar* )NULL;
- env->error_end = (UChar* )NULL;
- env->num_call = 0;
- env->num_mem = 0;
+ env->error = (UChar* )NULL;
+ env->error_end = (UChar* )NULL;
+ env->num_call = 0;
+ env->num_mem = 0;
#ifdef USE_NAMED_GROUP
- env->num_named = 0;
+ env->num_named = 0;
#endif
env->mem_alloc = 0;
env->mem_nodes_dynamic = (Node** )NULL;
@@ -1009,7 +991,8 @@ onig_node_free(Node* node)
switch (NTYPE(node)) {
case N_STRING:
- if (IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) {
+ if (NSTRING(node).capa != 0 &&
+ IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) {
xfree(NSTRING(node).s);
}
break;
@@ -1033,7 +1016,6 @@ onig_node_free(Node* node)
#else
xfree(node);
#endif
-
node = next_node;
goto start;
}
@@ -1043,17 +1025,15 @@ onig_node_free(Node* node)
{
CClassNode* cc = &(NCCLASS(node));
- if (IS_CCLASS_SHARE(cc))
- return ;
-
+ if (IS_CCLASS_SHARE(cc)) return ;
if (cc->mbuf)
bbuf_free(cc->mbuf);
}
break;
- case N_QUALIFIER:
- if (NQUALIFIER(node).target)
- onig_node_free(NQUALIFIER(node).target);
+ case N_QUANTIFIER:
+ if (NQUANTIFIER(node).target)
+ onig_node_free(NQUANTIFIER(node).target);
break;
case N_EFFECT:
@@ -1144,11 +1124,12 @@ node_new_cclass(void)
}
static Node*
-node_new_cclass_by_codepoint_range(int not,
- const OnigCodePoint sbr[], const OnigCodePoint mbr[])
+node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,
+ const OnigCodePoint ranges[])
{
+ int n, i;
CClassNode* cc;
- int n, i, j;
+ OnigCodePoint j;
Node* node = node_new();
CHECK_NULL_RETURN(node);
@@ -1159,31 +1140,34 @@ node_new_cclass_by_codepoint_range(int not,
if (not != 0) CCLASS_SET_NOT(cc);
BITSET_CLEAR(cc->bs);
- if (IS_NOT_NULL(sbr)) {
- n = ONIGENC_CODE_RANGE_NUM(sbr);
+ if (sb_out > 0 && IS_NOT_NULL(ranges)) {
+ n = ONIGENC_CODE_RANGE_NUM(ranges);
for (i = 0; i < n; i++) {
- for (j = ONIGENC_CODE_RANGE_FROM(sbr, i);
- j <= (int )ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
+ for (j = ONIGENC_CODE_RANGE_FROM(ranges, i);
+ j <= (int )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {
+ if (j >= sb_out) goto sb_end;
+
BITSET_SET_BIT(cc->bs, j);
}
}
}
- if (IS_NULL(mbr)) {
+ sb_end:
+ if (IS_NULL(ranges)) {
is_null:
cc->mbuf = NULL;
}
else {
BBuf* bbuf;
- n = ONIGENC_CODE_RANGE_NUM(mbr);
+ n = ONIGENC_CODE_RANGE_NUM(ranges);
if (n == 0) goto is_null;
bbuf = (BBuf* )xmalloc(sizeof(BBuf));
CHECK_NULL_RETURN_VAL(bbuf, NULL);
bbuf->alloc = n + 1;
bbuf->used = n + 1;
- bbuf->p = (UChar* )((void* )mbr);
+ bbuf->p = (UChar* )((void* )ranges);
cc->mbuf = bbuf;
}
@@ -1192,12 +1176,13 @@ node_new_cclass_by_codepoint_range(int not,
}
static Node*
-node_new_ctype(int type)
+node_new_ctype(int type, int not)
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
node->type = N_CTYPE;
- NCTYPE(node).type = type;
+ NCTYPE(node).ctype = type;
+ NCTYPE(node).not = not;
return node;
}
@@ -1227,8 +1212,26 @@ onig_node_new_list(Node* left, Node* right)
return node_new_list(left, right);
}
-static Node*
-node_new_alt(Node* left, Node* right)
+extern Node*
+onig_node_list_add(Node* list, Node* x)
+{
+ Node *n;
+
+ n = onig_node_new_list(x, NULL);
+ if (IS_NULL(n)) return NULL_NODE;
+
+ if (IS_NOT_NULL(list)) {
+ while (IS_NOT_NULL(NCONS(list).right))
+ list = NCONS(list).right;
+
+ NCONS(list).right = n;
+ }
+
+ return n;
+}
+
+extern Node*
+onig_node_new_alt(Node* left, Node* right)
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
@@ -1318,25 +1321,25 @@ node_new_call(UChar* name, UChar* name_end)
#endif
static Node*
-node_new_qualifier(int lower, int upper, int by_number)
+node_new_quantifier(int lower, int upper, int by_number)
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
- node->type = N_QUALIFIER;
- NQUALIFIER(node).state = 0;
- NQUALIFIER(node).target = NULL;
- NQUALIFIER(node).lower = lower;
- NQUALIFIER(node).upper = upper;
- NQUALIFIER(node).greedy = 1;
- NQUALIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY;
- NQUALIFIER(node).head_exact = NULL_NODE;
- NQUALIFIER(node).next_head_exact = NULL_NODE;
- NQUALIFIER(node).is_refered = 0;
+ node->type = N_QUANTIFIER;
+ NQUANTIFIER(node).state = 0;
+ NQUANTIFIER(node).target = NULL;
+ NQUANTIFIER(node).lower = lower;
+ NQUANTIFIER(node).upper = upper;
+ NQUANTIFIER(node).greedy = 1;
+ NQUANTIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY;
+ NQUANTIFIER(node).head_exact = NULL_NODE;
+ NQUANTIFIER(node).next_head_exact = NULL_NODE;
+ NQUANTIFIER(node).is_refered = 0;
if (by_number != 0)
- NQUALIFIER(node).state |= NST_BY_NUMBER;
+ NQUANTIFIER(node).state |= NST_BY_NUMBER;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
- NQUALIFIER(node).comb_exp_check_num = 0;
+ NQUANTIFIER(node).comb_exp_check_num = 0;
#endif
return node;
@@ -1400,14 +1403,14 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
int capa = len + addlen + NODE_STR_MARGIN;
if (capa <= NSTRING(node).capa) {
- k_strcpy(NSTRING(node).s + len, s, end);
+ onig_strcpy(NSTRING(node).s + len, s, end);
}
else {
if (NSTRING(node).s == NSTRING(node).buf)
p = strcat_capa_from_static(NSTRING(node).s, NSTRING(node).end,
s, end, capa);
else
- p = k_strcat_capa(NSTRING(node).s, NSTRING(node).end, s, end, capa);
+ p = strcat_capa(NSTRING(node).s, NSTRING(node).end, s, end, capa);
CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
NSTRING(node).s = p;
@@ -1415,7 +1418,7 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
}
}
else {
- k_strcpy(NSTRING(node).s + len, s, end);
+ onig_strcpy(NSTRING(node).s + len, s, end);
}
NSTRING(node).end = NSTRING(node).s + len + addlen;
}
@@ -1423,6 +1426,13 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
return 0;
}
+extern int
+onig_node_str_set(Node* node, const UChar* s, const UChar* end)
+{
+ onig_node_str_clear(node);
+ return onig_node_str_cat(node, s, end);
+}
+
static int
node_str_cat_char(Node* node, UChar c)
{
@@ -1531,6 +1541,24 @@ str_node_can_be_split(StrNode* sn, OnigEncoding enc)
return 0;
}
+#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
+static int
+node_str_head_pad(StrNode* sn, int num, UChar val)
+{
+ UChar buf[NODE_STR_BUF_SIZE];
+ int i, len;
+
+ len = sn->end - sn->s;
+ onig_strcpy(buf, sn->s, sn->end);
+ onig_strcpy(&(sn->s[num]), buf, buf + len);
+ sn->end += num;
+
+ for (i = 0; i < num; i++) {
+ sn->s[i] = val;
+ }
+}
+#endif
+
extern int
onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
{
@@ -2069,13 +2097,13 @@ conv_backslash_value(int c, ScanEnv* env)
{
if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
switch (c) {
- case 'n': return '\n';
- case 't': return '\t';
- case 'r': return '\r';
- case 'f': return '\f';
- case 'a': return '\007';
- case 'b': return '\010';
- case 'e': return '\033';
+ case 'n': return '\n';
+ case 't': return '\t';
+ case 'r': return '\r';
+ case 'f': return '\f';
+ case 'a': return '\007';
+ case 'b': return '\010';
+ case 'e': return '\033';
case 'v':
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
return '\v';
@@ -2089,7 +2117,7 @@ conv_backslash_value(int c, ScanEnv* env)
}
static int
-is_invalid_qualifier_target(Node* node)
+is_invalid_quantifier_target(Node* node)
{
switch (NTYPE(node)) {
case N_ANCHOR:
@@ -2098,19 +2126,19 @@ is_invalid_qualifier_target(Node* node)
case N_EFFECT:
if (NEFFECT(node).type == EFFECT_OPTION)
- return is_invalid_qualifier_target(NEFFECT(node).target);
+ return is_invalid_quantifier_target(NEFFECT(node).target);
break;
case N_LIST: /* ex. (?:\G\A)* */
do {
- if (! is_invalid_qualifier_target(NCONS(node).left)) return 0;
+ if (! is_invalid_quantifier_target(NCONS(node).left)) return 0;
} while (IS_NOT_NULL(node = NCONS(node).right));
return 0;
break;
case N_ALT: /* ex. (?:abc|\A)* */
do {
- if (is_invalid_qualifier_target(NCONS(node).left)) return 1;
+ if (is_invalid_quantifier_target(NCONS(node).left)) return 1;
} while (IS_NOT_NULL(node = NCONS(node).right));
break;
@@ -2122,24 +2150,24 @@ is_invalid_qualifier_target(Node* node)
/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
static int
-popular_qualifier_num(QualifierNode* qf)
+popular_quantifier_num(QuantifierNode* q)
{
- if (qf->greedy) {
- if (qf->lower == 0) {
- if (qf->upper == 1) return 0;
- else if (IS_REPEAT_INFINITE(qf->upper)) return 1;
+ if (q->greedy) {
+ if (q->lower == 0) {
+ if (q->upper == 1) return 0;
+ else if (IS_REPEAT_INFINITE(q->upper)) return 1;
}
- else if (qf->lower == 1) {
- if (IS_REPEAT_INFINITE(qf->upper)) return 2;
+ else if (q->lower == 1) {
+ if (IS_REPEAT_INFINITE(q->upper)) return 2;
}
}
else {
- if (qf->lower == 0) {
- if (qf->upper == 1) return 3;
- else if (IS_REPEAT_INFINITE(qf->upper)) return 4;
+ if (q->lower == 0) {
+ if (q->upper == 1) return 3;
+ else if (IS_REPEAT_INFINITE(q->upper)) return 4;
}
- else if (qf->lower == 1) {
- if (IS_REPEAT_INFINITE(qf->upper)) return 5;
+ else if (q->lower == 1) {
+ if (IS_REPEAT_INFINITE(q->upper)) return 5;
}
}
return -1;
@@ -2166,15 +2194,15 @@ static enum ReduceType ReduceTypeTable[6][6] = {
};
extern void
-onig_reduce_nested_qualifier(Node* pnode, Node* cnode)
+onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
{
int pnum, cnum;
- QualifierNode *p, *c;
+ QuantifierNode *p, *c;
- p = &(NQUALIFIER(pnode));
- c = &(NQUALIFIER(cnode));
- pnum = popular_qualifier_num(p);
- cnum = popular_qualifier_num(c);
+ p = &(NQUANTIFIER(pnode));
+ c = &(NQUANTIFIER(cnode));
+ pnum = popular_quantifier_num(p);
+ cnum = popular_quantifier_num(c);
switch(ReduceTypeTable[cnum][pnum]) {
case RQ_DEL:
@@ -2275,6 +2303,7 @@ typedef struct {
UChar* name_end;
} call;
struct {
+ int ctype;
int not;
} prop;
} u;
@@ -2282,7 +2311,7 @@ typedef struct {
static int
-fetch_range_qualifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
+fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
{
int low, up, syn_allow, non_low = 0;
int r = 0;
@@ -2349,7 +2378,7 @@ fetch_range_qualifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
if (PEND) goto invalid;
PFETCH(c);
if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
- if (c != MC_ESC(enc)) goto invalid;
+ if (c != MC_ESC(env->syntax)) goto invalid;
PFETCH(c);
}
if (c != '}') goto invalid;
@@ -2392,7 +2421,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
if (c != '-') return ONIGERR_META_CODE_SYNTAX;
if (PEND) return ONIGERR_END_PATTERN_AT_META;
PFETCH(c);
- if (c == MC_ESC(enc)) {
+ if (c == MC_ESC(env->syntax)) {
v = fetch_escaped_value(&p, end, env);
if (v < 0) return v;
c = (OnigCodePoint )v;
@@ -2422,7 +2451,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
c = 0177;
}
else {
- if (c == MC_ESC(enc)) {
+ if (c == MC_ESC(env->syntax)) {
v = fetch_escaped_value(&p, end, env);
if (v < 0) return v;
c = (OnigCodePoint )v;
@@ -2447,23 +2476,38 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
+static OnigCodePoint
+get_name_end_code_point(OnigCodePoint start)
+{
+ switch (start) {
+ case '<': return (OnigCodePoint )'>'; break;
+ case '\'': return (OnigCodePoint )'\''; break;
+ default:
+ break;
+ }
+
+ return (OnigCodePoint )0;
+}
+
#ifdef USE_NAMED_GROUP
#ifdef USE_BACKREF_AT_LEVEL
/*
\k<name+n>, \k<name-n>
*/
static int
-fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end
- , ScanEnv* env, int* level)
+fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
+ UChar** rname_end, ScanEnv* env, int* level)
{
int r, exist_level = 0;
+ OnigCodePoint end_code;
OnigCodePoint c = 0;
- OnigCodePoint first_code;
OnigEncoding enc = env->enc;
UChar *name_end;
UChar *p = *src;
PFETCH_READY;
+ end_code = get_name_end_code_point(start_code);
+
name_end = end;
r = 0;
if (PEND) {
@@ -2471,8 +2515,7 @@ fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end
}
else {
PFETCH(c);
- first_code = c;
- if (c == '>')
+ if (c == end_code)
return ONIGERR_EMPTY_GROUP_NAME;
if (!ONIGENC_IS_CODE_WORD(enc, c)) {
@@ -2483,14 +2526,14 @@ fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end
while (!PEND) {
name_end = p;
PFETCH(c);
- if (c == '>' || c == ')' || c == '+' || c == '-') break;
+ if (c == end_code || c == ')' || c == '+' || c == '-') break;
if (!ONIGENC_IS_CODE_WORD(enc, c)) {
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
}
}
- if (c != '>') {
+ if (c != end_code) {
if (c == '+' || c == '-') {
int num;
int flag = (c == '-' ? -1 : 1);
@@ -2504,21 +2547,16 @@ fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end
exist_level = 1;
PFETCH(c);
- if (c == '>')
- goto first_check;
+ if (c == end_code)
+ goto end;
}
err:
r = ONIGERR_INVALID_GROUP_NAME;
name_end = end;
}
- else {
- first_check:
- if (ONIGENC_IS_CODE_ASCII(first_code) &&
- ONIGENC_IS_CODE_UPPER(enc, first_code))
- r = ONIGERR_INVALID_GROUP_NAME;
- }
+ end:
if (r == 0) {
*rname_end = name_end;
*src = p;
@@ -2536,16 +2574,19 @@ fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end
1 -> reference name (allow number name)
*/
static int
-fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
+fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
+ UChar** rname_end, ScanEnv* env, int ref)
{
int r, is_num;
+ OnigCodePoint end_code;
OnigCodePoint c = 0;
- OnigCodePoint first_code;
OnigEncoding enc = env->enc;
UChar *name_end;
UChar *p = *src;
PFETCH_READY;
+ end_code = get_name_end_code_point(start_code);
+
name_end = end;
r = 0;
is_num = 0;
@@ -2554,8 +2595,7 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
}
else {
PFETCH(c);
- first_code = c;
- if (c == '>')
+ if (c == end_code)
return ONIGERR_EMPTY_GROUP_NAME;
if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
@@ -2573,7 +2613,7 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
while (!PEND) {
name_end = p;
PFETCH(c);
- if (c == '>' || c == ')') break;
+ if (c == end_code || c == ')') break;
if (is_num == 1) {
if (! ONIGENC_IS_CODE_DIGIT(enc, c)) {
@@ -2590,15 +2630,10 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
}
}
- if (c != '>') {
+ if (c != end_code) {
r = ONIGERR_INVALID_GROUP_NAME;
name_end = end;
}
- else {
- if (ONIGENC_IS_CODE_ASCII(first_code) &&
- ONIGENC_IS_CODE_UPPER(enc, first_code))
- r = ONIGERR_INVALID_GROUP_NAME;
- }
if (r == 0) {
*rname_end = name_end;
@@ -2612,15 +2647,19 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
}
#else
static int
-fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
+fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
+ UChar** rname_end, ScanEnv* env, int ref)
{
int r, len;
+ OnigCodePoint end_code;
OnigCodePoint c = 0;
UChar *name_end;
OnigEncoding enc = env->enc;
UChar *p = *src;
PFETCH_READY;
+ end_code = get_name_end_code_point(start_code);
+
r = 0;
while (!PEND) {
name_end = p;
@@ -2628,11 +2667,11 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
PFETCH(c);
- if (c == '>' || c == ')') break;
+ if (c == end_code || c == ')') break;
if (! ONIGENC_IS_CODE_DIGIT(enc, c))
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
}
- if (c != '>') {
+ if (c != end_code) {
r = ONIGERR_INVALID_GROUP_NAME;
name_end = end;
}
@@ -2710,7 +2749,7 @@ find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
static int
str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
- OnigCodePoint bad, OnigEncoding enc)
+ OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)
{
int i, in_esc;
OnigCodePoint x;
@@ -2738,7 +2777,7 @@ str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
else {
x = ONIGENC_MBC_TO_CODE(enc, p, to);
if (x == bad) return 0;
- else if (x == MC_ESC(enc)) in_esc = 1;
+ else if (x == MC_ESC(syn)) in_esc = 1;
p = q;
}
}
@@ -2774,7 +2813,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
else if (c == '-') {
tok->type = TK_CC_RANGE;
}
- else if (c == MC_ESC(enc)) {
+ else if (c == MC_ESC(syn)) {
if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
goto end;
@@ -2786,37 +2825,45 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
switch (c) {
case 'w':
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_WORD;
+ tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
+ tok->u.prop.not = 0;
break;
case 'W':
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_NOT_WORD;
+ tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
+ tok->u.prop.not = 1;
break;
case 'd':
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_DIGIT;
+ tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
+ tok->u.prop.not = 0;
break;
case 'D':
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_NOT_DIGIT;
+ tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
+ tok->u.prop.not = 1;
break;
case 's':
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_WHITE_SPACE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
+ tok->u.prop.not = 0;
break;
case 'S':
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_NOT_WHITE_SPACE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
+ tok->u.prop.not = 1;
break;
case 'h':
if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_XDIGIT;
+ tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
+ tok->u.prop.not = 0;
break;
case 'H':
if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_NOT_XDIGIT;
+ tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
+ tok->u.prop.not = 1;
break;
case 'p':
@@ -2925,7 +2972,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->backp = p; /* point at '[' is readed */
PINC;
if (str_exist_check_with_esc(send, 2, p, end,
- (OnigCodePoint )']', enc)) {
+ (OnigCodePoint )']', enc, syn)) {
tok->type = TK_POSIX_BRACKET_OPEN;
}
else {
@@ -2978,7 +3025,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->backp = p;
PFETCH(c);
- if (IS_MC_ESC_CODE(c, enc, syn)) {
+ if (IS_MC_ESC_CODE(c, syn)) {
if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
tok->backp = p;
@@ -3035,7 +3082,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '{':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
- r = fetch_range_qualifier(&p, end, tok, env);
+ r = fetch_range_quantifier(&p, end, tok, env);
if (r < 0) return r; /* error */
if (r == 0) goto greedy_check;
else if (r == 2) { /* {n} */
@@ -3065,13 +3112,15 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case 'w':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_WORD;
+ tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
+ tok->u.prop.not = 0;
break;
case 'W':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_NOT_WORD;
+ tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
+ tok->u.prop.not = 1;
break;
case 'b':
@@ -3103,37 +3152,43 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case 's':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_WHITE_SPACE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
+ tok->u.prop.not = 0;
break;
case 'S':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_NOT_WHITE_SPACE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
+ tok->u.prop.not = 1;
break;
case 'd':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_DIGIT;
+ tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
+ tok->u.prop.not = 0;
break;
case 'D':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_NOT_DIGIT;
+ tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
+ tok->u.prop.not = 1;
break;
case 'h':
if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_XDIGIT;
+ tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
+ tok->u.prop.not = 0;
break;
case 'H':
if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_NOT_XDIGIT;
+ tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
+ tok->u.prop.not = 1;
break;
case 'A':
@@ -3279,7 +3334,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case 'k':
if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
PFETCH(c);
- if (c == '<') {
+ if (c == '<' || c == '\'') {
UChar* name_end;
int* backs;
@@ -3287,7 +3342,8 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
#ifdef USE_BACKREF_AT_LEVEL
name_end = NULL_UCHARP; /* no need. escape gcc warning. */
- r = fetch_name_with_level(&p, end, &name_end, env, &tok->u.backref.level);
+ r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
+ env, &tok->u.backref.level);
if (r == 1) tok->u.backref.exist_level = 1;
else tok->u.backref.exist_level = 0;
#else
@@ -3331,11 +3387,11 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case 'g':
if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
PFETCH(c);
- if (c == '<') {
+ if (c == '<' || c == '\'') {
UChar* name_end;
prev = p;
- r = fetch_name(&p, end, &name_end, env, 1);
+ r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, 1);
if (r < 0) return r;
tok->type = TK_CALL;
@@ -3395,15 +3451,15 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
#ifdef USE_VARIABLE_META_CHARS
if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
- if (c == MC_ANYCHAR(enc))
+ if (c == MC_ANYCHAR(syn))
goto any_char;
- else if (c == MC_ANYTIME(enc))
+ else if (c == MC_ANYTIME(syn))
goto anytime;
- else if (c == MC_ZERO_OR_ONE_TIME(enc))
+ else if (c == MC_ZERO_OR_ONE_TIME(syn))
goto zero_or_one_time;
- else if (c == MC_ONE_OR_MORE_TIME(enc))
+ else if (c == MC_ONE_OR_MORE_TIME(syn))
goto one_or_more_time;
- else if (c == MC_ANYCHAR_ANYTIME(enc)) {
+ else if (c == MC_ANYCHAR_ANYTIME(syn)) {
tok->type = TK_ANYCHAR_ANYTIME;
goto out;
}
@@ -3454,7 +3510,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '{':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
- r = fetch_range_qualifier(&p, end, tok, env);
+ r = fetch_range_quantifier(&p, end, tok, env);
if (r < 0) return r; /* error */
if (r == 0) goto greedy_check;
else if (r == 2) { /* {n} */
@@ -3480,7 +3536,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
while (1) {
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
PFETCH(c);
- if (c == MC_ESC(enc)) {
+ if (c == MC_ESC(syn)) {
if (!PEND) PFETCH(c);
}
else {
@@ -3557,23 +3613,34 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
static int
add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
- const OnigCodePoint sbr[], const OnigCodePoint mbr[])
+ OnigCodePoint sb_out, const OnigCodePoint mbr[])
{
int i, r;
OnigCodePoint j;
- int nsb = ONIGENC_CODE_RANGE_NUM(sbr);
- int nmb = ONIGENC_CODE_RANGE_NUM(mbr);
+ int n = ONIGENC_CODE_RANGE_NUM(mbr);
if (not == 0) {
- for (i = 0; i < nsb; i++) {
- for (j = ONIGENC_CODE_RANGE_FROM(sbr, i);
- j <= ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
+ for (i = 0; i < n; i++) {
+ for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
+ j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
+ if (j >= sb_out) {
+ if (j == ONIGENC_CODE_RANGE_TO(mbr, i)) i++;
+ else if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
+ r = add_code_range_to_buf(&(cc->mbuf), j,
+ ONIGENC_CODE_RANGE_TO(mbr, i));
+ if (r != 0) return r;
+ i++;
+ }
+
+ goto sb_end;
+ }
BITSET_SET_BIT(cc->bs, j);
}
}
- for (i = 0; i < nmb; i++) {
+ sb_end:
+ for ( ; i < n; i++) {
r = add_code_range_to_buf(&(cc->mbuf),
ONIGENC_CODE_RANGE_FROM(mbr, i),
ONIGENC_CODE_RANGE_TO(mbr, i));
@@ -3583,24 +3650,24 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
else {
OnigCodePoint prev = 0;
- if (ONIGENC_MBC_MINLEN(enc) == 1) {
- for (i = 0; i < nsb; i++) {
- for (j = prev;
- j < ONIGENC_CODE_RANGE_FROM(sbr, i); j++) {
- BITSET_SET_BIT(cc->bs, j);
- }
- prev = ONIGENC_CODE_RANGE_TO(sbr, i) + 1;
- }
- if (prev < 0x7f) {
- for (j = prev; j < 0x7f; j++) {
- BITSET_SET_BIT(cc->bs, j);
- }
+ for (i = 0; i < n; i++) {
+ for (j = prev;
+ j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
+ if (j >= sb_out) {
+ goto sb_end2;
+ }
+ BITSET_SET_BIT(cc->bs, j);
}
-
- prev = 0x80;
+ prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
}
+ for (j = prev; j < sb_out; j++) {
+ BITSET_SET_BIT(cc->bs, j);
+ }
+
+ sb_end2:
+ prev = sb_out;
- for (i = 0; i < nmb; i++) {
+ for (i = 0; i < n; i++) {
if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
r = add_code_range_to_buf(&(cc->mbuf), prev,
ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
@@ -3621,12 +3688,13 @@ static int
add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
{
int c, r;
- const OnigCodePoint *sbr, *mbr;
+ const OnigCodePoint *ranges;
+ OnigCodePoint sb_out;
OnigEncoding enc = env->enc;
- r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr);
+ r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
if (r == 0) {
- return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sbr, mbr);
+ return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);
}
else if (r != ONIG_NO_SUPPORT_CONFIG) {
return r;
@@ -3680,7 +3748,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
case ONIGENC_CTYPE_WORD:
if (not == 0) {
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (ONIGENC_IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);
+ if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);
}
ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
@@ -3702,61 +3770,10 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
}
static int
-parse_ctype_to_enc_ctype(int pctype, int* not)
-{
- int ctype;
-
- switch (pctype) {
- case CTYPE_WORD:
- ctype = ONIGENC_CTYPE_WORD;
- *not = 0;
- break;
- case CTYPE_NOT_WORD:
- ctype = ONIGENC_CTYPE_WORD;
- *not = 1;
- break;
- case CTYPE_WHITE_SPACE:
- ctype = ONIGENC_CTYPE_SPACE;
- *not = 0;
- break;
- case CTYPE_NOT_WHITE_SPACE:
- ctype = ONIGENC_CTYPE_SPACE;
- *not = 1;
- break;
- case CTYPE_DIGIT:
- ctype = ONIGENC_CTYPE_DIGIT;
- *not = 0;
- break;
- case CTYPE_NOT_DIGIT:
- ctype = ONIGENC_CTYPE_DIGIT;
- *not = 1;
- break;
- case CTYPE_XDIGIT:
- ctype = ONIGENC_CTYPE_XDIGIT;
- *not = 0;
- break;
- case CTYPE_NOT_XDIGIT:
- ctype = ONIGENC_CTYPE_XDIGIT;
- *not = 1;
- break;
- default:
- return ONIGERR_PARSER_BUG;
- break;
- }
- return ctype;
-}
-
-typedef struct {
- UChar *name;
- int ctype;
- short int len;
-} PosixBracketEntryType;
-
-static int
parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
{
#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
-#define POSIX_BRACKET_NAME_MAX_LEN 6
+#define POSIX_BRACKET_NAME_MIN_LEN 4
static PosixBracketEntryType PBS[] = {
{ (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
@@ -3772,7 +3789,8 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
{ (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
{ (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
{ (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
- { (UChar* )NULL, -1, 0 }
+ { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },
+ { (UChar* )NULL, -1, 0 }
};
PosixBracketEntryType *pb;
@@ -3789,7 +3807,7 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
else
not = 0;
- if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MAX_LEN + 2)
+ if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
goto not_posix_bracket;
for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
@@ -3823,86 +3841,39 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
}
}
- return 1; /* 1: is not POSIX bracket, but no error. */
-}
-
-static int
-property_name_to_ctype(UChar* p, UChar* end, OnigEncoding enc)
-{
- static PosixBracketEntryType PBS[] = {
- { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 },
- { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 },
- { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 },
- { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
- { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 },
- { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 },
- { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 },
- { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 },
- { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 },
- { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 },
- { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 },
- { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
- { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 },
- { (UChar* )NULL, -1, 0 }
- };
-
- PosixBracketEntryType *pb;
- int len;
-
- len = onigenc_strlen(enc, p, end);
- for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
- if (len == pb->len &&
- onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
- return pb->ctype;
- }
-
- return -1;
+ return 1; /* 1: is not POSIX bracket, but no error. */
}
static int
fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
{
- int ctype;
+ int r;
OnigCodePoint c;
OnigEncoding enc = env->enc;
UChar *prev, *start, *p = *src;
PFETCH_READY;
- /* 'IsXXXX' => 'XXXX' */
- if (!PEND &&
- IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS)) {
- c = PPEEK;
- if (c == 'I') {
- PINC;
- if (! PEND) {
- c = PPEEK;
- if (c == 's')
- PINC;
- else
- PUNFETCH;
- }
- }
- }
-
+ r = 0;
start = prev = p;
while (!PEND) {
prev = p;
PFETCH(c);
if (c == '}') {
- ctype = property_name_to_ctype(start, prev, enc);
- if (ctype < 0) break;
+ r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
+ if (r < 0) break;
*src = p;
- return ctype;
+ return r;
}
- else if (c == '(' || c == ')' || c == '{' || c == '|')
+ else if (c == '(' || c == ')' || c == '{' || c == '|') {
+ r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;
break;
+ }
}
- onig_scan_env_set_error_string(env, ONIGERR_INVALID_CHAR_PROPERTY_NAME,
- *src, prev);
- return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+ onig_scan_env_set_error_string(env, r, *src, prev);
+ return r;
}
static int
@@ -4039,10 +4010,11 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
static int
code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
- OnigEncoding enc)
+ ScanEnv* env)
{
int in_esc;
OnigCodePoint code;
+ OnigEncoding enc = env->enc;
UChar* p = from;
PFETCH_READY;
@@ -4054,7 +4026,7 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
else {
PFETCH(code);
if (code == c) return 1;
- if (code == MC_ESC(enc)) in_esc = 1;
+ if (code == MC_ESC(env->syntax)) in_esc = 1;
}
}
return 0;
@@ -4089,7 +4061,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
if (r < 0) return r;
if (r == TK_CC_CLOSE) {
if (! code_exist_check((OnigCodePoint )']',
- *src, env->pattern_end, 1, env->enc))
+ *src, env->pattern_end, 1, env))
return ONIGERR_EMPTY_CHAR_CLASS;
CC_ESC_WARN(env, (UChar* )"]");
@@ -4205,12 +4177,8 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
break;
case TK_CHAR_TYPE:
- {
- int ctype, not;
- ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
- r = add_ctype_to_cc(cc, ctype, not, env);
- if (r != 0) return r;
- }
+ r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);
+ if (r != 0) return r;
next_class:
r = next_state_class(cc, &vs, &val_type, &state, env);
@@ -4434,6 +4402,14 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
*np = node_new_effect(EFFECT_STOP_BACKTRACK);
break;
+ case '\'':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
+ goto named_group1;
+ }
+ else
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ break;
+
case '<': /* look behind (?<=...), (?<!...) */
PFETCH(c);
if (c == '=')
@@ -4441,35 +4417,45 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
else if (c == '!')
*np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);
#ifdef USE_NAMED_GROUP
- else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
- UChar *name;
- UChar *name_end;
+ else {
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
+ UChar *name;
+ UChar *name_end;
- PUNFETCH;
- list_capture = 0;
+ PUNFETCH;
+ c = '<';
- named_group:
- name = p;
- r = fetch_name(&p, end, &name_end, env, 0);
- if (r < 0) return r;
+ named_group1:
+ list_capture = 0;
- num = scan_env_add_mem_entry(env);
- if (num < 0) return num;
- if (list_capture != 0 && num >= BIT_STATUS_BITS_NUM)
- return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
+ named_group2:
+ name = p;
+ r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, 0);
+ if (r < 0) return r;
- r = name_add(env->reg, name, name_end, num, env);
- if (r != 0) return r;
- *np = node_new_effect_memory(env->option, 1);
- CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
- NEFFECT(*np).regnum = num;
- if (list_capture != 0)
- BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
- env->num_named++;
+ num = scan_env_add_mem_entry(env);
+ if (num < 0) return num;
+ if (list_capture != 0 && num >= BIT_STATUS_BITS_NUM)
+ return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
+
+ r = name_add(env->reg, name, name_end, num, env);
+ if (r != 0) return r;
+ *np = node_new_effect_memory(env->option, 1);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ NEFFECT(*np).regnum = num;
+ if (list_capture != 0)
+ BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
+ env->num_named++;
+ }
+ else {
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ }
}
-#endif
- else
+#else
+ else {
return ONIGERR_UNDEFINED_GROUP_OPTION;
+ }
+#endif
break;
case '@':
@@ -4477,9 +4463,9 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
#ifdef USE_NAMED_GROUP
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
PFETCH(c);
- if (c == '<') {
+ if (c == '<' || c == '\'') {
list_capture = 1;
- goto named_group; /* (?@<name>...) */
+ goto named_group2; /* (?@<name>...) */
}
PUNFETCH;
}
@@ -4619,11 +4605,11 @@ static const char* ReduceQStr[] = {
};
static int
-set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
+set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
{
- QualifierNode* qn;
+ QuantifierNode* qn;
- qn = &(NQUALIFIER(qnode));
+ qn = &(NQUANTIFIER(qnode));
if (qn->lower == 1 && qn->upper == 1) {
return 1;
}
@@ -4642,15 +4628,15 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
}
break;
- case N_QUALIFIER:
+ case N_QUANTIFIER:
{ /* check redundant double repeat. */
/* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
- QualifierNode* qnt = &(NQUALIFIER(target));
- int nestq_num = popular_qualifier_num(qn);
- int targetq_num = popular_qualifier_num(qnt);
+ QuantifierNode* qnt = &(NQUANTIFIER(target));
+ int nestq_num = popular_quantifier_num(qn);
+ int targetq_num = popular_quantifier_num(qnt);
#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
- if (!IS_QUALIFIER_BY_NUMBER(qn) && !IS_QUALIFIER_BY_NUMBER(qnt) &&
+ if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
UChar buf[WARN_BUFSIZE];
@@ -4686,7 +4672,7 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
#endif
if (targetq_num >= 0) {
if (nestq_num >= 0) {
- onig_reduce_nested_qualifier(qnode, target);
+ onig_reduce_nested_quantifier(qnode, target);
goto q_exit;
}
else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
@@ -4708,60 +4694,6 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
return 0;
}
-static int
-make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc,
- CClassNode* cc, Node** root)
-{
- int r, i, j, k, clen, len, ncode, n;
- UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
- Node **ptail, *snode = NULL_NODE;
- const OnigCompAmbigCodes* ccs;
- const OnigCompAmbigCodeItem* ci;
- OnigAmbigType amb;
-
- n = 0;
- *root = NULL_NODE;
- ptail = root;
-
-
- for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
- if ((amb & ambig_flag) == 0) continue;
-
- ncode = ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, amb, &ccs);
- for (i = 0; i < ncode; i++) {
- if (onig_is_code_in_cc(enc, ccs[i].code, cc)) {
- for (j = 0; j < ccs[i].n; j++) {
- ci = &(ccs[i].items[j]);
- if (ci->len > 1) { /* compound only */
- if (IS_CCLASS_NOT(cc)) clear_not_flag_cclass(cc, enc);
-
- clen = ci->len;
- for (k = 0; k < clen; k++) {
- len = ONIGENC_CODE_TO_MBC(enc, ci->code[k], buf);
-
- if (k == 0) {
- snode = node_new_str_raw(buf, buf + len);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
- }
- else {
- r = onig_node_str_cat(snode, buf, buf + len);
- if (r < 0) return r;
- }
- }
-
- *ptail = node_new_alt(snode, NULL_NODE);
- CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY);
- ptail = &(NCONS(*ptail).right);
- n++;
- }
- }
- }
- }
- }
-
- return n;
-}
-
#ifdef USE_SHARED_CCLASS_TABLE
@@ -4840,6 +4772,78 @@ onig_free_shared_cclass_table(void)
#endif /* USE_SHARED_CCLASS_TABLE */
+typedef struct {
+ ScanEnv* env;
+ CClassNode* cc;
+ Node* alt_root;
+ Node** ptail;
+} ICaseFoldArgType;
+
+static int
+i_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)
+{
+ ICaseFoldArgType* iarg;
+ ScanEnv* env;
+ CClassNode* cc;
+ BitSetRef bs;
+
+ iarg = (ICaseFoldArgType* )arg;
+ env = iarg->env;
+ cc = iarg->cc;
+ bs = cc->bs;
+
+ if (to_len == 1) {
+ int in_cc;
+ in_cc = onig_is_code_in_cc(env->enc, from, cc);
+ if ((in_cc != 0 && !IS_CCLASS_NOT(cc)) ||
+ (in_cc == 0 && IS_CCLASS_NOT(cc))) {
+ if (ONIGENC_MBC_MINLEN(env->enc) > 1 || from >= SINGLE_BYTE_SIZE) {
+ add_code_range(&(cc->mbuf), env, *to, *to);
+ }
+ else {
+ if (BITSET_AT(bs, from)) {
+ /* /(?i:[^A-C])/.match("a") ==> fail. */
+ BITSET_SET_BIT(bs, *to);
+ }
+ }
+ }
+ }
+ else {
+ int r, i, len;
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+ Node *snode = NULL_NODE;
+
+ if (onig_is_code_in_cc(env->enc, from, cc)) {
+ if (IS_CCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
+
+ for (i = 0; i < to_len; i++) {
+ len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
+ if (i == 0) {
+ snode = onig_node_new_str(buf, buf + len);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+
+ /* char-class expanded multi-char only
+ compare with string folded at match time. */
+ NSTRING_SET_AMBIG(snode);
+ }
+ else {
+ r = onig_node_str_cat(snode, buf, buf + len);
+ if (r < 0) {
+ onig_node_free(snode);
+ return r;
+ }
+ }
+ }
+
+ *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
+ CHECK_NULL_RETURN_VAL(*(iarg->ptail), ONIGERR_MEMORY);
+ iarg->ptail = &(NCONS((*(iarg->ptail))).right);
+ }
+ }
+
+ return 0;
+}
+
static int
parse_exp(Node** np, OnigToken* tok, int term,
UChar** src, UChar* end, ScanEnv* env)
@@ -4915,19 +4919,35 @@ parse_exp(Node** np, OnigToken* tok, int term,
CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
len = 1;
while (1) {
+ if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
+ if (len == enc_len(env->enc, NSTRING(*np).s)) {
+ r = fetch_token(tok, src, end, env);
+ NSTRING_CLEAR_RAW(*np);
+ goto string_end;
+ }
+ }
+
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
if (r != TK_RAW_BYTE) {
-#ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
- if (len >= enc_len(env->enc, NSTRING(*np).s)) {
- NSTRING_CLEAR_RAW(*np);
+ /* Don't use this, it is wrong for little endian encodings. */
+#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
+ int rem;
+ if (len < ONIGENC_MBC_MINLEN(env->enc)) {
+ rem = ONIGENC_MBC_MINLEN(env->enc) - len;
+ (void )node_str_head_pad(&NSTRING(*np), rem, (UChar )0);
+ if (len + rem == enc_len(env->enc, NSTRING(*np).s)) {
+ NSTRING_CLEAR_RAW(*np);
+ goto string_end;
+ }
}
#endif
- goto string_end;
+ return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
}
r = node_str_cat_char(*np, (UChar )tok->u.c);
if (r < 0) return r;
+
len++;
}
}
@@ -4952,7 +4972,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
OnigCodePoint end_op[2];
UChar *qstart, *qend, *nextp;
- end_op[0] = (OnigCodePoint )MC_ESC(env->enc);
+ end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
end_op[1] = (OnigCodePoint )'E';
qstart = *src;
qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
@@ -4967,28 +4987,24 @@ parse_exp(Node** np, OnigToken* tok, int term,
case TK_CHAR_TYPE:
{
- switch (tok->u.subtype) {
- case CTYPE_WORD:
- case CTYPE_NOT_WORD:
- *np = node_new_ctype(tok->u.subtype);
+ switch (tok->u.prop.ctype) {
+ case ONIGENC_CTYPE_WORD:
+ *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not);
CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
break;
- case CTYPE_WHITE_SPACE:
- case CTYPE_NOT_WHITE_SPACE:
- case CTYPE_DIGIT:
- case CTYPE_NOT_DIGIT:
- case CTYPE_XDIGIT:
- case CTYPE_NOT_XDIGIT:
+ case ONIGENC_CTYPE_SPACE:
+ case ONIGENC_CTYPE_DIGIT:
+ case ONIGENC_CTYPE_XDIGIT:
{
CClassNode* cc;
- int ctype, not;
#ifdef USE_SHARED_CCLASS_TABLE
- const OnigCodePoint *sbr, *mbr;
+ const OnigCodePoint *mbr;
+ OnigCodePoint sb_out;
- ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
- r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr);
+ r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype,
+ &sb_out, &mbr);
if (r == 0 &&
ONIGENC_CODE_RANGE_NUM(mbr)
>= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
@@ -4996,8 +5012,8 @@ parse_exp(Node** np, OnigToken* tok, int term,
type_cclass_key* new_key;
key.enc = env->enc;
- key.not = not;
- key.type = ctype;
+ key.not = tok->u.prop.not;
+ key.type = tok->u.prop.ctype;
THREAD_ATOMIC_START;
@@ -5017,13 +5033,15 @@ parse_exp(Node** np, OnigToken* tok, int term,
}
}
- *np = node_new_cclass_by_codepoint_range(not, sbr, mbr);
+ *np = node_new_cclass_by_codepoint_range(tok->u.prop.not,
+ sb_out, mbr);
if (IS_NULL(*np)) {
THREAD_ATOMIC_END;
return ONIGERR_MEMORY;
}
- CCLASS_SET_SHARE(&(NCCLASS(*np)));
+ cc = &(NCCLASS(*np));
+ CCLASS_SET_SHARE(cc);
new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,
(st_data_t )*np);
@@ -5032,12 +5050,11 @@ parse_exp(Node** np, OnigToken* tok, int term,
}
else {
#endif
- ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
*np = node_new_cclass();
CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
cc = &(NCCLASS(*np));
- add_ctype_to_cc(cc, ctype, 0, env);
- if (not != 0) CCLASS_SET_NOT(cc);
+ add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
+ if (tok->u.prop.not != 0) CCLASS_SET_NOT(cc);
#ifdef USE_SHARED_CCLASS_TABLE
}
#endif
@@ -5066,55 +5083,28 @@ parse_exp(Node** np, OnigToken* tok, int term,
cc = &(NCCLASS(*np));
if (IS_IGNORECASE(env->option)) {
- int i, n, in_cc;
- const OnigPairAmbigCodes* ccs;
- BitSetRef bs = cc->bs;
- OnigAmbigType amb;
-
- for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
- if ((amb & env->ambig_flag) == 0) continue;
-
- n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(env->enc, amb, &ccs);
- for (i = 0; i < n; i++) {
- in_cc = onig_is_code_in_cc(env->enc, ccs[i].from, cc);
-
- if ((in_cc != 0 && !IS_CCLASS_NOT(cc)) ||
- (in_cc == 0 && IS_CCLASS_NOT(cc))) {
- if (ONIGENC_MBC_MINLEN(env->enc) > 1 ||
- ccs[i].from >= SINGLE_BYTE_SIZE) {
- /* if (cc->not) clear_not_flag_cclass(cc, env->enc); */
- add_code_range(&(cc->mbuf), env, ccs[i].to, ccs[i].to);
- }
- else {
- if (BITSET_AT(bs, ccs[i].from)) {
- /* /(?i:[^A-C])/.match("a") ==> fail. */
- BITSET_SET_BIT(bs, ccs[i].to);
- }
- if (BITSET_AT(bs, ccs[i].to)) {
- BITSET_SET_BIT(bs, ccs[i].from);
- }
- }
- }
- }
- }
- }
+ ICaseFoldArgType iarg;
- if (IS_IGNORECASE(env->option) &&
- (env->ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- int res;
- Node *alt_root, *work;
+ iarg.env = env;
+ iarg.cc = cc;
+ iarg.alt_root = NULL_NODE;
+ iarg.ptail = &(iarg.alt_root);
- res = make_compound_alt_node_from_cc(env->ambig_flag, env->enc,
- cc, &alt_root);
- if (res < 0) return res;
- if (res > 0) {
- work = node_new_alt(*np, alt_root);
+ r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
+ i_case_fold, &iarg);
+ if (r != 0) {
+ if (IS_NOT_NULL(iarg.alt_root))
+ onig_node_free(iarg.alt_root);
+ return r;
+ }
+ if (IS_NOT_NULL(iarg.alt_root)) {
+ Node* work = onig_node_new_alt(*np, iarg.alt_root);
if (IS_NULL(work)) {
- onig_node_free(alt_root);
+ onig_node_free(iarg.alt_root);
return ONIGERR_MEMORY;
}
*np = work;
- }
+ }
}
}
break;
@@ -5127,9 +5117,9 @@ parse_exp(Node** np, OnigToken* tok, int term,
case TK_ANYCHAR_ANYTIME:
*np = node_new_anychar();
CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
- qn = node_new_qualifier(0, REPEAT_INFINITE, 0);
+ qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
- NQUALIFIER(qn).target = *np;
+ NQUANTIFIER(qn).target = *np;
*np = qn;
break;
@@ -5185,14 +5175,14 @@ parse_exp(Node** np, OnigToken* tok, int term,
repeat:
if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
- if (is_invalid_qualifier_target(*targetp))
+ if (is_invalid_quantifier_target(*targetp))
return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
- qn = node_new_qualifier(tok->u.repeat.lower, tok->u.repeat.upper,
+ qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
(r == TK_INTERVAL ? 1 : 0));
CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
- NQUALIFIER(qn).greedy = tok->u.repeat.greedy;
- r = set_qualifier(qn, *targetp, group, env);
+ NQUANTIFIER(qn).greedy = tok->u.repeat.greedy;
+ r = set_quantifier(qn, *targetp, group, env);
if (r < 0) return r;
if (tok->u.repeat.possessive != 0) {
@@ -5277,7 +5267,7 @@ parse_subexp(Node** top, OnigToken* tok, int term,
*top = node;
}
else if (r == TK_ALT) {
- *top = node_new_alt(node, NULL);
+ *top = onig_node_new_alt(node, NULL);
headp = &(NCONS(*top).right);
while (r == TK_ALT) {
r = fetch_token(tok, src, end, env);
@@ -5285,7 +5275,7 @@ parse_subexp(Node** top, OnigToken* tok, int term,
r = parse_branch(&node, tok, term, src, end, env);
if (r < 0) return r;
- *headp = node_new_alt(node, NULL);
+ *headp = onig_node_new_alt(node, NULL);
headp = &(NCONS(*headp).right);
}
@@ -5328,13 +5318,13 @@ onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_
#endif
scan_env_clear(env);
- env->option = reg->options;
- env->ambig_flag = reg->ambig_flag;
- env->enc = reg->enc;
- env->syntax = reg->syntax;
- env->pattern = (UChar* )pattern;
- env->pattern_end = (UChar* )end;
- env->reg = reg;
+ env->option = reg->options;
+ env->case_fold_flag = reg->case_fold_flag;
+ env->enc = reg->enc;
+ env->syntax = reg->syntax;
+ env->pattern = (UChar* )pattern;
+ env->pattern_end = (UChar* )end;
+ env->reg = reg;
*root = NULL;
p = (UChar* )pattern;
diff --git a/regparse.h b/regparse.h
index 80f5283ab2..de99032574 100644
--- a/regparse.h
+++ b/regparse.h
@@ -4,7 +4,7 @@
regparse.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -37,7 +37,7 @@
#define N_CTYPE (1<< 2)
#define N_ANYCHAR (1<< 3)
#define N_BACKREF (1<< 4)
-#define N_QUALIFIER (1<< 5)
+#define N_QUANTIFIER (1<< 5)
#define N_EFFECT (1<< 6)
#define N_ANCHOR (1<< 7)
#define N_LIST (1<< 8)
@@ -52,21 +52,12 @@
#define NSTRING(node) ((node)->u.str)
#define NCCLASS(node) ((node)->u.cclass)
#define NCTYPE(node) ((node)->u.ctype)
-#define NQUALIFIER(node) ((node)->u.qualifier)
+#define NQUANTIFIER(node) ((node)->u.quant)
#define NANCHOR(node) ((node)->u.anchor)
#define NBACKREF(node) ((node)->u.backref)
#define NEFFECT(node) ((node)->u.effect)
#define NCALL(node) ((node)->u.call)
-#define CTYPE_WORD (1<<0)
-#define CTYPE_NOT_WORD (1<<1)
-#define CTYPE_WHITE_SPACE (1<<2)
-#define CTYPE_NOT_WHITE_SPACE (1<<3)
-#define CTYPE_DIGIT (1<<4)
-#define CTYPE_NOT_DIGIT (1<<5)
-#define CTYPE_XDIGIT (1<<6)
-#define CTYPE_NOT_XDIGIT (1<<7)
-
#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
@@ -80,17 +71,18 @@
#define NSTR_RAW (1<<0) /* by backslashed number */
#define NSTR_AMBIG (1<<1)
-#define NSTR_AMBIG_REDUCE (1<<2)
+#define NSTR_DONT_GET_OPT_INFO (1<<2)
#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
-#define NSTRING_SET_AMBIG_REDUCE(node) (node)->u.str.flag |= NSTR_AMBIG_REDUCE
+#define NSTRING_SET_DONT_GET_OPT_INFO(node) \
+ (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO
#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
-#define NSTRING_IS_AMBIG_REDUCE(node) \
- (((node)->u.str.flag & NSTR_AMBIG_REDUCE) != 0)
+#define NSTRING_IS_DONT_GET_OPT_INFO(node) \
+ (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0)
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
@@ -109,15 +101,6 @@ typedef struct {
UChar buf[NODE_STR_BUF_SIZE];
} StrNode;
-/* move to regint.h */
-#if 0
-typedef struct {
- int flags;
- BitSet bs;
- BBuf* mbuf; /* multi-byte info or NULL */
-} CClassNode;
-#endif
-
typedef struct {
int state;
struct _Node* target;
@@ -131,7 +114,7 @@ typedef struct {
#ifdef USE_COMBINATION_EXPLOSION_CHECK
int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
#endif
-} QualifierNode;
+} QuantifierNode;
/* status bits */
#define NST_MIN_FIXED (1<<0)
@@ -170,21 +153,21 @@ typedef struct {
#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
-#define IS_QUALIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
-#define IS_QUALIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
+#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
+#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
typedef struct {
int state;
int type;
int regnum;
OnigOptionType option;
- struct _Node* target;
- AbsAddrType call_addr;
+ struct _Node* target;
+ AbsAddrType call_addr;
/* for multiple call reference */
OnigDistance min_len; /* min length (byte) */
OnigDistance max_len; /* max length (byte) */
- int char_len; /* character length */
- int opt_count; /* referenced count in optimize_node_left() */
+ int char_len; /* character length */
+ int opt_count; /* referenced count in optimize_node_left() */
} EffectNode;
#define CALLNODE_REFNUM_UNDEF -1
@@ -192,13 +175,13 @@ typedef struct {
#ifdef USE_SUBEXP_CALL
typedef struct {
- int offset;
+ int offset;
struct _Node* target;
} UnsetAddr;
typedef struct {
- int num;
- int alloc;
+ int num;
+ int alloc;
UnsetAddr* us;
} UnsetAddrList;
@@ -207,18 +190,18 @@ typedef struct {
int ref_num;
UChar* name;
UChar* name_end;
- struct _Node* target; /* EffectNode : EFFECT_MEMORY */
+ struct _Node* target; /* EffectNode : EFFECT_MEMORY */
UnsetAddrList* unset_addr_list;
} CallNode;
#endif
typedef struct {
- int state;
- int back_num;
- int back_static[NODE_BACKREFS_SIZE];
- int* back_dynamic;
- int nest_level;
+ int state;
+ int back_num;
+ int back_static[NODE_BACKREFS_SIZE];
+ int* back_dynamic;
+ int nest_level;
} BackrefNode;
typedef struct {
@@ -230,21 +213,22 @@ typedef struct {
typedef struct _Node {
int type;
union {
- StrNode str;
- CClassNode cclass;
- QualifierNode qualifier;
- EffectNode effect;
+ StrNode str;
+ CClassNode cclass;
+ QuantifierNode quant;
+ EffectNode effect;
#ifdef USE_SUBEXP_CALL
- CallNode call;
+ CallNode call;
#endif
- BackrefNode backref;
- AnchorNode anchor;
+ BackrefNode backref;
+ AnchorNode anchor;
struct {
struct _Node* left;
struct _Node* right;
} cons;
struct {
- int type;
+ int ctype;
+ int not;
} ctype;
} u;
} Node;
@@ -257,30 +241,30 @@ typedef struct _Node {
(senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
typedef struct {
- OnigOptionType option;
- OnigAmbigType ambig_flag;
- OnigEncoding enc;
- OnigSyntaxType* syntax;
- BitStatusType capture_history;
- BitStatusType bt_mem_start;
- BitStatusType bt_mem_end;
- BitStatusType backrefed_mem;
- UChar* pattern;
- UChar* pattern_end;
- UChar* error;
- UChar* error_end;
- regex_t* reg; /* for reg->names only */
- int num_call;
+ OnigOptionType option;
+ OnigCaseFoldType case_fold_flag;
+ OnigEncoding enc;
+ OnigSyntaxType* syntax;
+ BitStatusType capture_history;
+ BitStatusType bt_mem_start;
+ BitStatusType bt_mem_end;
+ BitStatusType backrefed_mem;
+ UChar* pattern;
+ UChar* pattern_end;
+ UChar* error;
+ UChar* error_end;
+ regex_t* reg; /* for reg->names only */
+ int num_call;
#ifdef USE_SUBEXP_CALL
- UnsetAddrList* unset_addr_list;
+ UnsetAddrList* unset_addr_list;
#endif
- int num_mem;
+ int num_mem;
#ifdef USE_NAMED_GROUP
- int num_named;
+ int num_named;
#endif
- int mem_alloc;
- Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
- Node** mem_nodes_dynamic;
+ int mem_alloc;
+ Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
+ Node** mem_nodes_dynamic;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
int num_comb_exp_check;
int comb_exp_max_regnum;
@@ -294,7 +278,6 @@ typedef struct {
#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
-
#ifdef USE_NAMED_GROUP
typedef struct {
int new_val;
@@ -304,18 +287,22 @@ extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
#endif
extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
+extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
-extern void onig_reduce_nested_qualifier P_((Node* pnode, Node* cnode));
+extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
extern void onig_node_conv_to_str_node P_((Node* node, int raw));
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
+extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
extern void onig_node_free P_((Node* node));
extern Node* onig_node_new_effect P_((int type));
extern Node* onig_node_new_anchor P_((int type));
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
extern Node* onig_node_new_list P_((Node* left, Node* right));
+extern Node* onig_node_list_add P_((Node* list, Node* x));
+extern Node* onig_node_new_alt P_((Node* left, Node* right));
extern void onig_node_str_clear P_((Node* node));
-extern int onig_free_node_list(void);
+extern int onig_free_node_list P_((void));
extern int onig_names_free P_((regex_t* reg));
extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
diff --git a/regsyntax.c b/regsyntax.c
new file mode 100644
index 0000000000..ade5b55f77
--- /dev/null
+++ b/regsyntax.c
@@ -0,0 +1,315 @@
+/**********************************************************************
+ regsyntax.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regint.h"
+
+OnigSyntaxType OnigSyntaxASIS = {
+ 0
+ , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE
+ , 0
+ , ONIG_OPTION_NONE
+ ,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ }
+};
+
+OnigSyntaxType OnigSyntaxPosixBasic = {
+ ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
+ ONIG_SYN_OP_ESC_BRACE_INTERVAL )
+ , 0
+ , 0
+ , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
+ ,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ }
+};
+
+OnigSyntaxType OnigSyntaxPosixExtended = {
+ ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP |
+ ONIG_SYN_OP_BRACE_INTERVAL |
+ ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
+ , 0
+ , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
+ ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
+ ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
+ ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
+ , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
+ ,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ }
+};
+
+OnigSyntaxType OnigSyntaxEmacs = {
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC |
+ ONIG_SYN_OP_ESC_BRACE_INTERVAL |
+ ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT |
+ ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF |
+ ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF |
+ ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS )
+ , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
+ , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
+ , ONIG_OPTION_NONE
+ ,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ }
+};
+
+OnigSyntaxType OnigSyntaxGrep = {
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
+ ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
+ ONIG_SYN_OP_ESC_VBAR_ALT |
+ ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
+ ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
+ ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND |
+ ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF )
+ , 0
+ , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
+ , ONIG_OPTION_NONE
+ ,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ }
+};
+
+OnigSyntaxType OnigSyntaxGnuRegex = {
+ SYN_GNU_REGEX_OP
+ , 0
+ , SYN_GNU_REGEX_BV
+ , ONIG_OPTION_NONE
+ ,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ }
+};
+
+OnigSyntaxType OnigSyntaxJava = {
+ (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
+ ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL |
+ ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 )
+ & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
+ , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
+ ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
+ ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
+ ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
+ ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
+ , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
+ , ONIG_OPTION_SINGLELINE
+ ,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ }
+};
+
+OnigSyntaxType OnigSyntaxPerl = {
+ (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
+ ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
+ ONIG_SYN_OP_ESC_C_CONTROL )
+ & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
+ , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
+ ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
+ ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT )
+ , SYN_GNU_REGEX_BV
+ , ONIG_OPTION_SINGLELINE
+ ,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ }
+};
+
+/* Perl + named group */
+OnigSyntaxType OnigSyntaxPerl_NG = {
+ (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
+ ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
+ ONIG_SYN_OP_ESC_C_CONTROL )
+ & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
+ , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
+ ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
+ ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
+ ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP |
+ ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
+ ONIG_SYN_OP2_ESC_G_SUBEXP_CALL )
+ , ( SYN_GNU_REGEX_BV |
+ ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
+ ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME )
+ , ONIG_OPTION_SINGLELINE
+ ,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ }
+};
+
+
+
+extern int
+onig_set_default_syntax(OnigSyntaxType* syntax)
+{
+ if (IS_NULL(syntax))
+ syntax = ONIG_SYNTAX_RUBY;
+
+ OnigDefaultSyntax = syntax;
+ return 0;
+}
+
+extern void
+onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
+{
+ *to = *from;
+}
+
+extern void
+onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
+{
+ syntax->op = op;
+}
+
+extern void
+onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
+{
+ syntax->op2 = op2;
+}
+
+extern void
+onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
+{
+ syntax->behavior = behavior;
+}
+
+extern void
+onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
+{
+ syntax->options = options;
+}
+
+extern unsigned int
+onig_get_syntax_op(OnigSyntaxType* syntax)
+{
+ return syntax->op;
+}
+
+extern unsigned int
+onig_get_syntax_op2(OnigSyntaxType* syntax)
+{
+ return syntax->op2;
+}
+
+extern unsigned int
+onig_get_syntax_behavior(OnigSyntaxType* syntax)
+{
+ return syntax->behavior;
+}
+
+extern OnigOptionType
+onig_get_syntax_options(OnigSyntaxType* syntax)
+{
+ return syntax->options;
+}
+
+#ifdef USE_VARIABLE_META_CHARS
+extern int onig_set_meta_char(OnigSyntaxType* enc,
+ unsigned int what, OnigCodePoint code)
+{
+ switch (what) {
+ case ONIG_META_CHAR_ESCAPE:
+ enc->meta_char_table.esc = code;
+ break;
+ case ONIG_META_CHAR_ANYCHAR:
+ enc->meta_char_table.anychar = code;
+ break;
+ case ONIG_META_CHAR_ANYTIME:
+ enc->meta_char_table.anytime = code;
+ break;
+ case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
+ enc->meta_char_table.zero_or_one_time = code;
+ break;
+ case ONIG_META_CHAR_ONE_OR_MORE_TIME:
+ enc->meta_char_table.one_or_more_time = code;
+ break;
+ case ONIG_META_CHAR_ANYCHAR_ANYTIME:
+ enc->meta_char_table.anychar_anytime = code;
+ break;
+ default:
+ return ONIGERR_INVALID_ARGUMENT;
+ break;
+ }
+ return 0;
+}
+#endif /* USE_VARIABLE_META_CHARS */
diff --git a/sjis.c b/sjis.c
index f7d7d52265..f6b78eba13 100644
--- a/sjis.c
+++ b/sjis.c
@@ -2,7 +2,7 @@
sjis.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*/
-#include "regenc.h"
+#include "regint.h"
static const int EncLen_SJIS[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -71,13 +71,13 @@ static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)]
static int
-sjis_mbc_enc_len(const UChar* p)
+mbc_enc_len(const UChar* p)
{
return EncLen_SJIS[*p];
}
static int
-sjis_code_to_mbclen(OnigCodePoint code)
+code_to_mbclen(OnigCodePoint code)
{
if (code < 256) {
if (EncLen_SJIS[(int )code] == 1)
@@ -93,7 +93,7 @@ sjis_code_to_mbclen(OnigCodePoint code)
}
static OnigCodePoint
-sjis_mbc_to_code(const UChar* p, const UChar* end)
+mbc_to_code(const UChar* p, const UChar* end)
{
int c, i, len;
OnigCodePoint n;
@@ -112,7 +112,7 @@ sjis_mbc_to_code(const UChar* p, const UChar* end)
}
static int
-sjis_code_to_mbc(OnigCodePoint code, UChar *buf)
+code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar *p = buf;
@@ -127,60 +127,56 @@ sjis_code_to_mbc(OnigCodePoint code, UChar *buf)
}
static int
-sjis_mbc_to_normalize(OnigAmbigType flag,
- const UChar** pp, const UChar* end, UChar* lower)
+mbc_case_fold(OnigCaseFoldType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
- *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
- }
- else {
- *lower = *p;
- }
-
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
else {
+ int i;
int len = enc_len(ONIG_ENCODING_SJIS, p);
- if (lower != p) {
- int i;
- for (i = 0; i < len; i++) {
- *lower++ = *p++;
- }
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
}
(*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
+#if 0
static int
-sjis_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag,
+ const UChar** pp, const UChar* end)
{
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SJIS, flag, pp, end);
}
+#endif
+#if 0
static int
-sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
- if ((ctype & (ONIGENC_CTYPE_WORD |
- ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
- return (sjis_code_to_mbclen(code) > 1 ? TRUE : FALSE);
+ if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
+ return (code_to_mbclen(code) > 1 ? TRUE : FALSE);
}
}
return FALSE;
}
+#endif
static UChar*
-sjis_left_adjust_char_head(const UChar* start, const UChar* s)
+left_adjust_char_head(const UChar* start, const UChar* s)
{
const UChar *p;
int len;
@@ -203,36 +199,120 @@ sjis_left_adjust_char_head(const UChar* start, const UChar* s)
}
static int
-sjis_is_allowed_reverse_match(const UChar* s, const UChar* end)
+is_allowed_reverse_match(const UChar* s, const UChar* end)
{
const UChar c = *s;
return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE);
}
+
+static int PropertyInited = 0;
+static const OnigCodePoint** PropertyList;
+static int PropertyListNum;
+static int PropertyListSize;
+static hash_table_type* PropertyNameTable;
+
+static const OnigCodePoint CR_Hiragana[] = {
+ 1,
+ 0x829f, 0x82f1
+}; /* CR_Hiragana */
+
+static const OnigCodePoint CR_Katakana[] = {
+ 4,
+ 0x00a6, 0x00af,
+ 0x00b1, 0x00dd,
+ 0x8340, 0x837e,
+ 0x8380, 0x8396,
+}; /* CR_Katakana */
+
+static int
+init_property_list(void)
+{
+ int r;
+
+ PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana);
+ PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana);
+ PropertyInited = 1;
+
+ end:
+ return r;
+}
+
+static int
+property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
+{
+ int ctype;
+
+ PROPERTY_LIST_INIT_CHECK;
+
+ if (onig_st_lookup_strend(PropertyNameTable, p, end, (void*)&ctype) == 0) {
+ return onigenc_minimum_property_name_to_ctype(enc, p, end);
+ }
+
+ return ctype;
+}
+
+static int
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (ctype <= ONIGENC_MAX_STD_CTYPE) {
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
+ return (code_to_mbclen(code) > 1 ? TRUE : FALSE);
+ }
+ }
+ }
+ else {
+ PROPERTY_LIST_INIT_CHECK;
+
+ ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
+ if (ctype >= (unsigned int )PropertyListNum)
+ return ONIGENCERR_TYPE_BUG;
+
+ return onig_is_in_code_range((UChar* )PropertyList[ctype], code);
+ }
+
+ return FALSE;
+}
+
+static int
+get_ctype_code_range(int ctype, OnigCodePoint* sb_out,
+ const OnigCodePoint* ranges[])
+{
+ if (ctype <= ONIGENC_MAX_STD_CTYPE) {
+ return ONIG_NO_SUPPORT_CONFIG;
+ }
+ else {
+ *sb_out = 0x80;
+
+ PROPERTY_LIST_INIT_CHECK;
+
+ ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
+ if (ctype >= PropertyListNum)
+ return ONIGENCERR_TYPE_BUG;
+
+ *ranges = PropertyList[ctype];
+ return 0;
+ }
+}
+
OnigEncodingType OnigEncodingSJIS = {
- sjis_mbc_enc_len,
+ mbc_enc_len,
"Shift_JIS", /* name */
2, /* max byte length */
1, /* min byte length */
- ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
- {
- (OnigCodePoint )'\\' /* esc */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
- },
onigenc_is_mbc_newline_0x0a,
- sjis_mbc_to_code,
- sjis_code_to_mbclen,
- sjis_code_to_mbc,
- sjis_mbc_to_normalize,
- sjis_is_mbc_ambiguous,
- onigenc_ascii_get_all_pair_ambig_codes,
- onigenc_nothing_get_all_comp_ambig_codes,
- sjis_is_code_ctype,
- onigenc_not_support_get_ctype_code_range,
- sjis_left_adjust_char_head,
- sjis_is_allowed_reverse_match
+ mbc_to_code,
+ code_to_mbclen,
+ code_to_mbc,
+ mbc_case_fold,
+ onigenc_ascii_apply_all_case_fold,
+ onigenc_ascii_get_case_fold_codes_by_str,
+ property_name_to_ctype,
+ is_code_ctype,
+ get_ctype_code_range,
+ left_adjust_char_head,
+ is_allowed_reverse_match
};
diff --git a/unicode.c b/unicode.c
new file mode 100644
index 0000000000..c81f73006f
--- /dev/null
+++ b/unicode.c
@@ -0,0 +1,11344 @@
+/**********************************************************************
+ unicode.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regint.h"
+
+#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
+ ((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
+#if 0
+#define ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(code,cbit) \
+ ((EncUNICODE_ISO_8859_1_CtypeTable[code] & (cbit)) != 0)
+#endif
+
+static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
+};
+
+/* 'NEWLINE' */
+static const OnigCodePoint CR_NEWLINE[] = {
+ 1,
+ 0x000a, 0x000a
+}; /* CR_NEWLINE */
+
+/* 'Alpha': [[:Alpha:]] */
+static const OnigCodePoint CR_Alpha[] = {
+ 418,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0241,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x065e,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x076d,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x097d, 0x097d,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09f0, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a70, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10fc,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x135f,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x180b, 0x180d,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19a9,
+ 0x19b0, 0x19c9,
+ 0x1a00, 0x1a1b,
+ 0x1d00, 0x1dc3,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x2094,
+ 0x20d0, 0x20eb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c80, 0x2ce4,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x3005, 0x3006,
+ 0x302a, 0x302f,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fbb,
+ 0xa000, 0xa48c,
+ 0xa800, 0xa827,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x10400, 0x1049d,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a3f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d242, 0x1d244,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+}; /* CR_Alpha */
+
+/* 'Blank': [[:Blank:]] */
+static const OnigCodePoint CR_Blank[] = {
+ 9,
+ 0x0009, 0x0009,
+ 0x0020, 0x0020,
+ 0x00a0, 0x00a0,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+}; /* CR_Blank */
+
+/* 'Cntrl': [[:Cntrl:]] */
+static const OnigCodePoint CR_Cntrl[] = {
+ 19,
+ 0x0000, 0x001f,
+ 0x007f, 0x009f,
+ 0x00ad, 0x00ad,
+ 0x0600, 0x0603,
+ 0x06dd, 0x06dd,
+ 0x070f, 0x070f,
+ 0x17b4, 0x17b5,
+ 0x200b, 0x200f,
+ 0x202a, 0x202e,
+ 0x2060, 0x2063,
+ 0x206a, 0x206f,
+ 0xd800, 0xf8ff,
+ 0xfeff, 0xfeff,
+ 0xfff9, 0xfffb,
+ 0x1d173, 0x1d17a,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+}; /* CR_Cntrl */
+
+/* 'Digit': [[:Digit:]] */
+static const OnigCodePoint CR_Digit[] = {
+ 23,
+ 0x0030, 0x0039,
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be6, 0x0bef,
+ 0x0c66, 0x0c6f,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d6f,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f29,
+ 0x1040, 0x1049,
+ 0x17e0, 0x17e9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0x19d0, 0x19d9,
+ 0xff10, 0xff19,
+ 0x104a0, 0x104a9,
+ 0x1d7ce, 0x1d7ff
+}; /* CR_Digit */
+
+/* 'Graph': [[:Graph:]] */
+static const OnigCodePoint CR_Graph[] = {
+ 424,
+ 0x0021, 0x007e,
+ 0x00a1, 0x0241,
+ 0x0250, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05b9,
+ 0x05bb, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060b, 0x0615,
+ 0x061b, 0x061b,
+ 0x061e, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x065e,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x076d,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x097d, 0x097d,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fd1,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fc,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x137c,
+ 0x1380, 0x1399,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1681, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19a9,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19d9,
+ 0x19de, 0x1a1b,
+ 0x1a1e, 0x1a1f,
+ 0x1d00, 0x1dc3,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x200b, 0x2027,
+ 0x202a, 0x202e,
+ 0x2030, 0x205e,
+ 0x2060, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x2090, 0x2094,
+ 0x20a0, 0x20b5,
+ 0x20d0, 0x20eb,
+ 0x2100, 0x214c,
+ 0x2153, 0x2183,
+ 0x2190, 0x23db,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x269c,
+ 0x26a0, 0x26b1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27c0, 0x27c6,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b13,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c80, 0x2cea,
+ 0x2cf9, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x2e00, 0x2e17,
+ 0x2e1c, 0x2e1d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3001, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31c0, 0x31cf,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fbb,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xa700, 0xa716,
+ 0xa800, 0xa82b,
+ 0xac00, 0xd7a3,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe19,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1018a,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x103c3,
+ 0x103c8, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a47,
+ 0x10a50, 0x10a58,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d200, 0x1d245,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+}; /* CR_Graph */
+
+/* 'Lower': [[:Lower:]] */
+static const OnigCodePoint CR_Lower[] = {
+ 480,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00df, 0x00f6,
+ 0x00f8, 0x00ff,
+ 0x0101, 0x0101,
+ 0x0103, 0x0103,
+ 0x0105, 0x0105,
+ 0x0107, 0x0107,
+ 0x0109, 0x0109,
+ 0x010b, 0x010b,
+ 0x010d, 0x010d,
+ 0x010f, 0x010f,
+ 0x0111, 0x0111,
+ 0x0113, 0x0113,
+ 0x0115, 0x0115,
+ 0x0117, 0x0117,
+ 0x0119, 0x0119,
+ 0x011b, 0x011b,
+ 0x011d, 0x011d,
+ 0x011f, 0x011f,
+ 0x0121, 0x0121,
+ 0x0123, 0x0123,
+ 0x0125, 0x0125,
+ 0x0127, 0x0127,
+ 0x0129, 0x0129,
+ 0x012b, 0x012b,
+ 0x012d, 0x012d,
+ 0x012f, 0x012f,
+ 0x0131, 0x0131,
+ 0x0133, 0x0133,
+ 0x0135, 0x0135,
+ 0x0137, 0x0138,
+ 0x013a, 0x013a,
+ 0x013c, 0x013c,
+ 0x013e, 0x013e,
+ 0x0140, 0x0140,
+ 0x0142, 0x0142,
+ 0x0144, 0x0144,
+ 0x0146, 0x0146,
+ 0x0148, 0x0149,
+ 0x014b, 0x014b,
+ 0x014d, 0x014d,
+ 0x014f, 0x014f,
+ 0x0151, 0x0151,
+ 0x0153, 0x0153,
+ 0x0155, 0x0155,
+ 0x0157, 0x0157,
+ 0x0159, 0x0159,
+ 0x015b, 0x015b,
+ 0x015d, 0x015d,
+ 0x015f, 0x015f,
+ 0x0161, 0x0161,
+ 0x0163, 0x0163,
+ 0x0165, 0x0165,
+ 0x0167, 0x0167,
+ 0x0169, 0x0169,
+ 0x016b, 0x016b,
+ 0x016d, 0x016d,
+ 0x016f, 0x016f,
+ 0x0171, 0x0171,
+ 0x0173, 0x0173,
+ 0x0175, 0x0175,
+ 0x0177, 0x0177,
+ 0x017a, 0x017a,
+ 0x017c, 0x017c,
+ 0x017e, 0x0180,
+ 0x0183, 0x0183,
+ 0x0185, 0x0185,
+ 0x0188, 0x0188,
+ 0x018c, 0x018d,
+ 0x0192, 0x0192,
+ 0x0195, 0x0195,
+ 0x0199, 0x019b,
+ 0x019e, 0x019e,
+ 0x01a1, 0x01a1,
+ 0x01a3, 0x01a3,
+ 0x01a5, 0x01a5,
+ 0x01a8, 0x01a8,
+ 0x01aa, 0x01ab,
+ 0x01ad, 0x01ad,
+ 0x01b0, 0x01b0,
+ 0x01b4, 0x01b4,
+ 0x01b6, 0x01b6,
+ 0x01b9, 0x01ba,
+ 0x01bd, 0x01bf,
+ 0x01c6, 0x01c6,
+ 0x01c9, 0x01c9,
+ 0x01cc, 0x01cc,
+ 0x01ce, 0x01ce,
+ 0x01d0, 0x01d0,
+ 0x01d2, 0x01d2,
+ 0x01d4, 0x01d4,
+ 0x01d6, 0x01d6,
+ 0x01d8, 0x01d8,
+ 0x01da, 0x01da,
+ 0x01dc, 0x01dd,
+ 0x01df, 0x01df,
+ 0x01e1, 0x01e1,
+ 0x01e3, 0x01e3,
+ 0x01e5, 0x01e5,
+ 0x01e7, 0x01e7,
+ 0x01e9, 0x01e9,
+ 0x01eb, 0x01eb,
+ 0x01ed, 0x01ed,
+ 0x01ef, 0x01f0,
+ 0x01f3, 0x01f3,
+ 0x01f5, 0x01f5,
+ 0x01f9, 0x01f9,
+ 0x01fb, 0x01fb,
+ 0x01fd, 0x01fd,
+ 0x01ff, 0x01ff,
+ 0x0201, 0x0201,
+ 0x0203, 0x0203,
+ 0x0205, 0x0205,
+ 0x0207, 0x0207,
+ 0x0209, 0x0209,
+ 0x020b, 0x020b,
+ 0x020d, 0x020d,
+ 0x020f, 0x020f,
+ 0x0211, 0x0211,
+ 0x0213, 0x0213,
+ 0x0215, 0x0215,
+ 0x0217, 0x0217,
+ 0x0219, 0x0219,
+ 0x021b, 0x021b,
+ 0x021d, 0x021d,
+ 0x021f, 0x021f,
+ 0x0221, 0x0221,
+ 0x0223, 0x0223,
+ 0x0225, 0x0225,
+ 0x0227, 0x0227,
+ 0x0229, 0x0229,
+ 0x022b, 0x022b,
+ 0x022d, 0x022d,
+ 0x022f, 0x022f,
+ 0x0231, 0x0231,
+ 0x0233, 0x0239,
+ 0x023c, 0x023c,
+ 0x023f, 0x0240,
+ 0x0250, 0x02af,
+ 0x0390, 0x0390,
+ 0x03ac, 0x03ce,
+ 0x03d0, 0x03d1,
+ 0x03d5, 0x03d7,
+ 0x03d9, 0x03d9,
+ 0x03db, 0x03db,
+ 0x03dd, 0x03dd,
+ 0x03df, 0x03df,
+ 0x03e1, 0x03e1,
+ 0x03e3, 0x03e3,
+ 0x03e5, 0x03e5,
+ 0x03e7, 0x03e7,
+ 0x03e9, 0x03e9,
+ 0x03eb, 0x03eb,
+ 0x03ed, 0x03ed,
+ 0x03ef, 0x03f3,
+ 0x03f5, 0x03f5,
+ 0x03f8, 0x03f8,
+ 0x03fb, 0x03fc,
+ 0x0430, 0x045f,
+ 0x0461, 0x0461,
+ 0x0463, 0x0463,
+ 0x0465, 0x0465,
+ 0x0467, 0x0467,
+ 0x0469, 0x0469,
+ 0x046b, 0x046b,
+ 0x046d, 0x046d,
+ 0x046f, 0x046f,
+ 0x0471, 0x0471,
+ 0x0473, 0x0473,
+ 0x0475, 0x0475,
+ 0x0477, 0x0477,
+ 0x0479, 0x0479,
+ 0x047b, 0x047b,
+ 0x047d, 0x047d,
+ 0x047f, 0x047f,
+ 0x0481, 0x0481,
+ 0x048b, 0x048b,
+ 0x048d, 0x048d,
+ 0x048f, 0x048f,
+ 0x0491, 0x0491,
+ 0x0493, 0x0493,
+ 0x0495, 0x0495,
+ 0x0497, 0x0497,
+ 0x0499, 0x0499,
+ 0x049b, 0x049b,
+ 0x049d, 0x049d,
+ 0x049f, 0x049f,
+ 0x04a1, 0x04a1,
+ 0x04a3, 0x04a3,
+ 0x04a5, 0x04a5,
+ 0x04a7, 0x04a7,
+ 0x04a9, 0x04a9,
+ 0x04ab, 0x04ab,
+ 0x04ad, 0x04ad,
+ 0x04af, 0x04af,
+ 0x04b1, 0x04b1,
+ 0x04b3, 0x04b3,
+ 0x04b5, 0x04b5,
+ 0x04b7, 0x04b7,
+ 0x04b9, 0x04b9,
+ 0x04bb, 0x04bb,
+ 0x04bd, 0x04bd,
+ 0x04bf, 0x04bf,
+ 0x04c2, 0x04c2,
+ 0x04c4, 0x04c4,
+ 0x04c6, 0x04c6,
+ 0x04c8, 0x04c8,
+ 0x04ca, 0x04ca,
+ 0x04cc, 0x04cc,
+ 0x04ce, 0x04ce,
+ 0x04d1, 0x04d1,
+ 0x04d3, 0x04d3,
+ 0x04d5, 0x04d5,
+ 0x04d7, 0x04d7,
+ 0x04d9, 0x04d9,
+ 0x04db, 0x04db,
+ 0x04dd, 0x04dd,
+ 0x04df, 0x04df,
+ 0x04e1, 0x04e1,
+ 0x04e3, 0x04e3,
+ 0x04e5, 0x04e5,
+ 0x04e7, 0x04e7,
+ 0x04e9, 0x04e9,
+ 0x04eb, 0x04eb,
+ 0x04ed, 0x04ed,
+ 0x04ef, 0x04ef,
+ 0x04f1, 0x04f1,
+ 0x04f3, 0x04f3,
+ 0x04f5, 0x04f5,
+ 0x04f7, 0x04f7,
+ 0x04f9, 0x04f9,
+ 0x0501, 0x0501,
+ 0x0503, 0x0503,
+ 0x0505, 0x0505,
+ 0x0507, 0x0507,
+ 0x0509, 0x0509,
+ 0x050b, 0x050b,
+ 0x050d, 0x050d,
+ 0x050f, 0x050f,
+ 0x0561, 0x0587,
+ 0x1d00, 0x1d2b,
+ 0x1d62, 0x1d77,
+ 0x1d79, 0x1d9a,
+ 0x1e01, 0x1e01,
+ 0x1e03, 0x1e03,
+ 0x1e05, 0x1e05,
+ 0x1e07, 0x1e07,
+ 0x1e09, 0x1e09,
+ 0x1e0b, 0x1e0b,
+ 0x1e0d, 0x1e0d,
+ 0x1e0f, 0x1e0f,
+ 0x1e11, 0x1e11,
+ 0x1e13, 0x1e13,
+ 0x1e15, 0x1e15,
+ 0x1e17, 0x1e17,
+ 0x1e19, 0x1e19,
+ 0x1e1b, 0x1e1b,
+ 0x1e1d, 0x1e1d,
+ 0x1e1f, 0x1e1f,
+ 0x1e21, 0x1e21,
+ 0x1e23, 0x1e23,
+ 0x1e25, 0x1e25,
+ 0x1e27, 0x1e27,
+ 0x1e29, 0x1e29,
+ 0x1e2b, 0x1e2b,
+ 0x1e2d, 0x1e2d,
+ 0x1e2f, 0x1e2f,
+ 0x1e31, 0x1e31,
+ 0x1e33, 0x1e33,
+ 0x1e35, 0x1e35,
+ 0x1e37, 0x1e37,
+ 0x1e39, 0x1e39,
+ 0x1e3b, 0x1e3b,
+ 0x1e3d, 0x1e3d,
+ 0x1e3f, 0x1e3f,
+ 0x1e41, 0x1e41,
+ 0x1e43, 0x1e43,
+ 0x1e45, 0x1e45,
+ 0x1e47, 0x1e47,
+ 0x1e49, 0x1e49,
+ 0x1e4b, 0x1e4b,
+ 0x1e4d, 0x1e4d,
+ 0x1e4f, 0x1e4f,
+ 0x1e51, 0x1e51,
+ 0x1e53, 0x1e53,
+ 0x1e55, 0x1e55,
+ 0x1e57, 0x1e57,
+ 0x1e59, 0x1e59,
+ 0x1e5b, 0x1e5b,
+ 0x1e5d, 0x1e5d,
+ 0x1e5f, 0x1e5f,
+ 0x1e61, 0x1e61,
+ 0x1e63, 0x1e63,
+ 0x1e65, 0x1e65,
+ 0x1e67, 0x1e67,
+ 0x1e69, 0x1e69,
+ 0x1e6b, 0x1e6b,
+ 0x1e6d, 0x1e6d,
+ 0x1e6f, 0x1e6f,
+ 0x1e71, 0x1e71,
+ 0x1e73, 0x1e73,
+ 0x1e75, 0x1e75,
+ 0x1e77, 0x1e77,
+ 0x1e79, 0x1e79,
+ 0x1e7b, 0x1e7b,
+ 0x1e7d, 0x1e7d,
+ 0x1e7f, 0x1e7f,
+ 0x1e81, 0x1e81,
+ 0x1e83, 0x1e83,
+ 0x1e85, 0x1e85,
+ 0x1e87, 0x1e87,
+ 0x1e89, 0x1e89,
+ 0x1e8b, 0x1e8b,
+ 0x1e8d, 0x1e8d,
+ 0x1e8f, 0x1e8f,
+ 0x1e91, 0x1e91,
+ 0x1e93, 0x1e93,
+ 0x1e95, 0x1e9b,
+ 0x1ea1, 0x1ea1,
+ 0x1ea3, 0x1ea3,
+ 0x1ea5, 0x1ea5,
+ 0x1ea7, 0x1ea7,
+ 0x1ea9, 0x1ea9,
+ 0x1eab, 0x1eab,
+ 0x1ead, 0x1ead,
+ 0x1eaf, 0x1eaf,
+ 0x1eb1, 0x1eb1,
+ 0x1eb3, 0x1eb3,
+ 0x1eb5, 0x1eb5,
+ 0x1eb7, 0x1eb7,
+ 0x1eb9, 0x1eb9,
+ 0x1ebb, 0x1ebb,
+ 0x1ebd, 0x1ebd,
+ 0x1ebf, 0x1ebf,
+ 0x1ec1, 0x1ec1,
+ 0x1ec3, 0x1ec3,
+ 0x1ec5, 0x1ec5,
+ 0x1ec7, 0x1ec7,
+ 0x1ec9, 0x1ec9,
+ 0x1ecb, 0x1ecb,
+ 0x1ecd, 0x1ecd,
+ 0x1ecf, 0x1ecf,
+ 0x1ed1, 0x1ed1,
+ 0x1ed3, 0x1ed3,
+ 0x1ed5, 0x1ed5,
+ 0x1ed7, 0x1ed7,
+ 0x1ed9, 0x1ed9,
+ 0x1edb, 0x1edb,
+ 0x1edd, 0x1edd,
+ 0x1edf, 0x1edf,
+ 0x1ee1, 0x1ee1,
+ 0x1ee3, 0x1ee3,
+ 0x1ee5, 0x1ee5,
+ 0x1ee7, 0x1ee7,
+ 0x1ee9, 0x1ee9,
+ 0x1eeb, 0x1eeb,
+ 0x1eed, 0x1eed,
+ 0x1eef, 0x1eef,
+ 0x1ef1, 0x1ef1,
+ 0x1ef3, 0x1ef3,
+ 0x1ef5, 0x1ef5,
+ 0x1ef7, 0x1ef7,
+ 0x1ef9, 0x1ef9,
+ 0x1f00, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1f87,
+ 0x1f90, 0x1f97,
+ 0x1fa0, 0x1fa7,
+ 0x1fb0, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x210a, 0x210a,
+ 0x210e, 0x210f,
+ 0x2113, 0x2113,
+ 0x212f, 0x212f,
+ 0x2134, 0x2134,
+ 0x2139, 0x2139,
+ 0x213c, 0x213d,
+ 0x2146, 0x2149,
+ 0x2c30, 0x2c5e,
+ 0x2c81, 0x2c81,
+ 0x2c83, 0x2c83,
+ 0x2c85, 0x2c85,
+ 0x2c87, 0x2c87,
+ 0x2c89, 0x2c89,
+ 0x2c8b, 0x2c8b,
+ 0x2c8d, 0x2c8d,
+ 0x2c8f, 0x2c8f,
+ 0x2c91, 0x2c91,
+ 0x2c93, 0x2c93,
+ 0x2c95, 0x2c95,
+ 0x2c97, 0x2c97,
+ 0x2c99, 0x2c99,
+ 0x2c9b, 0x2c9b,
+ 0x2c9d, 0x2c9d,
+ 0x2c9f, 0x2c9f,
+ 0x2ca1, 0x2ca1,
+ 0x2ca3, 0x2ca3,
+ 0x2ca5, 0x2ca5,
+ 0x2ca7, 0x2ca7,
+ 0x2ca9, 0x2ca9,
+ 0x2cab, 0x2cab,
+ 0x2cad, 0x2cad,
+ 0x2caf, 0x2caf,
+ 0x2cb1, 0x2cb1,
+ 0x2cb3, 0x2cb3,
+ 0x2cb5, 0x2cb5,
+ 0x2cb7, 0x2cb7,
+ 0x2cb9, 0x2cb9,
+ 0x2cbb, 0x2cbb,
+ 0x2cbd, 0x2cbd,
+ 0x2cbf, 0x2cbf,
+ 0x2cc1, 0x2cc1,
+ 0x2cc3, 0x2cc3,
+ 0x2cc5, 0x2cc5,
+ 0x2cc7, 0x2cc7,
+ 0x2cc9, 0x2cc9,
+ 0x2ccb, 0x2ccb,
+ 0x2ccd, 0x2ccd,
+ 0x2ccf, 0x2ccf,
+ 0x2cd1, 0x2cd1,
+ 0x2cd3, 0x2cd3,
+ 0x2cd5, 0x2cd5,
+ 0x2cd7, 0x2cd7,
+ 0x2cd9, 0x2cd9,
+ 0x2cdb, 0x2cdb,
+ 0x2cdd, 0x2cdd,
+ 0x2cdf, 0x2cdf,
+ 0x2ce1, 0x2ce1,
+ 0x2ce3, 0x2ce4,
+ 0x2d00, 0x2d25,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+ 0x1d41a, 0x1d433,
+ 0x1d44e, 0x1d454,
+ 0x1d456, 0x1d467,
+ 0x1d482, 0x1d49b,
+ 0x1d4b6, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d4cf,
+ 0x1d4ea, 0x1d503,
+ 0x1d51e, 0x1d537,
+ 0x1d552, 0x1d56b,
+ 0x1d586, 0x1d59f,
+ 0x1d5ba, 0x1d5d3,
+ 0x1d5ee, 0x1d607,
+ 0x1d622, 0x1d63b,
+ 0x1d656, 0x1d66f,
+ 0x1d68a, 0x1d6a5,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6e1,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d71b,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d755,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d78f,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9
+}; /* CR_Lower */
+
+/* 'Print': [[:Print:]] */
+static const OnigCodePoint CR_Print[] = {
+ 423,
+ 0x0009, 0x000d,
+ 0x0020, 0x007e,
+ 0x0085, 0x0085,
+ 0x00a0, 0x0241,
+ 0x0250, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05b9,
+ 0x05bb, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060b, 0x0615,
+ 0x061b, 0x061b,
+ 0x061e, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x065e,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x076d,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x097d, 0x097d,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fd1,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fc,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x137c,
+ 0x1380, 0x1399,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1680, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180e,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19a9,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19d9,
+ 0x19de, 0x1a1b,
+ 0x1a1e, 0x1a1f,
+ 0x1d00, 0x1dc3,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x2000, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x2090, 0x2094,
+ 0x20a0, 0x20b5,
+ 0x20d0, 0x20eb,
+ 0x2100, 0x214c,
+ 0x2153, 0x2183,
+ 0x2190, 0x23db,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x269c,
+ 0x26a0, 0x26b1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27c0, 0x27c6,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b13,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c80, 0x2cea,
+ 0x2cf9, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x2e00, 0x2e17,
+ 0x2e1c, 0x2e1d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3000, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31c0, 0x31cf,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fbb,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xa700, 0xa716,
+ 0xa800, 0xa82b,
+ 0xac00, 0xd7a3,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe19,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1018a,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x103c3,
+ 0x103c8, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a47,
+ 0x10a50, 0x10a58,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d200, 0x1d245,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+}; /* CR_Print */
+
+/* 'Punct': [[:Punct:]] */
+static const OnigCodePoint CR_Punct[] = {
+ 96,
+ 0x0021, 0x0023,
+ 0x0025, 0x002a,
+ 0x002c, 0x002f,
+ 0x003a, 0x003b,
+ 0x003f, 0x0040,
+ 0x005b, 0x005d,
+ 0x005f, 0x005f,
+ 0x007b, 0x007b,
+ 0x007d, 0x007d,
+ 0x00a1, 0x00a1,
+ 0x00ab, 0x00ab,
+ 0x00b7, 0x00b7,
+ 0x00bb, 0x00bb,
+ 0x00bf, 0x00bf,
+ 0x037e, 0x037e,
+ 0x0387, 0x0387,
+ 0x055a, 0x055f,
+ 0x0589, 0x058a,
+ 0x05be, 0x05be,
+ 0x05c0, 0x05c0,
+ 0x05c3, 0x05c3,
+ 0x05c6, 0x05c6,
+ 0x05f3, 0x05f4,
+ 0x060c, 0x060d,
+ 0x061b, 0x061b,
+ 0x061e, 0x061f,
+ 0x066a, 0x066d,
+ 0x06d4, 0x06d4,
+ 0x0700, 0x070d,
+ 0x0964, 0x0965,
+ 0x0970, 0x0970,
+ 0x0df4, 0x0df4,
+ 0x0e4f, 0x0e4f,
+ 0x0e5a, 0x0e5b,
+ 0x0f04, 0x0f12,
+ 0x0f3a, 0x0f3d,
+ 0x0f85, 0x0f85,
+ 0x0fd0, 0x0fd1,
+ 0x104a, 0x104f,
+ 0x10fb, 0x10fb,
+ 0x1361, 0x1368,
+ 0x166d, 0x166e,
+ 0x169b, 0x169c,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x17d4, 0x17d6,
+ 0x17d8, 0x17da,
+ 0x1800, 0x180a,
+ 0x1944, 0x1945,
+ 0x19de, 0x19df,
+ 0x1a1e, 0x1a1f,
+ 0x2010, 0x2027,
+ 0x2030, 0x2043,
+ 0x2045, 0x2051,
+ 0x2053, 0x205e,
+ 0x207d, 0x207e,
+ 0x208d, 0x208e,
+ 0x2329, 0x232a,
+ 0x23b4, 0x23b6,
+ 0x2768, 0x2775,
+ 0x27c5, 0x27c6,
+ 0x27e6, 0x27eb,
+ 0x2983, 0x2998,
+ 0x29d8, 0x29db,
+ 0x29fc, 0x29fd,
+ 0x2cf9, 0x2cfc,
+ 0x2cfe, 0x2cff,
+ 0x2e00, 0x2e17,
+ 0x2e1c, 0x2e1d,
+ 0x3001, 0x3003,
+ 0x3008, 0x3011,
+ 0x3014, 0x301f,
+ 0x3030, 0x3030,
+ 0x303d, 0x303d,
+ 0x30a0, 0x30a0,
+ 0x30fb, 0x30fb,
+ 0xfd3e, 0xfd3f,
+ 0xfe10, 0xfe19,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe61,
+ 0xfe63, 0xfe63,
+ 0xfe68, 0xfe68,
+ 0xfe6a, 0xfe6b,
+ 0xff01, 0xff03,
+ 0xff05, 0xff0a,
+ 0xff0c, 0xff0f,
+ 0xff1a, 0xff1b,
+ 0xff1f, 0xff20,
+ 0xff3b, 0xff3d,
+ 0xff3f, 0xff3f,
+ 0xff5b, 0xff5b,
+ 0xff5d, 0xff5d,
+ 0xff5f, 0xff65,
+ 0x10100, 0x10101,
+ 0x1039f, 0x1039f,
+ 0x10a50, 0x10a58
+}; /* CR_Punct */
+
+/* 'Space': [[:Space:]] */
+static const OnigCodePoint CR_Space[] = {
+ 11,
+ 0x0009, 0x000d,
+ 0x0020, 0x0020,
+ 0x0085, 0x0085,
+ 0x00a0, 0x00a0,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+}; /* CR_Space */
+
+/* 'Upper': [[:Upper:]] */
+static const OnigCodePoint CR_Upper[] = {
+ 476,
+ 0x0041, 0x005a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de,
+ 0x0100, 0x0100,
+ 0x0102, 0x0102,
+ 0x0104, 0x0104,
+ 0x0106, 0x0106,
+ 0x0108, 0x0108,
+ 0x010a, 0x010a,
+ 0x010c, 0x010c,
+ 0x010e, 0x010e,
+ 0x0110, 0x0110,
+ 0x0112, 0x0112,
+ 0x0114, 0x0114,
+ 0x0116, 0x0116,
+ 0x0118, 0x0118,
+ 0x011a, 0x011a,
+ 0x011c, 0x011c,
+ 0x011e, 0x011e,
+ 0x0120, 0x0120,
+ 0x0122, 0x0122,
+ 0x0124, 0x0124,
+ 0x0126, 0x0126,
+ 0x0128, 0x0128,
+ 0x012a, 0x012a,
+ 0x012c, 0x012c,
+ 0x012e, 0x012e,
+ 0x0130, 0x0130,
+ 0x0132, 0x0132,
+ 0x0134, 0x0134,
+ 0x0136, 0x0136,
+ 0x0139, 0x0139,
+ 0x013b, 0x013b,
+ 0x013d, 0x013d,
+ 0x013f, 0x013f,
+ 0x0141, 0x0141,
+ 0x0143, 0x0143,
+ 0x0145, 0x0145,
+ 0x0147, 0x0147,
+ 0x014a, 0x014a,
+ 0x014c, 0x014c,
+ 0x014e, 0x014e,
+ 0x0150, 0x0150,
+ 0x0152, 0x0152,
+ 0x0154, 0x0154,
+ 0x0156, 0x0156,
+ 0x0158, 0x0158,
+ 0x015a, 0x015a,
+ 0x015c, 0x015c,
+ 0x015e, 0x015e,
+ 0x0160, 0x0160,
+ 0x0162, 0x0162,
+ 0x0164, 0x0164,
+ 0x0166, 0x0166,
+ 0x0168, 0x0168,
+ 0x016a, 0x016a,
+ 0x016c, 0x016c,
+ 0x016e, 0x016e,
+ 0x0170, 0x0170,
+ 0x0172, 0x0172,
+ 0x0174, 0x0174,
+ 0x0176, 0x0176,
+ 0x0178, 0x0179,
+ 0x017b, 0x017b,
+ 0x017d, 0x017d,
+ 0x0181, 0x0182,
+ 0x0184, 0x0184,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a2, 0x01a2,
+ 0x01a4, 0x01a4,
+ 0x01a6, 0x01a7,
+ 0x01a9, 0x01a9,
+ 0x01ac, 0x01ac,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b5, 0x01b5,
+ 0x01b7, 0x01b8,
+ 0x01bc, 0x01bc,
+ 0x01c4, 0x01c4,
+ 0x01c7, 0x01c7,
+ 0x01ca, 0x01ca,
+ 0x01cd, 0x01cd,
+ 0x01cf, 0x01cf,
+ 0x01d1, 0x01d1,
+ 0x01d3, 0x01d3,
+ 0x01d5, 0x01d5,
+ 0x01d7, 0x01d7,
+ 0x01d9, 0x01d9,
+ 0x01db, 0x01db,
+ 0x01de, 0x01de,
+ 0x01e0, 0x01e0,
+ 0x01e2, 0x01e2,
+ 0x01e4, 0x01e4,
+ 0x01e6, 0x01e6,
+ 0x01e8, 0x01e8,
+ 0x01ea, 0x01ea,
+ 0x01ec, 0x01ec,
+ 0x01ee, 0x01ee,
+ 0x01f1, 0x01f1,
+ 0x01f4, 0x01f4,
+ 0x01f6, 0x01f8,
+ 0x01fa, 0x01fa,
+ 0x01fc, 0x01fc,
+ 0x01fe, 0x01fe,
+ 0x0200, 0x0200,
+ 0x0202, 0x0202,
+ 0x0204, 0x0204,
+ 0x0206, 0x0206,
+ 0x0208, 0x0208,
+ 0x020a, 0x020a,
+ 0x020c, 0x020c,
+ 0x020e, 0x020e,
+ 0x0210, 0x0210,
+ 0x0212, 0x0212,
+ 0x0214, 0x0214,
+ 0x0216, 0x0216,
+ 0x0218, 0x0218,
+ 0x021a, 0x021a,
+ 0x021c, 0x021c,
+ 0x021e, 0x021e,
+ 0x0220, 0x0220,
+ 0x0222, 0x0222,
+ 0x0224, 0x0224,
+ 0x0226, 0x0226,
+ 0x0228, 0x0228,
+ 0x022a, 0x022a,
+ 0x022c, 0x022c,
+ 0x022e, 0x022e,
+ 0x0230, 0x0230,
+ 0x0232, 0x0232,
+ 0x023a, 0x023b,
+ 0x023d, 0x023e,
+ 0x0241, 0x0241,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03d2, 0x03d4,
+ 0x03d8, 0x03d8,
+ 0x03da, 0x03da,
+ 0x03dc, 0x03dc,
+ 0x03de, 0x03de,
+ 0x03e0, 0x03e0,
+ 0x03e2, 0x03e2,
+ 0x03e4, 0x03e4,
+ 0x03e6, 0x03e6,
+ 0x03e8, 0x03e8,
+ 0x03ea, 0x03ea,
+ 0x03ec, 0x03ec,
+ 0x03ee, 0x03ee,
+ 0x03f4, 0x03f4,
+ 0x03f7, 0x03f7,
+ 0x03f9, 0x03fa,
+ 0x03fd, 0x042f,
+ 0x0460, 0x0460,
+ 0x0462, 0x0462,
+ 0x0464, 0x0464,
+ 0x0466, 0x0466,
+ 0x0468, 0x0468,
+ 0x046a, 0x046a,
+ 0x046c, 0x046c,
+ 0x046e, 0x046e,
+ 0x0470, 0x0470,
+ 0x0472, 0x0472,
+ 0x0474, 0x0474,
+ 0x0476, 0x0476,
+ 0x0478, 0x0478,
+ 0x047a, 0x047a,
+ 0x047c, 0x047c,
+ 0x047e, 0x047e,
+ 0x0480, 0x0480,
+ 0x048a, 0x048a,
+ 0x048c, 0x048c,
+ 0x048e, 0x048e,
+ 0x0490, 0x0490,
+ 0x0492, 0x0492,
+ 0x0494, 0x0494,
+ 0x0496, 0x0496,
+ 0x0498, 0x0498,
+ 0x049a, 0x049a,
+ 0x049c, 0x049c,
+ 0x049e, 0x049e,
+ 0x04a0, 0x04a0,
+ 0x04a2, 0x04a2,
+ 0x04a4, 0x04a4,
+ 0x04a6, 0x04a6,
+ 0x04a8, 0x04a8,
+ 0x04aa, 0x04aa,
+ 0x04ac, 0x04ac,
+ 0x04ae, 0x04ae,
+ 0x04b0, 0x04b0,
+ 0x04b2, 0x04b2,
+ 0x04b4, 0x04b4,
+ 0x04b6, 0x04b6,
+ 0x04b8, 0x04b8,
+ 0x04ba, 0x04ba,
+ 0x04bc, 0x04bc,
+ 0x04be, 0x04be,
+ 0x04c0, 0x04c1,
+ 0x04c3, 0x04c3,
+ 0x04c5, 0x04c5,
+ 0x04c7, 0x04c7,
+ 0x04c9, 0x04c9,
+ 0x04cb, 0x04cb,
+ 0x04cd, 0x04cd,
+ 0x04d0, 0x04d0,
+ 0x04d2, 0x04d2,
+ 0x04d4, 0x04d4,
+ 0x04d6, 0x04d6,
+ 0x04d8, 0x04d8,
+ 0x04da, 0x04da,
+ 0x04dc, 0x04dc,
+ 0x04de, 0x04de,
+ 0x04e0, 0x04e0,
+ 0x04e2, 0x04e2,
+ 0x04e4, 0x04e4,
+ 0x04e6, 0x04e6,
+ 0x04e8, 0x04e8,
+ 0x04ea, 0x04ea,
+ 0x04ec, 0x04ec,
+ 0x04ee, 0x04ee,
+ 0x04f0, 0x04f0,
+ 0x04f2, 0x04f2,
+ 0x04f4, 0x04f4,
+ 0x04f6, 0x04f6,
+ 0x04f8, 0x04f8,
+ 0x0500, 0x0500,
+ 0x0502, 0x0502,
+ 0x0504, 0x0504,
+ 0x0506, 0x0506,
+ 0x0508, 0x0508,
+ 0x050a, 0x050a,
+ 0x050c, 0x050c,
+ 0x050e, 0x050e,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1e00, 0x1e00,
+ 0x1e02, 0x1e02,
+ 0x1e04, 0x1e04,
+ 0x1e06, 0x1e06,
+ 0x1e08, 0x1e08,
+ 0x1e0a, 0x1e0a,
+ 0x1e0c, 0x1e0c,
+ 0x1e0e, 0x1e0e,
+ 0x1e10, 0x1e10,
+ 0x1e12, 0x1e12,
+ 0x1e14, 0x1e14,
+ 0x1e16, 0x1e16,
+ 0x1e18, 0x1e18,
+ 0x1e1a, 0x1e1a,
+ 0x1e1c, 0x1e1c,
+ 0x1e1e, 0x1e1e,
+ 0x1e20, 0x1e20,
+ 0x1e22, 0x1e22,
+ 0x1e24, 0x1e24,
+ 0x1e26, 0x1e26,
+ 0x1e28, 0x1e28,
+ 0x1e2a, 0x1e2a,
+ 0x1e2c, 0x1e2c,
+ 0x1e2e, 0x1e2e,
+ 0x1e30, 0x1e30,
+ 0x1e32, 0x1e32,
+ 0x1e34, 0x1e34,
+ 0x1e36, 0x1e36,
+ 0x1e38, 0x1e38,
+ 0x1e3a, 0x1e3a,
+ 0x1e3c, 0x1e3c,
+ 0x1e3e, 0x1e3e,
+ 0x1e40, 0x1e40,
+ 0x1e42, 0x1e42,
+ 0x1e44, 0x1e44,
+ 0x1e46, 0x1e46,
+ 0x1e48, 0x1e48,
+ 0x1e4a, 0x1e4a,
+ 0x1e4c, 0x1e4c,
+ 0x1e4e, 0x1e4e,
+ 0x1e50, 0x1e50,
+ 0x1e52, 0x1e52,
+ 0x1e54, 0x1e54,
+ 0x1e56, 0x1e56,
+ 0x1e58, 0x1e58,
+ 0x1e5a, 0x1e5a,
+ 0x1e5c, 0x1e5c,
+ 0x1e5e, 0x1e5e,
+ 0x1e60, 0x1e60,
+ 0x1e62, 0x1e62,
+ 0x1e64, 0x1e64,
+ 0x1e66, 0x1e66,
+ 0x1e68, 0x1e68,
+ 0x1e6a, 0x1e6a,
+ 0x1e6c, 0x1e6c,
+ 0x1e6e, 0x1e6e,
+ 0x1e70, 0x1e70,
+ 0x1e72, 0x1e72,
+ 0x1e74, 0x1e74,
+ 0x1e76, 0x1e76,
+ 0x1e78, 0x1e78,
+ 0x1e7a, 0x1e7a,
+ 0x1e7c, 0x1e7c,
+ 0x1e7e, 0x1e7e,
+ 0x1e80, 0x1e80,
+ 0x1e82, 0x1e82,
+ 0x1e84, 0x1e84,
+ 0x1e86, 0x1e86,
+ 0x1e88, 0x1e88,
+ 0x1e8a, 0x1e8a,
+ 0x1e8c, 0x1e8c,
+ 0x1e8e, 0x1e8e,
+ 0x1e90, 0x1e90,
+ 0x1e92, 0x1e92,
+ 0x1e94, 0x1e94,
+ 0x1ea0, 0x1ea0,
+ 0x1ea2, 0x1ea2,
+ 0x1ea4, 0x1ea4,
+ 0x1ea6, 0x1ea6,
+ 0x1ea8, 0x1ea8,
+ 0x1eaa, 0x1eaa,
+ 0x1eac, 0x1eac,
+ 0x1eae, 0x1eae,
+ 0x1eb0, 0x1eb0,
+ 0x1eb2, 0x1eb2,
+ 0x1eb4, 0x1eb4,
+ 0x1eb6, 0x1eb6,
+ 0x1eb8, 0x1eb8,
+ 0x1eba, 0x1eba,
+ 0x1ebc, 0x1ebc,
+ 0x1ebe, 0x1ebe,
+ 0x1ec0, 0x1ec0,
+ 0x1ec2, 0x1ec2,
+ 0x1ec4, 0x1ec4,
+ 0x1ec6, 0x1ec6,
+ 0x1ec8, 0x1ec8,
+ 0x1eca, 0x1eca,
+ 0x1ecc, 0x1ecc,
+ 0x1ece, 0x1ece,
+ 0x1ed0, 0x1ed0,
+ 0x1ed2, 0x1ed2,
+ 0x1ed4, 0x1ed4,
+ 0x1ed6, 0x1ed6,
+ 0x1ed8, 0x1ed8,
+ 0x1eda, 0x1eda,
+ 0x1edc, 0x1edc,
+ 0x1ede, 0x1ede,
+ 0x1ee0, 0x1ee0,
+ 0x1ee2, 0x1ee2,
+ 0x1ee4, 0x1ee4,
+ 0x1ee6, 0x1ee6,
+ 0x1ee8, 0x1ee8,
+ 0x1eea, 0x1eea,
+ 0x1eec, 0x1eec,
+ 0x1eee, 0x1eee,
+ 0x1ef0, 0x1ef0,
+ 0x1ef2, 0x1ef2,
+ 0x1ef4, 0x1ef4,
+ 0x1ef6, 0x1ef6,
+ 0x1ef8, 0x1ef8,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f5f,
+ 0x1f68, 0x1f6f,
+ 0x1fb8, 0x1fbb,
+ 0x1fc8, 0x1fcb,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210b, 0x210d,
+ 0x2110, 0x2112,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x2130, 0x2131,
+ 0x2133, 0x2133,
+ 0x213e, 0x213f,
+ 0x2145, 0x2145,
+ 0x2c00, 0x2c2e,
+ 0x2c80, 0x2c80,
+ 0x2c82, 0x2c82,
+ 0x2c84, 0x2c84,
+ 0x2c86, 0x2c86,
+ 0x2c88, 0x2c88,
+ 0x2c8a, 0x2c8a,
+ 0x2c8c, 0x2c8c,
+ 0x2c8e, 0x2c8e,
+ 0x2c90, 0x2c90,
+ 0x2c92, 0x2c92,
+ 0x2c94, 0x2c94,
+ 0x2c96, 0x2c96,
+ 0x2c98, 0x2c98,
+ 0x2c9a, 0x2c9a,
+ 0x2c9c, 0x2c9c,
+ 0x2c9e, 0x2c9e,
+ 0x2ca0, 0x2ca0,
+ 0x2ca2, 0x2ca2,
+ 0x2ca4, 0x2ca4,
+ 0x2ca6, 0x2ca6,
+ 0x2ca8, 0x2ca8,
+ 0x2caa, 0x2caa,
+ 0x2cac, 0x2cac,
+ 0x2cae, 0x2cae,
+ 0x2cb0, 0x2cb0,
+ 0x2cb2, 0x2cb2,
+ 0x2cb4, 0x2cb4,
+ 0x2cb6, 0x2cb6,
+ 0x2cb8, 0x2cb8,
+ 0x2cba, 0x2cba,
+ 0x2cbc, 0x2cbc,
+ 0x2cbe, 0x2cbe,
+ 0x2cc0, 0x2cc0,
+ 0x2cc2, 0x2cc2,
+ 0x2cc4, 0x2cc4,
+ 0x2cc6, 0x2cc6,
+ 0x2cc8, 0x2cc8,
+ 0x2cca, 0x2cca,
+ 0x2ccc, 0x2ccc,
+ 0x2cce, 0x2cce,
+ 0x2cd0, 0x2cd0,
+ 0x2cd2, 0x2cd2,
+ 0x2cd4, 0x2cd4,
+ 0x2cd6, 0x2cd6,
+ 0x2cd8, 0x2cd8,
+ 0x2cda, 0x2cda,
+ 0x2cdc, 0x2cdc,
+ 0x2cde, 0x2cde,
+ 0x2ce0, 0x2ce0,
+ 0x2ce2, 0x2ce2,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+ 0x1d400, 0x1d419,
+ 0x1d434, 0x1d44d,
+ 0x1d468, 0x1d481,
+ 0x1d49c, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b5,
+ 0x1d4d0, 0x1d4e9,
+ 0x1d504, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d538, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d56c, 0x1d585,
+ 0x1d5a0, 0x1d5b9,
+ 0x1d5d4, 0x1d5ed,
+ 0x1d608, 0x1d621,
+ 0x1d63c, 0x1d655,
+ 0x1d670, 0x1d689,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6e2, 0x1d6fa,
+ 0x1d71c, 0x1d734,
+ 0x1d756, 0x1d76e,
+ 0x1d790, 0x1d7a8
+}; /* CR_Upper */
+
+/* 'XDigit': [[:XDigit:]] */
+static const OnigCodePoint CR_XDigit[] = {
+ 3,
+ 0x0030, 0x0039,
+ 0x0041, 0x0046,
+ 0x0061, 0x0066
+}; /* CR_XDigit */
+
+/* 'Word': [[:Word:]] */
+static const OnigCodePoint CR_Word[] = {
+ 464,
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x005f, 0x005f,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b2, 0x00b3,
+ 0x00b5, 0x00b5,
+ 0x00b9, 0x00ba,
+ 0x00bc, 0x00be,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0241,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x065e,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x076d,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x097d, 0x097d,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x09f4, 0x09f9,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bf2,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f33,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1049,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10fc,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x135f,
+ 0x1369, 0x137c,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19a9,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19d9,
+ 0x1a00, 0x1a1b,
+ 0x1d00, 0x1dc3,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x203f, 0x2040,
+ 0x2054, 0x2054,
+ 0x2070, 0x2071,
+ 0x2074, 0x2079,
+ 0x207f, 0x2089,
+ 0x2090, 0x2094,
+ 0x20d0, 0x20eb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x2153, 0x2183,
+ 0x2460, 0x249b,
+ 0x24ea, 0x24ff,
+ 0x2776, 0x2793,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c80, 0x2ce4,
+ 0x2cfd, 0x2cfd,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x3005, 0x3007,
+ 0x3021, 0x302f,
+ 0x3031, 0x3035,
+ 0x3038, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3192, 0x3195,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3220, 0x3229,
+ 0x3251, 0x325f,
+ 0x3280, 0x3289,
+ 0x32b1, 0x32bf,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fbb,
+ 0xa000, 0xa48c,
+ 0xa800, 0xa827,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe33, 0xfe34,
+ 0xfe4d, 0xfe4f,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff3f, 0xff3f,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10107, 0x10133,
+ 0x10140, 0x10178,
+ 0x1018a, 0x1018a,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x103d1, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a47,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d242, 0x1d244,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+}; /* CR_Word */
+
+/* 'Alnum': [[:Alnum:]] */
+static const OnigCodePoint CR_Alnum[] = {
+ 436,
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0241,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x065e,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x076d,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x097d, 0x097d,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bef,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f29,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1049,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10fc,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x135f,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19a9,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19d9,
+ 0x1a00, 0x1a1b,
+ 0x1d00, 0x1dc3,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x2094,
+ 0x20d0, 0x20eb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c80, 0x2ce4,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x3005, 0x3006,
+ 0x302a, 0x302f,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fbb,
+ 0xa000, 0xa48c,
+ 0xa800, 0xa827,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a3f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d242, 0x1d244,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+}; /* CR_Alnum */
+
+/* 'ASCII': [[:ASCII:]] */
+static const OnigCodePoint CR_ASCII[] = {
+ 1,
+ 0x0000, 0x007f
+}; /* CR_ASCII */
+
+#ifdef USE_UNICODE_PROPERTIES
+
+/* 'Any': - */
+static const OnigCodePoint CR_Any[] = {
+ 1,
+ 0x0000, 0x10ffff
+}; /* CR_Any */
+
+/* 'Assigned': - */
+static const OnigCodePoint CR_Assigned[] = {
+ 420,
+ 0x0000, 0x0241,
+ 0x0250, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05b9,
+ 0x05bb, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060b, 0x0615,
+ 0x061b, 0x061b,
+ 0x061e, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x065e,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x076d,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x097d, 0x097d,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fd1,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fc,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x137c,
+ 0x1380, 0x1399,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1680, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180e,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19a9,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19d9,
+ 0x19de, 0x1a1b,
+ 0x1a1e, 0x1a1f,
+ 0x1d00, 0x1dc3,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x2000, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x2090, 0x2094,
+ 0x20a0, 0x20b5,
+ 0x20d0, 0x20eb,
+ 0x2100, 0x214c,
+ 0x2153, 0x2183,
+ 0x2190, 0x23db,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x269c,
+ 0x26a0, 0x26b1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27c0, 0x27c6,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b13,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c80, 0x2cea,
+ 0x2cf9, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x2e00, 0x2e17,
+ 0x2e1c, 0x2e1d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3000, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31c0, 0x31cf,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fbb,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xa700, 0xa716,
+ 0xa800, 0xa82b,
+ 0xac00, 0xd7a3,
+ 0xd800, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe19,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1018a,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x103c3,
+ 0x103c8, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a47,
+ 0x10a50, 0x10a58,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d200, 0x1d245,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+}; /* CR_Assigned */
+
+/* 'C': Major Category */
+static const OnigCodePoint CR_C[] = {
+ 422,
+ 0x0000, 0x001f,
+ 0x007f, 0x009f,
+ 0x00ad, 0x00ad,
+ 0x0242, 0x024f,
+ 0x0370, 0x0373,
+ 0x0376, 0x0379,
+ 0x037b, 0x037d,
+ 0x037f, 0x0383,
+ 0x038b, 0x038b,
+ 0x038d, 0x038d,
+ 0x03a2, 0x03a2,
+ 0x03cf, 0x03cf,
+ 0x0487, 0x0487,
+ 0x04cf, 0x04cf,
+ 0x04fa, 0x04ff,
+ 0x0510, 0x0530,
+ 0x0557, 0x0558,
+ 0x0560, 0x0560,
+ 0x0588, 0x0588,
+ 0x058b, 0x0590,
+ 0x05ba, 0x05ba,
+ 0x05c8, 0x05cf,
+ 0x05eb, 0x05ef,
+ 0x05f5, 0x060a,
+ 0x0616, 0x061a,
+ 0x061c, 0x061d,
+ 0x0620, 0x0620,
+ 0x063b, 0x063f,
+ 0x065f, 0x065f,
+ 0x06dd, 0x06dd,
+ 0x070e, 0x070f,
+ 0x074b, 0x074c,
+ 0x076e, 0x077f,
+ 0x07b2, 0x0900,
+ 0x093a, 0x093b,
+ 0x094e, 0x094f,
+ 0x0955, 0x0957,
+ 0x0971, 0x097c,
+ 0x097e, 0x0980,
+ 0x0984, 0x0984,
+ 0x098d, 0x098e,
+ 0x0991, 0x0992,
+ 0x09a9, 0x09a9,
+ 0x09b1, 0x09b1,
+ 0x09b3, 0x09b5,
+ 0x09ba, 0x09bb,
+ 0x09c5, 0x09c6,
+ 0x09c9, 0x09ca,
+ 0x09cf, 0x09d6,
+ 0x09d8, 0x09db,
+ 0x09de, 0x09de,
+ 0x09e4, 0x09e5,
+ 0x09fb, 0x0a00,
+ 0x0a04, 0x0a04,
+ 0x0a0b, 0x0a0e,
+ 0x0a11, 0x0a12,
+ 0x0a29, 0x0a29,
+ 0x0a31, 0x0a31,
+ 0x0a34, 0x0a34,
+ 0x0a37, 0x0a37,
+ 0x0a3a, 0x0a3b,
+ 0x0a3d, 0x0a3d,
+ 0x0a43, 0x0a46,
+ 0x0a49, 0x0a4a,
+ 0x0a4e, 0x0a58,
+ 0x0a5d, 0x0a5d,
+ 0x0a5f, 0x0a65,
+ 0x0a75, 0x0a80,
+ 0x0a84, 0x0a84,
+ 0x0a8e, 0x0a8e,
+ 0x0a92, 0x0a92,
+ 0x0aa9, 0x0aa9,
+ 0x0ab1, 0x0ab1,
+ 0x0ab4, 0x0ab4,
+ 0x0aba, 0x0abb,
+ 0x0ac6, 0x0ac6,
+ 0x0aca, 0x0aca,
+ 0x0ace, 0x0acf,
+ 0x0ad1, 0x0adf,
+ 0x0ae4, 0x0ae5,
+ 0x0af0, 0x0af0,
+ 0x0af2, 0x0b00,
+ 0x0b04, 0x0b04,
+ 0x0b0d, 0x0b0e,
+ 0x0b11, 0x0b12,
+ 0x0b29, 0x0b29,
+ 0x0b31, 0x0b31,
+ 0x0b34, 0x0b34,
+ 0x0b3a, 0x0b3b,
+ 0x0b44, 0x0b46,
+ 0x0b49, 0x0b4a,
+ 0x0b4e, 0x0b55,
+ 0x0b58, 0x0b5b,
+ 0x0b5e, 0x0b5e,
+ 0x0b62, 0x0b65,
+ 0x0b72, 0x0b81,
+ 0x0b84, 0x0b84,
+ 0x0b8b, 0x0b8d,
+ 0x0b91, 0x0b91,
+ 0x0b96, 0x0b98,
+ 0x0b9b, 0x0b9b,
+ 0x0b9d, 0x0b9d,
+ 0x0ba0, 0x0ba2,
+ 0x0ba5, 0x0ba7,
+ 0x0bab, 0x0bad,
+ 0x0bba, 0x0bbd,
+ 0x0bc3, 0x0bc5,
+ 0x0bc9, 0x0bc9,
+ 0x0bce, 0x0bd6,
+ 0x0bd8, 0x0be5,
+ 0x0bfb, 0x0c00,
+ 0x0c04, 0x0c04,
+ 0x0c0d, 0x0c0d,
+ 0x0c11, 0x0c11,
+ 0x0c29, 0x0c29,
+ 0x0c34, 0x0c34,
+ 0x0c3a, 0x0c3d,
+ 0x0c45, 0x0c45,
+ 0x0c49, 0x0c49,
+ 0x0c4e, 0x0c54,
+ 0x0c57, 0x0c5f,
+ 0x0c62, 0x0c65,
+ 0x0c70, 0x0c81,
+ 0x0c84, 0x0c84,
+ 0x0c8d, 0x0c8d,
+ 0x0c91, 0x0c91,
+ 0x0ca9, 0x0ca9,
+ 0x0cb4, 0x0cb4,
+ 0x0cba, 0x0cbb,
+ 0x0cc5, 0x0cc5,
+ 0x0cc9, 0x0cc9,
+ 0x0cce, 0x0cd4,
+ 0x0cd7, 0x0cdd,
+ 0x0cdf, 0x0cdf,
+ 0x0ce2, 0x0ce5,
+ 0x0cf0, 0x0d01,
+ 0x0d04, 0x0d04,
+ 0x0d0d, 0x0d0d,
+ 0x0d11, 0x0d11,
+ 0x0d29, 0x0d29,
+ 0x0d3a, 0x0d3d,
+ 0x0d44, 0x0d45,
+ 0x0d49, 0x0d49,
+ 0x0d4e, 0x0d56,
+ 0x0d58, 0x0d5f,
+ 0x0d62, 0x0d65,
+ 0x0d70, 0x0d81,
+ 0x0d84, 0x0d84,
+ 0x0d97, 0x0d99,
+ 0x0db2, 0x0db2,
+ 0x0dbc, 0x0dbc,
+ 0x0dbe, 0x0dbf,
+ 0x0dc7, 0x0dc9,
+ 0x0dcb, 0x0dce,
+ 0x0dd5, 0x0dd5,
+ 0x0dd7, 0x0dd7,
+ 0x0de0, 0x0df1,
+ 0x0df5, 0x0e00,
+ 0x0e3b, 0x0e3e,
+ 0x0e5c, 0x0e80,
+ 0x0e83, 0x0e83,
+ 0x0e85, 0x0e86,
+ 0x0e89, 0x0e89,
+ 0x0e8b, 0x0e8c,
+ 0x0e8e, 0x0e93,
+ 0x0e98, 0x0e98,
+ 0x0ea0, 0x0ea0,
+ 0x0ea4, 0x0ea4,
+ 0x0ea6, 0x0ea6,
+ 0x0ea8, 0x0ea9,
+ 0x0eac, 0x0eac,
+ 0x0eba, 0x0eba,
+ 0x0ebe, 0x0ebf,
+ 0x0ec5, 0x0ec5,
+ 0x0ec7, 0x0ec7,
+ 0x0ece, 0x0ecf,
+ 0x0eda, 0x0edb,
+ 0x0ede, 0x0eff,
+ 0x0f48, 0x0f48,
+ 0x0f6b, 0x0f70,
+ 0x0f8c, 0x0f8f,
+ 0x0f98, 0x0f98,
+ 0x0fbd, 0x0fbd,
+ 0x0fcd, 0x0fce,
+ 0x0fd2, 0x0fff,
+ 0x1022, 0x1022,
+ 0x1028, 0x1028,
+ 0x102b, 0x102b,
+ 0x1033, 0x1035,
+ 0x103a, 0x103f,
+ 0x105a, 0x109f,
+ 0x10c6, 0x10cf,
+ 0x10fd, 0x10ff,
+ 0x115a, 0x115e,
+ 0x11a3, 0x11a7,
+ 0x11fa, 0x11ff,
+ 0x1249, 0x1249,
+ 0x124e, 0x124f,
+ 0x1257, 0x1257,
+ 0x1259, 0x1259,
+ 0x125e, 0x125f,
+ 0x1289, 0x1289,
+ 0x128e, 0x128f,
+ 0x12b1, 0x12b1,
+ 0x12b6, 0x12b7,
+ 0x12bf, 0x12bf,
+ 0x12c1, 0x12c1,
+ 0x12c6, 0x12c7,
+ 0x12d7, 0x12d7,
+ 0x1311, 0x1311,
+ 0x1316, 0x1317,
+ 0x135b, 0x135e,
+ 0x137d, 0x137f,
+ 0x139a, 0x139f,
+ 0x13f5, 0x1400,
+ 0x1677, 0x167f,
+ 0x169d, 0x169f,
+ 0x16f1, 0x16ff,
+ 0x170d, 0x170d,
+ 0x1715, 0x171f,
+ 0x1737, 0x173f,
+ 0x1754, 0x175f,
+ 0x176d, 0x176d,
+ 0x1771, 0x1771,
+ 0x1774, 0x177f,
+ 0x17b4, 0x17b5,
+ 0x17de, 0x17df,
+ 0x17ea, 0x17ef,
+ 0x17fa, 0x17ff,
+ 0x180f, 0x180f,
+ 0x181a, 0x181f,
+ 0x1878, 0x187f,
+ 0x18aa, 0x18ff,
+ 0x191d, 0x191f,
+ 0x192c, 0x192f,
+ 0x193c, 0x193f,
+ 0x1941, 0x1943,
+ 0x196e, 0x196f,
+ 0x1975, 0x197f,
+ 0x19aa, 0x19af,
+ 0x19ca, 0x19cf,
+ 0x19da, 0x19dd,
+ 0x1a1c, 0x1a1d,
+ 0x1a20, 0x1cff,
+ 0x1dc4, 0x1dff,
+ 0x1e9c, 0x1e9f,
+ 0x1efa, 0x1eff,
+ 0x1f16, 0x1f17,
+ 0x1f1e, 0x1f1f,
+ 0x1f46, 0x1f47,
+ 0x1f4e, 0x1f4f,
+ 0x1f58, 0x1f58,
+ 0x1f5a, 0x1f5a,
+ 0x1f5c, 0x1f5c,
+ 0x1f5e, 0x1f5e,
+ 0x1f7e, 0x1f7f,
+ 0x1fb5, 0x1fb5,
+ 0x1fc5, 0x1fc5,
+ 0x1fd4, 0x1fd5,
+ 0x1fdc, 0x1fdc,
+ 0x1ff0, 0x1ff1,
+ 0x1ff5, 0x1ff5,
+ 0x1fff, 0x1fff,
+ 0x200b, 0x200f,
+ 0x202a, 0x202e,
+ 0x2060, 0x206f,
+ 0x2072, 0x2073,
+ 0x208f, 0x208f,
+ 0x2095, 0x209f,
+ 0x20b6, 0x20cf,
+ 0x20ec, 0x20ff,
+ 0x214d, 0x2152,
+ 0x2184, 0x218f,
+ 0x23dc, 0x23ff,
+ 0x2427, 0x243f,
+ 0x244b, 0x245f,
+ 0x269d, 0x269f,
+ 0x26b2, 0x2700,
+ 0x2705, 0x2705,
+ 0x270a, 0x270b,
+ 0x2728, 0x2728,
+ 0x274c, 0x274c,
+ 0x274e, 0x274e,
+ 0x2753, 0x2755,
+ 0x2757, 0x2757,
+ 0x275f, 0x2760,
+ 0x2795, 0x2797,
+ 0x27b0, 0x27b0,
+ 0x27bf, 0x27bf,
+ 0x27c7, 0x27cf,
+ 0x27ec, 0x27ef,
+ 0x2b14, 0x2bff,
+ 0x2c2f, 0x2c2f,
+ 0x2c5f, 0x2c7f,
+ 0x2ceb, 0x2cf8,
+ 0x2d26, 0x2d2f,
+ 0x2d66, 0x2d6e,
+ 0x2d70, 0x2d7f,
+ 0x2d97, 0x2d9f,
+ 0x2da7, 0x2da7,
+ 0x2daf, 0x2daf,
+ 0x2db7, 0x2db7,
+ 0x2dbf, 0x2dbf,
+ 0x2dc7, 0x2dc7,
+ 0x2dcf, 0x2dcf,
+ 0x2dd7, 0x2dd7,
+ 0x2ddf, 0x2dff,
+ 0x2e18, 0x2e1b,
+ 0x2e1e, 0x2e7f,
+ 0x2e9a, 0x2e9a,
+ 0x2ef4, 0x2eff,
+ 0x2fd6, 0x2fef,
+ 0x2ffc, 0x2fff,
+ 0x3040, 0x3040,
+ 0x3097, 0x3098,
+ 0x3100, 0x3104,
+ 0x312d, 0x3130,
+ 0x318f, 0x318f,
+ 0x31b8, 0x31bf,
+ 0x31d0, 0x31ef,
+ 0x321f, 0x321f,
+ 0x3244, 0x324f,
+ 0x32ff, 0x32ff,
+ 0x4db6, 0x4dbf,
+ 0x9fbc, 0x9fff,
+ 0xa48d, 0xa48f,
+ 0xa4c7, 0xa6ff,
+ 0xa717, 0xa7ff,
+ 0xa82c, 0xabff,
+ 0xd7a4, 0xf8ff,
+ 0xfa2e, 0xfa2f,
+ 0xfa6b, 0xfa6f,
+ 0xfada, 0xfaff,
+ 0xfb07, 0xfb12,
+ 0xfb18, 0xfb1c,
+ 0xfb37, 0xfb37,
+ 0xfb3d, 0xfb3d,
+ 0xfb3f, 0xfb3f,
+ 0xfb42, 0xfb42,
+ 0xfb45, 0xfb45,
+ 0xfbb2, 0xfbd2,
+ 0xfd40, 0xfd4f,
+ 0xfd90, 0xfd91,
+ 0xfdc8, 0xfdef,
+ 0xfdfe, 0xfdff,
+ 0xfe1a, 0xfe1f,
+ 0xfe24, 0xfe2f,
+ 0xfe53, 0xfe53,
+ 0xfe67, 0xfe67,
+ 0xfe6c, 0xfe6f,
+ 0xfe75, 0xfe75,
+ 0xfefd, 0xff00,
+ 0xffbf, 0xffc1,
+ 0xffc8, 0xffc9,
+ 0xffd0, 0xffd1,
+ 0xffd8, 0xffd9,
+ 0xffdd, 0xffdf,
+ 0xffe7, 0xffe7,
+ 0xffef, 0xfffb,
+ 0xfffe, 0xffff,
+ 0x1000c, 0x1000c,
+ 0x10027, 0x10027,
+ 0x1003b, 0x1003b,
+ 0x1003e, 0x1003e,
+ 0x1004e, 0x1004f,
+ 0x1005e, 0x1007f,
+ 0x100fb, 0x100ff,
+ 0x10103, 0x10106,
+ 0x10134, 0x10136,
+ 0x1018b, 0x102ff,
+ 0x1031f, 0x1031f,
+ 0x10324, 0x1032f,
+ 0x1034b, 0x1037f,
+ 0x1039e, 0x1039e,
+ 0x103c4, 0x103c7,
+ 0x103d6, 0x103ff,
+ 0x1049e, 0x1049f,
+ 0x104aa, 0x107ff,
+ 0x10806, 0x10807,
+ 0x10809, 0x10809,
+ 0x10836, 0x10836,
+ 0x10839, 0x1083b,
+ 0x1083d, 0x1083e,
+ 0x10840, 0x109ff,
+ 0x10a04, 0x10a04,
+ 0x10a07, 0x10a0b,
+ 0x10a14, 0x10a14,
+ 0x10a18, 0x10a18,
+ 0x10a34, 0x10a37,
+ 0x10a3b, 0x10a3e,
+ 0x10a48, 0x10a4f,
+ 0x10a59, 0x1cfff,
+ 0x1d0f6, 0x1d0ff,
+ 0x1d127, 0x1d129,
+ 0x1d173, 0x1d17a,
+ 0x1d1de, 0x1d1ff,
+ 0x1d246, 0x1d2ff,
+ 0x1d357, 0x1d3ff,
+ 0x1d455, 0x1d455,
+ 0x1d49d, 0x1d49d,
+ 0x1d4a0, 0x1d4a1,
+ 0x1d4a3, 0x1d4a4,
+ 0x1d4a7, 0x1d4a8,
+ 0x1d4ad, 0x1d4ad,
+ 0x1d4ba, 0x1d4ba,
+ 0x1d4bc, 0x1d4bc,
+ 0x1d4c4, 0x1d4c4,
+ 0x1d506, 0x1d506,
+ 0x1d50b, 0x1d50c,
+ 0x1d515, 0x1d515,
+ 0x1d51d, 0x1d51d,
+ 0x1d53a, 0x1d53a,
+ 0x1d53f, 0x1d53f,
+ 0x1d545, 0x1d545,
+ 0x1d547, 0x1d549,
+ 0x1d551, 0x1d551,
+ 0x1d6a6, 0x1d6a7,
+ 0x1d7ca, 0x1d7cd,
+ 0x1d800, 0x1ffff,
+ 0x2a6d7, 0x2f7ff,
+ 0x2fa1e, 0xe00ff,
+ 0xe01f0, 0x10ffff
+}; /* CR_C */
+
+/* 'Cc': General Category */
+static const OnigCodePoint CR_Cc[] = {
+ 2,
+ 0x0000, 0x001f,
+ 0x007f, 0x009f
+}; /* CR_Cc */
+
+/* 'Cf': General Category */
+static const OnigCodePoint CR_Cf[] = {
+ 14,
+ 0x00ad, 0x00ad,
+ 0x0600, 0x0603,
+ 0x06dd, 0x06dd,
+ 0x070f, 0x070f,
+ 0x17b4, 0x17b5,
+ 0x200b, 0x200f,
+ 0x202a, 0x202e,
+ 0x2060, 0x2063,
+ 0x206a, 0x206f,
+ 0xfeff, 0xfeff,
+ 0xfff9, 0xfffb,
+ 0x1d173, 0x1d17a,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f
+}; /* CR_Cf */
+
+/* 'Cn': General Category */
+static const OnigCodePoint CR_Cn[] = {
+ 420,
+ 0x0242, 0x024f,
+ 0x0370, 0x0373,
+ 0x0376, 0x0379,
+ 0x037b, 0x037d,
+ 0x037f, 0x0383,
+ 0x038b, 0x038b,
+ 0x038d, 0x038d,
+ 0x03a2, 0x03a2,
+ 0x03cf, 0x03cf,
+ 0x0487, 0x0487,
+ 0x04cf, 0x04cf,
+ 0x04fa, 0x04ff,
+ 0x0510, 0x0530,
+ 0x0557, 0x0558,
+ 0x0560, 0x0560,
+ 0x0588, 0x0588,
+ 0x058b, 0x0590,
+ 0x05ba, 0x05ba,
+ 0x05c8, 0x05cf,
+ 0x05eb, 0x05ef,
+ 0x05f5, 0x05ff,
+ 0x0604, 0x060a,
+ 0x0616, 0x061a,
+ 0x061c, 0x061d,
+ 0x0620, 0x0620,
+ 0x063b, 0x063f,
+ 0x065f, 0x065f,
+ 0x070e, 0x070e,
+ 0x074b, 0x074c,
+ 0x076e, 0x077f,
+ 0x07b2, 0x0900,
+ 0x093a, 0x093b,
+ 0x094e, 0x094f,
+ 0x0955, 0x0957,
+ 0x0971, 0x097c,
+ 0x097e, 0x0980,
+ 0x0984, 0x0984,
+ 0x098d, 0x098e,
+ 0x0991, 0x0992,
+ 0x09a9, 0x09a9,
+ 0x09b1, 0x09b1,
+ 0x09b3, 0x09b5,
+ 0x09ba, 0x09bb,
+ 0x09c5, 0x09c6,
+ 0x09c9, 0x09ca,
+ 0x09cf, 0x09d6,
+ 0x09d8, 0x09db,
+ 0x09de, 0x09de,
+ 0x09e4, 0x09e5,
+ 0x09fb, 0x0a00,
+ 0x0a04, 0x0a04,
+ 0x0a0b, 0x0a0e,
+ 0x0a11, 0x0a12,
+ 0x0a29, 0x0a29,
+ 0x0a31, 0x0a31,
+ 0x0a34, 0x0a34,
+ 0x0a37, 0x0a37,
+ 0x0a3a, 0x0a3b,
+ 0x0a3d, 0x0a3d,
+ 0x0a43, 0x0a46,
+ 0x0a49, 0x0a4a,
+ 0x0a4e, 0x0a58,
+ 0x0a5d, 0x0a5d,
+ 0x0a5f, 0x0a65,
+ 0x0a75, 0x0a80,
+ 0x0a84, 0x0a84,
+ 0x0a8e, 0x0a8e,
+ 0x0a92, 0x0a92,
+ 0x0aa9, 0x0aa9,
+ 0x0ab1, 0x0ab1,
+ 0x0ab4, 0x0ab4,
+ 0x0aba, 0x0abb,
+ 0x0ac6, 0x0ac6,
+ 0x0aca, 0x0aca,
+ 0x0ace, 0x0acf,
+ 0x0ad1, 0x0adf,
+ 0x0ae4, 0x0ae5,
+ 0x0af0, 0x0af0,
+ 0x0af2, 0x0b00,
+ 0x0b04, 0x0b04,
+ 0x0b0d, 0x0b0e,
+ 0x0b11, 0x0b12,
+ 0x0b29, 0x0b29,
+ 0x0b31, 0x0b31,
+ 0x0b34, 0x0b34,
+ 0x0b3a, 0x0b3b,
+ 0x0b44, 0x0b46,
+ 0x0b49, 0x0b4a,
+ 0x0b4e, 0x0b55,
+ 0x0b58, 0x0b5b,
+ 0x0b5e, 0x0b5e,
+ 0x0b62, 0x0b65,
+ 0x0b72, 0x0b81,
+ 0x0b84, 0x0b84,
+ 0x0b8b, 0x0b8d,
+ 0x0b91, 0x0b91,
+ 0x0b96, 0x0b98,
+ 0x0b9b, 0x0b9b,
+ 0x0b9d, 0x0b9d,
+ 0x0ba0, 0x0ba2,
+ 0x0ba5, 0x0ba7,
+ 0x0bab, 0x0bad,
+ 0x0bba, 0x0bbd,
+ 0x0bc3, 0x0bc5,
+ 0x0bc9, 0x0bc9,
+ 0x0bce, 0x0bd6,
+ 0x0bd8, 0x0be5,
+ 0x0bfb, 0x0c00,
+ 0x0c04, 0x0c04,
+ 0x0c0d, 0x0c0d,
+ 0x0c11, 0x0c11,
+ 0x0c29, 0x0c29,
+ 0x0c34, 0x0c34,
+ 0x0c3a, 0x0c3d,
+ 0x0c45, 0x0c45,
+ 0x0c49, 0x0c49,
+ 0x0c4e, 0x0c54,
+ 0x0c57, 0x0c5f,
+ 0x0c62, 0x0c65,
+ 0x0c70, 0x0c81,
+ 0x0c84, 0x0c84,
+ 0x0c8d, 0x0c8d,
+ 0x0c91, 0x0c91,
+ 0x0ca9, 0x0ca9,
+ 0x0cb4, 0x0cb4,
+ 0x0cba, 0x0cbb,
+ 0x0cc5, 0x0cc5,
+ 0x0cc9, 0x0cc9,
+ 0x0cce, 0x0cd4,
+ 0x0cd7, 0x0cdd,
+ 0x0cdf, 0x0cdf,
+ 0x0ce2, 0x0ce5,
+ 0x0cf0, 0x0d01,
+ 0x0d04, 0x0d04,
+ 0x0d0d, 0x0d0d,
+ 0x0d11, 0x0d11,
+ 0x0d29, 0x0d29,
+ 0x0d3a, 0x0d3d,
+ 0x0d44, 0x0d45,
+ 0x0d49, 0x0d49,
+ 0x0d4e, 0x0d56,
+ 0x0d58, 0x0d5f,
+ 0x0d62, 0x0d65,
+ 0x0d70, 0x0d81,
+ 0x0d84, 0x0d84,
+ 0x0d97, 0x0d99,
+ 0x0db2, 0x0db2,
+ 0x0dbc, 0x0dbc,
+ 0x0dbe, 0x0dbf,
+ 0x0dc7, 0x0dc9,
+ 0x0dcb, 0x0dce,
+ 0x0dd5, 0x0dd5,
+ 0x0dd7, 0x0dd7,
+ 0x0de0, 0x0df1,
+ 0x0df5, 0x0e00,
+ 0x0e3b, 0x0e3e,
+ 0x0e5c, 0x0e80,
+ 0x0e83, 0x0e83,
+ 0x0e85, 0x0e86,
+ 0x0e89, 0x0e89,
+ 0x0e8b, 0x0e8c,
+ 0x0e8e, 0x0e93,
+ 0x0e98, 0x0e98,
+ 0x0ea0, 0x0ea0,
+ 0x0ea4, 0x0ea4,
+ 0x0ea6, 0x0ea6,
+ 0x0ea8, 0x0ea9,
+ 0x0eac, 0x0eac,
+ 0x0eba, 0x0eba,
+ 0x0ebe, 0x0ebf,
+ 0x0ec5, 0x0ec5,
+ 0x0ec7, 0x0ec7,
+ 0x0ece, 0x0ecf,
+ 0x0eda, 0x0edb,
+ 0x0ede, 0x0eff,
+ 0x0f48, 0x0f48,
+ 0x0f6b, 0x0f70,
+ 0x0f8c, 0x0f8f,
+ 0x0f98, 0x0f98,
+ 0x0fbd, 0x0fbd,
+ 0x0fcd, 0x0fce,
+ 0x0fd2, 0x0fff,
+ 0x1022, 0x1022,
+ 0x1028, 0x1028,
+ 0x102b, 0x102b,
+ 0x1033, 0x1035,
+ 0x103a, 0x103f,
+ 0x105a, 0x109f,
+ 0x10c6, 0x10cf,
+ 0x10fd, 0x10ff,
+ 0x115a, 0x115e,
+ 0x11a3, 0x11a7,
+ 0x11fa, 0x11ff,
+ 0x1249, 0x1249,
+ 0x124e, 0x124f,
+ 0x1257, 0x1257,
+ 0x1259, 0x1259,
+ 0x125e, 0x125f,
+ 0x1289, 0x1289,
+ 0x128e, 0x128f,
+ 0x12b1, 0x12b1,
+ 0x12b6, 0x12b7,
+ 0x12bf, 0x12bf,
+ 0x12c1, 0x12c1,
+ 0x12c6, 0x12c7,
+ 0x12d7, 0x12d7,
+ 0x1311, 0x1311,
+ 0x1316, 0x1317,
+ 0x135b, 0x135e,
+ 0x137d, 0x137f,
+ 0x139a, 0x139f,
+ 0x13f5, 0x1400,
+ 0x1677, 0x167f,
+ 0x169d, 0x169f,
+ 0x16f1, 0x16ff,
+ 0x170d, 0x170d,
+ 0x1715, 0x171f,
+ 0x1737, 0x173f,
+ 0x1754, 0x175f,
+ 0x176d, 0x176d,
+ 0x1771, 0x1771,
+ 0x1774, 0x177f,
+ 0x17de, 0x17df,
+ 0x17ea, 0x17ef,
+ 0x17fa, 0x17ff,
+ 0x180f, 0x180f,
+ 0x181a, 0x181f,
+ 0x1878, 0x187f,
+ 0x18aa, 0x18ff,
+ 0x191d, 0x191f,
+ 0x192c, 0x192f,
+ 0x193c, 0x193f,
+ 0x1941, 0x1943,
+ 0x196e, 0x196f,
+ 0x1975, 0x197f,
+ 0x19aa, 0x19af,
+ 0x19ca, 0x19cf,
+ 0x19da, 0x19dd,
+ 0x1a1c, 0x1a1d,
+ 0x1a20, 0x1cff,
+ 0x1dc4, 0x1dff,
+ 0x1e9c, 0x1e9f,
+ 0x1efa, 0x1eff,
+ 0x1f16, 0x1f17,
+ 0x1f1e, 0x1f1f,
+ 0x1f46, 0x1f47,
+ 0x1f4e, 0x1f4f,
+ 0x1f58, 0x1f58,
+ 0x1f5a, 0x1f5a,
+ 0x1f5c, 0x1f5c,
+ 0x1f5e, 0x1f5e,
+ 0x1f7e, 0x1f7f,
+ 0x1fb5, 0x1fb5,
+ 0x1fc5, 0x1fc5,
+ 0x1fd4, 0x1fd5,
+ 0x1fdc, 0x1fdc,
+ 0x1ff0, 0x1ff1,
+ 0x1ff5, 0x1ff5,
+ 0x1fff, 0x1fff,
+ 0x2064, 0x2069,
+ 0x2072, 0x2073,
+ 0x208f, 0x208f,
+ 0x2095, 0x209f,
+ 0x20b6, 0x20cf,
+ 0x20ec, 0x20ff,
+ 0x214d, 0x2152,
+ 0x2184, 0x218f,
+ 0x23dc, 0x23ff,
+ 0x2427, 0x243f,
+ 0x244b, 0x245f,
+ 0x269d, 0x269f,
+ 0x26b2, 0x2700,
+ 0x2705, 0x2705,
+ 0x270a, 0x270b,
+ 0x2728, 0x2728,
+ 0x274c, 0x274c,
+ 0x274e, 0x274e,
+ 0x2753, 0x2755,
+ 0x2757, 0x2757,
+ 0x275f, 0x2760,
+ 0x2795, 0x2797,
+ 0x27b0, 0x27b0,
+ 0x27bf, 0x27bf,
+ 0x27c7, 0x27cf,
+ 0x27ec, 0x27ef,
+ 0x2b14, 0x2bff,
+ 0x2c2f, 0x2c2f,
+ 0x2c5f, 0x2c7f,
+ 0x2ceb, 0x2cf8,
+ 0x2d26, 0x2d2f,
+ 0x2d66, 0x2d6e,
+ 0x2d70, 0x2d7f,
+ 0x2d97, 0x2d9f,
+ 0x2da7, 0x2da7,
+ 0x2daf, 0x2daf,
+ 0x2db7, 0x2db7,
+ 0x2dbf, 0x2dbf,
+ 0x2dc7, 0x2dc7,
+ 0x2dcf, 0x2dcf,
+ 0x2dd7, 0x2dd7,
+ 0x2ddf, 0x2dff,
+ 0x2e18, 0x2e1b,
+ 0x2e1e, 0x2e7f,
+ 0x2e9a, 0x2e9a,
+ 0x2ef4, 0x2eff,
+ 0x2fd6, 0x2fef,
+ 0x2ffc, 0x2fff,
+ 0x3040, 0x3040,
+ 0x3097, 0x3098,
+ 0x3100, 0x3104,
+ 0x312d, 0x3130,
+ 0x318f, 0x318f,
+ 0x31b8, 0x31bf,
+ 0x31d0, 0x31ef,
+ 0x321f, 0x321f,
+ 0x3244, 0x324f,
+ 0x32ff, 0x32ff,
+ 0x4db6, 0x4dbf,
+ 0x9fbc, 0x9fff,
+ 0xa48d, 0xa48f,
+ 0xa4c7, 0xa6ff,
+ 0xa717, 0xa7ff,
+ 0xa82c, 0xabff,
+ 0xd7a4, 0xd7ff,
+ 0xfa2e, 0xfa2f,
+ 0xfa6b, 0xfa6f,
+ 0xfada, 0xfaff,
+ 0xfb07, 0xfb12,
+ 0xfb18, 0xfb1c,
+ 0xfb37, 0xfb37,
+ 0xfb3d, 0xfb3d,
+ 0xfb3f, 0xfb3f,
+ 0xfb42, 0xfb42,
+ 0xfb45, 0xfb45,
+ 0xfbb2, 0xfbd2,
+ 0xfd40, 0xfd4f,
+ 0xfd90, 0xfd91,
+ 0xfdc8, 0xfdef,
+ 0xfdfe, 0xfdff,
+ 0xfe1a, 0xfe1f,
+ 0xfe24, 0xfe2f,
+ 0xfe53, 0xfe53,
+ 0xfe67, 0xfe67,
+ 0xfe6c, 0xfe6f,
+ 0xfe75, 0xfe75,
+ 0xfefd, 0xfefe,
+ 0xff00, 0xff00,
+ 0xffbf, 0xffc1,
+ 0xffc8, 0xffc9,
+ 0xffd0, 0xffd1,
+ 0xffd8, 0xffd9,
+ 0xffdd, 0xffdf,
+ 0xffe7, 0xffe7,
+ 0xffef, 0xfff8,
+ 0xfffe, 0xffff,
+ 0x1000c, 0x1000c,
+ 0x10027, 0x10027,
+ 0x1003b, 0x1003b,
+ 0x1003e, 0x1003e,
+ 0x1004e, 0x1004f,
+ 0x1005e, 0x1007f,
+ 0x100fb, 0x100ff,
+ 0x10103, 0x10106,
+ 0x10134, 0x10136,
+ 0x1018b, 0x102ff,
+ 0x1031f, 0x1031f,
+ 0x10324, 0x1032f,
+ 0x1034b, 0x1037f,
+ 0x1039e, 0x1039e,
+ 0x103c4, 0x103c7,
+ 0x103d6, 0x103ff,
+ 0x1049e, 0x1049f,
+ 0x104aa, 0x107ff,
+ 0x10806, 0x10807,
+ 0x10809, 0x10809,
+ 0x10836, 0x10836,
+ 0x10839, 0x1083b,
+ 0x1083d, 0x1083e,
+ 0x10840, 0x109ff,
+ 0x10a04, 0x10a04,
+ 0x10a07, 0x10a0b,
+ 0x10a14, 0x10a14,
+ 0x10a18, 0x10a18,
+ 0x10a34, 0x10a37,
+ 0x10a3b, 0x10a3e,
+ 0x10a48, 0x10a4f,
+ 0x10a59, 0x1cfff,
+ 0x1d0f6, 0x1d0ff,
+ 0x1d127, 0x1d129,
+ 0x1d1de, 0x1d1ff,
+ 0x1d246, 0x1d2ff,
+ 0x1d357, 0x1d3ff,
+ 0x1d455, 0x1d455,
+ 0x1d49d, 0x1d49d,
+ 0x1d4a0, 0x1d4a1,
+ 0x1d4a3, 0x1d4a4,
+ 0x1d4a7, 0x1d4a8,
+ 0x1d4ad, 0x1d4ad,
+ 0x1d4ba, 0x1d4ba,
+ 0x1d4bc, 0x1d4bc,
+ 0x1d4c4, 0x1d4c4,
+ 0x1d506, 0x1d506,
+ 0x1d50b, 0x1d50c,
+ 0x1d515, 0x1d515,
+ 0x1d51d, 0x1d51d,
+ 0x1d53a, 0x1d53a,
+ 0x1d53f, 0x1d53f,
+ 0x1d545, 0x1d545,
+ 0x1d547, 0x1d549,
+ 0x1d551, 0x1d551,
+ 0x1d6a6, 0x1d6a7,
+ 0x1d7ca, 0x1d7cd,
+ 0x1d800, 0x1ffff,
+ 0x2a6d7, 0x2f7ff,
+ 0x2fa1e, 0xe0000,
+ 0xe0002, 0xe001f,
+ 0xe0080, 0xe00ff,
+ 0xe01f0, 0xeffff,
+ 0xffffe, 0xfffff,
+ 0x10fffe, 0x10ffff
+}; /* CR_Cn */
+
+/* 'Co': General Category */
+static const OnigCodePoint CR_Co[] = {
+ 3,
+ 0xe000, 0xf8ff,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+}; /* CR_Co */
+
+/* 'Cs': General Category */
+static const OnigCodePoint CR_Cs[] = {
+ 1,
+ 0xd800, 0xdfff
+}; /* CR_Cs */
+
+/* 'L': Major Category */
+static const OnigCodePoint CR_L[] = {
+ 347,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0241,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x048a, 0x04ce,
+ 0x04d0, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0621, 0x063a,
+ 0x0640, 0x064a,
+ 0x066e, 0x066f,
+ 0x0671, 0x06d3,
+ 0x06d5, 0x06d5,
+ 0x06e5, 0x06e6,
+ 0x06ee, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x0710,
+ 0x0712, 0x072f,
+ 0x074d, 0x076d,
+ 0x0780, 0x07a5,
+ 0x07b1, 0x07b1,
+ 0x0904, 0x0939,
+ 0x093d, 0x093d,
+ 0x0950, 0x0950,
+ 0x0958, 0x0961,
+ 0x097d, 0x097d,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bd, 0x09bd,
+ 0x09ce, 0x09ce,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e1,
+ 0x09f0, 0x09f1,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a72, 0x0a74,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abd, 0x0abd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae1,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3d, 0x0b3d,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b71, 0x0b71,
+ 0x0b83, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c60, 0x0c61,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbd, 0x0cbd,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d60, 0x0d61,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0e01, 0x0e30,
+ 0x0e32, 0x0e33,
+ 0x0e40, 0x0e46,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb0,
+ 0x0eb2, 0x0eb3,
+ 0x0ebd, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f40, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f88, 0x0f8b,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x1050, 0x1055,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10fc,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1711,
+ 0x1720, 0x1731,
+ 0x1740, 0x1751,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1780, 0x17b3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dc,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a8,
+ 0x1900, 0x191c,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19a9,
+ 0x19c1, 0x19c7,
+ 0x1a00, 0x1a16,
+ 0x1d00, 0x1dbf,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x2094,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c80, 0x2ce4,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x3005, 0x3006,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fbb,
+ 0xa000, 0xa48c,
+ 0xa800, 0xa801,
+ 0xa803, 0xa805,
+ 0xa807, 0xa80a,
+ 0xa80c, 0xa822,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb1d,
+ 0xfb1f, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x10400, 0x1049d,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x10a00, 0x10a00,
+ 0x10a10, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d
+}; /* CR_L */
+
+/* 'Ll': General Category */
+static const OnigCodePoint CR_Ll[] = {
+ 480,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00df, 0x00f6,
+ 0x00f8, 0x00ff,
+ 0x0101, 0x0101,
+ 0x0103, 0x0103,
+ 0x0105, 0x0105,
+ 0x0107, 0x0107,
+ 0x0109, 0x0109,
+ 0x010b, 0x010b,
+ 0x010d, 0x010d,
+ 0x010f, 0x010f,
+ 0x0111, 0x0111,
+ 0x0113, 0x0113,
+ 0x0115, 0x0115,
+ 0x0117, 0x0117,
+ 0x0119, 0x0119,
+ 0x011b, 0x011b,
+ 0x011d, 0x011d,
+ 0x011f, 0x011f,
+ 0x0121, 0x0121,
+ 0x0123, 0x0123,
+ 0x0125, 0x0125,
+ 0x0127, 0x0127,
+ 0x0129, 0x0129,
+ 0x012b, 0x012b,
+ 0x012d, 0x012d,
+ 0x012f, 0x012f,
+ 0x0131, 0x0131,
+ 0x0133, 0x0133,
+ 0x0135, 0x0135,
+ 0x0137, 0x0138,
+ 0x013a, 0x013a,
+ 0x013c, 0x013c,
+ 0x013e, 0x013e,
+ 0x0140, 0x0140,
+ 0x0142, 0x0142,
+ 0x0144, 0x0144,
+ 0x0146, 0x0146,
+ 0x0148, 0x0149,
+ 0x014b, 0x014b,
+ 0x014d, 0x014d,
+ 0x014f, 0x014f,
+ 0x0151, 0x0151,
+ 0x0153, 0x0153,
+ 0x0155, 0x0155,
+ 0x0157, 0x0157,
+ 0x0159, 0x0159,
+ 0x015b, 0x015b,
+ 0x015d, 0x015d,
+ 0x015f, 0x015f,
+ 0x0161, 0x0161,
+ 0x0163, 0x0163,
+ 0x0165, 0x0165,
+ 0x0167, 0x0167,
+ 0x0169, 0x0169,
+ 0x016b, 0x016b,
+ 0x016d, 0x016d,
+ 0x016f, 0x016f,
+ 0x0171, 0x0171,
+ 0x0173, 0x0173,
+ 0x0175, 0x0175,
+ 0x0177, 0x0177,
+ 0x017a, 0x017a,
+ 0x017c, 0x017c,
+ 0x017e, 0x0180,
+ 0x0183, 0x0183,
+ 0x0185, 0x0185,
+ 0x0188, 0x0188,
+ 0x018c, 0x018d,
+ 0x0192, 0x0192,
+ 0x0195, 0x0195,
+ 0x0199, 0x019b,
+ 0x019e, 0x019e,
+ 0x01a1, 0x01a1,
+ 0x01a3, 0x01a3,
+ 0x01a5, 0x01a5,
+ 0x01a8, 0x01a8,
+ 0x01aa, 0x01ab,
+ 0x01ad, 0x01ad,
+ 0x01b0, 0x01b0,
+ 0x01b4, 0x01b4,
+ 0x01b6, 0x01b6,
+ 0x01b9, 0x01ba,
+ 0x01bd, 0x01bf,
+ 0x01c6, 0x01c6,
+ 0x01c9, 0x01c9,
+ 0x01cc, 0x01cc,
+ 0x01ce, 0x01ce,
+ 0x01d0, 0x01d0,
+ 0x01d2, 0x01d2,
+ 0x01d4, 0x01d4,
+ 0x01d6, 0x01d6,
+ 0x01d8, 0x01d8,
+ 0x01da, 0x01da,
+ 0x01dc, 0x01dd,
+ 0x01df, 0x01df,
+ 0x01e1, 0x01e1,
+ 0x01e3, 0x01e3,
+ 0x01e5, 0x01e5,
+ 0x01e7, 0x01e7,
+ 0x01e9, 0x01e9,
+ 0x01eb, 0x01eb,
+ 0x01ed, 0x01ed,
+ 0x01ef, 0x01f0,
+ 0x01f3, 0x01f3,
+ 0x01f5, 0x01f5,
+ 0x01f9, 0x01f9,
+ 0x01fb, 0x01fb,
+ 0x01fd, 0x01fd,
+ 0x01ff, 0x01ff,
+ 0x0201, 0x0201,
+ 0x0203, 0x0203,
+ 0x0205, 0x0205,
+ 0x0207, 0x0207,
+ 0x0209, 0x0209,
+ 0x020b, 0x020b,
+ 0x020d, 0x020d,
+ 0x020f, 0x020f,
+ 0x0211, 0x0211,
+ 0x0213, 0x0213,
+ 0x0215, 0x0215,
+ 0x0217, 0x0217,
+ 0x0219, 0x0219,
+ 0x021b, 0x021b,
+ 0x021d, 0x021d,
+ 0x021f, 0x021f,
+ 0x0221, 0x0221,
+ 0x0223, 0x0223,
+ 0x0225, 0x0225,
+ 0x0227, 0x0227,
+ 0x0229, 0x0229,
+ 0x022b, 0x022b,
+ 0x022d, 0x022d,
+ 0x022f, 0x022f,
+ 0x0231, 0x0231,
+ 0x0233, 0x0239,
+ 0x023c, 0x023c,
+ 0x023f, 0x0240,
+ 0x0250, 0x02af,
+ 0x0390, 0x0390,
+ 0x03ac, 0x03ce,
+ 0x03d0, 0x03d1,
+ 0x03d5, 0x03d7,
+ 0x03d9, 0x03d9,
+ 0x03db, 0x03db,
+ 0x03dd, 0x03dd,
+ 0x03df, 0x03df,
+ 0x03e1, 0x03e1,
+ 0x03e3, 0x03e3,
+ 0x03e5, 0x03e5,
+ 0x03e7, 0x03e7,
+ 0x03e9, 0x03e9,
+ 0x03eb, 0x03eb,
+ 0x03ed, 0x03ed,
+ 0x03ef, 0x03f3,
+ 0x03f5, 0x03f5,
+ 0x03f8, 0x03f8,
+ 0x03fb, 0x03fc,
+ 0x0430, 0x045f,
+ 0x0461, 0x0461,
+ 0x0463, 0x0463,
+ 0x0465, 0x0465,
+ 0x0467, 0x0467,
+ 0x0469, 0x0469,
+ 0x046b, 0x046b,
+ 0x046d, 0x046d,
+ 0x046f, 0x046f,
+ 0x0471, 0x0471,
+ 0x0473, 0x0473,
+ 0x0475, 0x0475,
+ 0x0477, 0x0477,
+ 0x0479, 0x0479,
+ 0x047b, 0x047b,
+ 0x047d, 0x047d,
+ 0x047f, 0x047f,
+ 0x0481, 0x0481,
+ 0x048b, 0x048b,
+ 0x048d, 0x048d,
+ 0x048f, 0x048f,
+ 0x0491, 0x0491,
+ 0x0493, 0x0493,
+ 0x0495, 0x0495,
+ 0x0497, 0x0497,
+ 0x0499, 0x0499,
+ 0x049b, 0x049b,
+ 0x049d, 0x049d,
+ 0x049f, 0x049f,
+ 0x04a1, 0x04a1,
+ 0x04a3, 0x04a3,
+ 0x04a5, 0x04a5,
+ 0x04a7, 0x04a7,
+ 0x04a9, 0x04a9,
+ 0x04ab, 0x04ab,
+ 0x04ad, 0x04ad,
+ 0x04af, 0x04af,
+ 0x04b1, 0x04b1,
+ 0x04b3, 0x04b3,
+ 0x04b5, 0x04b5,
+ 0x04b7, 0x04b7,
+ 0x04b9, 0x04b9,
+ 0x04bb, 0x04bb,
+ 0x04bd, 0x04bd,
+ 0x04bf, 0x04bf,
+ 0x04c2, 0x04c2,
+ 0x04c4, 0x04c4,
+ 0x04c6, 0x04c6,
+ 0x04c8, 0x04c8,
+ 0x04ca, 0x04ca,
+ 0x04cc, 0x04cc,
+ 0x04ce, 0x04ce,
+ 0x04d1, 0x04d1,
+ 0x04d3, 0x04d3,
+ 0x04d5, 0x04d5,
+ 0x04d7, 0x04d7,
+ 0x04d9, 0x04d9,
+ 0x04db, 0x04db,
+ 0x04dd, 0x04dd,
+ 0x04df, 0x04df,
+ 0x04e1, 0x04e1,
+ 0x04e3, 0x04e3,
+ 0x04e5, 0x04e5,
+ 0x04e7, 0x04e7,
+ 0x04e9, 0x04e9,
+ 0x04eb, 0x04eb,
+ 0x04ed, 0x04ed,
+ 0x04ef, 0x04ef,
+ 0x04f1, 0x04f1,
+ 0x04f3, 0x04f3,
+ 0x04f5, 0x04f5,
+ 0x04f7, 0x04f7,
+ 0x04f9, 0x04f9,
+ 0x0501, 0x0501,
+ 0x0503, 0x0503,
+ 0x0505, 0x0505,
+ 0x0507, 0x0507,
+ 0x0509, 0x0509,
+ 0x050b, 0x050b,
+ 0x050d, 0x050d,
+ 0x050f, 0x050f,
+ 0x0561, 0x0587,
+ 0x1d00, 0x1d2b,
+ 0x1d62, 0x1d77,
+ 0x1d79, 0x1d9a,
+ 0x1e01, 0x1e01,
+ 0x1e03, 0x1e03,
+ 0x1e05, 0x1e05,
+ 0x1e07, 0x1e07,
+ 0x1e09, 0x1e09,
+ 0x1e0b, 0x1e0b,
+ 0x1e0d, 0x1e0d,
+ 0x1e0f, 0x1e0f,
+ 0x1e11, 0x1e11,
+ 0x1e13, 0x1e13,
+ 0x1e15, 0x1e15,
+ 0x1e17, 0x1e17,
+ 0x1e19, 0x1e19,
+ 0x1e1b, 0x1e1b,
+ 0x1e1d, 0x1e1d,
+ 0x1e1f, 0x1e1f,
+ 0x1e21, 0x1e21,
+ 0x1e23, 0x1e23,
+ 0x1e25, 0x1e25,
+ 0x1e27, 0x1e27,
+ 0x1e29, 0x1e29,
+ 0x1e2b, 0x1e2b,
+ 0x1e2d, 0x1e2d,
+ 0x1e2f, 0x1e2f,
+ 0x1e31, 0x1e31,
+ 0x1e33, 0x1e33,
+ 0x1e35, 0x1e35,
+ 0x1e37, 0x1e37,
+ 0x1e39, 0x1e39,
+ 0x1e3b, 0x1e3b,
+ 0x1e3d, 0x1e3d,
+ 0x1e3f, 0x1e3f,
+ 0x1e41, 0x1e41,
+ 0x1e43, 0x1e43,
+ 0x1e45, 0x1e45,
+ 0x1e47, 0x1e47,
+ 0x1e49, 0x1e49,
+ 0x1e4b, 0x1e4b,
+ 0x1e4d, 0x1e4d,
+ 0x1e4f, 0x1e4f,
+ 0x1e51, 0x1e51,
+ 0x1e53, 0x1e53,
+ 0x1e55, 0x1e55,
+ 0x1e57, 0x1e57,
+ 0x1e59, 0x1e59,
+ 0x1e5b, 0x1e5b,
+ 0x1e5d, 0x1e5d,
+ 0x1e5f, 0x1e5f,
+ 0x1e61, 0x1e61,
+ 0x1e63, 0x1e63,
+ 0x1e65, 0x1e65,
+ 0x1e67, 0x1e67,
+ 0x1e69, 0x1e69,
+ 0x1e6b, 0x1e6b,
+ 0x1e6d, 0x1e6d,
+ 0x1e6f, 0x1e6f,
+ 0x1e71, 0x1e71,
+ 0x1e73, 0x1e73,
+ 0x1e75, 0x1e75,
+ 0x1e77, 0x1e77,
+ 0x1e79, 0x1e79,
+ 0x1e7b, 0x1e7b,
+ 0x1e7d, 0x1e7d,
+ 0x1e7f, 0x1e7f,
+ 0x1e81, 0x1e81,
+ 0x1e83, 0x1e83,
+ 0x1e85, 0x1e85,
+ 0x1e87, 0x1e87,
+ 0x1e89, 0x1e89,
+ 0x1e8b, 0x1e8b,
+ 0x1e8d, 0x1e8d,
+ 0x1e8f, 0x1e8f,
+ 0x1e91, 0x1e91,
+ 0x1e93, 0x1e93,
+ 0x1e95, 0x1e9b,
+ 0x1ea1, 0x1ea1,
+ 0x1ea3, 0x1ea3,
+ 0x1ea5, 0x1ea5,
+ 0x1ea7, 0x1ea7,
+ 0x1ea9, 0x1ea9,
+ 0x1eab, 0x1eab,
+ 0x1ead, 0x1ead,
+ 0x1eaf, 0x1eaf,
+ 0x1eb1, 0x1eb1,
+ 0x1eb3, 0x1eb3,
+ 0x1eb5, 0x1eb5,
+ 0x1eb7, 0x1eb7,
+ 0x1eb9, 0x1eb9,
+ 0x1ebb, 0x1ebb,
+ 0x1ebd, 0x1ebd,
+ 0x1ebf, 0x1ebf,
+ 0x1ec1, 0x1ec1,
+ 0x1ec3, 0x1ec3,
+ 0x1ec5, 0x1ec5,
+ 0x1ec7, 0x1ec7,
+ 0x1ec9, 0x1ec9,
+ 0x1ecb, 0x1ecb,
+ 0x1ecd, 0x1ecd,
+ 0x1ecf, 0x1ecf,
+ 0x1ed1, 0x1ed1,
+ 0x1ed3, 0x1ed3,
+ 0x1ed5, 0x1ed5,
+ 0x1ed7, 0x1ed7,
+ 0x1ed9, 0x1ed9,
+ 0x1edb, 0x1edb,
+ 0x1edd, 0x1edd,
+ 0x1edf, 0x1edf,
+ 0x1ee1, 0x1ee1,
+ 0x1ee3, 0x1ee3,
+ 0x1ee5, 0x1ee5,
+ 0x1ee7, 0x1ee7,
+ 0x1ee9, 0x1ee9,
+ 0x1eeb, 0x1eeb,
+ 0x1eed, 0x1eed,
+ 0x1eef, 0x1eef,
+ 0x1ef1, 0x1ef1,
+ 0x1ef3, 0x1ef3,
+ 0x1ef5, 0x1ef5,
+ 0x1ef7, 0x1ef7,
+ 0x1ef9, 0x1ef9,
+ 0x1f00, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1f87,
+ 0x1f90, 0x1f97,
+ 0x1fa0, 0x1fa7,
+ 0x1fb0, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x210a, 0x210a,
+ 0x210e, 0x210f,
+ 0x2113, 0x2113,
+ 0x212f, 0x212f,
+ 0x2134, 0x2134,
+ 0x2139, 0x2139,
+ 0x213c, 0x213d,
+ 0x2146, 0x2149,
+ 0x2c30, 0x2c5e,
+ 0x2c81, 0x2c81,
+ 0x2c83, 0x2c83,
+ 0x2c85, 0x2c85,
+ 0x2c87, 0x2c87,
+ 0x2c89, 0x2c89,
+ 0x2c8b, 0x2c8b,
+ 0x2c8d, 0x2c8d,
+ 0x2c8f, 0x2c8f,
+ 0x2c91, 0x2c91,
+ 0x2c93, 0x2c93,
+ 0x2c95, 0x2c95,
+ 0x2c97, 0x2c97,
+ 0x2c99, 0x2c99,
+ 0x2c9b, 0x2c9b,
+ 0x2c9d, 0x2c9d,
+ 0x2c9f, 0x2c9f,
+ 0x2ca1, 0x2ca1,
+ 0x2ca3, 0x2ca3,
+ 0x2ca5, 0x2ca5,
+ 0x2ca7, 0x2ca7,
+ 0x2ca9, 0x2ca9,
+ 0x2cab, 0x2cab,
+ 0x2cad, 0x2cad,
+ 0x2caf, 0x2caf,
+ 0x2cb1, 0x2cb1,
+ 0x2cb3, 0x2cb3,
+ 0x2cb5, 0x2cb5,
+ 0x2cb7, 0x2cb7,
+ 0x2cb9, 0x2cb9,
+ 0x2cbb, 0x2cbb,
+ 0x2cbd, 0x2cbd,
+ 0x2cbf, 0x2cbf,
+ 0x2cc1, 0x2cc1,
+ 0x2cc3, 0x2cc3,
+ 0x2cc5, 0x2cc5,
+ 0x2cc7, 0x2cc7,
+ 0x2cc9, 0x2cc9,
+ 0x2ccb, 0x2ccb,
+ 0x2ccd, 0x2ccd,
+ 0x2ccf, 0x2ccf,
+ 0x2cd1, 0x2cd1,
+ 0x2cd3, 0x2cd3,
+ 0x2cd5, 0x2cd5,
+ 0x2cd7, 0x2cd7,
+ 0x2cd9, 0x2cd9,
+ 0x2cdb, 0x2cdb,
+ 0x2cdd, 0x2cdd,
+ 0x2cdf, 0x2cdf,
+ 0x2ce1, 0x2ce1,
+ 0x2ce3, 0x2ce4,
+ 0x2d00, 0x2d25,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+ 0x1d41a, 0x1d433,
+ 0x1d44e, 0x1d454,
+ 0x1d456, 0x1d467,
+ 0x1d482, 0x1d49b,
+ 0x1d4b6, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d4cf,
+ 0x1d4ea, 0x1d503,
+ 0x1d51e, 0x1d537,
+ 0x1d552, 0x1d56b,
+ 0x1d586, 0x1d59f,
+ 0x1d5ba, 0x1d5d3,
+ 0x1d5ee, 0x1d607,
+ 0x1d622, 0x1d63b,
+ 0x1d656, 0x1d66f,
+ 0x1d68a, 0x1d6a5,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6e1,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d71b,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d755,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d78f,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9
+}; /* CR_Ll */
+
+/* 'Lm': General Category */
+static const OnigCodePoint CR_Lm[] = {
+ 26,
+ 0x02b0, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x037a, 0x037a,
+ 0x0559, 0x0559,
+ 0x0640, 0x0640,
+ 0x06e5, 0x06e6,
+ 0x0e46, 0x0e46,
+ 0x0ec6, 0x0ec6,
+ 0x10fc, 0x10fc,
+ 0x17d7, 0x17d7,
+ 0x1843, 0x1843,
+ 0x1d2c, 0x1d61,
+ 0x1d78, 0x1d78,
+ 0x1d9b, 0x1dbf,
+ 0x2090, 0x2094,
+ 0x2d6f, 0x2d6f,
+ 0x3005, 0x3005,
+ 0x3031, 0x3035,
+ 0x303b, 0x303b,
+ 0x309d, 0x309e,
+ 0x30fc, 0x30fe,
+ 0xa015, 0xa015,
+ 0xff70, 0xff70,
+ 0xff9e, 0xff9f
+}; /* CR_Lm */
+
+/* 'Lo': General Category */
+static const OnigCodePoint CR_Lo[] = {
+ 245,
+ 0x01bb, 0x01bb,
+ 0x01c0, 0x01c3,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0621, 0x063a,
+ 0x0641, 0x064a,
+ 0x066e, 0x066f,
+ 0x0671, 0x06d3,
+ 0x06d5, 0x06d5,
+ 0x06ee, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x0710,
+ 0x0712, 0x072f,
+ 0x074d, 0x076d,
+ 0x0780, 0x07a5,
+ 0x07b1, 0x07b1,
+ 0x0904, 0x0939,
+ 0x093d, 0x093d,
+ 0x0950, 0x0950,
+ 0x0958, 0x0961,
+ 0x097d, 0x097d,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bd, 0x09bd,
+ 0x09ce, 0x09ce,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e1,
+ 0x09f0, 0x09f1,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a72, 0x0a74,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abd, 0x0abd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae1,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3d, 0x0b3d,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b71, 0x0b71,
+ 0x0b83, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c60, 0x0c61,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbd, 0x0cbd,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d60, 0x0d61,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0e01, 0x0e30,
+ 0x0e32, 0x0e33,
+ 0x0e40, 0x0e45,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb0,
+ 0x0eb2, 0x0eb3,
+ 0x0ebd, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f40, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f88, 0x0f8b,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x1050, 0x1055,
+ 0x10d0, 0x10fa,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1711,
+ 0x1720, 0x1731,
+ 0x1740, 0x1751,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1780, 0x17b3,
+ 0x17dc, 0x17dc,
+ 0x1820, 0x1842,
+ 0x1844, 0x1877,
+ 0x1880, 0x18a8,
+ 0x1900, 0x191c,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19a9,
+ 0x19c1, 0x19c7,
+ 0x1a00, 0x1a16,
+ 0x2135, 0x2138,
+ 0x2d30, 0x2d65,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x3006, 0x3006,
+ 0x303c, 0x303c,
+ 0x3041, 0x3096,
+ 0x309f, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30ff, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fbb,
+ 0xa000, 0xa014,
+ 0xa016, 0xa48c,
+ 0xa800, 0xa801,
+ 0xa803, 0xa805,
+ 0xa807, 0xa80a,
+ 0xa80c, 0xa822,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0xfb1d, 0xfb1d,
+ 0xfb1f, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff66, 0xff6f,
+ 0xff71, 0xff9d,
+ 0xffa0, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x10450, 0x1049d,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x10a00, 0x10a00,
+ 0x10a10, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d
+}; /* CR_Lo */
+
+/* 'Lt': General Category */
+static const OnigCodePoint CR_Lt[] = {
+ 10,
+ 0x01c5, 0x01c5,
+ 0x01c8, 0x01c8,
+ 0x01cb, 0x01cb,
+ 0x01f2, 0x01f2,
+ 0x1f88, 0x1f8f,
+ 0x1f98, 0x1f9f,
+ 0x1fa8, 0x1faf,
+ 0x1fbc, 0x1fbc,
+ 0x1fcc, 0x1fcc,
+ 0x1ffc, 0x1ffc
+}; /* CR_Lt */
+
+/* 'Lu': General Category */
+static const OnigCodePoint CR_Lu[] = {
+ 476,
+ 0x0041, 0x005a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de,
+ 0x0100, 0x0100,
+ 0x0102, 0x0102,
+ 0x0104, 0x0104,
+ 0x0106, 0x0106,
+ 0x0108, 0x0108,
+ 0x010a, 0x010a,
+ 0x010c, 0x010c,
+ 0x010e, 0x010e,
+ 0x0110, 0x0110,
+ 0x0112, 0x0112,
+ 0x0114, 0x0114,
+ 0x0116, 0x0116,
+ 0x0118, 0x0118,
+ 0x011a, 0x011a,
+ 0x011c, 0x011c,
+ 0x011e, 0x011e,
+ 0x0120, 0x0120,
+ 0x0122, 0x0122,
+ 0x0124, 0x0124,
+ 0x0126, 0x0126,
+ 0x0128, 0x0128,
+ 0x012a, 0x012a,
+ 0x012c, 0x012c,
+ 0x012e, 0x012e,
+ 0x0130, 0x0130,
+ 0x0132, 0x0132,
+ 0x0134, 0x0134,
+ 0x0136, 0x0136,
+ 0x0139, 0x0139,
+ 0x013b, 0x013b,
+ 0x013d, 0x013d,
+ 0x013f, 0x013f,
+ 0x0141, 0x0141,
+ 0x0143, 0x0143,
+ 0x0145, 0x0145,
+ 0x0147, 0x0147,
+ 0x014a, 0x014a,
+ 0x014c, 0x014c,
+ 0x014e, 0x014e,
+ 0x0150, 0x0150,
+ 0x0152, 0x0152,
+ 0x0154, 0x0154,
+ 0x0156, 0x0156,
+ 0x0158, 0x0158,
+ 0x015a, 0x015a,
+ 0x015c, 0x015c,
+ 0x015e, 0x015e,
+ 0x0160, 0x0160,
+ 0x0162, 0x0162,
+ 0x0164, 0x0164,
+ 0x0166, 0x0166,
+ 0x0168, 0x0168,
+ 0x016a, 0x016a,
+ 0x016c, 0x016c,
+ 0x016e, 0x016e,
+ 0x0170, 0x0170,
+ 0x0172, 0x0172,
+ 0x0174, 0x0174,
+ 0x0176, 0x0176,
+ 0x0178, 0x0179,
+ 0x017b, 0x017b,
+ 0x017d, 0x017d,
+ 0x0181, 0x0182,
+ 0x0184, 0x0184,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a2, 0x01a2,
+ 0x01a4, 0x01a4,
+ 0x01a6, 0x01a7,
+ 0x01a9, 0x01a9,
+ 0x01ac, 0x01ac,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b5, 0x01b5,
+ 0x01b7, 0x01b8,
+ 0x01bc, 0x01bc,
+ 0x01c4, 0x01c4,
+ 0x01c7, 0x01c7,
+ 0x01ca, 0x01ca,
+ 0x01cd, 0x01cd,
+ 0x01cf, 0x01cf,
+ 0x01d1, 0x01d1,
+ 0x01d3, 0x01d3,
+ 0x01d5, 0x01d5,
+ 0x01d7, 0x01d7,
+ 0x01d9, 0x01d9,
+ 0x01db, 0x01db,
+ 0x01de, 0x01de,
+ 0x01e0, 0x01e0,
+ 0x01e2, 0x01e2,
+ 0x01e4, 0x01e4,
+ 0x01e6, 0x01e6,
+ 0x01e8, 0x01e8,
+ 0x01ea, 0x01ea,
+ 0x01ec, 0x01ec,
+ 0x01ee, 0x01ee,
+ 0x01f1, 0x01f1,
+ 0x01f4, 0x01f4,
+ 0x01f6, 0x01f8,
+ 0x01fa, 0x01fa,
+ 0x01fc, 0x01fc,
+ 0x01fe, 0x01fe,
+ 0x0200, 0x0200,
+ 0x0202, 0x0202,
+ 0x0204, 0x0204,
+ 0x0206, 0x0206,
+ 0x0208, 0x0208,
+ 0x020a, 0x020a,
+ 0x020c, 0x020c,
+ 0x020e, 0x020e,
+ 0x0210, 0x0210,
+ 0x0212, 0x0212,
+ 0x0214, 0x0214,
+ 0x0216, 0x0216,
+ 0x0218, 0x0218,
+ 0x021a, 0x021a,
+ 0x021c, 0x021c,
+ 0x021e, 0x021e,
+ 0x0220, 0x0220,
+ 0x0222, 0x0222,
+ 0x0224, 0x0224,
+ 0x0226, 0x0226,
+ 0x0228, 0x0228,
+ 0x022a, 0x022a,
+ 0x022c, 0x022c,
+ 0x022e, 0x022e,
+ 0x0230, 0x0230,
+ 0x0232, 0x0232,
+ 0x023a, 0x023b,
+ 0x023d, 0x023e,
+ 0x0241, 0x0241,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03d2, 0x03d4,
+ 0x03d8, 0x03d8,
+ 0x03da, 0x03da,
+ 0x03dc, 0x03dc,
+ 0x03de, 0x03de,
+ 0x03e0, 0x03e0,
+ 0x03e2, 0x03e2,
+ 0x03e4, 0x03e4,
+ 0x03e6, 0x03e6,
+ 0x03e8, 0x03e8,
+ 0x03ea, 0x03ea,
+ 0x03ec, 0x03ec,
+ 0x03ee, 0x03ee,
+ 0x03f4, 0x03f4,
+ 0x03f7, 0x03f7,
+ 0x03f9, 0x03fa,
+ 0x03fd, 0x042f,
+ 0x0460, 0x0460,
+ 0x0462, 0x0462,
+ 0x0464, 0x0464,
+ 0x0466, 0x0466,
+ 0x0468, 0x0468,
+ 0x046a, 0x046a,
+ 0x046c, 0x046c,
+ 0x046e, 0x046e,
+ 0x0470, 0x0470,
+ 0x0472, 0x0472,
+ 0x0474, 0x0474,
+ 0x0476, 0x0476,
+ 0x0478, 0x0478,
+ 0x047a, 0x047a,
+ 0x047c, 0x047c,
+ 0x047e, 0x047e,
+ 0x0480, 0x0480,
+ 0x048a, 0x048a,
+ 0x048c, 0x048c,
+ 0x048e, 0x048e,
+ 0x0490, 0x0490,
+ 0x0492, 0x0492,
+ 0x0494, 0x0494,
+ 0x0496, 0x0496,
+ 0x0498, 0x0498,
+ 0x049a, 0x049a,
+ 0x049c, 0x049c,
+ 0x049e, 0x049e,
+ 0x04a0, 0x04a0,
+ 0x04a2, 0x04a2,
+ 0x04a4, 0x04a4,
+ 0x04a6, 0x04a6,
+ 0x04a8, 0x04a8,
+ 0x04aa, 0x04aa,
+ 0x04ac, 0x04ac,
+ 0x04ae, 0x04ae,
+ 0x04b0, 0x04b0,
+ 0x04b2, 0x04b2,
+ 0x04b4, 0x04b4,
+ 0x04b6, 0x04b6,
+ 0x04b8, 0x04b8,
+ 0x04ba, 0x04ba,
+ 0x04bc, 0x04bc,
+ 0x04be, 0x04be,
+ 0x04c0, 0x04c1,
+ 0x04c3, 0x04c3,
+ 0x04c5, 0x04c5,
+ 0x04c7, 0x04c7,
+ 0x04c9, 0x04c9,
+ 0x04cb, 0x04cb,
+ 0x04cd, 0x04cd,
+ 0x04d0, 0x04d0,
+ 0x04d2, 0x04d2,
+ 0x04d4, 0x04d4,
+ 0x04d6, 0x04d6,
+ 0x04d8, 0x04d8,
+ 0x04da, 0x04da,
+ 0x04dc, 0x04dc,
+ 0x04de, 0x04de,
+ 0x04e0, 0x04e0,
+ 0x04e2, 0x04e2,
+ 0x04e4, 0x04e4,
+ 0x04e6, 0x04e6,
+ 0x04e8, 0x04e8,
+ 0x04ea, 0x04ea,
+ 0x04ec, 0x04ec,
+ 0x04ee, 0x04ee,
+ 0x04f0, 0x04f0,
+ 0x04f2, 0x04f2,
+ 0x04f4, 0x04f4,
+ 0x04f6, 0x04f6,
+ 0x04f8, 0x04f8,
+ 0x0500, 0x0500,
+ 0x0502, 0x0502,
+ 0x0504, 0x0504,
+ 0x0506, 0x0506,
+ 0x0508, 0x0508,
+ 0x050a, 0x050a,
+ 0x050c, 0x050c,
+ 0x050e, 0x050e,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1e00, 0x1e00,
+ 0x1e02, 0x1e02,
+ 0x1e04, 0x1e04,
+ 0x1e06, 0x1e06,
+ 0x1e08, 0x1e08,
+ 0x1e0a, 0x1e0a,
+ 0x1e0c, 0x1e0c,
+ 0x1e0e, 0x1e0e,
+ 0x1e10, 0x1e10,
+ 0x1e12, 0x1e12,
+ 0x1e14, 0x1e14,
+ 0x1e16, 0x1e16,
+ 0x1e18, 0x1e18,
+ 0x1e1a, 0x1e1a,
+ 0x1e1c, 0x1e1c,
+ 0x1e1e, 0x1e1e,
+ 0x1e20, 0x1e20,
+ 0x1e22, 0x1e22,
+ 0x1e24, 0x1e24,
+ 0x1e26, 0x1e26,
+ 0x1e28, 0x1e28,
+ 0x1e2a, 0x1e2a,
+ 0x1e2c, 0x1e2c,
+ 0x1e2e, 0x1e2e,
+ 0x1e30, 0x1e30,
+ 0x1e32, 0x1e32,
+ 0x1e34, 0x1e34,
+ 0x1e36, 0x1e36,
+ 0x1e38, 0x1e38,
+ 0x1e3a, 0x1e3a,
+ 0x1e3c, 0x1e3c,
+ 0x1e3e, 0x1e3e,
+ 0x1e40, 0x1e40,
+ 0x1e42, 0x1e42,
+ 0x1e44, 0x1e44,
+ 0x1e46, 0x1e46,
+ 0x1e48, 0x1e48,
+ 0x1e4a, 0x1e4a,
+ 0x1e4c, 0x1e4c,
+ 0x1e4e, 0x1e4e,
+ 0x1e50, 0x1e50,
+ 0x1e52, 0x1e52,
+ 0x1e54, 0x1e54,
+ 0x1e56, 0x1e56,
+ 0x1e58, 0x1e58,
+ 0x1e5a, 0x1e5a,
+ 0x1e5c, 0x1e5c,
+ 0x1e5e, 0x1e5e,
+ 0x1e60, 0x1e60,
+ 0x1e62, 0x1e62,
+ 0x1e64, 0x1e64,
+ 0x1e66, 0x1e66,
+ 0x1e68, 0x1e68,
+ 0x1e6a, 0x1e6a,
+ 0x1e6c, 0x1e6c,
+ 0x1e6e, 0x1e6e,
+ 0x1e70, 0x1e70,
+ 0x1e72, 0x1e72,
+ 0x1e74, 0x1e74,
+ 0x1e76, 0x1e76,
+ 0x1e78, 0x1e78,
+ 0x1e7a, 0x1e7a,
+ 0x1e7c, 0x1e7c,
+ 0x1e7e, 0x1e7e,
+ 0x1e80, 0x1e80,
+ 0x1e82, 0x1e82,
+ 0x1e84, 0x1e84,
+ 0x1e86, 0x1e86,
+ 0x1e88, 0x1e88,
+ 0x1e8a, 0x1e8a,
+ 0x1e8c, 0x1e8c,
+ 0x1e8e, 0x1e8e,
+ 0x1e90, 0x1e90,
+ 0x1e92, 0x1e92,
+ 0x1e94, 0x1e94,
+ 0x1ea0, 0x1ea0,
+ 0x1ea2, 0x1ea2,
+ 0x1ea4, 0x1ea4,
+ 0x1ea6, 0x1ea6,
+ 0x1ea8, 0x1ea8,
+ 0x1eaa, 0x1eaa,
+ 0x1eac, 0x1eac,
+ 0x1eae, 0x1eae,
+ 0x1eb0, 0x1eb0,
+ 0x1eb2, 0x1eb2,
+ 0x1eb4, 0x1eb4,
+ 0x1eb6, 0x1eb6,
+ 0x1eb8, 0x1eb8,
+ 0x1eba, 0x1eba,
+ 0x1ebc, 0x1ebc,
+ 0x1ebe, 0x1ebe,
+ 0x1ec0, 0x1ec0,
+ 0x1ec2, 0x1ec2,
+ 0x1ec4, 0x1ec4,
+ 0x1ec6, 0x1ec6,
+ 0x1ec8, 0x1ec8,
+ 0x1eca, 0x1eca,
+ 0x1ecc, 0x1ecc,
+ 0x1ece, 0x1ece,
+ 0x1ed0, 0x1ed0,
+ 0x1ed2, 0x1ed2,
+ 0x1ed4, 0x1ed4,
+ 0x1ed6, 0x1ed6,
+ 0x1ed8, 0x1ed8,
+ 0x1eda, 0x1eda,
+ 0x1edc, 0x1edc,
+ 0x1ede, 0x1ede,
+ 0x1ee0, 0x1ee0,
+ 0x1ee2, 0x1ee2,
+ 0x1ee4, 0x1ee4,
+ 0x1ee6, 0x1ee6,
+ 0x1ee8, 0x1ee8,
+ 0x1eea, 0x1eea,
+ 0x1eec, 0x1eec,
+ 0x1eee, 0x1eee,
+ 0x1ef0, 0x1ef0,
+ 0x1ef2, 0x1ef2,
+ 0x1ef4, 0x1ef4,
+ 0x1ef6, 0x1ef6,
+ 0x1ef8, 0x1ef8,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f5f,
+ 0x1f68, 0x1f6f,
+ 0x1fb8, 0x1fbb,
+ 0x1fc8, 0x1fcb,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210b, 0x210d,
+ 0x2110, 0x2112,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x2130, 0x2131,
+ 0x2133, 0x2133,
+ 0x213e, 0x213f,
+ 0x2145, 0x2145,
+ 0x2c00, 0x2c2e,
+ 0x2c80, 0x2c80,
+ 0x2c82, 0x2c82,
+ 0x2c84, 0x2c84,
+ 0x2c86, 0x2c86,
+ 0x2c88, 0x2c88,
+ 0x2c8a, 0x2c8a,
+ 0x2c8c, 0x2c8c,
+ 0x2c8e, 0x2c8e,
+ 0x2c90, 0x2c90,
+ 0x2c92, 0x2c92,
+ 0x2c94, 0x2c94,
+ 0x2c96, 0x2c96,
+ 0x2c98, 0x2c98,
+ 0x2c9a, 0x2c9a,
+ 0x2c9c, 0x2c9c,
+ 0x2c9e, 0x2c9e,
+ 0x2ca0, 0x2ca0,
+ 0x2ca2, 0x2ca2,
+ 0x2ca4, 0x2ca4,
+ 0x2ca6, 0x2ca6,
+ 0x2ca8, 0x2ca8,
+ 0x2caa, 0x2caa,
+ 0x2cac, 0x2cac,
+ 0x2cae, 0x2cae,
+ 0x2cb0, 0x2cb0,
+ 0x2cb2, 0x2cb2,
+ 0x2cb4, 0x2cb4,
+ 0x2cb6, 0x2cb6,
+ 0x2cb8, 0x2cb8,
+ 0x2cba, 0x2cba,
+ 0x2cbc, 0x2cbc,
+ 0x2cbe, 0x2cbe,
+ 0x2cc0, 0x2cc0,
+ 0x2cc2, 0x2cc2,
+ 0x2cc4, 0x2cc4,
+ 0x2cc6, 0x2cc6,
+ 0x2cc8, 0x2cc8,
+ 0x2cca, 0x2cca,
+ 0x2ccc, 0x2ccc,
+ 0x2cce, 0x2cce,
+ 0x2cd0, 0x2cd0,
+ 0x2cd2, 0x2cd2,
+ 0x2cd4, 0x2cd4,
+ 0x2cd6, 0x2cd6,
+ 0x2cd8, 0x2cd8,
+ 0x2cda, 0x2cda,
+ 0x2cdc, 0x2cdc,
+ 0x2cde, 0x2cde,
+ 0x2ce0, 0x2ce0,
+ 0x2ce2, 0x2ce2,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+ 0x1d400, 0x1d419,
+ 0x1d434, 0x1d44d,
+ 0x1d468, 0x1d481,
+ 0x1d49c, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b5,
+ 0x1d4d0, 0x1d4e9,
+ 0x1d504, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d538, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d56c, 0x1d585,
+ 0x1d5a0, 0x1d5b9,
+ 0x1d5d4, 0x1d5ed,
+ 0x1d608, 0x1d621,
+ 0x1d63c, 0x1d655,
+ 0x1d670, 0x1d689,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6e2, 0x1d6fa,
+ 0x1d71c, 0x1d734,
+ 0x1d756, 0x1d76e,
+ 0x1d790, 0x1d7a8
+}; /* CR_Lu */
+
+/* 'M': Major Category */
+static const OnigCodePoint CR_M[] = {
+ 133,
+ 0x0300, 0x036f,
+ 0x0483, 0x0486,
+ 0x0488, 0x0489,
+ 0x0591, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x0610, 0x0615,
+ 0x064b, 0x065e,
+ 0x0670, 0x0670,
+ 0x06d6, 0x06dc,
+ 0x06de, 0x06e4,
+ 0x06e7, 0x06e8,
+ 0x06ea, 0x06ed,
+ 0x0711, 0x0711,
+ 0x0730, 0x074a,
+ 0x07a6, 0x07b0,
+ 0x0901, 0x0903,
+ 0x093c, 0x093c,
+ 0x093e, 0x094d,
+ 0x0951, 0x0954,
+ 0x0962, 0x0963,
+ 0x0981, 0x0983,
+ 0x09bc, 0x09bc,
+ 0x09be, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09e2, 0x09e3,
+ 0x0a01, 0x0a03,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a70, 0x0a71,
+ 0x0a81, 0x0a83,
+ 0x0abc, 0x0abc,
+ 0x0abe, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ae2, 0x0ae3,
+ 0x0b01, 0x0b03,
+ 0x0b3c, 0x0b3c,
+ 0x0b3e, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b82, 0x0b82,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0c01, 0x0c03,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c82, 0x0c83,
+ 0x0cbc, 0x0cbc,
+ 0x0cbe, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0d02, 0x0d03,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d82, 0x0d83,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e31, 0x0e31,
+ 0x0e34, 0x0e3a,
+ 0x0e47, 0x0e4e,
+ 0x0eb1, 0x0eb1,
+ 0x0eb4, 0x0eb9,
+ 0x0ebb, 0x0ebc,
+ 0x0ec8, 0x0ecd,
+ 0x0f18, 0x0f19,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f3f,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f87,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1056, 0x1059,
+ 0x135f, 0x135f,
+ 0x1712, 0x1714,
+ 0x1732, 0x1734,
+ 0x1752, 0x1753,
+ 0x1772, 0x1773,
+ 0x17b6, 0x17d3,
+ 0x17dd, 0x17dd,
+ 0x180b, 0x180d,
+ 0x18a9, 0x18a9,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x19b0, 0x19c0,
+ 0x19c8, 0x19c9,
+ 0x1a17, 0x1a1b,
+ 0x1dc0, 0x1dc3,
+ 0x20d0, 0x20eb,
+ 0x302a, 0x302f,
+ 0x3099, 0x309a,
+ 0xa802, 0xa802,
+ 0xa806, 0xa806,
+ 0xa80b, 0xa80b,
+ 0xa823, 0xa827,
+ 0xfb1e, 0xfb1e,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0x10a01, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a0f,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a3f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d242, 0x1d244,
+ 0xe0100, 0xe01ef
+}; /* CR_M */
+
+/* 'Mc': General Category */
+static const OnigCodePoint CR_Mc[] = {
+ 63,
+ 0x0903, 0x0903,
+ 0x093e, 0x0940,
+ 0x0949, 0x094c,
+ 0x0982, 0x0983,
+ 0x09be, 0x09c0,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cc,
+ 0x09d7, 0x09d7,
+ 0x0a03, 0x0a03,
+ 0x0a3e, 0x0a40,
+ 0x0a83, 0x0a83,
+ 0x0abe, 0x0ac0,
+ 0x0ac9, 0x0ac9,
+ 0x0acb, 0x0acc,
+ 0x0b02, 0x0b03,
+ 0x0b3e, 0x0b3e,
+ 0x0b40, 0x0b40,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4c,
+ 0x0b57, 0x0b57,
+ 0x0bbe, 0x0bbf,
+ 0x0bc1, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcc,
+ 0x0bd7, 0x0bd7,
+ 0x0c01, 0x0c03,
+ 0x0c41, 0x0c44,
+ 0x0c82, 0x0c83,
+ 0x0cbe, 0x0cbe,
+ 0x0cc0, 0x0cc4,
+ 0x0cc7, 0x0cc8,
+ 0x0cca, 0x0ccb,
+ 0x0cd5, 0x0cd6,
+ 0x0d02, 0x0d03,
+ 0x0d3e, 0x0d40,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4c,
+ 0x0d57, 0x0d57,
+ 0x0d82, 0x0d83,
+ 0x0dcf, 0x0dd1,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0f3e, 0x0f3f,
+ 0x0f7f, 0x0f7f,
+ 0x102c, 0x102c,
+ 0x1031, 0x1031,
+ 0x1038, 0x1038,
+ 0x1056, 0x1057,
+ 0x17b6, 0x17b6,
+ 0x17be, 0x17c5,
+ 0x17c7, 0x17c8,
+ 0x1923, 0x1926,
+ 0x1929, 0x192b,
+ 0x1930, 0x1931,
+ 0x1933, 0x1938,
+ 0x19b0, 0x19c0,
+ 0x19c8, 0x19c9,
+ 0x1a19, 0x1a1b,
+ 0xa802, 0xa802,
+ 0xa823, 0xa824,
+ 0xa827, 0xa827,
+ 0x1d165, 0x1d166,
+ 0x1d16d, 0x1d172
+}; /* CR_Mc */
+
+/* 'Me': General Category */
+static const OnigCodePoint CR_Me[] = {
+ 4,
+ 0x0488, 0x0489,
+ 0x06de, 0x06de,
+ 0x20dd, 0x20e0,
+ 0x20e2, 0x20e4
+}; /* CR_Me */
+
+/* 'Mn': General Category */
+static const OnigCodePoint CR_Mn[] = {
+ 124,
+ 0x0300, 0x036f,
+ 0x0483, 0x0486,
+ 0x0591, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x0610, 0x0615,
+ 0x064b, 0x065e,
+ 0x0670, 0x0670,
+ 0x06d6, 0x06dc,
+ 0x06df, 0x06e4,
+ 0x06e7, 0x06e8,
+ 0x06ea, 0x06ed,
+ 0x0711, 0x0711,
+ 0x0730, 0x074a,
+ 0x07a6, 0x07b0,
+ 0x0901, 0x0902,
+ 0x093c, 0x093c,
+ 0x0941, 0x0948,
+ 0x094d, 0x094d,
+ 0x0951, 0x0954,
+ 0x0962, 0x0963,
+ 0x0981, 0x0981,
+ 0x09bc, 0x09bc,
+ 0x09c1, 0x09c4,
+ 0x09cd, 0x09cd,
+ 0x09e2, 0x09e3,
+ 0x0a01, 0x0a02,
+ 0x0a3c, 0x0a3c,
+ 0x0a41, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a70, 0x0a71,
+ 0x0a81, 0x0a82,
+ 0x0abc, 0x0abc,
+ 0x0ac1, 0x0ac5,
+ 0x0ac7, 0x0ac8,
+ 0x0acd, 0x0acd,
+ 0x0ae2, 0x0ae3,
+ 0x0b01, 0x0b01,
+ 0x0b3c, 0x0b3c,
+ 0x0b3f, 0x0b3f,
+ 0x0b41, 0x0b43,
+ 0x0b4d, 0x0b4d,
+ 0x0b56, 0x0b56,
+ 0x0b82, 0x0b82,
+ 0x0bc0, 0x0bc0,
+ 0x0bcd, 0x0bcd,
+ 0x0c3e, 0x0c40,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0cbc, 0x0cbc,
+ 0x0cbf, 0x0cbf,
+ 0x0cc6, 0x0cc6,
+ 0x0ccc, 0x0ccd,
+ 0x0d41, 0x0d43,
+ 0x0d4d, 0x0d4d,
+ 0x0dca, 0x0dca,
+ 0x0dd2, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0e31, 0x0e31,
+ 0x0e34, 0x0e3a,
+ 0x0e47, 0x0e4e,
+ 0x0eb1, 0x0eb1,
+ 0x0eb4, 0x0eb9,
+ 0x0ebb, 0x0ebc,
+ 0x0ec8, 0x0ecd,
+ 0x0f18, 0x0f19,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f71, 0x0f7e,
+ 0x0f80, 0x0f84,
+ 0x0f86, 0x0f87,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x102d, 0x1030,
+ 0x1032, 0x1032,
+ 0x1036, 0x1037,
+ 0x1039, 0x1039,
+ 0x1058, 0x1059,
+ 0x135f, 0x135f,
+ 0x1712, 0x1714,
+ 0x1732, 0x1734,
+ 0x1752, 0x1753,
+ 0x1772, 0x1773,
+ 0x17b7, 0x17bd,
+ 0x17c6, 0x17c6,
+ 0x17c9, 0x17d3,
+ 0x17dd, 0x17dd,
+ 0x180b, 0x180d,
+ 0x18a9, 0x18a9,
+ 0x1920, 0x1922,
+ 0x1927, 0x1928,
+ 0x1932, 0x1932,
+ 0x1939, 0x193b,
+ 0x1a17, 0x1a18,
+ 0x1dc0, 0x1dc3,
+ 0x20d0, 0x20dc,
+ 0x20e1, 0x20e1,
+ 0x20e5, 0x20eb,
+ 0x302a, 0x302f,
+ 0x3099, 0x309a,
+ 0xa806, 0xa806,
+ 0xa80b, 0xa80b,
+ 0xa825, 0xa826,
+ 0xfb1e, 0xfb1e,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0x10a01, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a0f,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a3f,
+ 0x1d167, 0x1d169,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d242, 0x1d244,
+ 0xe0100, 0xe01ef
+}; /* CR_Mn */
+
+/* 'N': Major Category */
+static const OnigCodePoint CR_N[] = {
+ 53,
+ 0x0030, 0x0039,
+ 0x00b2, 0x00b3,
+ 0x00b9, 0x00b9,
+ 0x00bc, 0x00be,
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x09f4, 0x09f9,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be6, 0x0bf2,
+ 0x0c66, 0x0c6f,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d6f,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f33,
+ 0x1040, 0x1049,
+ 0x1369, 0x137c,
+ 0x16ee, 0x16f0,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0x19d0, 0x19d9,
+ 0x2070, 0x2070,
+ 0x2074, 0x2079,
+ 0x2080, 0x2089,
+ 0x2153, 0x2183,
+ 0x2460, 0x249b,
+ 0x24ea, 0x24ff,
+ 0x2776, 0x2793,
+ 0x2cfd, 0x2cfd,
+ 0x3007, 0x3007,
+ 0x3021, 0x3029,
+ 0x3038, 0x303a,
+ 0x3192, 0x3195,
+ 0x3220, 0x3229,
+ 0x3251, 0x325f,
+ 0x3280, 0x3289,
+ 0x32b1, 0x32bf,
+ 0xff10, 0xff19,
+ 0x10107, 0x10133,
+ 0x10140, 0x10178,
+ 0x1018a, 0x1018a,
+ 0x10320, 0x10323,
+ 0x1034a, 0x1034a,
+ 0x103d1, 0x103d5,
+ 0x104a0, 0x104a9,
+ 0x10a40, 0x10a47,
+ 0x1d7ce, 0x1d7ff
+}; /* CR_N */
+
+/* 'Nd': General Category */
+static const OnigCodePoint CR_Nd[] = {
+ 23,
+ 0x0030, 0x0039,
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be6, 0x0bef,
+ 0x0c66, 0x0c6f,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d6f,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f29,
+ 0x1040, 0x1049,
+ 0x17e0, 0x17e9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0x19d0, 0x19d9,
+ 0xff10, 0xff19,
+ 0x104a0, 0x104a9,
+ 0x1d7ce, 0x1d7ff
+}; /* CR_Nd */
+
+/* 'Nl': General Category */
+static const OnigCodePoint CR_Nl[] = {
+ 8,
+ 0x16ee, 0x16f0,
+ 0x2160, 0x2183,
+ 0x3007, 0x3007,
+ 0x3021, 0x3029,
+ 0x3038, 0x303a,
+ 0x10140, 0x10174,
+ 0x1034a, 0x1034a,
+ 0x103d1, 0x103d5
+}; /* CR_Nl */
+
+/* 'No': General Category */
+static const OnigCodePoint CR_No[] = {
+ 26,
+ 0x00b2, 0x00b3,
+ 0x00b9, 0x00b9,
+ 0x00bc, 0x00be,
+ 0x09f4, 0x09f9,
+ 0x0bf0, 0x0bf2,
+ 0x0f2a, 0x0f33,
+ 0x1369, 0x137c,
+ 0x17f0, 0x17f9,
+ 0x2070, 0x2070,
+ 0x2074, 0x2079,
+ 0x2080, 0x2089,
+ 0x2153, 0x215f,
+ 0x2460, 0x249b,
+ 0x24ea, 0x24ff,
+ 0x2776, 0x2793,
+ 0x2cfd, 0x2cfd,
+ 0x3192, 0x3195,
+ 0x3220, 0x3229,
+ 0x3251, 0x325f,
+ 0x3280, 0x3289,
+ 0x32b1, 0x32bf,
+ 0x10107, 0x10133,
+ 0x10175, 0x10178,
+ 0x1018a, 0x1018a,
+ 0x10320, 0x10323,
+ 0x10a40, 0x10a47
+}; /* CR_No */
+
+/* 'P': Major Category */
+static const OnigCodePoint CR_P[] = {
+ 96,
+ 0x0021, 0x0023,
+ 0x0025, 0x002a,
+ 0x002c, 0x002f,
+ 0x003a, 0x003b,
+ 0x003f, 0x0040,
+ 0x005b, 0x005d,
+ 0x005f, 0x005f,
+ 0x007b, 0x007b,
+ 0x007d, 0x007d,
+ 0x00a1, 0x00a1,
+ 0x00ab, 0x00ab,
+ 0x00b7, 0x00b7,
+ 0x00bb, 0x00bb,
+ 0x00bf, 0x00bf,
+ 0x037e, 0x037e,
+ 0x0387, 0x0387,
+ 0x055a, 0x055f,
+ 0x0589, 0x058a,
+ 0x05be, 0x05be,
+ 0x05c0, 0x05c0,
+ 0x05c3, 0x05c3,
+ 0x05c6, 0x05c6,
+ 0x05f3, 0x05f4,
+ 0x060c, 0x060d,
+ 0x061b, 0x061b,
+ 0x061e, 0x061f,
+ 0x066a, 0x066d,
+ 0x06d4, 0x06d4,
+ 0x0700, 0x070d,
+ 0x0964, 0x0965,
+ 0x0970, 0x0970,
+ 0x0df4, 0x0df4,
+ 0x0e4f, 0x0e4f,
+ 0x0e5a, 0x0e5b,
+ 0x0f04, 0x0f12,
+ 0x0f3a, 0x0f3d,
+ 0x0f85, 0x0f85,
+ 0x0fd0, 0x0fd1,
+ 0x104a, 0x104f,
+ 0x10fb, 0x10fb,
+ 0x1361, 0x1368,
+ 0x166d, 0x166e,
+ 0x169b, 0x169c,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x17d4, 0x17d6,
+ 0x17d8, 0x17da,
+ 0x1800, 0x180a,
+ 0x1944, 0x1945,
+ 0x19de, 0x19df,
+ 0x1a1e, 0x1a1f,
+ 0x2010, 0x2027,
+ 0x2030, 0x2043,
+ 0x2045, 0x2051,
+ 0x2053, 0x205e,
+ 0x207d, 0x207e,
+ 0x208d, 0x208e,
+ 0x2329, 0x232a,
+ 0x23b4, 0x23b6,
+ 0x2768, 0x2775,
+ 0x27c5, 0x27c6,
+ 0x27e6, 0x27eb,
+ 0x2983, 0x2998,
+ 0x29d8, 0x29db,
+ 0x29fc, 0x29fd,
+ 0x2cf9, 0x2cfc,
+ 0x2cfe, 0x2cff,
+ 0x2e00, 0x2e17,
+ 0x2e1c, 0x2e1d,
+ 0x3001, 0x3003,
+ 0x3008, 0x3011,
+ 0x3014, 0x301f,
+ 0x3030, 0x3030,
+ 0x303d, 0x303d,
+ 0x30a0, 0x30a0,
+ 0x30fb, 0x30fb,
+ 0xfd3e, 0xfd3f,
+ 0xfe10, 0xfe19,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe61,
+ 0xfe63, 0xfe63,
+ 0xfe68, 0xfe68,
+ 0xfe6a, 0xfe6b,
+ 0xff01, 0xff03,
+ 0xff05, 0xff0a,
+ 0xff0c, 0xff0f,
+ 0xff1a, 0xff1b,
+ 0xff1f, 0xff20,
+ 0xff3b, 0xff3d,
+ 0xff3f, 0xff3f,
+ 0xff5b, 0xff5b,
+ 0xff5d, 0xff5d,
+ 0xff5f, 0xff65,
+ 0x10100, 0x10101,
+ 0x1039f, 0x1039f,
+ 0x10a50, 0x10a58
+}; /* CR_P */
+
+/* 'Pc': General Category */
+static const OnigCodePoint CR_Pc[] = {
+ 6,
+ 0x005f, 0x005f,
+ 0x203f, 0x2040,
+ 0x2054, 0x2054,
+ 0xfe33, 0xfe34,
+ 0xfe4d, 0xfe4f,
+ 0xff3f, 0xff3f
+}; /* CR_Pc */
+
+/* 'Pd': General Category */
+static const OnigCodePoint CR_Pd[] = {
+ 12,
+ 0x002d, 0x002d,
+ 0x058a, 0x058a,
+ 0x1806, 0x1806,
+ 0x2010, 0x2015,
+ 0x2e17, 0x2e17,
+ 0x301c, 0x301c,
+ 0x3030, 0x3030,
+ 0x30a0, 0x30a0,
+ 0xfe31, 0xfe32,
+ 0xfe58, 0xfe58,
+ 0xfe63, 0xfe63,
+ 0xff0d, 0xff0d
+}; /* CR_Pd */
+
+/* 'Pe': General Category */
+static const OnigCodePoint CR_Pe[] = {
+ 65,
+ 0x0029, 0x0029,
+ 0x005d, 0x005d,
+ 0x007d, 0x007d,
+ 0x0f3b, 0x0f3b,
+ 0x0f3d, 0x0f3d,
+ 0x169c, 0x169c,
+ 0x2046, 0x2046,
+ 0x207e, 0x207e,
+ 0x208e, 0x208e,
+ 0x232a, 0x232a,
+ 0x23b5, 0x23b5,
+ 0x2769, 0x2769,
+ 0x276b, 0x276b,
+ 0x276d, 0x276d,
+ 0x276f, 0x276f,
+ 0x2771, 0x2771,
+ 0x2773, 0x2773,
+ 0x2775, 0x2775,
+ 0x27c6, 0x27c6,
+ 0x27e7, 0x27e7,
+ 0x27e9, 0x27e9,
+ 0x27eb, 0x27eb,
+ 0x2984, 0x2984,
+ 0x2986, 0x2986,
+ 0x2988, 0x2988,
+ 0x298a, 0x298a,
+ 0x298c, 0x298c,
+ 0x298e, 0x298e,
+ 0x2990, 0x2990,
+ 0x2992, 0x2992,
+ 0x2994, 0x2994,
+ 0x2996, 0x2996,
+ 0x2998, 0x2998,
+ 0x29d9, 0x29d9,
+ 0x29db, 0x29db,
+ 0x29fd, 0x29fd,
+ 0x3009, 0x3009,
+ 0x300b, 0x300b,
+ 0x300d, 0x300d,
+ 0x300f, 0x300f,
+ 0x3011, 0x3011,
+ 0x3015, 0x3015,
+ 0x3017, 0x3017,
+ 0x3019, 0x3019,
+ 0x301b, 0x301b,
+ 0x301e, 0x301f,
+ 0xfd3f, 0xfd3f,
+ 0xfe18, 0xfe18,
+ 0xfe36, 0xfe36,
+ 0xfe38, 0xfe38,
+ 0xfe3a, 0xfe3a,
+ 0xfe3c, 0xfe3c,
+ 0xfe3e, 0xfe3e,
+ 0xfe40, 0xfe40,
+ 0xfe42, 0xfe42,
+ 0xfe44, 0xfe44,
+ 0xfe48, 0xfe48,
+ 0xfe5a, 0xfe5a,
+ 0xfe5c, 0xfe5c,
+ 0xfe5e, 0xfe5e,
+ 0xff09, 0xff09,
+ 0xff3d, 0xff3d,
+ 0xff5d, 0xff5d,
+ 0xff60, 0xff60,
+ 0xff63, 0xff63
+}; /* CR_Pe */
+
+/* 'Pf': General Category */
+static const OnigCodePoint CR_Pf[] = {
+ 9,
+ 0x00bb, 0x00bb,
+ 0x2019, 0x2019,
+ 0x201d, 0x201d,
+ 0x203a, 0x203a,
+ 0x2e03, 0x2e03,
+ 0x2e05, 0x2e05,
+ 0x2e0a, 0x2e0a,
+ 0x2e0d, 0x2e0d,
+ 0x2e1d, 0x2e1d
+}; /* CR_Pf */
+
+/* 'Pi': General Category */
+static const OnigCodePoint CR_Pi[] = {
+ 10,
+ 0x00ab, 0x00ab,
+ 0x2018, 0x2018,
+ 0x201b, 0x201c,
+ 0x201f, 0x201f,
+ 0x2039, 0x2039,
+ 0x2e02, 0x2e02,
+ 0x2e04, 0x2e04,
+ 0x2e09, 0x2e09,
+ 0x2e0c, 0x2e0c,
+ 0x2e1c, 0x2e1c
+}; /* CR_Pi */
+
+/* 'Po': General Category */
+static const OnigCodePoint CR_Po[] = {
+ 88,
+ 0x0021, 0x0023,
+ 0x0025, 0x0027,
+ 0x002a, 0x002a,
+ 0x002c, 0x002c,
+ 0x002e, 0x002f,
+ 0x003a, 0x003b,
+ 0x003f, 0x0040,
+ 0x005c, 0x005c,
+ 0x00a1, 0x00a1,
+ 0x00b7, 0x00b7,
+ 0x00bf, 0x00bf,
+ 0x037e, 0x037e,
+ 0x0387, 0x0387,
+ 0x055a, 0x055f,
+ 0x0589, 0x0589,
+ 0x05be, 0x05be,
+ 0x05c0, 0x05c0,
+ 0x05c3, 0x05c3,
+ 0x05c6, 0x05c6,
+ 0x05f3, 0x05f4,
+ 0x060c, 0x060d,
+ 0x061b, 0x061b,
+ 0x061e, 0x061f,
+ 0x066a, 0x066d,
+ 0x06d4, 0x06d4,
+ 0x0700, 0x070d,
+ 0x0964, 0x0965,
+ 0x0970, 0x0970,
+ 0x0df4, 0x0df4,
+ 0x0e4f, 0x0e4f,
+ 0x0e5a, 0x0e5b,
+ 0x0f04, 0x0f12,
+ 0x0f85, 0x0f85,
+ 0x0fd0, 0x0fd1,
+ 0x104a, 0x104f,
+ 0x10fb, 0x10fb,
+ 0x1361, 0x1368,
+ 0x166d, 0x166e,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x17d4, 0x17d6,
+ 0x17d8, 0x17da,
+ 0x1800, 0x1805,
+ 0x1807, 0x180a,
+ 0x1944, 0x1945,
+ 0x19de, 0x19df,
+ 0x1a1e, 0x1a1f,
+ 0x2016, 0x2017,
+ 0x2020, 0x2027,
+ 0x2030, 0x2038,
+ 0x203b, 0x203e,
+ 0x2041, 0x2043,
+ 0x2047, 0x2051,
+ 0x2053, 0x2053,
+ 0x2055, 0x205e,
+ 0x23b6, 0x23b6,
+ 0x2cf9, 0x2cfc,
+ 0x2cfe, 0x2cff,
+ 0x2e00, 0x2e01,
+ 0x2e06, 0x2e08,
+ 0x2e0b, 0x2e0b,
+ 0x2e0e, 0x2e16,
+ 0x3001, 0x3003,
+ 0x303d, 0x303d,
+ 0x30fb, 0x30fb,
+ 0xfe10, 0xfe16,
+ 0xfe19, 0xfe19,
+ 0xfe30, 0xfe30,
+ 0xfe45, 0xfe46,
+ 0xfe49, 0xfe4c,
+ 0xfe50, 0xfe52,
+ 0xfe54, 0xfe57,
+ 0xfe5f, 0xfe61,
+ 0xfe68, 0xfe68,
+ 0xfe6a, 0xfe6b,
+ 0xff01, 0xff03,
+ 0xff05, 0xff07,
+ 0xff0a, 0xff0a,
+ 0xff0c, 0xff0c,
+ 0xff0e, 0xff0f,
+ 0xff1a, 0xff1b,
+ 0xff1f, 0xff20,
+ 0xff3c, 0xff3c,
+ 0xff61, 0xff61,
+ 0xff64, 0xff65,
+ 0x10100, 0x10101,
+ 0x1039f, 0x1039f,
+ 0x10a50, 0x10a58
+}; /* CR_Po */
+
+/* 'Ps': General Category */
+static const OnigCodePoint CR_Ps[] = {
+ 67,
+ 0x0028, 0x0028,
+ 0x005b, 0x005b,
+ 0x007b, 0x007b,
+ 0x0f3a, 0x0f3a,
+ 0x0f3c, 0x0f3c,
+ 0x169b, 0x169b,
+ 0x201a, 0x201a,
+ 0x201e, 0x201e,
+ 0x2045, 0x2045,
+ 0x207d, 0x207d,
+ 0x208d, 0x208d,
+ 0x2329, 0x2329,
+ 0x23b4, 0x23b4,
+ 0x2768, 0x2768,
+ 0x276a, 0x276a,
+ 0x276c, 0x276c,
+ 0x276e, 0x276e,
+ 0x2770, 0x2770,
+ 0x2772, 0x2772,
+ 0x2774, 0x2774,
+ 0x27c5, 0x27c5,
+ 0x27e6, 0x27e6,
+ 0x27e8, 0x27e8,
+ 0x27ea, 0x27ea,
+ 0x2983, 0x2983,
+ 0x2985, 0x2985,
+ 0x2987, 0x2987,
+ 0x2989, 0x2989,
+ 0x298b, 0x298b,
+ 0x298d, 0x298d,
+ 0x298f, 0x298f,
+ 0x2991, 0x2991,
+ 0x2993, 0x2993,
+ 0x2995, 0x2995,
+ 0x2997, 0x2997,
+ 0x29d8, 0x29d8,
+ 0x29da, 0x29da,
+ 0x29fc, 0x29fc,
+ 0x3008, 0x3008,
+ 0x300a, 0x300a,
+ 0x300c, 0x300c,
+ 0x300e, 0x300e,
+ 0x3010, 0x3010,
+ 0x3014, 0x3014,
+ 0x3016, 0x3016,
+ 0x3018, 0x3018,
+ 0x301a, 0x301a,
+ 0x301d, 0x301d,
+ 0xfd3e, 0xfd3e,
+ 0xfe17, 0xfe17,
+ 0xfe35, 0xfe35,
+ 0xfe37, 0xfe37,
+ 0xfe39, 0xfe39,
+ 0xfe3b, 0xfe3b,
+ 0xfe3d, 0xfe3d,
+ 0xfe3f, 0xfe3f,
+ 0xfe41, 0xfe41,
+ 0xfe43, 0xfe43,
+ 0xfe47, 0xfe47,
+ 0xfe59, 0xfe59,
+ 0xfe5b, 0xfe5b,
+ 0xfe5d, 0xfe5d,
+ 0xff08, 0xff08,
+ 0xff3b, 0xff3b,
+ 0xff5b, 0xff5b,
+ 0xff5f, 0xff5f,
+ 0xff62, 0xff62
+}; /* CR_Ps */
+
+/* 'S': Major Category */
+static const OnigCodePoint CR_S[] = {
+ 162,
+ 0x0024, 0x0024,
+ 0x002b, 0x002b,
+ 0x003c, 0x003e,
+ 0x005e, 0x005e,
+ 0x0060, 0x0060,
+ 0x007c, 0x007c,
+ 0x007e, 0x007e,
+ 0x00a2, 0x00a9,
+ 0x00ac, 0x00ac,
+ 0x00ae, 0x00b1,
+ 0x00b4, 0x00b4,
+ 0x00b6, 0x00b6,
+ 0x00b8, 0x00b8,
+ 0x00d7, 0x00d7,
+ 0x00f7, 0x00f7,
+ 0x02c2, 0x02c5,
+ 0x02d2, 0x02df,
+ 0x02e5, 0x02ed,
+ 0x02ef, 0x02ff,
+ 0x0374, 0x0375,
+ 0x0384, 0x0385,
+ 0x03f6, 0x03f6,
+ 0x0482, 0x0482,
+ 0x060b, 0x060b,
+ 0x060e, 0x060f,
+ 0x06e9, 0x06e9,
+ 0x06fd, 0x06fe,
+ 0x09f2, 0x09f3,
+ 0x09fa, 0x09fa,
+ 0x0af1, 0x0af1,
+ 0x0b70, 0x0b70,
+ 0x0bf3, 0x0bfa,
+ 0x0e3f, 0x0e3f,
+ 0x0f01, 0x0f03,
+ 0x0f13, 0x0f17,
+ 0x0f1a, 0x0f1f,
+ 0x0f34, 0x0f34,
+ 0x0f36, 0x0f36,
+ 0x0f38, 0x0f38,
+ 0x0fbe, 0x0fc5,
+ 0x0fc7, 0x0fcc,
+ 0x0fcf, 0x0fcf,
+ 0x1360, 0x1360,
+ 0x1390, 0x1399,
+ 0x17db, 0x17db,
+ 0x1940, 0x1940,
+ 0x19e0, 0x19ff,
+ 0x1fbd, 0x1fbd,
+ 0x1fbf, 0x1fc1,
+ 0x1fcd, 0x1fcf,
+ 0x1fdd, 0x1fdf,
+ 0x1fed, 0x1fef,
+ 0x1ffd, 0x1ffe,
+ 0x2044, 0x2044,
+ 0x2052, 0x2052,
+ 0x207a, 0x207c,
+ 0x208a, 0x208c,
+ 0x20a0, 0x20b5,
+ 0x2100, 0x2101,
+ 0x2103, 0x2106,
+ 0x2108, 0x2109,
+ 0x2114, 0x2114,
+ 0x2116, 0x2118,
+ 0x211e, 0x2123,
+ 0x2125, 0x2125,
+ 0x2127, 0x2127,
+ 0x2129, 0x2129,
+ 0x212e, 0x212e,
+ 0x2132, 0x2132,
+ 0x213a, 0x213b,
+ 0x2140, 0x2144,
+ 0x214a, 0x214c,
+ 0x2190, 0x2328,
+ 0x232b, 0x23b3,
+ 0x23b7, 0x23db,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x249c, 0x24e9,
+ 0x2500, 0x269c,
+ 0x26a0, 0x26b1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2767,
+ 0x2794, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27c0, 0x27c4,
+ 0x27d0, 0x27e5,
+ 0x27f0, 0x2982,
+ 0x2999, 0x29d7,
+ 0x29dc, 0x29fb,
+ 0x29fe, 0x2b13,
+ 0x2ce5, 0x2cea,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3004, 0x3004,
+ 0x3012, 0x3013,
+ 0x3020, 0x3020,
+ 0x3036, 0x3037,
+ 0x303e, 0x303f,
+ 0x309b, 0x309c,
+ 0x3190, 0x3191,
+ 0x3196, 0x319f,
+ 0x31c0, 0x31cf,
+ 0x3200, 0x321e,
+ 0x322a, 0x3243,
+ 0x3250, 0x3250,
+ 0x3260, 0x327f,
+ 0x328a, 0x32b0,
+ 0x32c0, 0x32fe,
+ 0x3300, 0x33ff,
+ 0x4dc0, 0x4dff,
+ 0xa490, 0xa4c6,
+ 0xa700, 0xa716,
+ 0xa828, 0xa82b,
+ 0xfb29, 0xfb29,
+ 0xfdfc, 0xfdfd,
+ 0xfe62, 0xfe62,
+ 0xfe64, 0xfe66,
+ 0xfe69, 0xfe69,
+ 0xff04, 0xff04,
+ 0xff0b, 0xff0b,
+ 0xff1c, 0xff1e,
+ 0xff3e, 0xff3e,
+ 0xff40, 0xff40,
+ 0xff5c, 0xff5c,
+ 0xff5e, 0xff5e,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfffc, 0xfffd,
+ 0x10102, 0x10102,
+ 0x10137, 0x1013f,
+ 0x10179, 0x10189,
+ 0x103d0, 0x103d0,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d164,
+ 0x1d16a, 0x1d16c,
+ 0x1d183, 0x1d184,
+ 0x1d18c, 0x1d1a9,
+ 0x1d1ae, 0x1d1dd,
+ 0x1d200, 0x1d241,
+ 0x1d245, 0x1d245,
+ 0x1d300, 0x1d356,
+ 0x1d6c1, 0x1d6c1,
+ 0x1d6db, 0x1d6db,
+ 0x1d6fb, 0x1d6fb,
+ 0x1d715, 0x1d715,
+ 0x1d735, 0x1d735,
+ 0x1d74f, 0x1d74f,
+ 0x1d76f, 0x1d76f,
+ 0x1d789, 0x1d789,
+ 0x1d7a9, 0x1d7a9,
+ 0x1d7c3, 0x1d7c3
+}; /* CR_S */
+
+/* 'Sc': General Category */
+static const OnigCodePoint CR_Sc[] = {
+ 14,
+ 0x0024, 0x0024,
+ 0x00a2, 0x00a5,
+ 0x060b, 0x060b,
+ 0x09f2, 0x09f3,
+ 0x0af1, 0x0af1,
+ 0x0bf9, 0x0bf9,
+ 0x0e3f, 0x0e3f,
+ 0x17db, 0x17db,
+ 0x20a0, 0x20b5,
+ 0xfdfc, 0xfdfc,
+ 0xfe69, 0xfe69,
+ 0xff04, 0xff04,
+ 0xffe0, 0xffe1,
+ 0xffe5, 0xffe6
+}; /* CR_Sc */
+
+/* 'Sk': General Category */
+static const OnigCodePoint CR_Sk[] = {
+ 23,
+ 0x005e, 0x005e,
+ 0x0060, 0x0060,
+ 0x00a8, 0x00a8,
+ 0x00af, 0x00af,
+ 0x00b4, 0x00b4,
+ 0x00b8, 0x00b8,
+ 0x02c2, 0x02c5,
+ 0x02d2, 0x02df,
+ 0x02e5, 0x02ed,
+ 0x02ef, 0x02ff,
+ 0x0374, 0x0375,
+ 0x0384, 0x0385,
+ 0x1fbd, 0x1fbd,
+ 0x1fbf, 0x1fc1,
+ 0x1fcd, 0x1fcf,
+ 0x1fdd, 0x1fdf,
+ 0x1fed, 0x1fef,
+ 0x1ffd, 0x1ffe,
+ 0x309b, 0x309c,
+ 0xa700, 0xa716,
+ 0xff3e, 0xff3e,
+ 0xff40, 0xff40,
+ 0xffe3, 0xffe3
+}; /* CR_Sk */
+
+/* 'Sm': General Category */
+static const OnigCodePoint CR_Sm[] = {
+ 59,
+ 0x002b, 0x002b,
+ 0x003c, 0x003e,
+ 0x007c, 0x007c,
+ 0x007e, 0x007e,
+ 0x00ac, 0x00ac,
+ 0x00b1, 0x00b1,
+ 0x00d7, 0x00d7,
+ 0x00f7, 0x00f7,
+ 0x03f6, 0x03f6,
+ 0x2044, 0x2044,
+ 0x2052, 0x2052,
+ 0x207a, 0x207c,
+ 0x208a, 0x208c,
+ 0x2140, 0x2144,
+ 0x214b, 0x214b,
+ 0x2190, 0x2194,
+ 0x219a, 0x219b,
+ 0x21a0, 0x21a0,
+ 0x21a3, 0x21a3,
+ 0x21a6, 0x21a6,
+ 0x21ae, 0x21ae,
+ 0x21ce, 0x21cf,
+ 0x21d2, 0x21d2,
+ 0x21d4, 0x21d4,
+ 0x21f4, 0x22ff,
+ 0x2308, 0x230b,
+ 0x2320, 0x2321,
+ 0x237c, 0x237c,
+ 0x239b, 0x23b3,
+ 0x25b7, 0x25b7,
+ 0x25c1, 0x25c1,
+ 0x25f8, 0x25ff,
+ 0x266f, 0x266f,
+ 0x27c0, 0x27c4,
+ 0x27d0, 0x27e5,
+ 0x27f0, 0x27ff,
+ 0x2900, 0x2982,
+ 0x2999, 0x29d7,
+ 0x29dc, 0x29fb,
+ 0x29fe, 0x2aff,
+ 0xfb29, 0xfb29,
+ 0xfe62, 0xfe62,
+ 0xfe64, 0xfe66,
+ 0xff0b, 0xff0b,
+ 0xff1c, 0xff1e,
+ 0xff5c, 0xff5c,
+ 0xff5e, 0xff5e,
+ 0xffe2, 0xffe2,
+ 0xffe9, 0xffec,
+ 0x1d6c1, 0x1d6c1,
+ 0x1d6db, 0x1d6db,
+ 0x1d6fb, 0x1d6fb,
+ 0x1d715, 0x1d715,
+ 0x1d735, 0x1d735,
+ 0x1d74f, 0x1d74f,
+ 0x1d76f, 0x1d76f,
+ 0x1d789, 0x1d789,
+ 0x1d7a9, 0x1d7a9,
+ 0x1d7c3, 0x1d7c3
+}; /* CR_Sm */
+
+/* 'So': General Category */
+static const OnigCodePoint CR_So[] = {
+ 120,
+ 0x00a6, 0x00a7,
+ 0x00a9, 0x00a9,
+ 0x00ae, 0x00ae,
+ 0x00b0, 0x00b0,
+ 0x00b6, 0x00b6,
+ 0x0482, 0x0482,
+ 0x060e, 0x060f,
+ 0x06e9, 0x06e9,
+ 0x06fd, 0x06fe,
+ 0x09fa, 0x09fa,
+ 0x0b70, 0x0b70,
+ 0x0bf3, 0x0bf8,
+ 0x0bfa, 0x0bfa,
+ 0x0f01, 0x0f03,
+ 0x0f13, 0x0f17,
+ 0x0f1a, 0x0f1f,
+ 0x0f34, 0x0f34,
+ 0x0f36, 0x0f36,
+ 0x0f38, 0x0f38,
+ 0x0fbe, 0x0fc5,
+ 0x0fc7, 0x0fcc,
+ 0x0fcf, 0x0fcf,
+ 0x1360, 0x1360,
+ 0x1390, 0x1399,
+ 0x1940, 0x1940,
+ 0x19e0, 0x19ff,
+ 0x2100, 0x2101,
+ 0x2103, 0x2106,
+ 0x2108, 0x2109,
+ 0x2114, 0x2114,
+ 0x2116, 0x2118,
+ 0x211e, 0x2123,
+ 0x2125, 0x2125,
+ 0x2127, 0x2127,
+ 0x2129, 0x2129,
+ 0x212e, 0x212e,
+ 0x2132, 0x2132,
+ 0x213a, 0x213b,
+ 0x214a, 0x214a,
+ 0x214c, 0x214c,
+ 0x2195, 0x2199,
+ 0x219c, 0x219f,
+ 0x21a1, 0x21a2,
+ 0x21a4, 0x21a5,
+ 0x21a7, 0x21ad,
+ 0x21af, 0x21cd,
+ 0x21d0, 0x21d1,
+ 0x21d3, 0x21d3,
+ 0x21d5, 0x21f3,
+ 0x2300, 0x2307,
+ 0x230c, 0x231f,
+ 0x2322, 0x2328,
+ 0x232b, 0x237b,
+ 0x237d, 0x239a,
+ 0x23b7, 0x23db,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x249c, 0x24e9,
+ 0x2500, 0x25b6,
+ 0x25b8, 0x25c0,
+ 0x25c2, 0x25f7,
+ 0x2600, 0x266e,
+ 0x2670, 0x269c,
+ 0x26a0, 0x26b1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2767,
+ 0x2794, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x2800, 0x28ff,
+ 0x2b00, 0x2b13,
+ 0x2ce5, 0x2cea,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3004, 0x3004,
+ 0x3012, 0x3013,
+ 0x3020, 0x3020,
+ 0x3036, 0x3037,
+ 0x303e, 0x303f,
+ 0x3190, 0x3191,
+ 0x3196, 0x319f,
+ 0x31c0, 0x31cf,
+ 0x3200, 0x321e,
+ 0x322a, 0x3243,
+ 0x3250, 0x3250,
+ 0x3260, 0x327f,
+ 0x328a, 0x32b0,
+ 0x32c0, 0x32fe,
+ 0x3300, 0x33ff,
+ 0x4dc0, 0x4dff,
+ 0xa490, 0xa4c6,
+ 0xa828, 0xa82b,
+ 0xfdfd, 0xfdfd,
+ 0xffe4, 0xffe4,
+ 0xffe8, 0xffe8,
+ 0xffed, 0xffee,
+ 0xfffc, 0xfffd,
+ 0x10102, 0x10102,
+ 0x10137, 0x1013f,
+ 0x10179, 0x10189,
+ 0x103d0, 0x103d0,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d164,
+ 0x1d16a, 0x1d16c,
+ 0x1d183, 0x1d184,
+ 0x1d18c, 0x1d1a9,
+ 0x1d1ae, 0x1d1dd,
+ 0x1d200, 0x1d241,
+ 0x1d245, 0x1d245,
+ 0x1d300, 0x1d356
+}; /* CR_So */
+
+/* 'Z': Major Category */
+static const OnigCodePoint CR_Z[] = {
+ 9,
+ 0x0020, 0x0020,
+ 0x00a0, 0x00a0,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+}; /* CR_Z */
+
+/* 'Zl': General Category */
+static const OnigCodePoint CR_Zl[] = {
+ 1,
+ 0x2028, 0x2028
+}; /* CR_Zl */
+
+/* 'Zp': General Category */
+static const OnigCodePoint CR_Zp[] = {
+ 1,
+ 0x2029, 0x2029
+}; /* CR_Zp */
+
+/* 'Zs': General Category */
+static const OnigCodePoint CR_Zs[] = {
+ 8,
+ 0x0020, 0x0020,
+ 0x00a0, 0x00a0,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+}; /* CR_Zs */
+
+/* 'Arabic': Script */
+static const OnigCodePoint CR_Arabic[] = {
+ 17,
+ 0x060b, 0x060b,
+ 0x060d, 0x0615,
+ 0x061e, 0x061e,
+ 0x0621, 0x063a,
+ 0x0641, 0x064a,
+ 0x0656, 0x065e,
+ 0x066a, 0x066f,
+ 0x0671, 0x06dc,
+ 0x06de, 0x06ff,
+ 0x0750, 0x076d,
+ 0xfb50, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfc,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc
+}; /* CR_Arabic */
+
+/* 'Armenian': Script */
+static const OnigCodePoint CR_Armenian[] = {
+ 5,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x058a, 0x058a,
+ 0xfb13, 0xfb17
+}; /* CR_Armenian */
+
+/* 'Bengali': Script */
+static const OnigCodePoint CR_Bengali[] = {
+ 14,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa
+}; /* CR_Bengali */
+
+/* 'Bopomofo': Script */
+static const OnigCodePoint CR_Bopomofo[] = {
+ 2,
+ 0x3105, 0x312c,
+ 0x31a0, 0x31b7
+}; /* CR_Bopomofo */
+
+/* 'Braille': Script */
+static const OnigCodePoint CR_Braille[] = {
+ 1,
+ 0x2800, 0x28ff
+}; /* CR_Braille */
+
+/* 'Buginese': Script */
+static const OnigCodePoint CR_Buginese[] = {
+ 2,
+ 0x1a00, 0x1a1b,
+ 0x1a1e, 0x1a1f
+}; /* CR_Buginese */
+
+/* 'Buhid': Script */
+static const OnigCodePoint CR_Buhid[] = {
+ 1,
+ 0x1740, 0x1753
+}; /* CR_Buhid */
+
+/* 'Canadian_Aboriginal': Script */
+static const OnigCodePoint CR_Canadian_Aboriginal[] = {
+ 1,
+ 0x1401, 0x1676
+}; /* CR_Canadian_Aboriginal */
+
+/* 'Cherokee': Script */
+static const OnigCodePoint CR_Cherokee[] = {
+ 1,
+ 0x13a0, 0x13f4
+}; /* CR_Cherokee */
+
+/* 'Common': Script */
+static const OnigCodePoint CR_Common[] = {
+ 126,
+ 0x0000, 0x0040,
+ 0x005b, 0x0060,
+ 0x007b, 0x00a9,
+ 0x00ab, 0x00b9,
+ 0x00bb, 0x00bf,
+ 0x00d7, 0x00d7,
+ 0x00f7, 0x00f7,
+ 0x02b9, 0x02df,
+ 0x02e5, 0x02ff,
+ 0x037e, 0x037e,
+ 0x0387, 0x0387,
+ 0x0589, 0x0589,
+ 0x0600, 0x0603,
+ 0x060c, 0x060c,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x0640, 0x0640,
+ 0x0660, 0x0669,
+ 0x06dd, 0x06dd,
+ 0x0964, 0x0965,
+ 0x0970, 0x0970,
+ 0x0e3f, 0x0e3f,
+ 0x10fb, 0x10fb,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x2000, 0x200b,
+ 0x200e, 0x2063,
+ 0x206a, 0x2070,
+ 0x2074, 0x207e,
+ 0x2080, 0x208e,
+ 0x20a0, 0x20b5,
+ 0x2100, 0x2125,
+ 0x2127, 0x2129,
+ 0x212c, 0x214c,
+ 0x2153, 0x2183,
+ 0x2190, 0x23db,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x269c,
+ 0x26a0, 0x26b1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27c0, 0x27c6,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x27ff,
+ 0x2900, 0x2b13,
+ 0x2e00, 0x2e17,
+ 0x2e1c, 0x2e1d,
+ 0x2ff0, 0x2ffb,
+ 0x3000, 0x3004,
+ 0x3006, 0x3006,
+ 0x3008, 0x3020,
+ 0x3030, 0x3037,
+ 0x303c, 0x303f,
+ 0x309b, 0x309c,
+ 0x30a0, 0x30a0,
+ 0x30fb, 0x30fc,
+ 0x3190, 0x319f,
+ 0x31c0, 0x31cf,
+ 0x3220, 0x3243,
+ 0x3250, 0x325f,
+ 0x327e, 0x32fe,
+ 0x3300, 0x33ff,
+ 0x4dc0, 0x4dff,
+ 0xa700, 0xa716,
+ 0xe000, 0xf8ff,
+ 0xfd3e, 0xfd3f,
+ 0xfdfd, 0xfdfd,
+ 0xfe10, 0xfe19,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xff20,
+ 0xff3b, 0xff40,
+ 0xff5b, 0xff65,
+ 0xff70, 0xff70,
+ 0xff9e, 0xff9f,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1013f,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d166,
+ 0x1d16a, 0x1d17a,
+ 0x1d183, 0x1d184,
+ 0x1d18c, 0x1d1a9,
+ 0x1d1ae, 0x1d1dd,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+}; /* CR_Common */
+
+/* 'Coptic': Script */
+static const OnigCodePoint CR_Coptic[] = {
+ 3,
+ 0x03e2, 0x03ef,
+ 0x2c80, 0x2cea,
+ 0x2cf9, 0x2cff
+}; /* CR_Coptic */
+
+/* 'Cypriot': Script */
+static const OnigCodePoint CR_Cypriot[] = {
+ 6,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f
+}; /* CR_Cypriot */
+
+/* 'Cyrillic': Script */
+static const OnigCodePoint CR_Cyrillic[] = {
+ 6,
+ 0x0400, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f9,
+ 0x0500, 0x050f,
+ 0x1d2b, 0x1d2b,
+ 0x1d78, 0x1d78
+}; /* CR_Cyrillic */
+
+/* 'Deseret': Script */
+static const OnigCodePoint CR_Deseret[] = {
+ 1,
+ 0x10400, 0x1044f
+}; /* CR_Deseret */
+
+/* 'Devanagari': Script */
+static const OnigCodePoint CR_Devanagari[] = {
+ 6,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x097d, 0x097d
+}; /* CR_Devanagari */
+
+/* 'Ethiopic': Script */
+static const OnigCodePoint CR_Ethiopic[] = {
+ 27,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x137c,
+ 0x1380, 0x1399,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde
+}; /* CR_Ethiopic */
+
+/* 'Georgian': Script */
+static const OnigCodePoint CR_Georgian[] = {
+ 4,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10fc,
+ 0x2d00, 0x2d25
+}; /* CR_Georgian */
+
+/* 'Glagolitic': Script */
+static const OnigCodePoint CR_Glagolitic[] = {
+ 2,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e
+}; /* CR_Glagolitic */
+
+/* 'Gothic': Script */
+static const OnigCodePoint CR_Gothic[] = {
+ 1,
+ 0x10330, 0x1034a
+}; /* CR_Gothic */
+
+/* 'Greek': Script */
+static const OnigCodePoint CR_Greek[] = {
+ 31,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x0384, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03e1,
+ 0x03f0, 0x03ff,
+ 0x1d26, 0x1d2a,
+ 0x1d5d, 0x1d61,
+ 0x1d66, 0x1d6a,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x2126, 0x2126,
+ 0x10140, 0x1018a,
+ 0x1d200, 0x1d245
+}; /* CR_Greek */
+
+/* 'Gujarati': Script */
+static const OnigCodePoint CR_Gujarati[] = {
+ 14,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1
+}; /* CR_Gujarati */
+
+/* 'Gurmukhi': Script */
+static const OnigCodePoint CR_Gurmukhi[] = {
+ 15,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74
+}; /* CR_Gurmukhi */
+
+/* 'Han': Script */
+static const OnigCodePoint CR_Han[] = {
+ 14,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x3005, 0x3005,
+ 0x3007, 0x3007,
+ 0x3021, 0x3029,
+ 0x3038, 0x303b,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fbb,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d
+}; /* CR_Han */
+
+/* 'Hangul': Script */
+static const OnigCodePoint CR_Hangul[] = {
+ 12,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x3131, 0x318e,
+ 0x3200, 0x321e,
+ 0x3260, 0x327d,
+ 0xac00, 0xd7a3,
+ 0xffa0, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc
+}; /* CR_Hangul */
+
+/* 'Hanunoo': Script */
+static const OnigCodePoint CR_Hanunoo[] = {
+ 1,
+ 0x1720, 0x1734
+}; /* CR_Hanunoo */
+
+/* 'Hebrew': Script */
+static const OnigCodePoint CR_Hebrew[] = {
+ 10,
+ 0x0591, 0x05b9,
+ 0x05bb, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfb4f
+}; /* CR_Hebrew */
+
+/* 'Hiragana': Script */
+static const OnigCodePoint CR_Hiragana[] = {
+ 2,
+ 0x3041, 0x3096,
+ 0x309d, 0x309f
+}; /* CR_Hiragana */
+
+/* 'Inherited': Script */
+static const OnigCodePoint CR_Inherited[] = {
+ 15,
+ 0x0300, 0x036f,
+ 0x064b, 0x0655,
+ 0x0670, 0x0670,
+ 0x1dc0, 0x1dc3,
+ 0x200c, 0x200d,
+ 0x20d0, 0x20eb,
+ 0x302a, 0x302f,
+ 0x3099, 0x309a,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0x1d167, 0x1d169,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0xe0100, 0xe01ef
+}; /* CR_Inherited */
+
+/* 'Kannada': Script */
+static const OnigCodePoint CR_Kannada[] = {
+ 13,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef
+}; /* CR_Kannada */
+
+/* 'Katakana': Script */
+static const OnigCodePoint CR_Katakana[] = {
+ 5,
+ 0x30a1, 0x30fa,
+ 0x30fd, 0x30ff,
+ 0x31f0, 0x31ff,
+ 0xff66, 0xff6f,
+ 0xff71, 0xff9d
+}; /* CR_Katakana */
+
+/* 'Kharoshthi': Script */
+static const OnigCodePoint CR_Kharoshthi[] = {
+ 8,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a47,
+ 0x10a50, 0x10a58
+}; /* CR_Kharoshthi */
+
+/* 'Khmer': Script */
+static const OnigCodePoint CR_Khmer[] = {
+ 4,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x19e0, 0x19ff
+}; /* CR_Khmer */
+
+/* 'Lao': Script */
+static const OnigCodePoint CR_Lao[] = {
+ 18,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd
+}; /* CR_Lao */
+
+/* 'Latin': Script */
+static const OnigCodePoint CR_Latin[] = {
+ 23,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0241,
+ 0x0250, 0x02b8,
+ 0x02e0, 0x02e4,
+ 0x1d00, 0x1d25,
+ 0x1d2c, 0x1d5c,
+ 0x1d62, 0x1d65,
+ 0x1d6b, 0x1d77,
+ 0x1d79, 0x1dbf,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x2094,
+ 0x212a, 0x212b,
+ 0xfb00, 0xfb06,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a
+}; /* CR_Latin */
+
+/* 'Limbu': Script */
+static const OnigCodePoint CR_Limbu[] = {
+ 5,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x194f
+}; /* CR_Limbu */
+
+/* 'Linear_B': Script */
+static const OnigCodePoint CR_Linear_B[] = {
+ 7,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa
+}; /* CR_Linear_B */
+
+/* 'Malayalam': Script */
+static const OnigCodePoint CR_Malayalam[] = {
+ 11,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f
+}; /* CR_Malayalam */
+
+/* 'Mongolian': Script */
+static const OnigCodePoint CR_Mongolian[] = {
+ 4,
+ 0x1800, 0x180e,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9
+}; /* CR_Mongolian */
+
+/* 'Myanmar': Script */
+static const OnigCodePoint CR_Myanmar[] = {
+ 6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059
+}; /* CR_Myanmar */
+
+/* 'New_Tai_Lue': Script */
+static const OnigCodePoint CR_New_Tai_Lue[] = {
+ 4,
+ 0x1980, 0x19a9,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19d9,
+ 0x19de, 0x19df
+}; /* CR_New_Tai_Lue */
+
+/* 'Ogham': Script */
+static const OnigCodePoint CR_Ogham[] = {
+ 1,
+ 0x1680, 0x169c
+}; /* CR_Ogham */
+
+/* 'Old_Italic': Script */
+static const OnigCodePoint CR_Old_Italic[] = {
+ 2,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323
+}; /* CR_Old_Italic */
+
+/* 'Old_Persian': Script */
+static const OnigCodePoint CR_Old_Persian[] = {
+ 2,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103d5
+}; /* CR_Old_Persian */
+
+/* 'Oriya': Script */
+static const OnigCodePoint CR_Oriya[] = {
+ 14,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71
+}; /* CR_Oriya */
+
+/* 'Osmanya': Script */
+static const OnigCodePoint CR_Osmanya[] = {
+ 2,
+ 0x10480, 0x1049d,
+ 0x104a0, 0x104a9
+}; /* CR_Osmanya */
+
+/* 'Runic': Script */
+static const OnigCodePoint CR_Runic[] = {
+ 2,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f0
+}; /* CR_Runic */
+
+/* 'Shavian': Script */
+static const OnigCodePoint CR_Shavian[] = {
+ 1,
+ 0x10450, 0x1047f
+}; /* CR_Shavian */
+
+/* 'Sinhala': Script */
+static const OnigCodePoint CR_Sinhala[] = {
+ 11,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4
+}; /* CR_Sinhala */
+
+/* 'Syloti_Nagri': Script */
+static const OnigCodePoint CR_Syloti_Nagri[] = {
+ 1,
+ 0xa800, 0xa82b
+}; /* CR_Syloti_Nagri */
+
+/* 'Syriac': Script */
+static const OnigCodePoint CR_Syriac[] = {
+ 3,
+ 0x0700, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x074f
+}; /* CR_Syriac */
+
+/* 'Tagalog': Script */
+static const OnigCodePoint CR_Tagalog[] = {
+ 2,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714
+}; /* CR_Tagalog */
+
+/* 'Tagbanwa': Script */
+static const OnigCodePoint CR_Tagbanwa[] = {
+ 3,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773
+}; /* CR_Tagbanwa */
+
+/* 'Tai_Le': Script */
+static const OnigCodePoint CR_Tai_Le[] = {
+ 2,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974
+}; /* CR_Tai_Le */
+
+/* 'Tamil': Script */
+static const OnigCodePoint CR_Tamil[] = {
+ 15,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bfa
+}; /* CR_Tamil */
+
+/* 'Telugu': Script */
+static const OnigCodePoint CR_Telugu[] = {
+ 12,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f
+}; /* CR_Telugu */
+
+/* 'Thaana': Script */
+static const OnigCodePoint CR_Thaana[] = {
+ 1,
+ 0x0780, 0x07b1
+}; /* CR_Thaana */
+
+/* 'Thai': Script */
+static const OnigCodePoint CR_Thai[] = {
+ 2,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e5b
+}; /* CR_Thai */
+
+/* 'Tibetan': Script */
+static const OnigCodePoint CR_Tibetan[] = {
+ 7,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fd1
+}; /* CR_Tibetan */
+
+/* 'Tifinagh': Script */
+static const OnigCodePoint CR_Tifinagh[] = {
+ 2,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f
+}; /* CR_Tifinagh */
+
+/* 'Ugaritic': Script */
+static const OnigCodePoint CR_Ugaritic[] = {
+ 2,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x1039f
+}; /* CR_Ugaritic */
+
+/* 'Yi': Script */
+static const OnigCodePoint CR_Yi[] = {
+ 2,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6
+}; /* CR_Yi */
+
+
+#endif /* USE_UNICODE_PROPERTIES */
+
+
+typedef struct {
+ int n;
+ OnigCodePoint code[3];
+} CodePointList3;
+
+typedef struct {
+ OnigCodePoint from;
+ CodePointList3 to;
+} CaseFold_11_Type;
+
+typedef struct {
+ OnigCodePoint from;
+ CodePointList3 to;
+} CaseUnfold_11_Type;
+
+typedef struct {
+ int n;
+ OnigCodePoint code[2];
+} CodePointList2;
+
+typedef struct {
+ OnigCodePoint from[2];
+ CodePointList2 to;
+} CaseUnfold_12_Type;
+
+typedef struct {
+ OnigCodePoint from[3];
+ CodePointList2 to;
+} CaseUnfold_13_Type;
+
+static const CaseFold_11_Type CaseFold[] = {
+ { 0x0041, {1, {0x0061}}},
+ { 0x0042, {1, {0x0062}}},
+ { 0x0043, {1, {0x0063}}},
+ { 0x0044, {1, {0x0064}}},
+ { 0x0045, {1, {0x0065}}},
+ { 0x0046, {1, {0x0066}}},
+ { 0x0047, {1, {0x0067}}},
+ { 0x0048, {1, {0x0068}}},
+ { 0x004a, {1, {0x006a}}},
+ { 0x004b, {1, {0x006b}}},
+ { 0x004c, {1, {0x006c}}},
+ { 0x004d, {1, {0x006d}}},
+ { 0x004e, {1, {0x006e}}},
+ { 0x004f, {1, {0x006f}}},
+ { 0x0050, {1, {0x0070}}},
+ { 0x0051, {1, {0x0071}}},
+ { 0x0052, {1, {0x0072}}},
+ { 0x0053, {1, {0x0073}}},
+ { 0x0054, {1, {0x0074}}},
+ { 0x0055, {1, {0x0075}}},
+ { 0x0056, {1, {0x0076}}},
+ { 0x0057, {1, {0x0077}}},
+ { 0x0058, {1, {0x0078}}},
+ { 0x0059, {1, {0x0079}}},
+ { 0x005a, {1, {0x007a}}},
+ { 0x00b5, {1, {0x03bc}}},
+ { 0x00c0, {1, {0x00e0}}},
+ { 0x00c1, {1, {0x00e1}}},
+ { 0x00c2, {1, {0x00e2}}},
+ { 0x00c3, {1, {0x00e3}}},
+ { 0x00c4, {1, {0x00e4}}},
+ { 0x00c5, {1, {0x00e5}}},
+ { 0x00c6, {1, {0x00e6}}},
+ { 0x00c7, {1, {0x00e7}}},
+ { 0x00c8, {1, {0x00e8}}},
+ { 0x00c9, {1, {0x00e9}}},
+ { 0x00ca, {1, {0x00ea}}},
+ { 0x00cb, {1, {0x00eb}}},
+ { 0x00cc, {1, {0x00ec}}},
+ { 0x00cd, {1, {0x00ed}}},
+ { 0x00ce, {1, {0x00ee}}},
+ { 0x00cf, {1, {0x00ef}}},
+ { 0x00d0, {1, {0x00f0}}},
+ { 0x00d1, {1, {0x00f1}}},
+ { 0x00d2, {1, {0x00f2}}},
+ { 0x00d3, {1, {0x00f3}}},
+ { 0x00d4, {1, {0x00f4}}},
+ { 0x00d5, {1, {0x00f5}}},
+ { 0x00d6, {1, {0x00f6}}},
+ { 0x00d8, {1, {0x00f8}}},
+ { 0x00d9, {1, {0x00f9}}},
+ { 0x00da, {1, {0x00fa}}},
+ { 0x00db, {1, {0x00fb}}},
+ { 0x00dc, {1, {0x00fc}}},
+ { 0x00dd, {1, {0x00fd}}},
+ { 0x00de, {1, {0x00fe}}},
+ { 0x00df, {2, {0x0073, 0x0073}}},
+ { 0x0100, {1, {0x0101}}},
+ { 0x0102, {1, {0x0103}}},
+ { 0x0104, {1, {0x0105}}},
+ { 0x0106, {1, {0x0107}}},
+ { 0x0108, {1, {0x0109}}},
+ { 0x010a, {1, {0x010b}}},
+ { 0x010c, {1, {0x010d}}},
+ { 0x010e, {1, {0x010f}}},
+ { 0x0110, {1, {0x0111}}},
+ { 0x0112, {1, {0x0113}}},
+ { 0x0114, {1, {0x0115}}},
+ { 0x0116, {1, {0x0117}}},
+ { 0x0118, {1, {0x0119}}},
+ { 0x011a, {1, {0x011b}}},
+ { 0x011c, {1, {0x011d}}},
+ { 0x011e, {1, {0x011f}}},
+ { 0x0120, {1, {0x0121}}},
+ { 0x0122, {1, {0x0123}}},
+ { 0x0124, {1, {0x0125}}},
+ { 0x0126, {1, {0x0127}}},
+ { 0x0128, {1, {0x0129}}},
+ { 0x012a, {1, {0x012b}}},
+ { 0x012c, {1, {0x012d}}},
+ { 0x012e, {1, {0x012f}}},
+ { 0x0132, {1, {0x0133}}},
+ { 0x0134, {1, {0x0135}}},
+ { 0x0136, {1, {0x0137}}},
+ { 0x0139, {1, {0x013a}}},
+ { 0x013b, {1, {0x013c}}},
+ { 0x013d, {1, {0x013e}}},
+ { 0x013f, {1, {0x0140}}},
+ { 0x0141, {1, {0x0142}}},
+ { 0x0143, {1, {0x0144}}},
+ { 0x0145, {1, {0x0146}}},
+ { 0x0147, {1, {0x0148}}},
+ { 0x0149, {2, {0x02bc, 0x006e}}},
+ { 0x014a, {1, {0x014b}}},
+ { 0x014c, {1, {0x014d}}},
+ { 0x014e, {1, {0x014f}}},
+ { 0x0150, {1, {0x0151}}},
+ { 0x0152, {1, {0x0153}}},
+ { 0x0154, {1, {0x0155}}},
+ { 0x0156, {1, {0x0157}}},
+ { 0x0158, {1, {0x0159}}},
+ { 0x015a, {1, {0x015b}}},
+ { 0x015c, {1, {0x015d}}},
+ { 0x015e, {1, {0x015f}}},
+ { 0x0160, {1, {0x0161}}},
+ { 0x0162, {1, {0x0163}}},
+ { 0x0164, {1, {0x0165}}},
+ { 0x0166, {1, {0x0167}}},
+ { 0x0168, {1, {0x0169}}},
+ { 0x016a, {1, {0x016b}}},
+ { 0x016c, {1, {0x016d}}},
+ { 0x016e, {1, {0x016f}}},
+ { 0x0170, {1, {0x0171}}},
+ { 0x0172, {1, {0x0173}}},
+ { 0x0174, {1, {0x0175}}},
+ { 0x0176, {1, {0x0177}}},
+ { 0x0178, {1, {0x00ff}}},
+ { 0x0179, {1, {0x017a}}},
+ { 0x017b, {1, {0x017c}}},
+ { 0x017d, {1, {0x017e}}},
+ { 0x017f, {1, {0x0073}}},
+ { 0x0181, {1, {0x0253}}},
+ { 0x0182, {1, {0x0183}}},
+ { 0x0184, {1, {0x0185}}},
+ { 0x0186, {1, {0x0254}}},
+ { 0x0187, {1, {0x0188}}},
+ { 0x0189, {1, {0x0256}}},
+ { 0x018a, {1, {0x0257}}},
+ { 0x018b, {1, {0x018c}}},
+ { 0x018e, {1, {0x01dd}}},
+ { 0x018f, {1, {0x0259}}},
+ { 0x0190, {1, {0x025b}}},
+ { 0x0191, {1, {0x0192}}},
+ { 0x0193, {1, {0x0260}}},
+ { 0x0194, {1, {0x0263}}},
+ { 0x0196, {1, {0x0269}}},
+ { 0x0197, {1, {0x0268}}},
+ { 0x0198, {1, {0x0199}}},
+ { 0x019c, {1, {0x026f}}},
+ { 0x019d, {1, {0x0272}}},
+ { 0x019f, {1, {0x0275}}},
+ { 0x01a0, {1, {0x01a1}}},
+ { 0x01a2, {1, {0x01a3}}},
+ { 0x01a4, {1, {0x01a5}}},
+ { 0x01a6, {1, {0x0280}}},
+ { 0x01a7, {1, {0x01a8}}},
+ { 0x01a9, {1, {0x0283}}},
+ { 0x01ac, {1, {0x01ad}}},
+ { 0x01ae, {1, {0x0288}}},
+ { 0x01af, {1, {0x01b0}}},
+ { 0x01b1, {1, {0x028a}}},
+ { 0x01b2, {1, {0x028b}}},
+ { 0x01b3, {1, {0x01b4}}},
+ { 0x01b5, {1, {0x01b6}}},
+ { 0x01b7, {1, {0x0292}}},
+ { 0x01b8, {1, {0x01b9}}},
+ { 0x01bc, {1, {0x01bd}}},
+ { 0x01c4, {1, {0x01c6}}},
+ { 0x01c5, {1, {0x01c6}}},
+ { 0x01c7, {1, {0x01c9}}},
+ { 0x01c8, {1, {0x01c9}}},
+ { 0x01ca, {1, {0x01cc}}},
+ { 0x01cb, {1, {0x01cc}}},
+ { 0x01cd, {1, {0x01ce}}},
+ { 0x01cf, {1, {0x01d0}}},
+ { 0x01d1, {1, {0x01d2}}},
+ { 0x01d3, {1, {0x01d4}}},
+ { 0x01d5, {1, {0x01d6}}},
+ { 0x01d7, {1, {0x01d8}}},
+ { 0x01d9, {1, {0x01da}}},
+ { 0x01db, {1, {0x01dc}}},
+ { 0x01de, {1, {0x01df}}},
+ { 0x01e0, {1, {0x01e1}}},
+ { 0x01e2, {1, {0x01e3}}},
+ { 0x01e4, {1, {0x01e5}}},
+ { 0x01e6, {1, {0x01e7}}},
+ { 0x01e8, {1, {0x01e9}}},
+ { 0x01ea, {1, {0x01eb}}},
+ { 0x01ec, {1, {0x01ed}}},
+ { 0x01ee, {1, {0x01ef}}},
+ { 0x01f0, {2, {0x006a, 0x030c}}},
+ { 0x01f1, {1, {0x01f3}}},
+ { 0x01f2, {1, {0x01f3}}},
+ { 0x01f4, {1, {0x01f5}}},
+ { 0x01f6, {1, {0x0195}}},
+ { 0x01f7, {1, {0x01bf}}},
+ { 0x01f8, {1, {0x01f9}}},
+ { 0x01fa, {1, {0x01fb}}},
+ { 0x01fc, {1, {0x01fd}}},
+ { 0x01fe, {1, {0x01ff}}},
+ { 0x0200, {1, {0x0201}}},
+ { 0x0202, {1, {0x0203}}},
+ { 0x0204, {1, {0x0205}}},
+ { 0x0206, {1, {0x0207}}},
+ { 0x0208, {1, {0x0209}}},
+ { 0x020a, {1, {0x020b}}},
+ { 0x020c, {1, {0x020d}}},
+ { 0x020e, {1, {0x020f}}},
+ { 0x0210, {1, {0x0211}}},
+ { 0x0212, {1, {0x0213}}},
+ { 0x0214, {1, {0x0215}}},
+ { 0x0216, {1, {0x0217}}},
+ { 0x0218, {1, {0x0219}}},
+ { 0x021a, {1, {0x021b}}},
+ { 0x021c, {1, {0x021d}}},
+ { 0x021e, {1, {0x021f}}},
+ { 0x0220, {1, {0x019e}}},
+ { 0x0222, {1, {0x0223}}},
+ { 0x0224, {1, {0x0225}}},
+ { 0x0226, {1, {0x0227}}},
+ { 0x0228, {1, {0x0229}}},
+ { 0x022a, {1, {0x022b}}},
+ { 0x022c, {1, {0x022d}}},
+ { 0x022e, {1, {0x022f}}},
+ { 0x0230, {1, {0x0231}}},
+ { 0x0232, {1, {0x0233}}},
+ { 0x023b, {1, {0x023c}}},
+ { 0x023d, {1, {0x019a}}},
+ { 0x0241, {1, {0x0294}}},
+ { 0x0345, {1, {0x03b9}}},
+ { 0x0386, {1, {0x03ac}}},
+ { 0x0388, {1, {0x03ad}}},
+ { 0x0389, {1, {0x03ae}}},
+ { 0x038a, {1, {0x03af}}},
+ { 0x038c, {1, {0x03cc}}},
+ { 0x038e, {1, {0x03cd}}},
+ { 0x038f, {1, {0x03ce}}},
+ { 0x0390, {3, {0x03b9, 0x0308, 0x0301}}},
+ { 0x0391, {1, {0x03b1}}},
+ { 0x0392, {1, {0x03b2}}},
+ { 0x0393, {1, {0x03b3}}},
+ { 0x0394, {1, {0x03b4}}},
+ { 0x0395, {1, {0x03b5}}},
+ { 0x0396, {1, {0x03b6}}},
+ { 0x0397, {1, {0x03b7}}},
+ { 0x0398, {1, {0x03b8}}},
+ { 0x0399, {1, {0x03b9}}},
+ { 0x039a, {1, {0x03ba}}},
+ { 0x039b, {1, {0x03bb}}},
+ { 0x039c, {1, {0x03bc}}},
+ { 0x039d, {1, {0x03bd}}},
+ { 0x039e, {1, {0x03be}}},
+ { 0x039f, {1, {0x03bf}}},
+ { 0x03a0, {1, {0x03c0}}},
+ { 0x03a1, {1, {0x03c1}}},
+ { 0x03a3, {1, {0x03c3}}},
+ { 0x03a4, {1, {0x03c4}}},
+ { 0x03a5, {1, {0x03c5}}},
+ { 0x03a6, {1, {0x03c6}}},
+ { 0x03a7, {1, {0x03c7}}},
+ { 0x03a8, {1, {0x03c8}}},
+ { 0x03a9, {1, {0x03c9}}},
+ { 0x03aa, {1, {0x03ca}}},
+ { 0x03ab, {1, {0x03cb}}},
+ { 0x03b0, {3, {0x03c5, 0x0308, 0x0301}}},
+ { 0x03c2, {1, {0x03c3}}},
+ { 0x03d0, {1, {0x03b2}}},
+ { 0x03d1, {1, {0x03b8}}},
+ { 0x03d5, {1, {0x03c6}}},
+ { 0x03d6, {1, {0x03c0}}},
+ { 0x03d8, {1, {0x03d9}}},
+ { 0x03da, {1, {0x03db}}},
+ { 0x03dc, {1, {0x03dd}}},
+ { 0x03de, {1, {0x03df}}},
+ { 0x03e0, {1, {0x03e1}}},
+ { 0x03e2, {1, {0x03e3}}},
+ { 0x03e4, {1, {0x03e5}}},
+ { 0x03e6, {1, {0x03e7}}},
+ { 0x03e8, {1, {0x03e9}}},
+ { 0x03ea, {1, {0x03eb}}},
+ { 0x03ec, {1, {0x03ed}}},
+ { 0x03ee, {1, {0x03ef}}},
+ { 0x03f0, {1, {0x03ba}}},
+ { 0x03f1, {1, {0x03c1}}},
+ { 0x03f4, {1, {0x03b8}}},
+ { 0x03f5, {1, {0x03b5}}},
+ { 0x03f7, {1, {0x03f8}}},
+ { 0x03f9, {1, {0x03f2}}},
+ { 0x03fa, {1, {0x03fb}}},
+ { 0x0400, {1, {0x0450}}},
+ { 0x0401, {1, {0x0451}}},
+ { 0x0402, {1, {0x0452}}},
+ { 0x0403, {1, {0x0453}}},
+ { 0x0404, {1, {0x0454}}},
+ { 0x0405, {1, {0x0455}}},
+ { 0x0406, {1, {0x0456}}},
+ { 0x0407, {1, {0x0457}}},
+ { 0x0408, {1, {0x0458}}},
+ { 0x0409, {1, {0x0459}}},
+ { 0x040a, {1, {0x045a}}},
+ { 0x040b, {1, {0x045b}}},
+ { 0x040c, {1, {0x045c}}},
+ { 0x040d, {1, {0x045d}}},
+ { 0x040e, {1, {0x045e}}},
+ { 0x040f, {1, {0x045f}}},
+ { 0x0410, {1, {0x0430}}},
+ { 0x0411, {1, {0x0431}}},
+ { 0x0412, {1, {0x0432}}},
+ { 0x0413, {1, {0x0433}}},
+ { 0x0414, {1, {0x0434}}},
+ { 0x0415, {1, {0x0435}}},
+ { 0x0416, {1, {0x0436}}},
+ { 0x0417, {1, {0x0437}}},
+ { 0x0418, {1, {0x0438}}},
+ { 0x0419, {1, {0x0439}}},
+ { 0x041a, {1, {0x043a}}},
+ { 0x041b, {1, {0x043b}}},
+ { 0x041c, {1, {0x043c}}},
+ { 0x041d, {1, {0x043d}}},
+ { 0x041e, {1, {0x043e}}},
+ { 0x041f, {1, {0x043f}}},
+ { 0x0420, {1, {0x0440}}},
+ { 0x0421, {1, {0x0441}}},
+ { 0x0422, {1, {0x0442}}},
+ { 0x0423, {1, {0x0443}}},
+ { 0x0424, {1, {0x0444}}},
+ { 0x0425, {1, {0x0445}}},
+ { 0x0426, {1, {0x0446}}},
+ { 0x0427, {1, {0x0447}}},
+ { 0x0428, {1, {0x0448}}},
+ { 0x0429, {1, {0x0449}}},
+ { 0x042a, {1, {0x044a}}},
+ { 0x042b, {1, {0x044b}}},
+ { 0x042c, {1, {0x044c}}},
+ { 0x042d, {1, {0x044d}}},
+ { 0x042e, {1, {0x044e}}},
+ { 0x042f, {1, {0x044f}}},
+ { 0x0460, {1, {0x0461}}},
+ { 0x0462, {1, {0x0463}}},
+ { 0x0464, {1, {0x0465}}},
+ { 0x0466, {1, {0x0467}}},
+ { 0x0468, {1, {0x0469}}},
+ { 0x046a, {1, {0x046b}}},
+ { 0x046c, {1, {0x046d}}},
+ { 0x046e, {1, {0x046f}}},
+ { 0x0470, {1, {0x0471}}},
+ { 0x0472, {1, {0x0473}}},
+ { 0x0474, {1, {0x0475}}},
+ { 0x0476, {1, {0x0477}}},
+ { 0x0478, {1, {0x0479}}},
+ { 0x047a, {1, {0x047b}}},
+ { 0x047c, {1, {0x047d}}},
+ { 0x047e, {1, {0x047f}}},
+ { 0x0480, {1, {0x0481}}},
+ { 0x048a, {1, {0x048b}}},
+ { 0x048c, {1, {0x048d}}},
+ { 0x048e, {1, {0x048f}}},
+ { 0x0490, {1, {0x0491}}},
+ { 0x0492, {1, {0x0493}}},
+ { 0x0494, {1, {0x0495}}},
+ { 0x0496, {1, {0x0497}}},
+ { 0x0498, {1, {0x0499}}},
+ { 0x049a, {1, {0x049b}}},
+ { 0x049c, {1, {0x049d}}},
+ { 0x049e, {1, {0x049f}}},
+ { 0x04a0, {1, {0x04a1}}},
+ { 0x04a2, {1, {0x04a3}}},
+ { 0x04a4, {1, {0x04a5}}},
+ { 0x04a6, {1, {0x04a7}}},
+ { 0x04a8, {1, {0x04a9}}},
+ { 0x04aa, {1, {0x04ab}}},
+ { 0x04ac, {1, {0x04ad}}},
+ { 0x04ae, {1, {0x04af}}},
+ { 0x04b0, {1, {0x04b1}}},
+ { 0x04b2, {1, {0x04b3}}},
+ { 0x04b4, {1, {0x04b5}}},
+ { 0x04b6, {1, {0x04b7}}},
+ { 0x04b8, {1, {0x04b9}}},
+ { 0x04ba, {1, {0x04bb}}},
+ { 0x04bc, {1, {0x04bd}}},
+ { 0x04be, {1, {0x04bf}}},
+ { 0x04c1, {1, {0x04c2}}},
+ { 0x04c3, {1, {0x04c4}}},
+ { 0x04c5, {1, {0x04c6}}},
+ { 0x04c7, {1, {0x04c8}}},
+ { 0x04c9, {1, {0x04ca}}},
+ { 0x04cb, {1, {0x04cc}}},
+ { 0x04cd, {1, {0x04ce}}},
+ { 0x04d0, {1, {0x04d1}}},
+ { 0x04d2, {1, {0x04d3}}},
+ { 0x04d4, {1, {0x04d5}}},
+ { 0x04d6, {1, {0x04d7}}},
+ { 0x04d8, {1, {0x04d9}}},
+ { 0x04da, {1, {0x04db}}},
+ { 0x04dc, {1, {0x04dd}}},
+ { 0x04de, {1, {0x04df}}},
+ { 0x04e0, {1, {0x04e1}}},
+ { 0x04e2, {1, {0x04e3}}},
+ { 0x04e4, {1, {0x04e5}}},
+ { 0x04e6, {1, {0x04e7}}},
+ { 0x04e8, {1, {0x04e9}}},
+ { 0x04ea, {1, {0x04eb}}},
+ { 0x04ec, {1, {0x04ed}}},
+ { 0x04ee, {1, {0x04ef}}},
+ { 0x04f0, {1, {0x04f1}}},
+ { 0x04f2, {1, {0x04f3}}},
+ { 0x04f4, {1, {0x04f5}}},
+ { 0x04f6, {1, {0x04f7}}},
+ { 0x04f8, {1, {0x04f9}}},
+ { 0x0500, {1, {0x0501}}},
+ { 0x0502, {1, {0x0503}}},
+ { 0x0504, {1, {0x0505}}},
+ { 0x0506, {1, {0x0507}}},
+ { 0x0508, {1, {0x0509}}},
+ { 0x050a, {1, {0x050b}}},
+ { 0x050c, {1, {0x050d}}},
+ { 0x050e, {1, {0x050f}}},
+ { 0x0531, {1, {0x0561}}},
+ { 0x0532, {1, {0x0562}}},
+ { 0x0533, {1, {0x0563}}},
+ { 0x0534, {1, {0x0564}}},
+ { 0x0535, {1, {0x0565}}},
+ { 0x0536, {1, {0x0566}}},
+ { 0x0537, {1, {0x0567}}},
+ { 0x0538, {1, {0x0568}}},
+ { 0x0539, {1, {0x0569}}},
+ { 0x053a, {1, {0x056a}}},
+ { 0x053b, {1, {0x056b}}},
+ { 0x053c, {1, {0x056c}}},
+ { 0x053d, {1, {0x056d}}},
+ { 0x053e, {1, {0x056e}}},
+ { 0x053f, {1, {0x056f}}},
+ { 0x0540, {1, {0x0570}}},
+ { 0x0541, {1, {0x0571}}},
+ { 0x0542, {1, {0x0572}}},
+ { 0x0543, {1, {0x0573}}},
+ { 0x0544, {1, {0x0574}}},
+ { 0x0545, {1, {0x0575}}},
+ { 0x0546, {1, {0x0576}}},
+ { 0x0547, {1, {0x0577}}},
+ { 0x0548, {1, {0x0578}}},
+ { 0x0549, {1, {0x0579}}},
+ { 0x054a, {1, {0x057a}}},
+ { 0x054b, {1, {0x057b}}},
+ { 0x054c, {1, {0x057c}}},
+ { 0x054d, {1, {0x057d}}},
+ { 0x054e, {1, {0x057e}}},
+ { 0x054f, {1, {0x057f}}},
+ { 0x0550, {1, {0x0580}}},
+ { 0x0551, {1, {0x0581}}},
+ { 0x0552, {1, {0x0582}}},
+ { 0x0553, {1, {0x0583}}},
+ { 0x0554, {1, {0x0584}}},
+ { 0x0555, {1, {0x0585}}},
+ { 0x0556, {1, {0x0586}}},
+ { 0x0587, {2, {0x0565, 0x0582}}},
+ { 0x10a0, {1, {0x2d00}}},
+ { 0x10a1, {1, {0x2d01}}},
+ { 0x10a2, {1, {0x2d02}}},
+ { 0x10a3, {1, {0x2d03}}},
+ { 0x10a4, {1, {0x2d04}}},
+ { 0x10a5, {1, {0x2d05}}},
+ { 0x10a6, {1, {0x2d06}}},
+ { 0x10a7, {1, {0x2d07}}},
+ { 0x10a8, {1, {0x2d08}}},
+ { 0x10a9, {1, {0x2d09}}},
+ { 0x10aa, {1, {0x2d0a}}},
+ { 0x10ab, {1, {0x2d0b}}},
+ { 0x10ac, {1, {0x2d0c}}},
+ { 0x10ad, {1, {0x2d0d}}},
+ { 0x10ae, {1, {0x2d0e}}},
+ { 0x10af, {1, {0x2d0f}}},
+ { 0x10b0, {1, {0x2d10}}},
+ { 0x10b1, {1, {0x2d11}}},
+ { 0x10b2, {1, {0x2d12}}},
+ { 0x10b3, {1, {0x2d13}}},
+ { 0x10b4, {1, {0x2d14}}},
+ { 0x10b5, {1, {0x2d15}}},
+ { 0x10b6, {1, {0x2d16}}},
+ { 0x10b7, {1, {0x2d17}}},
+ { 0x10b8, {1, {0x2d18}}},
+ { 0x10b9, {1, {0x2d19}}},
+ { 0x10ba, {1, {0x2d1a}}},
+ { 0x10bb, {1, {0x2d1b}}},
+ { 0x10bc, {1, {0x2d1c}}},
+ { 0x10bd, {1, {0x2d1d}}},
+ { 0x10be, {1, {0x2d1e}}},
+ { 0x10bf, {1, {0x2d1f}}},
+ { 0x10c0, {1, {0x2d20}}},
+ { 0x10c1, {1, {0x2d21}}},
+ { 0x10c2, {1, {0x2d22}}},
+ { 0x10c3, {1, {0x2d23}}},
+ { 0x10c4, {1, {0x2d24}}},
+ { 0x10c5, {1, {0x2d25}}},
+ { 0x1e00, {1, {0x1e01}}},
+ { 0x1e02, {1, {0x1e03}}},
+ { 0x1e04, {1, {0x1e05}}},
+ { 0x1e06, {1, {0x1e07}}},
+ { 0x1e08, {1, {0x1e09}}},
+ { 0x1e0a, {1, {0x1e0b}}},
+ { 0x1e0c, {1, {0x1e0d}}},
+ { 0x1e0e, {1, {0x1e0f}}},
+ { 0x1e10, {1, {0x1e11}}},
+ { 0x1e12, {1, {0x1e13}}},
+ { 0x1e14, {1, {0x1e15}}},
+ { 0x1e16, {1, {0x1e17}}},
+ { 0x1e18, {1, {0x1e19}}},
+ { 0x1e1a, {1, {0x1e1b}}},
+ { 0x1e1c, {1, {0x1e1d}}},
+ { 0x1e1e, {1, {0x1e1f}}},
+ { 0x1e20, {1, {0x1e21}}},
+ { 0x1e22, {1, {0x1e23}}},
+ { 0x1e24, {1, {0x1e25}}},
+ { 0x1e26, {1, {0x1e27}}},
+ { 0x1e28, {1, {0x1e29}}},
+ { 0x1e2a, {1, {0x1e2b}}},
+ { 0x1e2c, {1, {0x1e2d}}},
+ { 0x1e2e, {1, {0x1e2f}}},
+ { 0x1e30, {1, {0x1e31}}},
+ { 0x1e32, {1, {0x1e33}}},
+ { 0x1e34, {1, {0x1e35}}},
+ { 0x1e36, {1, {0x1e37}}},
+ { 0x1e38, {1, {0x1e39}}},
+ { 0x1e3a, {1, {0x1e3b}}},
+ { 0x1e3c, {1, {0x1e3d}}},
+ { 0x1e3e, {1, {0x1e3f}}},
+ { 0x1e40, {1, {0x1e41}}},
+ { 0x1e42, {1, {0x1e43}}},
+ { 0x1e44, {1, {0x1e45}}},
+ { 0x1e46, {1, {0x1e47}}},
+ { 0x1e48, {1, {0x1e49}}},
+ { 0x1e4a, {1, {0x1e4b}}},
+ { 0x1e4c, {1, {0x1e4d}}},
+ { 0x1e4e, {1, {0x1e4f}}},
+ { 0x1e50, {1, {0x1e51}}},
+ { 0x1e52, {1, {0x1e53}}},
+ { 0x1e54, {1, {0x1e55}}},
+ { 0x1e56, {1, {0x1e57}}},
+ { 0x1e58, {1, {0x1e59}}},
+ { 0x1e5a, {1, {0x1e5b}}},
+ { 0x1e5c, {1, {0x1e5d}}},
+ { 0x1e5e, {1, {0x1e5f}}},
+ { 0x1e60, {1, {0x1e61}}},
+ { 0x1e62, {1, {0x1e63}}},
+ { 0x1e64, {1, {0x1e65}}},
+ { 0x1e66, {1, {0x1e67}}},
+ { 0x1e68, {1, {0x1e69}}},
+ { 0x1e6a, {1, {0x1e6b}}},
+ { 0x1e6c, {1, {0x1e6d}}},
+ { 0x1e6e, {1, {0x1e6f}}},
+ { 0x1e70, {1, {0x1e71}}},
+ { 0x1e72, {1, {0x1e73}}},
+ { 0x1e74, {1, {0x1e75}}},
+ { 0x1e76, {1, {0x1e77}}},
+ { 0x1e78, {1, {0x1e79}}},
+ { 0x1e7a, {1, {0x1e7b}}},
+ { 0x1e7c, {1, {0x1e7d}}},
+ { 0x1e7e, {1, {0x1e7f}}},
+ { 0x1e80, {1, {0x1e81}}},
+ { 0x1e82, {1, {0x1e83}}},
+ { 0x1e84, {1, {0x1e85}}},
+ { 0x1e86, {1, {0x1e87}}},
+ { 0x1e88, {1, {0x1e89}}},
+ { 0x1e8a, {1, {0x1e8b}}},
+ { 0x1e8c, {1, {0x1e8d}}},
+ { 0x1e8e, {1, {0x1e8f}}},
+ { 0x1e90, {1, {0x1e91}}},
+ { 0x1e92, {1, {0x1e93}}},
+ { 0x1e94, {1, {0x1e95}}},
+ { 0x1e96, {2, {0x0068, 0x0331}}},
+ { 0x1e97, {2, {0x0074, 0x0308}}},
+ { 0x1e98, {2, {0x0077, 0x030a}}},
+ { 0x1e99, {2, {0x0079, 0x030a}}},
+ { 0x1e9a, {2, {0x0061, 0x02be}}},
+ { 0x1e9b, {1, {0x1e61}}},
+ { 0x1ea0, {1, {0x1ea1}}},
+ { 0x1ea2, {1, {0x1ea3}}},
+ { 0x1ea4, {1, {0x1ea5}}},
+ { 0x1ea6, {1, {0x1ea7}}},
+ { 0x1ea8, {1, {0x1ea9}}},
+ { 0x1eaa, {1, {0x1eab}}},
+ { 0x1eac, {1, {0x1ead}}},
+ { 0x1eae, {1, {0x1eaf}}},
+ { 0x1eb0, {1, {0x1eb1}}},
+ { 0x1eb2, {1, {0x1eb3}}},
+ { 0x1eb4, {1, {0x1eb5}}},
+ { 0x1eb6, {1, {0x1eb7}}},
+ { 0x1eb8, {1, {0x1eb9}}},
+ { 0x1eba, {1, {0x1ebb}}},
+ { 0x1ebc, {1, {0x1ebd}}},
+ { 0x1ebe, {1, {0x1ebf}}},
+ { 0x1ec0, {1, {0x1ec1}}},
+ { 0x1ec2, {1, {0x1ec3}}},
+ { 0x1ec4, {1, {0x1ec5}}},
+ { 0x1ec6, {1, {0x1ec7}}},
+ { 0x1ec8, {1, {0x1ec9}}},
+ { 0x1eca, {1, {0x1ecb}}},
+ { 0x1ecc, {1, {0x1ecd}}},
+ { 0x1ece, {1, {0x1ecf}}},
+ { 0x1ed0, {1, {0x1ed1}}},
+ { 0x1ed2, {1, {0x1ed3}}},
+ { 0x1ed4, {1, {0x1ed5}}},
+ { 0x1ed6, {1, {0x1ed7}}},
+ { 0x1ed8, {1, {0x1ed9}}},
+ { 0x1eda, {1, {0x1edb}}},
+ { 0x1edc, {1, {0x1edd}}},
+ { 0x1ede, {1, {0x1edf}}},
+ { 0x1ee0, {1, {0x1ee1}}},
+ { 0x1ee2, {1, {0x1ee3}}},
+ { 0x1ee4, {1, {0x1ee5}}},
+ { 0x1ee6, {1, {0x1ee7}}},
+ { 0x1ee8, {1, {0x1ee9}}},
+ { 0x1eea, {1, {0x1eeb}}},
+ { 0x1eec, {1, {0x1eed}}},
+ { 0x1eee, {1, {0x1eef}}},
+ { 0x1ef0, {1, {0x1ef1}}},
+ { 0x1ef2, {1, {0x1ef3}}},
+ { 0x1ef4, {1, {0x1ef5}}},
+ { 0x1ef6, {1, {0x1ef7}}},
+ { 0x1ef8, {1, {0x1ef9}}},
+ { 0x1f08, {1, {0x1f00}}},
+ { 0x1f09, {1, {0x1f01}}},
+ { 0x1f0a, {1, {0x1f02}}},
+ { 0x1f0b, {1, {0x1f03}}},
+ { 0x1f0c, {1, {0x1f04}}},
+ { 0x1f0d, {1, {0x1f05}}},
+ { 0x1f0e, {1, {0x1f06}}},
+ { 0x1f0f, {1, {0x1f07}}},
+ { 0x1f18, {1, {0x1f10}}},
+ { 0x1f19, {1, {0x1f11}}},
+ { 0x1f1a, {1, {0x1f12}}},
+ { 0x1f1b, {1, {0x1f13}}},
+ { 0x1f1c, {1, {0x1f14}}},
+ { 0x1f1d, {1, {0x1f15}}},
+ { 0x1f28, {1, {0x1f20}}},
+ { 0x1f29, {1, {0x1f21}}},
+ { 0x1f2a, {1, {0x1f22}}},
+ { 0x1f2b, {1, {0x1f23}}},
+ { 0x1f2c, {1, {0x1f24}}},
+ { 0x1f2d, {1, {0x1f25}}},
+ { 0x1f2e, {1, {0x1f26}}},
+ { 0x1f2f, {1, {0x1f27}}},
+ { 0x1f38, {1, {0x1f30}}},
+ { 0x1f39, {1, {0x1f31}}},
+ { 0x1f3a, {1, {0x1f32}}},
+ { 0x1f3b, {1, {0x1f33}}},
+ { 0x1f3c, {1, {0x1f34}}},
+ { 0x1f3d, {1, {0x1f35}}},
+ { 0x1f3e, {1, {0x1f36}}},
+ { 0x1f3f, {1, {0x1f37}}},
+ { 0x1f48, {1, {0x1f40}}},
+ { 0x1f49, {1, {0x1f41}}},
+ { 0x1f4a, {1, {0x1f42}}},
+ { 0x1f4b, {1, {0x1f43}}},
+ { 0x1f4c, {1, {0x1f44}}},
+ { 0x1f4d, {1, {0x1f45}}},
+ { 0x1f50, {2, {0x03c5, 0x0313}}},
+ { 0x1f52, {3, {0x03c5, 0x0313, 0x0300}}},
+ { 0x1f54, {3, {0x03c5, 0x0313, 0x0301}}},
+ { 0x1f56, {3, {0x03c5, 0x0313, 0x0342}}},
+ { 0x1f59, {1, {0x1f51}}},
+ { 0x1f5b, {1, {0x1f53}}},
+ { 0x1f5d, {1, {0x1f55}}},
+ { 0x1f5f, {1, {0x1f57}}},
+ { 0x1f68, {1, {0x1f60}}},
+ { 0x1f69, {1, {0x1f61}}},
+ { 0x1f6a, {1, {0x1f62}}},
+ { 0x1f6b, {1, {0x1f63}}},
+ { 0x1f6c, {1, {0x1f64}}},
+ { 0x1f6d, {1, {0x1f65}}},
+ { 0x1f6e, {1, {0x1f66}}},
+ { 0x1f6f, {1, {0x1f67}}},
+ { 0x1f80, {2, {0x1f00, 0x03b9}}},
+ { 0x1f81, {2, {0x1f01, 0x03b9}}},
+ { 0x1f82, {2, {0x1f02, 0x03b9}}},
+ { 0x1f83, {2, {0x1f03, 0x03b9}}},
+ { 0x1f84, {2, {0x1f04, 0x03b9}}},
+ { 0x1f85, {2, {0x1f05, 0x03b9}}},
+ { 0x1f86, {2, {0x1f06, 0x03b9}}},
+ { 0x1f87, {2, {0x1f07, 0x03b9}}},
+ { 0x1f88, {2, {0x1f00, 0x03b9}}},
+ { 0x1f89, {2, {0x1f01, 0x03b9}}},
+ { 0x1f8a, {2, {0x1f02, 0x03b9}}},
+ { 0x1f8b, {2, {0x1f03, 0x03b9}}},
+ { 0x1f8c, {2, {0x1f04, 0x03b9}}},
+ { 0x1f8d, {2, {0x1f05, 0x03b9}}},
+ { 0x1f8e, {2, {0x1f06, 0x03b9}}},
+ { 0x1f8f, {2, {0x1f07, 0x03b9}}},
+ { 0x1f90, {2, {0x1f20, 0x03b9}}},
+ { 0x1f91, {2, {0x1f21, 0x03b9}}},
+ { 0x1f92, {2, {0x1f22, 0x03b9}}},
+ { 0x1f93, {2, {0x1f23, 0x03b9}}},
+ { 0x1f94, {2, {0x1f24, 0x03b9}}},
+ { 0x1f95, {2, {0x1f25, 0x03b9}}},
+ { 0x1f96, {2, {0x1f26, 0x03b9}}},
+ { 0x1f97, {2, {0x1f27, 0x03b9}}},
+ { 0x1f98, {2, {0x1f20, 0x03b9}}},
+ { 0x1f99, {2, {0x1f21, 0x03b9}}},
+ { 0x1f9a, {2, {0x1f22, 0x03b9}}},
+ { 0x1f9b, {2, {0x1f23, 0x03b9}}},
+ { 0x1f9c, {2, {0x1f24, 0x03b9}}},
+ { 0x1f9d, {2, {0x1f25, 0x03b9}}},
+ { 0x1f9e, {2, {0x1f26, 0x03b9}}},
+ { 0x1f9f, {2, {0x1f27, 0x03b9}}},
+ { 0x1fa0, {2, {0x1f60, 0x03b9}}},
+ { 0x1fa1, {2, {0x1f61, 0x03b9}}},
+ { 0x1fa2, {2, {0x1f62, 0x03b9}}},
+ { 0x1fa3, {2, {0x1f63, 0x03b9}}},
+ { 0x1fa4, {2, {0x1f64, 0x03b9}}},
+ { 0x1fa5, {2, {0x1f65, 0x03b9}}},
+ { 0x1fa6, {2, {0x1f66, 0x03b9}}},
+ { 0x1fa7, {2, {0x1f67, 0x03b9}}},
+ { 0x1fa8, {2, {0x1f60, 0x03b9}}},
+ { 0x1fa9, {2, {0x1f61, 0x03b9}}},
+ { 0x1faa, {2, {0x1f62, 0x03b9}}},
+ { 0x1fab, {2, {0x1f63, 0x03b9}}},
+ { 0x1fac, {2, {0x1f64, 0x03b9}}},
+ { 0x1fad, {2, {0x1f65, 0x03b9}}},
+ { 0x1fae, {2, {0x1f66, 0x03b9}}},
+ { 0x1faf, {2, {0x1f67, 0x03b9}}},
+ { 0x1fb2, {2, {0x1f70, 0x03b9}}},
+ { 0x1fb3, {2, {0x03b1, 0x03b9}}},
+ { 0x1fb4, {2, {0x03ac, 0x03b9}}},
+ { 0x1fb6, {2, {0x03b1, 0x0342}}},
+ { 0x1fb7, {3, {0x03b1, 0x0342, 0x03b9}}},
+ { 0x1fb8, {1, {0x1fb0}}},
+ { 0x1fb9, {1, {0x1fb1}}},
+ { 0x1fba, {1, {0x1f70}}},
+ { 0x1fbb, {1, {0x1f71}}},
+ { 0x1fbc, {2, {0x03b1, 0x03b9}}},
+ { 0x1fbe, {1, {0x03b9}}},
+ { 0x1fc2, {2, {0x1f74, 0x03b9}}},
+ { 0x1fc3, {2, {0x03b7, 0x03b9}}},
+ { 0x1fc4, {2, {0x03ae, 0x03b9}}},
+ { 0x1fc6, {2, {0x03b7, 0x0342}}},
+ { 0x1fc7, {3, {0x03b7, 0x0342, 0x03b9}}},
+ { 0x1fc8, {1, {0x1f72}}},
+ { 0x1fc9, {1, {0x1f73}}},
+ { 0x1fca, {1, {0x1f74}}},
+ { 0x1fcb, {1, {0x1f75}}},
+ { 0x1fcc, {2, {0x03b7, 0x03b9}}},
+ { 0x1fd2, {3, {0x03b9, 0x0308, 0x0300}}},
+ { 0x1fd3, {3, {0x03b9, 0x0308, 0x0301}}},
+ { 0x1fd6, {2, {0x03b9, 0x0342}}},
+ { 0x1fd7, {3, {0x03b9, 0x0308, 0x0342}}},
+ { 0x1fd8, {1, {0x1fd0}}},
+ { 0x1fd9, {1, {0x1fd1}}},
+ { 0x1fda, {1, {0x1f76}}},
+ { 0x1fdb, {1, {0x1f77}}},
+ { 0x1fe2, {3, {0x03c5, 0x0308, 0x0300}}},
+ { 0x1fe3, {3, {0x03c5, 0x0308, 0x0301}}},
+ { 0x1fe4, {2, {0x03c1, 0x0313}}},
+ { 0x1fe6, {2, {0x03c5, 0x0342}}},
+ { 0x1fe7, {3, {0x03c5, 0x0308, 0x0342}}},
+ { 0x1fe8, {1, {0x1fe0}}},
+ { 0x1fe9, {1, {0x1fe1}}},
+ { 0x1fea, {1, {0x1f7a}}},
+ { 0x1feb, {1, {0x1f7b}}},
+ { 0x1fec, {1, {0x1fe5}}},
+ { 0x1ff2, {2, {0x1f7c, 0x03b9}}},
+ { 0x1ff3, {2, {0x03c9, 0x03b9}}},
+ { 0x1ff4, {2, {0x03ce, 0x03b9}}},
+ { 0x1ff6, {2, {0x03c9, 0x0342}}},
+ { 0x1ff7, {3, {0x03c9, 0x0342, 0x03b9}}},
+ { 0x1ff8, {1, {0x1f78}}},
+ { 0x1ff9, {1, {0x1f79}}},
+ { 0x1ffa, {1, {0x1f7c}}},
+ { 0x1ffb, {1, {0x1f7d}}},
+ { 0x1ffc, {2, {0x03c9, 0x03b9}}},
+ { 0x2126, {1, {0x03c9}}},
+ { 0x212a, {1, {0x006b}}},
+ { 0x212b, {1, {0x00e5}}},
+ { 0x2160, {1, {0x2170}}},
+ { 0x2161, {1, {0x2171}}},
+ { 0x2162, {1, {0x2172}}},
+ { 0x2163, {1, {0x2173}}},
+ { 0x2164, {1, {0x2174}}},
+ { 0x2165, {1, {0x2175}}},
+ { 0x2166, {1, {0x2176}}},
+ { 0x2167, {1, {0x2177}}},
+ { 0x2168, {1, {0x2178}}},
+ { 0x2169, {1, {0x2179}}},
+ { 0x216a, {1, {0x217a}}},
+ { 0x216b, {1, {0x217b}}},
+ { 0x216c, {1, {0x217c}}},
+ { 0x216d, {1, {0x217d}}},
+ { 0x216e, {1, {0x217e}}},
+ { 0x216f, {1, {0x217f}}},
+ { 0x24b6, {1, {0x24d0}}},
+ { 0x24b7, {1, {0x24d1}}},
+ { 0x24b8, {1, {0x24d2}}},
+ { 0x24b9, {1, {0x24d3}}},
+ { 0x24ba, {1, {0x24d4}}},
+ { 0x24bb, {1, {0x24d5}}},
+ { 0x24bc, {1, {0x24d6}}},
+ { 0x24bd, {1, {0x24d7}}},
+ { 0x24be, {1, {0x24d8}}},
+ { 0x24bf, {1, {0x24d9}}},
+ { 0x24c0, {1, {0x24da}}},
+ { 0x24c1, {1, {0x24db}}},
+ { 0x24c2, {1, {0x24dc}}},
+ { 0x24c3, {1, {0x24dd}}},
+ { 0x24c4, {1, {0x24de}}},
+ { 0x24c5, {1, {0x24df}}},
+ { 0x24c6, {1, {0x24e0}}},
+ { 0x24c7, {1, {0x24e1}}},
+ { 0x24c8, {1, {0x24e2}}},
+ { 0x24c9, {1, {0x24e3}}},
+ { 0x24ca, {1, {0x24e4}}},
+ { 0x24cb, {1, {0x24e5}}},
+ { 0x24cc, {1, {0x24e6}}},
+ { 0x24cd, {1, {0x24e7}}},
+ { 0x24ce, {1, {0x24e8}}},
+ { 0x24cf, {1, {0x24e9}}},
+ { 0x2c00, {1, {0x2c30}}},
+ { 0x2c01, {1, {0x2c31}}},
+ { 0x2c02, {1, {0x2c32}}},
+ { 0x2c03, {1, {0x2c33}}},
+ { 0x2c04, {1, {0x2c34}}},
+ { 0x2c05, {1, {0x2c35}}},
+ { 0x2c06, {1, {0x2c36}}},
+ { 0x2c07, {1, {0x2c37}}},
+ { 0x2c08, {1, {0x2c38}}},
+ { 0x2c09, {1, {0x2c39}}},
+ { 0x2c0a, {1, {0x2c3a}}},
+ { 0x2c0b, {1, {0x2c3b}}},
+ { 0x2c0c, {1, {0x2c3c}}},
+ { 0x2c0d, {1, {0x2c3d}}},
+ { 0x2c0e, {1, {0x2c3e}}},
+ { 0x2c0f, {1, {0x2c3f}}},
+ { 0x2c10, {1, {0x2c40}}},
+ { 0x2c11, {1, {0x2c41}}},
+ { 0x2c12, {1, {0x2c42}}},
+ { 0x2c13, {1, {0x2c43}}},
+ { 0x2c14, {1, {0x2c44}}},
+ { 0x2c15, {1, {0x2c45}}},
+ { 0x2c16, {1, {0x2c46}}},
+ { 0x2c17, {1, {0x2c47}}},
+ { 0x2c18, {1, {0x2c48}}},
+ { 0x2c19, {1, {0x2c49}}},
+ { 0x2c1a, {1, {0x2c4a}}},
+ { 0x2c1b, {1, {0x2c4b}}},
+ { 0x2c1c, {1, {0x2c4c}}},
+ { 0x2c1d, {1, {0x2c4d}}},
+ { 0x2c1e, {1, {0x2c4e}}},
+ { 0x2c1f, {1, {0x2c4f}}},
+ { 0x2c20, {1, {0x2c50}}},
+ { 0x2c21, {1, {0x2c51}}},
+ { 0x2c22, {1, {0x2c52}}},
+ { 0x2c23, {1, {0x2c53}}},
+ { 0x2c24, {1, {0x2c54}}},
+ { 0x2c25, {1, {0x2c55}}},
+ { 0x2c26, {1, {0x2c56}}},
+ { 0x2c27, {1, {0x2c57}}},
+ { 0x2c28, {1, {0x2c58}}},
+ { 0x2c29, {1, {0x2c59}}},
+ { 0x2c2a, {1, {0x2c5a}}},
+ { 0x2c2b, {1, {0x2c5b}}},
+ { 0x2c2c, {1, {0x2c5c}}},
+ { 0x2c2d, {1, {0x2c5d}}},
+ { 0x2c2e, {1, {0x2c5e}}},
+ { 0x2c80, {1, {0x2c81}}},
+ { 0x2c82, {1, {0x2c83}}},
+ { 0x2c84, {1, {0x2c85}}},
+ { 0x2c86, {1, {0x2c87}}},
+ { 0x2c88, {1, {0x2c89}}},
+ { 0x2c8a, {1, {0x2c8b}}},
+ { 0x2c8c, {1, {0x2c8d}}},
+ { 0x2c8e, {1, {0x2c8f}}},
+ { 0x2c90, {1, {0x2c91}}},
+ { 0x2c92, {1, {0x2c93}}},
+ { 0x2c94, {1, {0x2c95}}},
+ { 0x2c96, {1, {0x2c97}}},
+ { 0x2c98, {1, {0x2c99}}},
+ { 0x2c9a, {1, {0x2c9b}}},
+ { 0x2c9c, {1, {0x2c9d}}},
+ { 0x2c9e, {1, {0x2c9f}}},
+ { 0x2ca0, {1, {0x2ca1}}},
+ { 0x2ca2, {1, {0x2ca3}}},
+ { 0x2ca4, {1, {0x2ca5}}},
+ { 0x2ca6, {1, {0x2ca7}}},
+ { 0x2ca8, {1, {0x2ca9}}},
+ { 0x2caa, {1, {0x2cab}}},
+ { 0x2cac, {1, {0x2cad}}},
+ { 0x2cae, {1, {0x2caf}}},
+ { 0x2cb0, {1, {0x2cb1}}},
+ { 0x2cb2, {1, {0x2cb3}}},
+ { 0x2cb4, {1, {0x2cb5}}},
+ { 0x2cb6, {1, {0x2cb7}}},
+ { 0x2cb8, {1, {0x2cb9}}},
+ { 0x2cba, {1, {0x2cbb}}},
+ { 0x2cbc, {1, {0x2cbd}}},
+ { 0x2cbe, {1, {0x2cbf}}},
+ { 0x2cc0, {1, {0x2cc1}}},
+ { 0x2cc2, {1, {0x2cc3}}},
+ { 0x2cc4, {1, {0x2cc5}}},
+ { 0x2cc6, {1, {0x2cc7}}},
+ { 0x2cc8, {1, {0x2cc9}}},
+ { 0x2cca, {1, {0x2ccb}}},
+ { 0x2ccc, {1, {0x2ccd}}},
+ { 0x2cce, {1, {0x2ccf}}},
+ { 0x2cd0, {1, {0x2cd1}}},
+ { 0x2cd2, {1, {0x2cd3}}},
+ { 0x2cd4, {1, {0x2cd5}}},
+ { 0x2cd6, {1, {0x2cd7}}},
+ { 0x2cd8, {1, {0x2cd9}}},
+ { 0x2cda, {1, {0x2cdb}}},
+ { 0x2cdc, {1, {0x2cdd}}},
+ { 0x2cde, {1, {0x2cdf}}},
+ { 0x2ce0, {1, {0x2ce1}}},
+ { 0x2ce2, {1, {0x2ce3}}},
+ { 0xfb00, {2, {0x0066, 0x0066}}},
+ { 0xfb01, {2, {0x0066, 0x0069}}},
+ { 0xfb02, {2, {0x0066, 0x006c}}},
+ { 0xfb03, {3, {0x0066, 0x0066, 0x0069}}},
+ { 0xfb04, {3, {0x0066, 0x0066, 0x006c}}},
+ { 0xfb05, {2, {0x0073, 0x0074}}},
+ { 0xfb06, {2, {0x0073, 0x0074}}},
+ { 0xfb13, {2, {0x0574, 0x0576}}},
+ { 0xfb14, {2, {0x0574, 0x0565}}},
+ { 0xfb15, {2, {0x0574, 0x056b}}},
+ { 0xfb16, {2, {0x057e, 0x0576}}},
+ { 0xfb17, {2, {0x0574, 0x056d}}},
+ { 0xff21, {1, {0xff41}}},
+ { 0xff22, {1, {0xff42}}},
+ { 0xff23, {1, {0xff43}}},
+ { 0xff24, {1, {0xff44}}},
+ { 0xff25, {1, {0xff45}}},
+ { 0xff26, {1, {0xff46}}},
+ { 0xff27, {1, {0xff47}}},
+ { 0xff28, {1, {0xff48}}},
+ { 0xff29, {1, {0xff49}}},
+ { 0xff2a, {1, {0xff4a}}},
+ { 0xff2b, {1, {0xff4b}}},
+ { 0xff2c, {1, {0xff4c}}},
+ { 0xff2d, {1, {0xff4d}}},
+ { 0xff2e, {1, {0xff4e}}},
+ { 0xff2f, {1, {0xff4f}}},
+ { 0xff30, {1, {0xff50}}},
+ { 0xff31, {1, {0xff51}}},
+ { 0xff32, {1, {0xff52}}},
+ { 0xff33, {1, {0xff53}}},
+ { 0xff34, {1, {0xff54}}},
+ { 0xff35, {1, {0xff55}}},
+ { 0xff36, {1, {0xff56}}},
+ { 0xff37, {1, {0xff57}}},
+ { 0xff38, {1, {0xff58}}},
+ { 0xff39, {1, {0xff59}}},
+ { 0xff3a, {1, {0xff5a}}},
+ { 0x10400, {1, {0x10428}}},
+ { 0x10401, {1, {0x10429}}},
+ { 0x10402, {1, {0x1042a}}},
+ { 0x10403, {1, {0x1042b}}},
+ { 0x10404, {1, {0x1042c}}},
+ { 0x10405, {1, {0x1042d}}},
+ { 0x10406, {1, {0x1042e}}},
+ { 0x10407, {1, {0x1042f}}},
+ { 0x10408, {1, {0x10430}}},
+ { 0x10409, {1, {0x10431}}},
+ { 0x1040a, {1, {0x10432}}},
+ { 0x1040b, {1, {0x10433}}},
+ { 0x1040c, {1, {0x10434}}},
+ { 0x1040d, {1, {0x10435}}},
+ { 0x1040e, {1, {0x10436}}},
+ { 0x1040f, {1, {0x10437}}},
+ { 0x10410, {1, {0x10438}}},
+ { 0x10411, {1, {0x10439}}},
+ { 0x10412, {1, {0x1043a}}},
+ { 0x10413, {1, {0x1043b}}},
+ { 0x10414, {1, {0x1043c}}},
+ { 0x10415, {1, {0x1043d}}},
+ { 0x10416, {1, {0x1043e}}},
+ { 0x10417, {1, {0x1043f}}},
+ { 0x10418, {1, {0x10440}}},
+ { 0x10419, {1, {0x10441}}},
+ { 0x1041a, {1, {0x10442}}},
+ { 0x1041b, {1, {0x10443}}},
+ { 0x1041c, {1, {0x10444}}},
+ { 0x1041d, {1, {0x10445}}},
+ { 0x1041e, {1, {0x10446}}},
+ { 0x1041f, {1, {0x10447}}},
+ { 0x10420, {1, {0x10448}}},
+ { 0x10421, {1, {0x10449}}},
+ { 0x10422, {1, {0x1044a}}},
+ { 0x10423, {1, {0x1044b}}},
+ { 0x10424, {1, {0x1044c}}},
+ { 0x10425, {1, {0x1044d}}},
+ { 0x10426, {1, {0x1044e}}},
+ { 0x10427, {1, {0x1044f}}}
+};
+
+static const CaseFold_11_Type CaseFold_Locale[] = {
+ { 0x0049, {1, {0x0069}}},
+ { 0x0130, {2, {0x0069, 0x0307}}}
+};
+
+static const CaseUnfold_11_Type CaseUnfold_11[] = {
+ { 0x0061, {1, {0x0041 }}},
+ { 0x0062, {1, {0x0042 }}},
+ { 0x0063, {1, {0x0043 }}},
+ { 0x0064, {1, {0x0044 }}},
+ { 0x0065, {1, {0x0045 }}},
+ { 0x0066, {1, {0x0046 }}},
+ { 0x0067, {1, {0x0047 }}},
+ { 0x0068, {1, {0x0048 }}},
+ { 0x006a, {1, {0x004a }}},
+ { 0x006b, {2, {0x212a, 0x004b }}},
+ { 0x006c, {1, {0x004c }}},
+ { 0x006d, {1, {0x004d }}},
+ { 0x006e, {1, {0x004e }}},
+ { 0x006f, {1, {0x004f }}},
+ { 0x0070, {1, {0x0050 }}},
+ { 0x0071, {1, {0x0051 }}},
+ { 0x0072, {1, {0x0052 }}},
+ { 0x0073, {2, {0x0053, 0x017f }}},
+ { 0x0074, {1, {0x0054 }}},
+ { 0x0075, {1, {0x0055 }}},
+ { 0x0076, {1, {0x0056 }}},
+ { 0x0077, {1, {0x0057 }}},
+ { 0x0078, {1, {0x0058 }}},
+ { 0x0079, {1, {0x0059 }}},
+ { 0x007a, {1, {0x005a }}},
+ { 0x00e0, {1, {0x00c0 }}},
+ { 0x00e1, {1, {0x00c1 }}},
+ { 0x00e2, {1, {0x00c2 }}},
+ { 0x00e3, {1, {0x00c3 }}},
+ { 0x00e4, {1, {0x00c4 }}},
+ { 0x00e5, {2, {0x212b, 0x00c5 }}},
+ { 0x00e6, {1, {0x00c6 }}},
+ { 0x00e7, {1, {0x00c7 }}},
+ { 0x00e8, {1, {0x00c8 }}},
+ { 0x00e9, {1, {0x00c9 }}},
+ { 0x00ea, {1, {0x00ca }}},
+ { 0x00eb, {1, {0x00cb }}},
+ { 0x00ec, {1, {0x00cc }}},
+ { 0x00ed, {1, {0x00cd }}},
+ { 0x00ee, {1, {0x00ce }}},
+ { 0x00ef, {1, {0x00cf }}},
+ { 0x00f0, {1, {0x00d0 }}},
+ { 0x00f1, {1, {0x00d1 }}},
+ { 0x00f2, {1, {0x00d2 }}},
+ { 0x00f3, {1, {0x00d3 }}},
+ { 0x00f4, {1, {0x00d4 }}},
+ { 0x00f5, {1, {0x00d5 }}},
+ { 0x00f6, {1, {0x00d6 }}},
+ { 0x00f8, {1, {0x00d8 }}},
+ { 0x00f9, {1, {0x00d9 }}},
+ { 0x00fa, {1, {0x00da }}},
+ { 0x00fb, {1, {0x00db }}},
+ { 0x00fc, {1, {0x00dc }}},
+ { 0x00fd, {1, {0x00dd }}},
+ { 0x00fe, {1, {0x00de }}},
+ { 0x00ff, {1, {0x0178 }}},
+ { 0x0101, {1, {0x0100 }}},
+ { 0x0103, {1, {0x0102 }}},
+ { 0x0105, {1, {0x0104 }}},
+ { 0x0107, {1, {0x0106 }}},
+ { 0x0109, {1, {0x0108 }}},
+ { 0x010b, {1, {0x010a }}},
+ { 0x010d, {1, {0x010c }}},
+ { 0x010f, {1, {0x010e }}},
+ { 0x0111, {1, {0x0110 }}},
+ { 0x0113, {1, {0x0112 }}},
+ { 0x0115, {1, {0x0114 }}},
+ { 0x0117, {1, {0x0116 }}},
+ { 0x0119, {1, {0x0118 }}},
+ { 0x011b, {1, {0x011a }}},
+ { 0x011d, {1, {0x011c }}},
+ { 0x011f, {1, {0x011e }}},
+ { 0x0121, {1, {0x0120 }}},
+ { 0x0123, {1, {0x0122 }}},
+ { 0x0125, {1, {0x0124 }}},
+ { 0x0127, {1, {0x0126 }}},
+ { 0x0129, {1, {0x0128 }}},
+ { 0x012b, {1, {0x012a }}},
+ { 0x012d, {1, {0x012c }}},
+ { 0x012f, {1, {0x012e }}},
+ { 0x0133, {1, {0x0132 }}},
+ { 0x0135, {1, {0x0134 }}},
+ { 0x0137, {1, {0x0136 }}},
+ { 0x013a, {1, {0x0139 }}},
+ { 0x013c, {1, {0x013b }}},
+ { 0x013e, {1, {0x013d }}},
+ { 0x0140, {1, {0x013f }}},
+ { 0x0142, {1, {0x0141 }}},
+ { 0x0144, {1, {0x0143 }}},
+ { 0x0146, {1, {0x0145 }}},
+ { 0x0148, {1, {0x0147 }}},
+ { 0x014b, {1, {0x014a }}},
+ { 0x014d, {1, {0x014c }}},
+ { 0x014f, {1, {0x014e }}},
+ { 0x0151, {1, {0x0150 }}},
+ { 0x0153, {1, {0x0152 }}},
+ { 0x0155, {1, {0x0154 }}},
+ { 0x0157, {1, {0x0156 }}},
+ { 0x0159, {1, {0x0158 }}},
+ { 0x015b, {1, {0x015a }}},
+ { 0x015d, {1, {0x015c }}},
+ { 0x015f, {1, {0x015e }}},
+ { 0x0161, {1, {0x0160 }}},
+ { 0x0163, {1, {0x0162 }}},
+ { 0x0165, {1, {0x0164 }}},
+ { 0x0167, {1, {0x0166 }}},
+ { 0x0169, {1, {0x0168 }}},
+ { 0x016b, {1, {0x016a }}},
+ { 0x016d, {1, {0x016c }}},
+ { 0x016f, {1, {0x016e }}},
+ { 0x0171, {1, {0x0170 }}},
+ { 0x0173, {1, {0x0172 }}},
+ { 0x0175, {1, {0x0174 }}},
+ { 0x0177, {1, {0x0176 }}},
+ { 0x017a, {1, {0x0179 }}},
+ { 0x017c, {1, {0x017b }}},
+ { 0x017e, {1, {0x017d }}},
+ { 0x0183, {1, {0x0182 }}},
+ { 0x0185, {1, {0x0184 }}},
+ { 0x0188, {1, {0x0187 }}},
+ { 0x018c, {1, {0x018b }}},
+ { 0x0192, {1, {0x0191 }}},
+ { 0x0195, {1, {0x01f6 }}},
+ { 0x0199, {1, {0x0198 }}},
+ { 0x019a, {1, {0x023d }}},
+ { 0x019e, {1, {0x0220 }}},
+ { 0x01a1, {1, {0x01a0 }}},
+ { 0x01a3, {1, {0x01a2 }}},
+ { 0x01a5, {1, {0x01a4 }}},
+ { 0x01a8, {1, {0x01a7 }}},
+ { 0x01ad, {1, {0x01ac }}},
+ { 0x01b0, {1, {0x01af }}},
+ { 0x01b4, {1, {0x01b3 }}},
+ { 0x01b6, {1, {0x01b5 }}},
+ { 0x01b9, {1, {0x01b8 }}},
+ { 0x01bd, {1, {0x01bc }}},
+ { 0x01bf, {1, {0x01f7 }}},
+ { 0x01c6, {2, {0x01c4, 0x01c5 }}},
+ { 0x01c9, {2, {0x01c7, 0x01c8 }}},
+ { 0x01cc, {2, {0x01ca, 0x01cb }}},
+ { 0x01ce, {1, {0x01cd }}},
+ { 0x01d0, {1, {0x01cf }}},
+ { 0x01d2, {1, {0x01d1 }}},
+ { 0x01d4, {1, {0x01d3 }}},
+ { 0x01d6, {1, {0x01d5 }}},
+ { 0x01d8, {1, {0x01d7 }}},
+ { 0x01da, {1, {0x01d9 }}},
+ { 0x01dc, {1, {0x01db }}},
+ { 0x01dd, {1, {0x018e }}},
+ { 0x01df, {1, {0x01de }}},
+ { 0x01e1, {1, {0x01e0 }}},
+ { 0x01e3, {1, {0x01e2 }}},
+ { 0x01e5, {1, {0x01e4 }}},
+ { 0x01e7, {1, {0x01e6 }}},
+ { 0x01e9, {1, {0x01e8 }}},
+ { 0x01eb, {1, {0x01ea }}},
+ { 0x01ed, {1, {0x01ec }}},
+ { 0x01ef, {1, {0x01ee }}},
+ { 0x01f3, {2, {0x01f1, 0x01f2 }}},
+ { 0x01f5, {1, {0x01f4 }}},
+ { 0x01f9, {1, {0x01f8 }}},
+ { 0x01fb, {1, {0x01fa }}},
+ { 0x01fd, {1, {0x01fc }}},
+ { 0x01ff, {1, {0x01fe }}},
+ { 0x0201, {1, {0x0200 }}},
+ { 0x0203, {1, {0x0202 }}},
+ { 0x0205, {1, {0x0204 }}},
+ { 0x0207, {1, {0x0206 }}},
+ { 0x0209, {1, {0x0208 }}},
+ { 0x020b, {1, {0x020a }}},
+ { 0x020d, {1, {0x020c }}},
+ { 0x020f, {1, {0x020e }}},
+ { 0x0211, {1, {0x0210 }}},
+ { 0x0213, {1, {0x0212 }}},
+ { 0x0215, {1, {0x0214 }}},
+ { 0x0217, {1, {0x0216 }}},
+ { 0x0219, {1, {0x0218 }}},
+ { 0x021b, {1, {0x021a }}},
+ { 0x021d, {1, {0x021c }}},
+ { 0x021f, {1, {0x021e }}},
+ { 0x0223, {1, {0x0222 }}},
+ { 0x0225, {1, {0x0224 }}},
+ { 0x0227, {1, {0x0226 }}},
+ { 0x0229, {1, {0x0228 }}},
+ { 0x022b, {1, {0x022a }}},
+ { 0x022d, {1, {0x022c }}},
+ { 0x022f, {1, {0x022e }}},
+ { 0x0231, {1, {0x0230 }}},
+ { 0x0233, {1, {0x0232 }}},
+ { 0x023c, {1, {0x023b }}},
+ { 0x0253, {1, {0x0181 }}},
+ { 0x0254, {1, {0x0186 }}},
+ { 0x0256, {1, {0x0189 }}},
+ { 0x0257, {1, {0x018a }}},
+ { 0x0259, {1, {0x018f }}},
+ { 0x025b, {1, {0x0190 }}},
+ { 0x0260, {1, {0x0193 }}},
+ { 0x0263, {1, {0x0194 }}},
+ { 0x0268, {1, {0x0197 }}},
+ { 0x0269, {1, {0x0196 }}},
+ { 0x026f, {1, {0x019c }}},
+ { 0x0272, {1, {0x019d }}},
+ { 0x0275, {1, {0x019f }}},
+ { 0x0280, {1, {0x01a6 }}},
+ { 0x0283, {1, {0x01a9 }}},
+ { 0x0288, {1, {0x01ae }}},
+ { 0x028a, {1, {0x01b1 }}},
+ { 0x028b, {1, {0x01b2 }}},
+ { 0x0292, {1, {0x01b7 }}},
+ { 0x0294, {1, {0x0241 }}},
+ { 0x03ac, {1, {0x0386 }}},
+ { 0x03ad, {1, {0x0388 }}},
+ { 0x03ae, {1, {0x0389 }}},
+ { 0x03af, {1, {0x038a }}},
+ { 0x03b1, {1, {0x0391 }}},
+ { 0x03b2, {2, {0x0392, 0x03d0 }}},
+ { 0x03b3, {1, {0x0393 }}},
+ { 0x03b4, {1, {0x0394 }}},
+ { 0x03b5, {2, {0x03f5, 0x0395 }}},
+ { 0x03b6, {1, {0x0396 }}},
+ { 0x03b7, {1, {0x0397 }}},
+ { 0x03b8, {3, {0x03f4, 0x0398, 0x03d1 }}},
+ { 0x03b9, {3, {0x1fbe, 0x0399, 0x0345 }}},
+ { 0x03ba, {2, {0x03f0, 0x039a }}},
+ { 0x03bb, {1, {0x039b }}},
+ { 0x03bc, {2, {0x00b5, 0x039c }}},
+ { 0x03bd, {1, {0x039d }}},
+ { 0x03be, {1, {0x039e }}},
+ { 0x03bf, {1, {0x039f }}},
+ { 0x03c0, {2, {0x03a0, 0x03d6 }}},
+ { 0x03c1, {2, {0x03f1, 0x03a1 }}},
+ { 0x03c3, {2, {0x03a3, 0x03c2 }}},
+ { 0x03c4, {1, {0x03a4 }}},
+ { 0x03c5, {1, {0x03a5 }}},
+ { 0x03c6, {2, {0x03a6, 0x03d5 }}},
+ { 0x03c7, {1, {0x03a7 }}},
+ { 0x03c8, {1, {0x03a8 }}},
+ { 0x03c9, {2, {0x03a9, 0x2126 }}},
+ { 0x03ca, {1, {0x03aa }}},
+ { 0x03cb, {1, {0x03ab }}},
+ { 0x03cc, {1, {0x038c }}},
+ { 0x03cd, {1, {0x038e }}},
+ { 0x03ce, {1, {0x038f }}},
+ { 0x03d9, {1, {0x03d8 }}},
+ { 0x03db, {1, {0x03da }}},
+ { 0x03dd, {1, {0x03dc }}},
+ { 0x03df, {1, {0x03de }}},
+ { 0x03e1, {1, {0x03e0 }}},
+ { 0x03e3, {1, {0x03e2 }}},
+ { 0x03e5, {1, {0x03e4 }}},
+ { 0x03e7, {1, {0x03e6 }}},
+ { 0x03e9, {1, {0x03e8 }}},
+ { 0x03eb, {1, {0x03ea }}},
+ { 0x03ed, {1, {0x03ec }}},
+ { 0x03ef, {1, {0x03ee }}},
+ { 0x03f2, {1, {0x03f9 }}},
+ { 0x03f8, {1, {0x03f7 }}},
+ { 0x03fb, {1, {0x03fa }}},
+ { 0x0430, {1, {0x0410 }}},
+ { 0x0431, {1, {0x0411 }}},
+ { 0x0432, {1, {0x0412 }}},
+ { 0x0433, {1, {0x0413 }}},
+ { 0x0434, {1, {0x0414 }}},
+ { 0x0435, {1, {0x0415 }}},
+ { 0x0436, {1, {0x0416 }}},
+ { 0x0437, {1, {0x0417 }}},
+ { 0x0438, {1, {0x0418 }}},
+ { 0x0439, {1, {0x0419 }}},
+ { 0x043a, {1, {0x041a }}},
+ { 0x043b, {1, {0x041b }}},
+ { 0x043c, {1, {0x041c }}},
+ { 0x043d, {1, {0x041d }}},
+ { 0x043e, {1, {0x041e }}},
+ { 0x043f, {1, {0x041f }}},
+ { 0x0440, {1, {0x0420 }}},
+ { 0x0441, {1, {0x0421 }}},
+ { 0x0442, {1, {0x0422 }}},
+ { 0x0443, {1, {0x0423 }}},
+ { 0x0444, {1, {0x0424 }}},
+ { 0x0445, {1, {0x0425 }}},
+ { 0x0446, {1, {0x0426 }}},
+ { 0x0447, {1, {0x0427 }}},
+ { 0x0448, {1, {0x0428 }}},
+ { 0x0449, {1, {0x0429 }}},
+ { 0x044a, {1, {0x042a }}},
+ { 0x044b, {1, {0x042b }}},
+ { 0x044c, {1, {0x042c }}},
+ { 0x044d, {1, {0x042d }}},
+ { 0x044e, {1, {0x042e }}},
+ { 0x044f, {1, {0x042f }}},
+ { 0x0450, {1, {0x0400 }}},
+ { 0x0451, {1, {0x0401 }}},
+ { 0x0452, {1, {0x0402 }}},
+ { 0x0453, {1, {0x0403 }}},
+ { 0x0454, {1, {0x0404 }}},
+ { 0x0455, {1, {0x0405 }}},
+ { 0x0456, {1, {0x0406 }}},
+ { 0x0457, {1, {0x0407 }}},
+ { 0x0458, {1, {0x0408 }}},
+ { 0x0459, {1, {0x0409 }}},
+ { 0x045a, {1, {0x040a }}},
+ { 0x045b, {1, {0x040b }}},
+ { 0x045c, {1, {0x040c }}},
+ { 0x045d, {1, {0x040d }}},
+ { 0x045e, {1, {0x040e }}},
+ { 0x045f, {1, {0x040f }}},
+ { 0x0461, {1, {0x0460 }}},
+ { 0x0463, {1, {0x0462 }}},
+ { 0x0465, {1, {0x0464 }}},
+ { 0x0467, {1, {0x0466 }}},
+ { 0x0469, {1, {0x0468 }}},
+ { 0x046b, {1, {0x046a }}},
+ { 0x046d, {1, {0x046c }}},
+ { 0x046f, {1, {0x046e }}},
+ { 0x0471, {1, {0x0470 }}},
+ { 0x0473, {1, {0x0472 }}},
+ { 0x0475, {1, {0x0474 }}},
+ { 0x0477, {1, {0x0476 }}},
+ { 0x0479, {1, {0x0478 }}},
+ { 0x047b, {1, {0x047a }}},
+ { 0x047d, {1, {0x047c }}},
+ { 0x047f, {1, {0x047e }}},
+ { 0x0481, {1, {0x0480 }}},
+ { 0x048b, {1, {0x048a }}},
+ { 0x048d, {1, {0x048c }}},
+ { 0x048f, {1, {0x048e }}},
+ { 0x0491, {1, {0x0490 }}},
+ { 0x0493, {1, {0x0492 }}},
+ { 0x0495, {1, {0x0494 }}},
+ { 0x0497, {1, {0x0496 }}},
+ { 0x0499, {1, {0x0498 }}},
+ { 0x049b, {1, {0x049a }}},
+ { 0x049d, {1, {0x049c }}},
+ { 0x049f, {1, {0x049e }}},
+ { 0x04a1, {1, {0x04a0 }}},
+ { 0x04a3, {1, {0x04a2 }}},
+ { 0x04a5, {1, {0x04a4 }}},
+ { 0x04a7, {1, {0x04a6 }}},
+ { 0x04a9, {1, {0x04a8 }}},
+ { 0x04ab, {1, {0x04aa }}},
+ { 0x04ad, {1, {0x04ac }}},
+ { 0x04af, {1, {0x04ae }}},
+ { 0x04b1, {1, {0x04b0 }}},
+ { 0x04b3, {1, {0x04b2 }}},
+ { 0x04b5, {1, {0x04b4 }}},
+ { 0x04b7, {1, {0x04b6 }}},
+ { 0x04b9, {1, {0x04b8 }}},
+ { 0x04bb, {1, {0x04ba }}},
+ { 0x04bd, {1, {0x04bc }}},
+ { 0x04bf, {1, {0x04be }}},
+ { 0x04c2, {1, {0x04c1 }}},
+ { 0x04c4, {1, {0x04c3 }}},
+ { 0x04c6, {1, {0x04c5 }}},
+ { 0x04c8, {1, {0x04c7 }}},
+ { 0x04ca, {1, {0x04c9 }}},
+ { 0x04cc, {1, {0x04cb }}},
+ { 0x04ce, {1, {0x04cd }}},
+ { 0x04d1, {1, {0x04d0 }}},
+ { 0x04d3, {1, {0x04d2 }}},
+ { 0x04d5, {1, {0x04d4 }}},
+ { 0x04d7, {1, {0x04d6 }}},
+ { 0x04d9, {1, {0x04d8 }}},
+ { 0x04db, {1, {0x04da }}},
+ { 0x04dd, {1, {0x04dc }}},
+ { 0x04df, {1, {0x04de }}},
+ { 0x04e1, {1, {0x04e0 }}},
+ { 0x04e3, {1, {0x04e2 }}},
+ { 0x04e5, {1, {0x04e4 }}},
+ { 0x04e7, {1, {0x04e6 }}},
+ { 0x04e9, {1, {0x04e8 }}},
+ { 0x04eb, {1, {0x04ea }}},
+ { 0x04ed, {1, {0x04ec }}},
+ { 0x04ef, {1, {0x04ee }}},
+ { 0x04f1, {1, {0x04f0 }}},
+ { 0x04f3, {1, {0x04f2 }}},
+ { 0x04f5, {1, {0x04f4 }}},
+ { 0x04f7, {1, {0x04f6 }}},
+ { 0x04f9, {1, {0x04f8 }}},
+ { 0x0501, {1, {0x0500 }}},
+ { 0x0503, {1, {0x0502 }}},
+ { 0x0505, {1, {0x0504 }}},
+ { 0x0507, {1, {0x0506 }}},
+ { 0x0509, {1, {0x0508 }}},
+ { 0x050b, {1, {0x050a }}},
+ { 0x050d, {1, {0x050c }}},
+ { 0x050f, {1, {0x050e }}},
+ { 0x0561, {1, {0x0531 }}},
+ { 0x0562, {1, {0x0532 }}},
+ { 0x0563, {1, {0x0533 }}},
+ { 0x0564, {1, {0x0534 }}},
+ { 0x0565, {1, {0x0535 }}},
+ { 0x0566, {1, {0x0536 }}},
+ { 0x0567, {1, {0x0537 }}},
+ { 0x0568, {1, {0x0538 }}},
+ { 0x0569, {1, {0x0539 }}},
+ { 0x056a, {1, {0x053a }}},
+ { 0x056b, {1, {0x053b }}},
+ { 0x056c, {1, {0x053c }}},
+ { 0x056d, {1, {0x053d }}},
+ { 0x056e, {1, {0x053e }}},
+ { 0x056f, {1, {0x053f }}},
+ { 0x0570, {1, {0x0540 }}},
+ { 0x0571, {1, {0x0541 }}},
+ { 0x0572, {1, {0x0542 }}},
+ { 0x0573, {1, {0x0543 }}},
+ { 0x0574, {1, {0x0544 }}},
+ { 0x0575, {1, {0x0545 }}},
+ { 0x0576, {1, {0x0546 }}},
+ { 0x0577, {1, {0x0547 }}},
+ { 0x0578, {1, {0x0548 }}},
+ { 0x0579, {1, {0x0549 }}},
+ { 0x057a, {1, {0x054a }}},
+ { 0x057b, {1, {0x054b }}},
+ { 0x057c, {1, {0x054c }}},
+ { 0x057d, {1, {0x054d }}},
+ { 0x057e, {1, {0x054e }}},
+ { 0x057f, {1, {0x054f }}},
+ { 0x0580, {1, {0x0550 }}},
+ { 0x0581, {1, {0x0551 }}},
+ { 0x0582, {1, {0x0552 }}},
+ { 0x0583, {1, {0x0553 }}},
+ { 0x0584, {1, {0x0554 }}},
+ { 0x0585, {1, {0x0555 }}},
+ { 0x0586, {1, {0x0556 }}},
+ { 0x1e01, {1, {0x1e00 }}},
+ { 0x1e03, {1, {0x1e02 }}},
+ { 0x1e05, {1, {0x1e04 }}},
+ { 0x1e07, {1, {0x1e06 }}},
+ { 0x1e09, {1, {0x1e08 }}},
+ { 0x1e0b, {1, {0x1e0a }}},
+ { 0x1e0d, {1, {0x1e0c }}},
+ { 0x1e0f, {1, {0x1e0e }}},
+ { 0x1e11, {1, {0x1e10 }}},
+ { 0x1e13, {1, {0x1e12 }}},
+ { 0x1e15, {1, {0x1e14 }}},
+ { 0x1e17, {1, {0x1e16 }}},
+ { 0x1e19, {1, {0x1e18 }}},
+ { 0x1e1b, {1, {0x1e1a }}},
+ { 0x1e1d, {1, {0x1e1c }}},
+ { 0x1e1f, {1, {0x1e1e }}},
+ { 0x1e21, {1, {0x1e20 }}},
+ { 0x1e23, {1, {0x1e22 }}},
+ { 0x1e25, {1, {0x1e24 }}},
+ { 0x1e27, {1, {0x1e26 }}},
+ { 0x1e29, {1, {0x1e28 }}},
+ { 0x1e2b, {1, {0x1e2a }}},
+ { 0x1e2d, {1, {0x1e2c }}},
+ { 0x1e2f, {1, {0x1e2e }}},
+ { 0x1e31, {1, {0x1e30 }}},
+ { 0x1e33, {1, {0x1e32 }}},
+ { 0x1e35, {1, {0x1e34 }}},
+ { 0x1e37, {1, {0x1e36 }}},
+ { 0x1e39, {1, {0x1e38 }}},
+ { 0x1e3b, {1, {0x1e3a }}},
+ { 0x1e3d, {1, {0x1e3c }}},
+ { 0x1e3f, {1, {0x1e3e }}},
+ { 0x1e41, {1, {0x1e40 }}},
+ { 0x1e43, {1, {0x1e42 }}},
+ { 0x1e45, {1, {0x1e44 }}},
+ { 0x1e47, {1, {0x1e46 }}},
+ { 0x1e49, {1, {0x1e48 }}},
+ { 0x1e4b, {1, {0x1e4a }}},
+ { 0x1e4d, {1, {0x1e4c }}},
+ { 0x1e4f, {1, {0x1e4e }}},
+ { 0x1e51, {1, {0x1e50 }}},
+ { 0x1e53, {1, {0x1e52 }}},
+ { 0x1e55, {1, {0x1e54 }}},
+ { 0x1e57, {1, {0x1e56 }}},
+ { 0x1e59, {1, {0x1e58 }}},
+ { 0x1e5b, {1, {0x1e5a }}},
+ { 0x1e5d, {1, {0x1e5c }}},
+ { 0x1e5f, {1, {0x1e5e }}},
+ { 0x1e61, {2, {0x1e9b, 0x1e60 }}},
+ { 0x1e63, {1, {0x1e62 }}},
+ { 0x1e65, {1, {0x1e64 }}},
+ { 0x1e67, {1, {0x1e66 }}},
+ { 0x1e69, {1, {0x1e68 }}},
+ { 0x1e6b, {1, {0x1e6a }}},
+ { 0x1e6d, {1, {0x1e6c }}},
+ { 0x1e6f, {1, {0x1e6e }}},
+ { 0x1e71, {1, {0x1e70 }}},
+ { 0x1e73, {1, {0x1e72 }}},
+ { 0x1e75, {1, {0x1e74 }}},
+ { 0x1e77, {1, {0x1e76 }}},
+ { 0x1e79, {1, {0x1e78 }}},
+ { 0x1e7b, {1, {0x1e7a }}},
+ { 0x1e7d, {1, {0x1e7c }}},
+ { 0x1e7f, {1, {0x1e7e }}},
+ { 0x1e81, {1, {0x1e80 }}},
+ { 0x1e83, {1, {0x1e82 }}},
+ { 0x1e85, {1, {0x1e84 }}},
+ { 0x1e87, {1, {0x1e86 }}},
+ { 0x1e89, {1, {0x1e88 }}},
+ { 0x1e8b, {1, {0x1e8a }}},
+ { 0x1e8d, {1, {0x1e8c }}},
+ { 0x1e8f, {1, {0x1e8e }}},
+ { 0x1e91, {1, {0x1e90 }}},
+ { 0x1e93, {1, {0x1e92 }}},
+ { 0x1e95, {1, {0x1e94 }}},
+ { 0x1ea1, {1, {0x1ea0 }}},
+ { 0x1ea3, {1, {0x1ea2 }}},
+ { 0x1ea5, {1, {0x1ea4 }}},
+ { 0x1ea7, {1, {0x1ea6 }}},
+ { 0x1ea9, {1, {0x1ea8 }}},
+ { 0x1eab, {1, {0x1eaa }}},
+ { 0x1ead, {1, {0x1eac }}},
+ { 0x1eaf, {1, {0x1eae }}},
+ { 0x1eb1, {1, {0x1eb0 }}},
+ { 0x1eb3, {1, {0x1eb2 }}},
+ { 0x1eb5, {1, {0x1eb4 }}},
+ { 0x1eb7, {1, {0x1eb6 }}},
+ { 0x1eb9, {1, {0x1eb8 }}},
+ { 0x1ebb, {1, {0x1eba }}},
+ { 0x1ebd, {1, {0x1ebc }}},
+ { 0x1ebf, {1, {0x1ebe }}},
+ { 0x1ec1, {1, {0x1ec0 }}},
+ { 0x1ec3, {1, {0x1ec2 }}},
+ { 0x1ec5, {1, {0x1ec4 }}},
+ { 0x1ec7, {1, {0x1ec6 }}},
+ { 0x1ec9, {1, {0x1ec8 }}},
+ { 0x1ecb, {1, {0x1eca }}},
+ { 0x1ecd, {1, {0x1ecc }}},
+ { 0x1ecf, {1, {0x1ece }}},
+ { 0x1ed1, {1, {0x1ed0 }}},
+ { 0x1ed3, {1, {0x1ed2 }}},
+ { 0x1ed5, {1, {0x1ed4 }}},
+ { 0x1ed7, {1, {0x1ed6 }}},
+ { 0x1ed9, {1, {0x1ed8 }}},
+ { 0x1edb, {1, {0x1eda }}},
+ { 0x1edd, {1, {0x1edc }}},
+ { 0x1edf, {1, {0x1ede }}},
+ { 0x1ee1, {1, {0x1ee0 }}},
+ { 0x1ee3, {1, {0x1ee2 }}},
+ { 0x1ee5, {1, {0x1ee4 }}},
+ { 0x1ee7, {1, {0x1ee6 }}},
+ { 0x1ee9, {1, {0x1ee8 }}},
+ { 0x1eeb, {1, {0x1eea }}},
+ { 0x1eed, {1, {0x1eec }}},
+ { 0x1eef, {1, {0x1eee }}},
+ { 0x1ef1, {1, {0x1ef0 }}},
+ { 0x1ef3, {1, {0x1ef2 }}},
+ { 0x1ef5, {1, {0x1ef4 }}},
+ { 0x1ef7, {1, {0x1ef6 }}},
+ { 0x1ef9, {1, {0x1ef8 }}},
+ { 0x1f00, {1, {0x1f08 }}},
+ { 0x1f01, {1, {0x1f09 }}},
+ { 0x1f02, {1, {0x1f0a }}},
+ { 0x1f03, {1, {0x1f0b }}},
+ { 0x1f04, {1, {0x1f0c }}},
+ { 0x1f05, {1, {0x1f0d }}},
+ { 0x1f06, {1, {0x1f0e }}},
+ { 0x1f07, {1, {0x1f0f }}},
+ { 0x1f10, {1, {0x1f18 }}},
+ { 0x1f11, {1, {0x1f19 }}},
+ { 0x1f12, {1, {0x1f1a }}},
+ { 0x1f13, {1, {0x1f1b }}},
+ { 0x1f14, {1, {0x1f1c }}},
+ { 0x1f15, {1, {0x1f1d }}},
+ { 0x1f20, {1, {0x1f28 }}},
+ { 0x1f21, {1, {0x1f29 }}},
+ { 0x1f22, {1, {0x1f2a }}},
+ { 0x1f23, {1, {0x1f2b }}},
+ { 0x1f24, {1, {0x1f2c }}},
+ { 0x1f25, {1, {0x1f2d }}},
+ { 0x1f26, {1, {0x1f2e }}},
+ { 0x1f27, {1, {0x1f2f }}},
+ { 0x1f30, {1, {0x1f38 }}},
+ { 0x1f31, {1, {0x1f39 }}},
+ { 0x1f32, {1, {0x1f3a }}},
+ { 0x1f33, {1, {0x1f3b }}},
+ { 0x1f34, {1, {0x1f3c }}},
+ { 0x1f35, {1, {0x1f3d }}},
+ { 0x1f36, {1, {0x1f3e }}},
+ { 0x1f37, {1, {0x1f3f }}},
+ { 0x1f40, {1, {0x1f48 }}},
+ { 0x1f41, {1, {0x1f49 }}},
+ { 0x1f42, {1, {0x1f4a }}},
+ { 0x1f43, {1, {0x1f4b }}},
+ { 0x1f44, {1, {0x1f4c }}},
+ { 0x1f45, {1, {0x1f4d }}},
+ { 0x1f51, {1, {0x1f59 }}},
+ { 0x1f53, {1, {0x1f5b }}},
+ { 0x1f55, {1, {0x1f5d }}},
+ { 0x1f57, {1, {0x1f5f }}},
+ { 0x1f60, {1, {0x1f68 }}},
+ { 0x1f61, {1, {0x1f69 }}},
+ { 0x1f62, {1, {0x1f6a }}},
+ { 0x1f63, {1, {0x1f6b }}},
+ { 0x1f64, {1, {0x1f6c }}},
+ { 0x1f65, {1, {0x1f6d }}},
+ { 0x1f66, {1, {0x1f6e }}},
+ { 0x1f67, {1, {0x1f6f }}},
+ { 0x1f70, {1, {0x1fba }}},
+ { 0x1f71, {1, {0x1fbb }}},
+ { 0x1f72, {1, {0x1fc8 }}},
+ { 0x1f73, {1, {0x1fc9 }}},
+ { 0x1f74, {1, {0x1fca }}},
+ { 0x1f75, {1, {0x1fcb }}},
+ { 0x1f76, {1, {0x1fda }}},
+ { 0x1f77, {1, {0x1fdb }}},
+ { 0x1f78, {1, {0x1ff8 }}},
+ { 0x1f79, {1, {0x1ff9 }}},
+ { 0x1f7a, {1, {0x1fea }}},
+ { 0x1f7b, {1, {0x1feb }}},
+ { 0x1f7c, {1, {0x1ffa }}},
+ { 0x1f7d, {1, {0x1ffb }}},
+ { 0x1fb0, {1, {0x1fb8 }}},
+ { 0x1fb1, {1, {0x1fb9 }}},
+ { 0x1fd0, {1, {0x1fd8 }}},
+ { 0x1fd1, {1, {0x1fd9 }}},
+ { 0x1fe0, {1, {0x1fe8 }}},
+ { 0x1fe1, {1, {0x1fe9 }}},
+ { 0x1fe5, {1, {0x1fec }}},
+ { 0x2170, {1, {0x2160 }}},
+ { 0x2171, {1, {0x2161 }}},
+ { 0x2172, {1, {0x2162 }}},
+ { 0x2173, {1, {0x2163 }}},
+ { 0x2174, {1, {0x2164 }}},
+ { 0x2175, {1, {0x2165 }}},
+ { 0x2176, {1, {0x2166 }}},
+ { 0x2177, {1, {0x2167 }}},
+ { 0x2178, {1, {0x2168 }}},
+ { 0x2179, {1, {0x2169 }}},
+ { 0x217a, {1, {0x216a }}},
+ { 0x217b, {1, {0x216b }}},
+ { 0x217c, {1, {0x216c }}},
+ { 0x217d, {1, {0x216d }}},
+ { 0x217e, {1, {0x216e }}},
+ { 0x217f, {1, {0x216f }}},
+ { 0x24d0, {1, {0x24b6 }}},
+ { 0x24d1, {1, {0x24b7 }}},
+ { 0x24d2, {1, {0x24b8 }}},
+ { 0x24d3, {1, {0x24b9 }}},
+ { 0x24d4, {1, {0x24ba }}},
+ { 0x24d5, {1, {0x24bb }}},
+ { 0x24d6, {1, {0x24bc }}},
+ { 0x24d7, {1, {0x24bd }}},
+ { 0x24d8, {1, {0x24be }}},
+ { 0x24d9, {1, {0x24bf }}},
+ { 0x24da, {1, {0x24c0 }}},
+ { 0x24db, {1, {0x24c1 }}},
+ { 0x24dc, {1, {0x24c2 }}},
+ { 0x24dd, {1, {0x24c3 }}},
+ { 0x24de, {1, {0x24c4 }}},
+ { 0x24df, {1, {0x24c5 }}},
+ { 0x24e0, {1, {0x24c6 }}},
+ { 0x24e1, {1, {0x24c7 }}},
+ { 0x24e2, {1, {0x24c8 }}},
+ { 0x24e3, {1, {0x24c9 }}},
+ { 0x24e4, {1, {0x24ca }}},
+ { 0x24e5, {1, {0x24cb }}},
+ { 0x24e6, {1, {0x24cc }}},
+ { 0x24e7, {1, {0x24cd }}},
+ { 0x24e8, {1, {0x24ce }}},
+ { 0x24e9, {1, {0x24cf }}},
+ { 0x2c30, {1, {0x2c00 }}},
+ { 0x2c31, {1, {0x2c01 }}},
+ { 0x2c32, {1, {0x2c02 }}},
+ { 0x2c33, {1, {0x2c03 }}},
+ { 0x2c34, {1, {0x2c04 }}},
+ { 0x2c35, {1, {0x2c05 }}},
+ { 0x2c36, {1, {0x2c06 }}},
+ { 0x2c37, {1, {0x2c07 }}},
+ { 0x2c38, {1, {0x2c08 }}},
+ { 0x2c39, {1, {0x2c09 }}},
+ { 0x2c3a, {1, {0x2c0a }}},
+ { 0x2c3b, {1, {0x2c0b }}},
+ { 0x2c3c, {1, {0x2c0c }}},
+ { 0x2c3d, {1, {0x2c0d }}},
+ { 0x2c3e, {1, {0x2c0e }}},
+ { 0x2c3f, {1, {0x2c0f }}},
+ { 0x2c40, {1, {0x2c10 }}},
+ { 0x2c41, {1, {0x2c11 }}},
+ { 0x2c42, {1, {0x2c12 }}},
+ { 0x2c43, {1, {0x2c13 }}},
+ { 0x2c44, {1, {0x2c14 }}},
+ { 0x2c45, {1, {0x2c15 }}},
+ { 0x2c46, {1, {0x2c16 }}},
+ { 0x2c47, {1, {0x2c17 }}},
+ { 0x2c48, {1, {0x2c18 }}},
+ { 0x2c49, {1, {0x2c19 }}},
+ { 0x2c4a, {1, {0x2c1a }}},
+ { 0x2c4b, {1, {0x2c1b }}},
+ { 0x2c4c, {1, {0x2c1c }}},
+ { 0x2c4d, {1, {0x2c1d }}},
+ { 0x2c4e, {1, {0x2c1e }}},
+ { 0x2c4f, {1, {0x2c1f }}},
+ { 0x2c50, {1, {0x2c20 }}},
+ { 0x2c51, {1, {0x2c21 }}},
+ { 0x2c52, {1, {0x2c22 }}},
+ { 0x2c53, {1, {0x2c23 }}},
+ { 0x2c54, {1, {0x2c24 }}},
+ { 0x2c55, {1, {0x2c25 }}},
+ { 0x2c56, {1, {0x2c26 }}},
+ { 0x2c57, {1, {0x2c27 }}},
+ { 0x2c58, {1, {0x2c28 }}},
+ { 0x2c59, {1, {0x2c29 }}},
+ { 0x2c5a, {1, {0x2c2a }}},
+ { 0x2c5b, {1, {0x2c2b }}},
+ { 0x2c5c, {1, {0x2c2c }}},
+ { 0x2c5d, {1, {0x2c2d }}},
+ { 0x2c5e, {1, {0x2c2e }}},
+ { 0x2c81, {1, {0x2c80 }}},
+ { 0x2c83, {1, {0x2c82 }}},
+ { 0x2c85, {1, {0x2c84 }}},
+ { 0x2c87, {1, {0x2c86 }}},
+ { 0x2c89, {1, {0x2c88 }}},
+ { 0x2c8b, {1, {0x2c8a }}},
+ { 0x2c8d, {1, {0x2c8c }}},
+ { 0x2c8f, {1, {0x2c8e }}},
+ { 0x2c91, {1, {0x2c90 }}},
+ { 0x2c93, {1, {0x2c92 }}},
+ { 0x2c95, {1, {0x2c94 }}},
+ { 0x2c97, {1, {0x2c96 }}},
+ { 0x2c99, {1, {0x2c98 }}},
+ { 0x2c9b, {1, {0x2c9a }}},
+ { 0x2c9d, {1, {0x2c9c }}},
+ { 0x2c9f, {1, {0x2c9e }}},
+ { 0x2ca1, {1, {0x2ca0 }}},
+ { 0x2ca3, {1, {0x2ca2 }}},
+ { 0x2ca5, {1, {0x2ca4 }}},
+ { 0x2ca7, {1, {0x2ca6 }}},
+ { 0x2ca9, {1, {0x2ca8 }}},
+ { 0x2cab, {1, {0x2caa }}},
+ { 0x2cad, {1, {0x2cac }}},
+ { 0x2caf, {1, {0x2cae }}},
+ { 0x2cb1, {1, {0x2cb0 }}},
+ { 0x2cb3, {1, {0x2cb2 }}},
+ { 0x2cb5, {1, {0x2cb4 }}},
+ { 0x2cb7, {1, {0x2cb6 }}},
+ { 0x2cb9, {1, {0x2cb8 }}},
+ { 0x2cbb, {1, {0x2cba }}},
+ { 0x2cbd, {1, {0x2cbc }}},
+ { 0x2cbf, {1, {0x2cbe }}},
+ { 0x2cc1, {1, {0x2cc0 }}},
+ { 0x2cc3, {1, {0x2cc2 }}},
+ { 0x2cc5, {1, {0x2cc4 }}},
+ { 0x2cc7, {1, {0x2cc6 }}},
+ { 0x2cc9, {1, {0x2cc8 }}},
+ { 0x2ccb, {1, {0x2cca }}},
+ { 0x2ccd, {1, {0x2ccc }}},
+ { 0x2ccf, {1, {0x2cce }}},
+ { 0x2cd1, {1, {0x2cd0 }}},
+ { 0x2cd3, {1, {0x2cd2 }}},
+ { 0x2cd5, {1, {0x2cd4 }}},
+ { 0x2cd7, {1, {0x2cd6 }}},
+ { 0x2cd9, {1, {0x2cd8 }}},
+ { 0x2cdb, {1, {0x2cda }}},
+ { 0x2cdd, {1, {0x2cdc }}},
+ { 0x2cdf, {1, {0x2cde }}},
+ { 0x2ce1, {1, {0x2ce0 }}},
+ { 0x2ce3, {1, {0x2ce2 }}},
+ { 0x2d00, {1, {0x10a0 }}},
+ { 0x2d01, {1, {0x10a1 }}},
+ { 0x2d02, {1, {0x10a2 }}},
+ { 0x2d03, {1, {0x10a3 }}},
+ { 0x2d04, {1, {0x10a4 }}},
+ { 0x2d05, {1, {0x10a5 }}},
+ { 0x2d06, {1, {0x10a6 }}},
+ { 0x2d07, {1, {0x10a7 }}},
+ { 0x2d08, {1, {0x10a8 }}},
+ { 0x2d09, {1, {0x10a9 }}},
+ { 0x2d0a, {1, {0x10aa }}},
+ { 0x2d0b, {1, {0x10ab }}},
+ { 0x2d0c, {1, {0x10ac }}},
+ { 0x2d0d, {1, {0x10ad }}},
+ { 0x2d0e, {1, {0x10ae }}},
+ { 0x2d0f, {1, {0x10af }}},
+ { 0x2d10, {1, {0x10b0 }}},
+ { 0x2d11, {1, {0x10b1 }}},
+ { 0x2d12, {1, {0x10b2 }}},
+ { 0x2d13, {1, {0x10b3 }}},
+ { 0x2d14, {1, {0x10b4 }}},
+ { 0x2d15, {1, {0x10b5 }}},
+ { 0x2d16, {1, {0x10b6 }}},
+ { 0x2d17, {1, {0x10b7 }}},
+ { 0x2d18, {1, {0x10b8 }}},
+ { 0x2d19, {1, {0x10b9 }}},
+ { 0x2d1a, {1, {0x10ba }}},
+ { 0x2d1b, {1, {0x10bb }}},
+ { 0x2d1c, {1, {0x10bc }}},
+ { 0x2d1d, {1, {0x10bd }}},
+ { 0x2d1e, {1, {0x10be }}},
+ { 0x2d1f, {1, {0x10bf }}},
+ { 0x2d20, {1, {0x10c0 }}},
+ { 0x2d21, {1, {0x10c1 }}},
+ { 0x2d22, {1, {0x10c2 }}},
+ { 0x2d23, {1, {0x10c3 }}},
+ { 0x2d24, {1, {0x10c4 }}},
+ { 0x2d25, {1, {0x10c5 }}},
+ { 0xff41, {1, {0xff21 }}},
+ { 0xff42, {1, {0xff22 }}},
+ { 0xff43, {1, {0xff23 }}},
+ { 0xff44, {1, {0xff24 }}},
+ { 0xff45, {1, {0xff25 }}},
+ { 0xff46, {1, {0xff26 }}},
+ { 0xff47, {1, {0xff27 }}},
+ { 0xff48, {1, {0xff28 }}},
+ { 0xff49, {1, {0xff29 }}},
+ { 0xff4a, {1, {0xff2a }}},
+ { 0xff4b, {1, {0xff2b }}},
+ { 0xff4c, {1, {0xff2c }}},
+ { 0xff4d, {1, {0xff2d }}},
+ { 0xff4e, {1, {0xff2e }}},
+ { 0xff4f, {1, {0xff2f }}},
+ { 0xff50, {1, {0xff30 }}},
+ { 0xff51, {1, {0xff31 }}},
+ { 0xff52, {1, {0xff32 }}},
+ { 0xff53, {1, {0xff33 }}},
+ { 0xff54, {1, {0xff34 }}},
+ { 0xff55, {1, {0xff35 }}},
+ { 0xff56, {1, {0xff36 }}},
+ { 0xff57, {1, {0xff37 }}},
+ { 0xff58, {1, {0xff38 }}},
+ { 0xff59, {1, {0xff39 }}},
+ { 0xff5a, {1, {0xff3a }}},
+ { 0x10428, {1, {0x10400 }}},
+ { 0x10429, {1, {0x10401 }}},
+ { 0x1042a, {1, {0x10402 }}},
+ { 0x1042b, {1, {0x10403 }}},
+ { 0x1042c, {1, {0x10404 }}},
+ { 0x1042d, {1, {0x10405 }}},
+ { 0x1042e, {1, {0x10406 }}},
+ { 0x1042f, {1, {0x10407 }}},
+ { 0x10430, {1, {0x10408 }}},
+ { 0x10431, {1, {0x10409 }}},
+ { 0x10432, {1, {0x1040a }}},
+ { 0x10433, {1, {0x1040b }}},
+ { 0x10434, {1, {0x1040c }}},
+ { 0x10435, {1, {0x1040d }}},
+ { 0x10436, {1, {0x1040e }}},
+ { 0x10437, {1, {0x1040f }}},
+ { 0x10438, {1, {0x10410 }}},
+ { 0x10439, {1, {0x10411 }}},
+ { 0x1043a, {1, {0x10412 }}},
+ { 0x1043b, {1, {0x10413 }}},
+ { 0x1043c, {1, {0x10414 }}},
+ { 0x1043d, {1, {0x10415 }}},
+ { 0x1043e, {1, {0x10416 }}},
+ { 0x1043f, {1, {0x10417 }}},
+ { 0x10440, {1, {0x10418 }}},
+ { 0x10441, {1, {0x10419 }}},
+ { 0x10442, {1, {0x1041a }}},
+ { 0x10443, {1, {0x1041b }}},
+ { 0x10444, {1, {0x1041c }}},
+ { 0x10445, {1, {0x1041d }}},
+ { 0x10446, {1, {0x1041e }}},
+ { 0x10447, {1, {0x1041f }}},
+ { 0x10448, {1, {0x10420 }}},
+ { 0x10449, {1, {0x10421 }}},
+ { 0x1044a, {1, {0x10422 }}},
+ { 0x1044b, {1, {0x10423 }}},
+ { 0x1044c, {1, {0x10424 }}},
+ { 0x1044d, {1, {0x10425 }}},
+ { 0x1044e, {1, {0x10426 }}},
+ { 0x1044f, {1, {0x10427 }}}
+};
+
+static const CaseUnfold_11_Type CaseUnfold_11_Locale[] = {
+ { 0x0069, {1, {0x0049 }}}
+};
+
+static const CaseUnfold_12_Type CaseUnfold_12[] = {
+ { {0x0061, 0x02be}, {1, {0x1e9a }}},
+ { {0x0066, 0x0066}, {1, {0xfb00 }}},
+ { {0x0066, 0x0069}, {1, {0xfb01 }}},
+ { {0x0066, 0x006c}, {1, {0xfb02 }}},
+ { {0x0068, 0x0331}, {1, {0x1e96 }}},
+ { {0x006a, 0x030c}, {1, {0x01f0 }}},
+ { {0x0073, 0x0073}, {1, {0x00df }}},
+ { {0x0073, 0x0074}, {2, {0xfb05, 0xfb06 }}},
+ { {0x0074, 0x0308}, {1, {0x1e97 }}},
+ { {0x0077, 0x030a}, {1, {0x1e98 }}},
+ { {0x0079, 0x030a}, {1, {0x1e99 }}},
+ { {0x02bc, 0x006e}, {1, {0x0149 }}},
+ { {0x03ac, 0x03b9}, {1, {0x1fb4 }}},
+ { {0x03ae, 0x03b9}, {1, {0x1fc4 }}},
+ { {0x03b1, 0x0342}, {1, {0x1fb6 }}},
+ { {0x03b1, 0x03b9}, {2, {0x1fb3, 0x1fbc }}},
+ { {0x03b7, 0x0342}, {1, {0x1fc6 }}},
+ { {0x03b7, 0x03b9}, {2, {0x1fc3, 0x1fcc }}},
+ { {0x03b9, 0x0342}, {1, {0x1fd6 }}},
+ { {0x03c1, 0x0313}, {1, {0x1fe4 }}},
+ { {0x03c5, 0x0313}, {1, {0x1f50 }}},
+ { {0x03c5, 0x0342}, {1, {0x1fe6 }}},
+ { {0x03c9, 0x0342}, {1, {0x1ff6 }}},
+ { {0x03c9, 0x03b9}, {2, {0x1ff3, 0x1ffc }}},
+ { {0x03ce, 0x03b9}, {1, {0x1ff4 }}},
+ { {0x0565, 0x0582}, {1, {0x0587 }}},
+ { {0x0574, 0x0565}, {1, {0xfb14 }}},
+ { {0x0574, 0x056b}, {1, {0xfb15 }}},
+ { {0x0574, 0x056d}, {1, {0xfb17 }}},
+ { {0x0574, 0x0576}, {1, {0xfb13 }}},
+ { {0x057e, 0x0576}, {1, {0xfb16 }}},
+ { {0x1f00, 0x03b9}, {2, {0x1f88, 0x1f80 }}},
+ { {0x1f01, 0x03b9}, {2, {0x1f81, 0x1f89 }}},
+ { {0x1f02, 0x03b9}, {2, {0x1f82, 0x1f8a }}},
+ { {0x1f03, 0x03b9}, {2, {0x1f83, 0x1f8b }}},
+ { {0x1f04, 0x03b9}, {2, {0x1f84, 0x1f8c }}},
+ { {0x1f05, 0x03b9}, {2, {0x1f85, 0x1f8d }}},
+ { {0x1f06, 0x03b9}, {2, {0x1f86, 0x1f8e }}},
+ { {0x1f07, 0x03b9}, {2, {0x1f87, 0x1f8f }}},
+ { {0x1f20, 0x03b9}, {2, {0x1f90, 0x1f98 }}},
+ { {0x1f21, 0x03b9}, {2, {0x1f91, 0x1f99 }}},
+ { {0x1f22, 0x03b9}, {2, {0x1f92, 0x1f9a }}},
+ { {0x1f23, 0x03b9}, {2, {0x1f93, 0x1f9b }}},
+ { {0x1f24, 0x03b9}, {2, {0x1f94, 0x1f9c }}},
+ { {0x1f25, 0x03b9}, {2, {0x1f95, 0x1f9d }}},
+ { {0x1f26, 0x03b9}, {2, {0x1f96, 0x1f9e }}},
+ { {0x1f27, 0x03b9}, {2, {0x1f97, 0x1f9f }}},
+ { {0x1f60, 0x03b9}, {2, {0x1fa0, 0x1fa8 }}},
+ { {0x1f61, 0x03b9}, {2, {0x1fa1, 0x1fa9 }}},
+ { {0x1f62, 0x03b9}, {2, {0x1fa2, 0x1faa }}},
+ { {0x1f63, 0x03b9}, {2, {0x1fa3, 0x1fab }}},
+ { {0x1f64, 0x03b9}, {2, {0x1fa4, 0x1fac }}},
+ { {0x1f65, 0x03b9}, {2, {0x1fa5, 0x1fad }}},
+ { {0x1f66, 0x03b9}, {2, {0x1fa6, 0x1fae }}},
+ { {0x1f67, 0x03b9}, {2, {0x1fa7, 0x1faf }}},
+ { {0x1f70, 0x03b9}, {1, {0x1fb2 }}},
+ { {0x1f74, 0x03b9}, {1, {0x1fc2 }}},
+ { {0x1f7c, 0x03b9}, {1, {0x1ff2 }}}
+};
+
+static const CaseUnfold_12_Type CaseUnfold_12_Locale[] = {
+ { {0x0069, 0x0307}, {1, {0x0130 }}}
+};
+
+static const CaseUnfold_13_Type CaseUnfold_13[] = {
+ { {0x0066, 0x0066, 0x0069}, {1, {0xfb03 }}},
+ { {0x0066, 0x0066, 0x006c}, {1, {0xfb04 }}},
+ { {0x03b1, 0x0342, 0x03b9}, {1, {0x1fb7 }}},
+ { {0x03b7, 0x0342, 0x03b9}, {1, {0x1fc7 }}},
+ { {0x03b9, 0x0308, 0x0300}, {1, {0x1fd2 }}},
+ { {0x03b9, 0x0308, 0x0301}, {2, {0x0390, 0x1fd3 }}},
+ { {0x03b9, 0x0308, 0x0342}, {1, {0x1fd7 }}},
+ { {0x03c5, 0x0308, 0x0300}, {1, {0x1fe2 }}},
+ { {0x03c5, 0x0308, 0x0301}, {2, {0x03b0, 0x1fe3 }}},
+ { {0x03c5, 0x0308, 0x0342}, {1, {0x1fe7 }}},
+ { {0x03c5, 0x0313, 0x0300}, {1, {0x1f52 }}},
+ { {0x03c5, 0x0313, 0x0301}, {1, {0x1f54 }}},
+ { {0x03c5, 0x0313, 0x0342}, {1, {0x1f56 }}},
+ { {0x03c9, 0x0342, 0x03b9}, {1, {0x1ff7 }}}
+};
+
+
+static PosixBracketEntryType HashEntryData[] = {
+ { (UChar* )"NEWLINE", 0, 7 },
+ { (UChar* )"Alpha", 1, 5 },
+ { (UChar* )"Blank", 2, 5 },
+ { (UChar* )"Cntrl", 3, 5 },
+ { (UChar* )"Digit", 4, 5 },
+ { (UChar* )"Graph", 5, 5 },
+ { (UChar* )"Lower", 6, 5 },
+ { (UChar* )"Print", 7, 5 },
+ { (UChar* )"Punct", 8, 5 },
+ { (UChar* )"Space", 9, 5 },
+ { (UChar* )"Upper", 10, 5 },
+ { (UChar* )"XDigit", 11, 6 },
+ { (UChar* )"Word", 12, 4 },
+ { (UChar* )"Alnum", 13, 5 },
+ { (UChar* )"ASCII", 14, 5 },
+
+#ifdef USE_UNICODE_PROPERTIES
+ { (UChar* )"Any", 15, 3 },
+ { (UChar* )"Assigned", 16, 8 },
+ { (UChar* )"C", 17, 1 },
+ { (UChar* )"Cc", 18, 2 },
+ { (UChar* )"Cf", 19, 2 },
+ { (UChar* )"Cn", 20, 2 },
+ { (UChar* )"Co", 21, 2 },
+ { (UChar* )"Cs", 22, 2 },
+ { (UChar* )"L", 23, 1 },
+ { (UChar* )"Ll", 24, 2 },
+ { (UChar* )"Lm", 25, 2 },
+ { (UChar* )"Lo", 26, 2 },
+ { (UChar* )"Lt", 27, 2 },
+ { (UChar* )"Lu", 28, 2 },
+ { (UChar* )"M", 29, 1 },
+ { (UChar* )"Mc", 30, 2 },
+ { (UChar* )"Me", 31, 2 },
+ { (UChar* )"Mn", 32, 2 },
+ { (UChar* )"N", 33, 1 },
+ { (UChar* )"Nd", 34, 2 },
+ { (UChar* )"Nl", 35, 2 },
+ { (UChar* )"No", 36, 2 },
+ { (UChar* )"P", 37, 1 },
+ { (UChar* )"Pc", 38, 2 },
+ { (UChar* )"Pd", 39, 2 },
+ { (UChar* )"Pe", 40, 2 },
+ { (UChar* )"Pf", 41, 2 },
+ { (UChar* )"Pi", 42, 2 },
+ { (UChar* )"Po", 43, 2 },
+ { (UChar* )"Ps", 44, 2 },
+ { (UChar* )"S", 45, 1 },
+ { (UChar* )"Sc", 46, 2 },
+ { (UChar* )"Sk", 47, 2 },
+ { (UChar* )"Sm", 48, 2 },
+ { (UChar* )"So", 49, 2 },
+ { (UChar* )"Z", 50, 1 },
+ { (UChar* )"Zl", 51, 2 },
+ { (UChar* )"Zp", 52, 2 },
+ { (UChar* )"Zs", 53, 2 },
+ { (UChar* )"Arabic", 54, 6 },
+ { (UChar* )"Armenian", 55, 8 },
+ { (UChar* )"Bengali", 56, 7 },
+ { (UChar* )"Bopomofo", 57, 8 },
+ { (UChar* )"Braille", 58, 7 },
+ { (UChar* )"Buginese", 59, 8 },
+ { (UChar* )"Buhid", 60, 5 },
+ { (UChar* )"Canadian_Aboriginal", 61, 19 },
+ { (UChar* )"Cherokee", 62, 8 },
+ { (UChar* )"Common", 63, 6 },
+ { (UChar* )"Coptic", 64, 6 },
+ { (UChar* )"Cypriot", 65, 7 },
+ { (UChar* )"Cyrillic", 66, 8 },
+ { (UChar* )"Deseret", 67, 7 },
+ { (UChar* )"Devanagari", 68, 10 },
+ { (UChar* )"Ethiopic", 69, 8 },
+ { (UChar* )"Georgian", 70, 8 },
+ { (UChar* )"Glagolitic", 71, 10 },
+ { (UChar* )"Gothic", 72, 6 },
+ { (UChar* )"Greek", 73, 5 },
+ { (UChar* )"Gujarati", 74, 8 },
+ { (UChar* )"Gurmukhi", 75, 8 },
+ { (UChar* )"Han", 76, 3 },
+ { (UChar* )"Hangul", 77, 6 },
+ { (UChar* )"Hanunoo", 78, 7 },
+ { (UChar* )"Hebrew", 79, 6 },
+ { (UChar* )"Hiragana", 80, 8 },
+ { (UChar* )"Inherited", 81, 9 },
+ { (UChar* )"Kannada", 82, 7 },
+ { (UChar* )"Katakana", 83, 8 },
+ { (UChar* )"Kharoshthi", 84, 10 },
+ { (UChar* )"Khmer", 85, 5 },
+ { (UChar* )"Lao", 86, 3 },
+ { (UChar* )"Latin", 87, 5 },
+ { (UChar* )"Limbu", 88, 5 },
+ { (UChar* )"Linear_B", 89, 8 },
+ { (UChar* )"Malayalam", 90, 9 },
+ { (UChar* )"Mongolian", 91, 9 },
+ { (UChar* )"Myanmar", 92, 7 },
+ { (UChar* )"New_Tai_Lue", 93, 11 },
+ { (UChar* )"Ogham", 94, 5 },
+ { (UChar* )"Old_Italic", 95, 10 },
+ { (UChar* )"Old_Persian", 96, 11 },
+ { (UChar* )"Oriya", 97, 5 },
+ { (UChar* )"Osmanya", 98, 7 },
+ { (UChar* )"Runic", 99, 5 },
+ { (UChar* )"Shavian", 100, 7 },
+ { (UChar* )"Sinhala", 101, 7 },
+ { (UChar* )"Syloti_Nagri", 102, 12 },
+ { (UChar* )"Syriac", 103, 6 },
+ { (UChar* )"Tagalog", 104, 7 },
+ { (UChar* )"Tagbanwa", 105, 8 },
+ { (UChar* )"Tai_Le", 106, 6 },
+ { (UChar* )"Tamil", 107, 5 },
+ { (UChar* )"Telugu", 108, 6 },
+ { (UChar* )"Thaana", 109, 6 },
+ { (UChar* )"Thai", 110, 4 },
+ { (UChar* )"Tibetan", 111, 7 },
+ { (UChar* )"Tifinagh", 112, 8 },
+ { (UChar* )"Ugaritic", 113, 8 },
+ { (UChar* )"Yi", 114, 2 },
+#endif /* USE_UNICODE_PROPERTIES */
+ { (UChar* )NULL, -1, 0 }
+};
+
+#ifdef USE_UNICODE_PROPERTIES
+#define CODE_RANGES_NUM 115
+#else
+#define CODE_RANGES_NUM 15
+#endif
+
+static const OnigCodePoint* CodeRanges[CODE_RANGES_NUM];
+static int CodeRangeTableInited = 0;
+
+static void init_code_range_array() {
+ THREAD_ATOMIC_START;
+
+ CodeRanges[0] = CR_NEWLINE;
+ CodeRanges[1] = CR_Alpha;
+ CodeRanges[2] = CR_Blank;
+ CodeRanges[3] = CR_Cntrl;
+ CodeRanges[4] = CR_Digit;
+ CodeRanges[5] = CR_Graph;
+ CodeRanges[6] = CR_Lower;
+ CodeRanges[7] = CR_Print;
+ CodeRanges[8] = CR_Punct;
+ CodeRanges[9] = CR_Space;
+ CodeRanges[10] = CR_Upper;
+ CodeRanges[11] = CR_XDigit;
+ CodeRanges[12] = CR_Word;
+ CodeRanges[13] = CR_Alnum;
+ CodeRanges[14] = CR_ASCII;
+
+#ifdef USE_UNICODE_PROPERTIES
+ CodeRanges[15] = CR_Any;
+ CodeRanges[16] = CR_Assigned;
+ CodeRanges[17] = CR_C;
+ CodeRanges[18] = CR_Cc;
+ CodeRanges[19] = CR_Cf;
+ CodeRanges[20] = CR_Cn;
+ CodeRanges[21] = CR_Co;
+ CodeRanges[22] = CR_Cs;
+ CodeRanges[23] = CR_L;
+ CodeRanges[24] = CR_Ll;
+ CodeRanges[25] = CR_Lm;
+ CodeRanges[26] = CR_Lo;
+ CodeRanges[27] = CR_Lt;
+ CodeRanges[28] = CR_Lu;
+ CodeRanges[29] = CR_M;
+ CodeRanges[30] = CR_Mc;
+ CodeRanges[31] = CR_Me;
+ CodeRanges[32] = CR_Mn;
+ CodeRanges[33] = CR_N;
+ CodeRanges[34] = CR_Nd;
+ CodeRanges[35] = CR_Nl;
+ CodeRanges[36] = CR_No;
+ CodeRanges[37] = CR_P;
+ CodeRanges[38] = CR_Pc;
+ CodeRanges[39] = CR_Pd;
+ CodeRanges[40] = CR_Pe;
+ CodeRanges[41] = CR_Pf;
+ CodeRanges[42] = CR_Pi;
+ CodeRanges[43] = CR_Po;
+ CodeRanges[44] = CR_Ps;
+ CodeRanges[45] = CR_S;
+ CodeRanges[46] = CR_Sc;
+ CodeRanges[47] = CR_Sk;
+ CodeRanges[48] = CR_Sm;
+ CodeRanges[49] = CR_So;
+ CodeRanges[50] = CR_Z;
+ CodeRanges[51] = CR_Zl;
+ CodeRanges[52] = CR_Zp;
+ CodeRanges[53] = CR_Zs;
+ CodeRanges[54] = CR_Arabic;
+ CodeRanges[55] = CR_Armenian;
+ CodeRanges[56] = CR_Bengali;
+ CodeRanges[57] = CR_Bopomofo;
+ CodeRanges[58] = CR_Braille;
+ CodeRanges[59] = CR_Buginese;
+ CodeRanges[60] = CR_Buhid;
+ CodeRanges[61] = CR_Canadian_Aboriginal;
+ CodeRanges[62] = CR_Cherokee;
+ CodeRanges[63] = CR_Common;
+ CodeRanges[64] = CR_Coptic;
+ CodeRanges[65] = CR_Cypriot;
+ CodeRanges[66] = CR_Cyrillic;
+ CodeRanges[67] = CR_Deseret;
+ CodeRanges[68] = CR_Devanagari;
+ CodeRanges[69] = CR_Ethiopic;
+ CodeRanges[70] = CR_Georgian;
+ CodeRanges[71] = CR_Glagolitic;
+ CodeRanges[72] = CR_Gothic;
+ CodeRanges[73] = CR_Greek;
+ CodeRanges[74] = CR_Gujarati;
+ CodeRanges[75] = CR_Gurmukhi;
+ CodeRanges[76] = CR_Han;
+ CodeRanges[77] = CR_Hangul;
+ CodeRanges[78] = CR_Hanunoo;
+ CodeRanges[79] = CR_Hebrew;
+ CodeRanges[80] = CR_Hiragana;
+ CodeRanges[81] = CR_Inherited;
+ CodeRanges[82] = CR_Kannada;
+ CodeRanges[83] = CR_Katakana;
+ CodeRanges[84] = CR_Kharoshthi;
+ CodeRanges[85] = CR_Khmer;
+ CodeRanges[86] = CR_Lao;
+ CodeRanges[87] = CR_Latin;
+ CodeRanges[88] = CR_Limbu;
+ CodeRanges[89] = CR_Linear_B;
+ CodeRanges[90] = CR_Malayalam;
+ CodeRanges[91] = CR_Mongolian;
+ CodeRanges[92] = CR_Myanmar;
+ CodeRanges[93] = CR_New_Tai_Lue;
+ CodeRanges[94] = CR_Ogham;
+ CodeRanges[95] = CR_Old_Italic;
+ CodeRanges[96] = CR_Old_Persian;
+ CodeRanges[97] = CR_Oriya;
+ CodeRanges[98] = CR_Osmanya;
+ CodeRanges[99] = CR_Runic;
+ CodeRanges[100] = CR_Shavian;
+ CodeRanges[101] = CR_Sinhala;
+ CodeRanges[102] = CR_Syloti_Nagri;
+ CodeRanges[103] = CR_Syriac;
+ CodeRanges[104] = CR_Tagalog;
+ CodeRanges[105] = CR_Tagbanwa;
+ CodeRanges[106] = CR_Tai_Le;
+ CodeRanges[107] = CR_Tamil;
+ CodeRanges[108] = CR_Telugu;
+ CodeRanges[109] = CR_Thaana;
+ CodeRanges[110] = CR_Thai;
+ CodeRanges[111] = CR_Tibetan;
+ CodeRanges[112] = CR_Tifinagh;
+ CodeRanges[113] = CR_Ugaritic;
+ CodeRanges[114] = CR_Yi;
+#endif /* USE_UNICODE_PROPERTIES */
+
+ CodeRangeTableInited = 1;
+ THREAD_ATOMIC_END;
+}
+
+extern int
+onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (
+#ifdef USE_UNICODE_PROPERTIES
+ ctype <= ONIGENC_MAX_STD_CTYPE &&
+#endif
+ code < 256) {
+ return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
+ }
+
+ if (ctype >= CODE_RANGES_NUM) {
+ return ONIGENCERR_TYPE_BUG;
+ }
+
+ if (CodeRangeTableInited == 0) init_code_range_array();
+
+ return onig_is_in_code_range((UChar* )CodeRanges[ctype], code);
+}
+
+
+extern int
+onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[])
+{
+ if (ctype >= CODE_RANGES_NUM) {
+ return ONIGENCERR_TYPE_BUG;
+ }
+
+ if (CodeRangeTableInited == 0) init_code_range_array();
+
+ *ranges = CodeRanges[ctype];
+
+ return 0;
+}
+
+extern int
+onigenc_utf16_32_get_ctype_code_range(int ctype, OnigCodePoint* sb_out,
+ const OnigCodePoint* ranges[])
+{
+ *sb_out = 0x00;
+ return onigenc_unicode_ctype_code_range(ctype, ranges);
+}
+
+#include "st.h"
+
+#define PROPERTY_NAME_MAX_SIZE 20
+
+static st_table* NameCtypeTable;
+static int NameTableInited = 0;
+
+static int init_name_ctype_table(void)
+{
+ PosixBracketEntryType *pb;
+
+ THREAD_ATOMIC_START;
+
+ NameCtypeTable = onig_st_init_strend_table_with_size(100);
+ if (ONIG_IS_NULL(NameCtypeTable)) return ONIGERR_MEMORY;
+
+ for (pb = HashEntryData; ONIG_IS_NOT_NULL(pb->name); pb++) {
+ onig_st_insert_strend(NameCtypeTable, pb->name, pb->name + pb->len,
+ (st_data_t )pb->ctype);
+ }
+
+ NameTableInited = 1;
+ THREAD_ATOMIC_END;
+ return 0;
+}
+
+extern int
+onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end)
+{
+ int len, ctype;
+ UChar buf[PROPERTY_NAME_MAX_SIZE];
+ UChar *p;
+ OnigCodePoint code;
+
+ p = name;
+ len = 0;
+ while (p < end) {
+ code = ONIGENC_MBC_TO_CODE(enc, p, end);
+ if (code >= 0x80)
+ return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+
+ buf[len++] = (UChar )code;
+ if (len >= PROPERTY_NAME_MAX_SIZE)
+ return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+
+ p += enc_len(enc, p);
+ }
+
+ buf[len] = 0;
+
+ if (NameTableInited == 0) init_name_ctype_table();
+
+ if (onig_st_lookup_strend(NameCtypeTable, buf, buf + len,
+ (void*)&ctype) == 0) {
+ return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+ }
+
+ return ctype;
+}
+
+
+static int
+code2_cmp(OnigCodePoint* x, OnigCodePoint* y)
+{
+ if (x[0] == y[0] && x[1] == y[1]) return 0;
+ return 1;
+}
+
+static int
+code2_hash(OnigCodePoint* x)
+{
+ return (int )(x[0] + x[1]);
+}
+
+static struct st_hash_type type_code2_hash = {
+ code2_cmp,
+ code2_hash,
+};
+
+static int
+code3_cmp(OnigCodePoint* x, OnigCodePoint* y)
+{
+ if (x[0] == y[0] && x[1] == y[1] && x[2] == y[2]) return 0;
+ return 1;
+}
+
+static int
+code3_hash(OnigCodePoint* x)
+{
+ return (int )(x[0] + x[1] + x[2]);
+}
+
+static struct st_hash_type type_code3_hash = {
+ code3_cmp,
+ code3_hash,
+};
+
+
+static st_table* FoldTable; /* fold-1, fold-2, fold-3 */
+static st_table* Unfold1Table;
+static st_table* Unfold2Table;
+static st_table* Unfold3Table;
+static int CaseFoldInited = 0;
+
+static int init_case_fold_table(void)
+{
+ const CaseFold_11_Type *p;
+ const CaseUnfold_11_Type *p1;
+ const CaseUnfold_12_Type *p2;
+ const CaseUnfold_13_Type *p3;
+ int i;
+
+ THREAD_ATOMIC_START;
+
+ FoldTable = st_init_numtable_with_size(1200);
+ if (ONIG_IS_NULL(FoldTable)) return ONIGERR_MEMORY;
+ for (i = 0; i < sizeof(CaseFold)/sizeof(CaseFold_11_Type); i++) {
+ p = &CaseFold[i];
+ st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to));
+ }
+ for (i = 0; i < sizeof(CaseFold_Locale)/sizeof(CaseFold_11_Type); i++) {
+ p = &CaseFold_Locale[i];
+ st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to));
+ }
+
+ Unfold1Table = st_init_numtable_with_size(1000);
+ if (ONIG_IS_NULL(Unfold1Table)) return ONIGERR_MEMORY;
+
+ for (i = 0; i < sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type); i++) {
+ p1 = &CaseUnfold_11[i];
+ st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to));
+ }
+ for (i = 0; i < sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type);
+ i++) {
+ p1 = &CaseUnfold_11_Locale[i];
+ st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to));
+ }
+
+ Unfold2Table = st_init_table_with_size(&type_code2_hash, 200);
+ if (ONIG_IS_NULL(Unfold2Table)) return ONIGERR_MEMORY;
+
+ for (i = 0; i < sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type); i++) {
+ p2 = &CaseUnfold_12[i];
+ st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to));
+ }
+ for (i = 0; i < sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type);
+ i++) {
+ p2 = &CaseUnfold_12_Locale[i];
+ st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to));
+ }
+
+ Unfold3Table = st_init_table_with_size(&type_code3_hash, 30);
+ if (ONIG_IS_NULL(Unfold3Table)) return ONIGERR_MEMORY;
+
+ for (i = 0; i < sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type); i++) {
+ p3 = &CaseUnfold_13[i];
+ st_add_direct(Unfold3Table, (st_data_t )p3->from, (st_data_t )(&p3->to));
+ }
+
+ CaseFoldInited = 1;
+ THREAD_ATOMIC_END;
+ return 0;
+}
+
+extern int
+onigenc_unicode_mbc_case_fold(OnigEncoding enc,
+ OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold)
+{
+ CodePointList3 *to;
+ OnigCodePoint code;
+ int i, len, rlen;
+ const UChar *p = *pp;
+
+ if (CaseFoldInited == 0) init_case_fold_table();
+
+ code = ONIGENC_MBC_TO_CODE(enc, p, end);
+ len = enc_len(enc, p);
+ *pp += len;
+
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+ if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+ if (code == 0x0049) {
+ return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold);
+ }
+ else if (code == 0x0130) {
+ return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold);
+ }
+ }
+#endif
+
+ if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) {
+ if (to->n == 1) {
+ return ONIGENC_CODE_TO_MBC(enc, to->code[0], fold);
+ }
+#if 0
+ /* NO NEEDS TO CHECK */
+ else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+#else
+ else {
+#endif
+ rlen = 0;
+ for (i = 0; i < to->n; i++) {
+ len = ONIGENC_CODE_TO_MBC(enc, to->code[i], fold);
+ fold += len;
+ rlen += len;
+ }
+ return rlen;
+ }
+ }
+
+ for (i = 0; i < len; i++) {
+ *fold++ = *p++;
+ }
+ return len;
+}
+
+extern int
+onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
+ OnigApplyAllCaseFoldFunc f, void* arg)
+{
+ const CaseUnfold_11_Type* p11;
+ OnigCodePoint code;
+ int i, j, k, r;
+
+ /* if (CaseFoldInited == 0) init_case_fold_table(); */
+
+ for (i = 0; i < sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type); i++) {
+ p11 = &CaseUnfold_11[i];
+ for (j = 0; j < p11->to.n; j++) {
+ code = p11->from;
+ r = (*f)(p11->to.code[j], &code, 1, arg);
+ if (r != 0) return r;
+
+ code = p11->to.code[j];
+ r = (*f)(p11->from, &code, 1, arg);
+ if (r != 0) return r;
+
+ for (k = 0; k < j; k++) {
+ r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), 1, arg);
+ if (r != 0) return r;
+
+ r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), 1, arg);
+ if (r != 0) return r;
+ }
+ }
+ }
+
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+ if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+ code = 0x0131;
+ r = (*f)(0x0049, &code, 1, arg);
+ if (r != 0) return r;
+ code = 0x0049;
+ r = (*f)(0x0131, &code, 1, arg);
+ if (r != 0) return r;
+
+ code = 0x0130;
+ r = (*f)(0x0069, &code, 1, arg);
+ if (r != 0) return r;
+ code = 0x0069;
+ r = (*f)(0x0130, &code, 1, arg);
+ if (r != 0) return r;
+ }
+ else {
+#endif
+ for (i = 0; i < sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type);
+ i++) {
+ p11 = &CaseUnfold_11_Locale[i];
+ for (j = 0; j < p11->to.n; j++) {
+ code = p11->from;
+ r = (*f)(p11->to.code[j], &code, 1, arg);
+ if (r != 0) return r;
+
+ code = p11->to.code[j];
+ r = (*f)(p11->from, &code, 1, arg);
+ if (r != 0) return r;
+
+ for (k = 0; k < j; k++) {
+ r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]),
+ 1, arg);
+ if (r != 0) return r;
+
+ r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]),
+ 1, arg);
+ if (r != 0) return r;
+ }
+ }
+ }
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+ }
+#endif
+
+ if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+ for (i = 0; i < sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type); i++) {
+ for (j = 0; j < CaseUnfold_12[i].to.n; j++) {
+ r = (*f)(CaseUnfold_12[i].to.code[j],
+ (OnigCodePoint* )CaseUnfold_12[i].from, 2, arg);
+ if (r != 0) return r;
+
+ for (k = 0; k < CaseUnfold_12[i].to.n; k++) {
+ if (k == j) continue;
+
+ r = (*f)(CaseUnfold_12[i].to.code[j],
+ (OnigCodePoint* )(&CaseUnfold_12[i].to.code[k]), 1, arg);
+ if (r != 0) return r;
+ }
+ }
+ }
+
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+ if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) {
+#endif
+ for (i = 0; i < sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type);
+ i++) {
+ for (j = 0; j < CaseUnfold_12_Locale[i].to.n; j++) {
+ r = (*f)(CaseUnfold_12_Locale[i].to.code[j],
+ (OnigCodePoint* )CaseUnfold_12_Locale[i].from, 2, arg);
+ if (r != 0) return r;
+
+ for (k = 0; k < CaseUnfold_12_Locale[i].to.n; k++) {
+ if (k == j) continue;
+
+ r = (*f)(CaseUnfold_12_Locale[i].to.code[j],
+ (OnigCodePoint* )(&CaseUnfold_12_Locale[i].to.code[k]),
+ 1, arg);
+ if (r != 0) return r;
+ }
+ }
+ }
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+ }
+#endif
+
+ for (i = 0; i < sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type); i++) {
+ for (j = 0; j < CaseUnfold_13[i].to.n; j++) {
+ r = (*f)(CaseUnfold_13[i].to.code[j],
+ (OnigCodePoint* )CaseUnfold_13[i].from, 3, arg);
+ if (r != 0) return r;
+
+ for (k = 0; k < CaseUnfold_13[i].to.n; k++) {
+ if (k == j) continue;
+
+ r = (*f)(CaseUnfold_13[i].to.code[j],
+ (OnigCodePoint* )(&CaseUnfold_13[i].to.code[k]), 1, arg);
+ if (r != 0) return r;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+extern int
+onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
+ OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end,
+ OnigCaseFoldCodeItem items[])
+{
+ int n, i, j, k, len;
+ OnigCodePoint code, codes[3];
+ CodePointList3 *to, *z3;
+ CodePointList2 *z2;
+
+ if (CaseFoldInited == 0) init_case_fold_table();
+
+ n = 0;
+
+ code = ONIGENC_MBC_TO_CODE(enc, p, end);
+ len = enc_len(enc, p);
+
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+ if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+ if (code == 0x0049) {
+ items[0].byte_len = len;
+ items[0].code_len = 1;
+ items[0].code[0] = 0x0131;
+ return 1;
+ }
+ else if (code == 0x0130) {
+ items[0].byte_len = len;
+ items[0].code_len = 1;
+ items[0].code[0] = 0x0069;
+ return 1;
+ }
+ else if (code == 0x0131) {
+ items[0].byte_len = len;
+ items[0].code_len = 1;
+ items[0].code[0] = 0x0049;
+ return 1;
+ }
+ else if (code == 0x0069) {
+ items[0].byte_len = len;
+ items[0].code_len = 1;
+ items[0].code[0] = 0x0130;
+ return 1;
+ }
+ }
+#endif
+
+ if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) {
+ if (to->n == 1) {
+ OnigCodePoint orig_code = code;
+
+ items[0].byte_len = len;
+ items[0].code_len = 1;
+ items[0].code[0] = to->code[0];
+ n++;
+
+ code = to->code[0];
+ if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) {
+ for (i = 0; i < to->n; i++) {
+ if (to->code[i] != orig_code) {
+ items[n].byte_len = len;
+ items[n].code_len = 1;
+ items[n].code[0] = to->code[i];
+ n++;
+ }
+ }
+ }
+ }
+ else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+ OnigCodePoint cs[3][4];
+ int fn, ncs[3];
+
+ for (fn = 0; fn < to->n; fn++) {
+ cs[fn][0] = to->code[fn];
+ if (onig_st_lookup(Unfold1Table, (st_data_t )cs[fn][0],
+ (void* )&z3) != 0) {
+ for (i = 0; i < z3->n; i++) {
+ cs[fn][i+1] = z3->code[i];
+ }
+ ncs[fn] = z3->n + 1;
+ }
+ else
+ ncs[fn] = 1;
+ }
+
+ if (fn == 2) {
+ for (i = 0; i < ncs[0]; i++) {
+ for (j = 0; j < ncs[1]; j++) {
+ items[n].byte_len = len;
+ items[n].code_len = 2;
+ items[n].code[0] = cs[0][i];
+ items[n].code[1] = cs[1][j];
+ n++;
+ }
+ }
+
+ if (onig_st_lookup(Unfold2Table, (st_data_t )to->code,
+ (void* )&z2) != 0) {
+ for (i = 0; i < z2->n; i++) {
+ if (z2->code[i] == code) continue;
+
+ items[n].byte_len = len;
+ items[n].code_len = 1;
+ items[n].code[0] = z2->code[i];
+ n++;
+ }
+ }
+ }
+ else {
+ for (i = 0; i < ncs[0]; i++) {
+ for (j = 0; j < ncs[1]; j++) {
+ for (k = 0; k < ncs[2]; k++) {
+ items[n].byte_len = len;
+ items[n].code_len = 3;
+ items[n].code[0] = cs[0][i];
+ items[n].code[1] = cs[1][j];
+ items[n].code[2] = cs[2][k];
+ n++;
+ }
+ }
+ }
+
+ if (onig_st_lookup(Unfold3Table, (st_data_t )to->code,
+ (void* )&z2) != 0) {
+ for (i = 0; i < z2->n; i++) {
+ if (z2->code[i] == code) continue;
+
+ items[n].byte_len = len;
+ items[n].code_len = 1;
+ items[n].code[0] = z2->code[i];
+ n++;
+ }
+ }
+ }
+
+ /* multi char folded code is not head of another folded multi char */
+ flag = 0; /* DISABLE_CASE_FOLD_MULTI_CHAR(flag); */
+ }
+ }
+ else {
+ if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) {
+ for (i = 0; i < to->n; i++) {
+ items[n].byte_len = len;
+ items[n].code_len = 1;
+ items[n].code[0] = to->code[i];
+ n++;
+ }
+ }
+ }
+
+
+ if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+ p += len;
+ if (p < end) {
+ int clen;
+
+ codes[0] = code;
+ code = ONIGENC_MBC_TO_CODE(enc, p, end);
+ if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0
+ && to->n == 1) {
+ codes[1] = to->code[0];
+ }
+ else
+ codes[1] = code;
+
+ clen = enc_len(enc, p);
+ len += clen;
+ if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) {
+ for (i = 0; i < z2->n; i++) {
+ items[n].byte_len = len;
+ items[n].code_len = 1;
+ items[n].code[0] = z2->code[i];
+ n++;
+ }
+ }
+
+ p += clen;
+ if (p < end) {
+ code = ONIGENC_MBC_TO_CODE(enc, p, end);
+ if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0
+ && to->n == 1) {
+ codes[2] = to->code[0];
+ }
+ else
+ codes[2] = code;
+
+ clen = enc_len(enc, p);
+ len += clen;
+ if (onig_st_lookup(Unfold3Table, (st_data_t )codes,
+ (void* )&z2) != 0) {
+ for (i = 0; i < z2->n; i++) {
+ items[n].byte_len = len;
+ items[n].code_len = 1;
+ items[n].code[0] = z2->code[i];
+ n++;
+ }
+ }
+ }
+ }
+ }
+
+ return n;
+}
diff --git a/utf8.c b/utf8.c
index 0e816176ba..daeeea3bd8 100644
--- a/utf8.c
+++ b/utf8.c
@@ -218,108 +218,59 @@ utf8_code_to_mbc(OnigCodePoint code, UChar *buf)
}
static int
-utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower)
+utf8_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
+ const UChar* end, UChar* fold)
{
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
- if (end > p + 1 &&
- (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
- ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S')))) {
- *lower++ = '\303';
- *lower = '\237';
- (*pp) += 2;
- return 2;
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+ if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+ if (*p == 0x49) {
+ *fold++ = 0xc4;
+ *fold = 0xb1;
+ (*pp)++;
+ return 2;
+ }
}
+#endif
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
- *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
- }
- else {
- *lower = *p;
- }
+ *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
else {
- int len;
-
- if (*p == 195) { /* 195 == '\303' */
- int c = *(p + 1);
- if (c >= 128) {
- if (c <= (UChar )'\236' && /* upper */
- (flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) {
- if (c != (UChar )'\227') {
- *lower++ = *p;
- *lower = (UChar )(c + 32);
- (*pp) += 2;
- return 2;
- }
- }
-#if 0
- else if (c == (UChar )'\237' &&
- (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- *lower++ = '\303';
- *lower = '\237';
- (*pp) += 2;
- return 2;
- }
-#endif
- }
- }
-
- len = enc_len(ONIG_ENCODING_UTF8, p);
- if (lower != p) {
- int i;
- for (i = 0; i < len; i++) {
- *lower++ = *p++;
- }
- }
- (*pp) += len;
- return len; /* return byte length of converted char to lower */
+ return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF8, flag,
+ pp, end, fold);
}
}
+#if 0
static int
-utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+utf8_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
- if (end > p + 1 &&
- (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
- ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S')))) {
- (*pp) += 2;
- return TRUE;
- }
-
(*pp)++;
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
- return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
- }
+ return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
}
else {
(*pp) += enc_len(ONIG_ENCODING_UTF8, p);
- if (*p == 195) { /* 195 == '\303' */
+ if (*p == 0xc3) {
int c = *(p + 1);
- if (c >= 128) {
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) {
- if (c <= (UChar )'\236') { /* upper */
- if (c == (UChar )'\227') return FALSE;
- return TRUE;
- }
- else if (c >= (UChar )'\240' && c <= (UChar )'\276') { /* lower */
- if (c == (UChar )'\267') return FALSE;
- return TRUE;
- }
- }
- else if (c == (UChar )'\237' &&
- (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if (c >= 0x80) {
+ if (c <= (UChar )0x9e) { /* upper */
+ if (c == (UChar )0x97) return FALSE;
+ return TRUE;
+ }
+ else if (c >= (UChar )0xa0 && c <= (UChar )0xbe) { /* lower */
+ if (c == (UChar )'\267') return FALSE;
+ return TRUE;
+ }
+ else if (c == (UChar )0x9f &&
+ (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
return TRUE;
}
}
@@ -328,3398 +279,17 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
return FALSE;
}
-
-
-static const OnigCodePoint EmptyRange[] = { 0 };
-
-static const OnigCodePoint SBAlnum[] = {
- 3,
- 0x0030, 0x0039,
- 0x0041, 0x005a,
- 0x0061, 0x007a
-};
-
-static const OnigCodePoint MBAlnum[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- 411,
-#else
- 6,
-#endif
- 0x00aa, 0x00aa,
- 0x00b5, 0x00b5,
- 0x00ba, 0x00ba,
- 0x00c0, 0x00d6,
- 0x00d8, 0x00f6,
- 0x00f8, 0x0236
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- ,
- 0x0250, 0x02c1,
- 0x02c6, 0x02d1,
- 0x02e0, 0x02e4,
- 0x02ee, 0x02ee,
- 0x0300, 0x0357,
- 0x035d, 0x036f,
- 0x037a, 0x037a,
- 0x0386, 0x0386,
- 0x0388, 0x038a,
- 0x038c, 0x038c,
- 0x038e, 0x03a1,
- 0x03a3, 0x03ce,
- 0x03d0, 0x03f5,
- 0x03f7, 0x03fb,
- 0x0400, 0x0481,
- 0x0483, 0x0486,
- 0x0488, 0x04ce,
- 0x04d0, 0x04f5,
- 0x04f8, 0x04f9,
- 0x0500, 0x050f,
- 0x0531, 0x0556,
- 0x0559, 0x0559,
- 0x0561, 0x0587,
- 0x0591, 0x05a1,
- 0x05a3, 0x05b9,
- 0x05bb, 0x05bd,
- 0x05bf, 0x05bf,
- 0x05c1, 0x05c2,
- 0x05c4, 0x05c4,
- 0x05d0, 0x05ea,
- 0x05f0, 0x05f2,
- 0x0610, 0x0615,
- 0x0621, 0x063a,
- 0x0640, 0x0658,
- 0x0660, 0x0669,
- 0x066e, 0x06d3,
- 0x06d5, 0x06dc,
- 0x06de, 0x06e8,
- 0x06ea, 0x06fc,
- 0x06ff, 0x06ff,
- 0x0710, 0x074a,
- 0x074d, 0x074f,
- 0x0780, 0x07b1,
- 0x0901, 0x0939,
- 0x093c, 0x094d,
- 0x0950, 0x0954,
- 0x0958, 0x0963,
- 0x0966, 0x096f,
- 0x0981, 0x0983,
- 0x0985, 0x098c,
- 0x098f, 0x0990,
- 0x0993, 0x09a8,
- 0x09aa, 0x09b0,
- 0x09b2, 0x09b2,
- 0x09b6, 0x09b9,
- 0x09bc, 0x09c4,
- 0x09c7, 0x09c8,
- 0x09cb, 0x09cd,
- 0x09d7, 0x09d7,
- 0x09dc, 0x09dd,
- 0x09df, 0x09e3,
- 0x09e6, 0x09f1,
- 0x0a01, 0x0a03,
- 0x0a05, 0x0a0a,
- 0x0a0f, 0x0a10,
- 0x0a13, 0x0a28,
- 0x0a2a, 0x0a30,
- 0x0a32, 0x0a33,
- 0x0a35, 0x0a36,
- 0x0a38, 0x0a39,
- 0x0a3c, 0x0a3c,
- 0x0a3e, 0x0a42,
- 0x0a47, 0x0a48,
- 0x0a4b, 0x0a4d,
- 0x0a59, 0x0a5c,
- 0x0a5e, 0x0a5e,
- 0x0a66, 0x0a74,
- 0x0a81, 0x0a83,
- 0x0a85, 0x0a8d,
- 0x0a8f, 0x0a91,
- 0x0a93, 0x0aa8,
- 0x0aaa, 0x0ab0,
- 0x0ab2, 0x0ab3,
- 0x0ab5, 0x0ab9,
- 0x0abc, 0x0ac5,
- 0x0ac7, 0x0ac9,
- 0x0acb, 0x0acd,
- 0x0ad0, 0x0ad0,
- 0x0ae0, 0x0ae3,
- 0x0ae6, 0x0aef,
- 0x0b01, 0x0b03,
- 0x0b05, 0x0b0c,
- 0x0b0f, 0x0b10,
- 0x0b13, 0x0b28,
- 0x0b2a, 0x0b30,
- 0x0b32, 0x0b33,
- 0x0b35, 0x0b39,
- 0x0b3c, 0x0b43,
- 0x0b47, 0x0b48,
- 0x0b4b, 0x0b4d,
- 0x0b56, 0x0b57,
- 0x0b5c, 0x0b5d,
- 0x0b5f, 0x0b61,
- 0x0b66, 0x0b6f,
- 0x0b71, 0x0b71,
- 0x0b82, 0x0b83,
- 0x0b85, 0x0b8a,
- 0x0b8e, 0x0b90,
- 0x0b92, 0x0b95,
- 0x0b99, 0x0b9a,
- 0x0b9c, 0x0b9c,
- 0x0b9e, 0x0b9f,
- 0x0ba3, 0x0ba4,
- 0x0ba8, 0x0baa,
- 0x0bae, 0x0bb5,
- 0x0bb7, 0x0bb9,
- 0x0bbe, 0x0bc2,
- 0x0bc6, 0x0bc8,
- 0x0bca, 0x0bcd,
- 0x0bd7, 0x0bd7,
- 0x0be7, 0x0bef,
- 0x0c01, 0x0c03,
- 0x0c05, 0x0c0c,
- 0x0c0e, 0x0c10,
- 0x0c12, 0x0c28,
- 0x0c2a, 0x0c33,
- 0x0c35, 0x0c39,
- 0x0c3e, 0x0c44,
- 0x0c46, 0x0c48,
- 0x0c4a, 0x0c4d,
- 0x0c55, 0x0c56,
- 0x0c60, 0x0c61,
- 0x0c66, 0x0c6f,
- 0x0c82, 0x0c83,
- 0x0c85, 0x0c8c,
- 0x0c8e, 0x0c90,
- 0x0c92, 0x0ca8,
- 0x0caa, 0x0cb3,
- 0x0cb5, 0x0cb9,
- 0x0cbc, 0x0cc4,
- 0x0cc6, 0x0cc8,
- 0x0cca, 0x0ccd,
- 0x0cd5, 0x0cd6,
- 0x0cde, 0x0cde,
- 0x0ce0, 0x0ce1,
- 0x0ce6, 0x0cef,
- 0x0d02, 0x0d03,
- 0x0d05, 0x0d0c,
- 0x0d0e, 0x0d10,
- 0x0d12, 0x0d28,
- 0x0d2a, 0x0d39,
- 0x0d3e, 0x0d43,
- 0x0d46, 0x0d48,
- 0x0d4a, 0x0d4d,
- 0x0d57, 0x0d57,
- 0x0d60, 0x0d61,
- 0x0d66, 0x0d6f,
- 0x0d82, 0x0d83,
- 0x0d85, 0x0d96,
- 0x0d9a, 0x0db1,
- 0x0db3, 0x0dbb,
- 0x0dbd, 0x0dbd,
- 0x0dc0, 0x0dc6,
- 0x0dca, 0x0dca,
- 0x0dcf, 0x0dd4,
- 0x0dd6, 0x0dd6,
- 0x0dd8, 0x0ddf,
- 0x0df2, 0x0df3,
- 0x0e01, 0x0e3a,
- 0x0e40, 0x0e4e,
- 0x0e50, 0x0e59,
- 0x0e81, 0x0e82,
- 0x0e84, 0x0e84,
- 0x0e87, 0x0e88,
- 0x0e8a, 0x0e8a,
- 0x0e8d, 0x0e8d,
- 0x0e94, 0x0e97,
- 0x0e99, 0x0e9f,
- 0x0ea1, 0x0ea3,
- 0x0ea5, 0x0ea5,
- 0x0ea7, 0x0ea7,
- 0x0eaa, 0x0eab,
- 0x0ead, 0x0eb9,
- 0x0ebb, 0x0ebd,
- 0x0ec0, 0x0ec4,
- 0x0ec6, 0x0ec6,
- 0x0ec8, 0x0ecd,
- 0x0ed0, 0x0ed9,
- 0x0edc, 0x0edd,
- 0x0f00, 0x0f00,
- 0x0f18, 0x0f19,
- 0x0f20, 0x0f29,
- 0x0f35, 0x0f35,
- 0x0f37, 0x0f37,
- 0x0f39, 0x0f39,
- 0x0f3e, 0x0f47,
- 0x0f49, 0x0f6a,
- 0x0f71, 0x0f84,
- 0x0f86, 0x0f8b,
- 0x0f90, 0x0f97,
- 0x0f99, 0x0fbc,
- 0x0fc6, 0x0fc6,
- 0x1000, 0x1021,
- 0x1023, 0x1027,
- 0x1029, 0x102a,
- 0x102c, 0x1032,
- 0x1036, 0x1039,
- 0x1040, 0x1049,
- 0x1050, 0x1059,
- 0x10a0, 0x10c5,
- 0x10d0, 0x10f8,
- 0x1100, 0x1159,
- 0x115f, 0x11a2,
- 0x11a8, 0x11f9,
- 0x1200, 0x1206,
- 0x1208, 0x1246,
- 0x1248, 0x1248,
- 0x124a, 0x124d,
- 0x1250, 0x1256,
- 0x1258, 0x1258,
- 0x125a, 0x125d,
- 0x1260, 0x1286,
- 0x1288, 0x1288,
- 0x128a, 0x128d,
- 0x1290, 0x12ae,
- 0x12b0, 0x12b0,
- 0x12b2, 0x12b5,
- 0x12b8, 0x12be,
- 0x12c0, 0x12c0,
- 0x12c2, 0x12c5,
- 0x12c8, 0x12ce,
- 0x12d0, 0x12d6,
- 0x12d8, 0x12ee,
- 0x12f0, 0x130e,
- 0x1310, 0x1310,
- 0x1312, 0x1315,
- 0x1318, 0x131e,
- 0x1320, 0x1346,
- 0x1348, 0x135a,
- 0x1369, 0x1371,
- 0x13a0, 0x13f4,
- 0x1401, 0x166c,
- 0x166f, 0x1676,
- 0x1681, 0x169a,
- 0x16a0, 0x16ea,
- 0x1700, 0x170c,
- 0x170e, 0x1714,
- 0x1720, 0x1734,
- 0x1740, 0x1753,
- 0x1760, 0x176c,
- 0x176e, 0x1770,
- 0x1772, 0x1773,
- 0x1780, 0x17b3,
- 0x17b6, 0x17d3,
- 0x17d7, 0x17d7,
- 0x17dc, 0x17dd,
- 0x17e0, 0x17e9,
- 0x180b, 0x180d,
- 0x1810, 0x1819,
- 0x1820, 0x1877,
- 0x1880, 0x18a9,
- 0x1900, 0x191c,
- 0x1920, 0x192b,
- 0x1930, 0x193b,
- 0x1946, 0x196d,
- 0x1970, 0x1974,
- 0x1d00, 0x1d6b,
- 0x1e00, 0x1e9b,
- 0x1ea0, 0x1ef9,
- 0x1f00, 0x1f15,
- 0x1f18, 0x1f1d,
- 0x1f20, 0x1f45,
- 0x1f48, 0x1f4d,
- 0x1f50, 0x1f57,
- 0x1f59, 0x1f59,
- 0x1f5b, 0x1f5b,
- 0x1f5d, 0x1f5d,
- 0x1f5f, 0x1f7d,
- 0x1f80, 0x1fb4,
- 0x1fb6, 0x1fbc,
- 0x1fbe, 0x1fbe,
- 0x1fc2, 0x1fc4,
- 0x1fc6, 0x1fcc,
- 0x1fd0, 0x1fd3,
- 0x1fd6, 0x1fdb,
- 0x1fe0, 0x1fec,
- 0x1ff2, 0x1ff4,
- 0x1ff6, 0x1ffc,
- 0x2071, 0x2071,
- 0x207f, 0x207f,
- 0x20d0, 0x20ea,
- 0x2102, 0x2102,
- 0x2107, 0x2107,
- 0x210a, 0x2113,
- 0x2115, 0x2115,
- 0x2119, 0x211d,
- 0x2124, 0x2124,
- 0x2126, 0x2126,
- 0x2128, 0x2128,
- 0x212a, 0x212d,
- 0x212f, 0x2131,
- 0x2133, 0x2139,
- 0x213d, 0x213f,
- 0x2145, 0x2149,
- 0x3005, 0x3006,
- 0x302a, 0x302f,
- 0x3031, 0x3035,
- 0x303b, 0x303c,
- 0x3041, 0x3096,
- 0x3099, 0x309a,
- 0x309d, 0x309f,
- 0x30a1, 0x30fa,
- 0x30fc, 0x30ff,
- 0x3105, 0x312c,
- 0x3131, 0x318e,
- 0x31a0, 0x31b7,
- 0x31f0, 0x31ff,
- 0x3400, 0x4db5,
- 0x4e00, 0x9fa5,
- 0xa000, 0xa48c,
- 0xac00, 0xd7a3,
- 0xf900, 0xfa2d,
- 0xfa30, 0xfa6a,
- 0xfb00, 0xfb06,
- 0xfb13, 0xfb17,
- 0xfb1d, 0xfb28,
- 0xfb2a, 0xfb36,
- 0xfb38, 0xfb3c,
- 0xfb3e, 0xfb3e,
- 0xfb40, 0xfb41,
- 0xfb43, 0xfb44,
- 0xfb46, 0xfbb1,
- 0xfbd3, 0xfd3d,
- 0xfd50, 0xfd8f,
- 0xfd92, 0xfdc7,
- 0xfdf0, 0xfdfb,
- 0xfe00, 0xfe0f,
- 0xfe20, 0xfe23,
- 0xfe70, 0xfe74,
- 0xfe76, 0xfefc,
- 0xff10, 0xff19,
- 0xff21, 0xff3a,
- 0xff41, 0xff5a,
- 0xff66, 0xffbe,
- 0xffc2, 0xffc7,
- 0xffca, 0xffcf,
- 0xffd2, 0xffd7,
- 0xffda, 0xffdc,
- 0x10000, 0x1000b,
- 0x1000d, 0x10026,
- 0x10028, 0x1003a,
- 0x1003c, 0x1003d,
- 0x1003f, 0x1004d,
- 0x10050, 0x1005d,
- 0x10080, 0x100fa,
- 0x10300, 0x1031e,
- 0x10330, 0x10349,
- 0x10380, 0x1039d,
- 0x10400, 0x1049d,
- 0x104a0, 0x104a9,
- 0x10800, 0x10805,
- 0x10808, 0x10808,
- 0x1080a, 0x10835,
- 0x10837, 0x10838,
- 0x1083c, 0x1083c,
- 0x1083f, 0x1083f,
- 0x1d165, 0x1d169,
- 0x1d16d, 0x1d172,
- 0x1d17b, 0x1d182,
- 0x1d185, 0x1d18b,
- 0x1d1aa, 0x1d1ad,
- 0x1d400, 0x1d454,
- 0x1d456, 0x1d49c,
- 0x1d49e, 0x1d49f,
- 0x1d4a2, 0x1d4a2,
- 0x1d4a5, 0x1d4a6,
- 0x1d4a9, 0x1d4ac,
- 0x1d4ae, 0x1d4b9,
- 0x1d4bb, 0x1d4bb,
- 0x1d4bd, 0x1d4c3,
- 0x1d4c5, 0x1d505,
- 0x1d507, 0x1d50a,
- 0x1d50d, 0x1d514,
- 0x1d516, 0x1d51c,
- 0x1d51e, 0x1d539,
- 0x1d53b, 0x1d53e,
- 0x1d540, 0x1d544,
- 0x1d546, 0x1d546,
- 0x1d54a, 0x1d550,
- 0x1d552, 0x1d6a3,
- 0x1d6a8, 0x1d6c0,
- 0x1d6c2, 0x1d6da,
- 0x1d6dc, 0x1d6fa,
- 0x1d6fc, 0x1d714,
- 0x1d716, 0x1d734,
- 0x1d736, 0x1d74e,
- 0x1d750, 0x1d76e,
- 0x1d770, 0x1d788,
- 0x1d78a, 0x1d7a8,
- 0x1d7aa, 0x1d7c2,
- 0x1d7c4, 0x1d7c9,
- 0x1d7ce, 0x1d7ff,
- 0x20000, 0x2a6d6,
- 0x2f800, 0x2fa1d,
- 0xe0100, 0xe01ef
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBAlnum */
-
-static const OnigCodePoint SBAlpha[] = {
- 2,
- 0x0041, 0x005a,
- 0x0061, 0x007a
-};
-
-static const OnigCodePoint MBAlpha[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- 394,
-#else
- 6,
-#endif
- 0x00aa, 0x00aa,
- 0x00b5, 0x00b5,
- 0x00ba, 0x00ba,
- 0x00c0, 0x00d6,
- 0x00d8, 0x00f6,
- 0x00f8, 0x0236
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- ,
- 0x0250, 0x02c1,
- 0x02c6, 0x02d1,
- 0x02e0, 0x02e4,
- 0x02ee, 0x02ee,
- 0x0300, 0x0357,
- 0x035d, 0x036f,
- 0x037a, 0x037a,
- 0x0386, 0x0386,
- 0x0388, 0x038a,
- 0x038c, 0x038c,
- 0x038e, 0x03a1,
- 0x03a3, 0x03ce,
- 0x03d0, 0x03f5,
- 0x03f7, 0x03fb,
- 0x0400, 0x0481,
- 0x0483, 0x0486,
- 0x0488, 0x04ce,
- 0x04d0, 0x04f5,
- 0x04f8, 0x04f9,
- 0x0500, 0x050f,
- 0x0531, 0x0556,
- 0x0559, 0x0559,
- 0x0561, 0x0587,
- 0x0591, 0x05a1,
- 0x05a3, 0x05b9,
- 0x05bb, 0x05bd,
- 0x05bf, 0x05bf,
- 0x05c1, 0x05c2,
- 0x05c4, 0x05c4,
- 0x05d0, 0x05ea,
- 0x05f0, 0x05f2,
- 0x0610, 0x0615,
- 0x0621, 0x063a,
- 0x0640, 0x0658,
- 0x066e, 0x06d3,
- 0x06d5, 0x06dc,
- 0x06de, 0x06e8,
- 0x06ea, 0x06ef,
- 0x06fa, 0x06fc,
- 0x06ff, 0x06ff,
- 0x0710, 0x074a,
- 0x074d, 0x074f,
- 0x0780, 0x07b1,
- 0x0901, 0x0939,
- 0x093c, 0x094d,
- 0x0950, 0x0954,
- 0x0958, 0x0963,
- 0x0981, 0x0983,
- 0x0985, 0x098c,
- 0x098f, 0x0990,
- 0x0993, 0x09a8,
- 0x09aa, 0x09b0,
- 0x09b2, 0x09b2,
- 0x09b6, 0x09b9,
- 0x09bc, 0x09c4,
- 0x09c7, 0x09c8,
- 0x09cb, 0x09cd,
- 0x09d7, 0x09d7,
- 0x09dc, 0x09dd,
- 0x09df, 0x09e3,
- 0x09f0, 0x09f1,
- 0x0a01, 0x0a03,
- 0x0a05, 0x0a0a,
- 0x0a0f, 0x0a10,
- 0x0a13, 0x0a28,
- 0x0a2a, 0x0a30,
- 0x0a32, 0x0a33,
- 0x0a35, 0x0a36,
- 0x0a38, 0x0a39,
- 0x0a3c, 0x0a3c,
- 0x0a3e, 0x0a42,
- 0x0a47, 0x0a48,
- 0x0a4b, 0x0a4d,
- 0x0a59, 0x0a5c,
- 0x0a5e, 0x0a5e,
- 0x0a70, 0x0a74,
- 0x0a81, 0x0a83,
- 0x0a85, 0x0a8d,
- 0x0a8f, 0x0a91,
- 0x0a93, 0x0aa8,
- 0x0aaa, 0x0ab0,
- 0x0ab2, 0x0ab3,
- 0x0ab5, 0x0ab9,
- 0x0abc, 0x0ac5,
- 0x0ac7, 0x0ac9,
- 0x0acb, 0x0acd,
- 0x0ad0, 0x0ad0,
- 0x0ae0, 0x0ae3,
- 0x0b01, 0x0b03,
- 0x0b05, 0x0b0c,
- 0x0b0f, 0x0b10,
- 0x0b13, 0x0b28,
- 0x0b2a, 0x0b30,
- 0x0b32, 0x0b33,
- 0x0b35, 0x0b39,
- 0x0b3c, 0x0b43,
- 0x0b47, 0x0b48,
- 0x0b4b, 0x0b4d,
- 0x0b56, 0x0b57,
- 0x0b5c, 0x0b5d,
- 0x0b5f, 0x0b61,
- 0x0b71, 0x0b71,
- 0x0b82, 0x0b83,
- 0x0b85, 0x0b8a,
- 0x0b8e, 0x0b90,
- 0x0b92, 0x0b95,
- 0x0b99, 0x0b9a,
- 0x0b9c, 0x0b9c,
- 0x0b9e, 0x0b9f,
- 0x0ba3, 0x0ba4,
- 0x0ba8, 0x0baa,
- 0x0bae, 0x0bb5,
- 0x0bb7, 0x0bb9,
- 0x0bbe, 0x0bc2,
- 0x0bc6, 0x0bc8,
- 0x0bca, 0x0bcd,
- 0x0bd7, 0x0bd7,
- 0x0c01, 0x0c03,
- 0x0c05, 0x0c0c,
- 0x0c0e, 0x0c10,
- 0x0c12, 0x0c28,
- 0x0c2a, 0x0c33,
- 0x0c35, 0x0c39,
- 0x0c3e, 0x0c44,
- 0x0c46, 0x0c48,
- 0x0c4a, 0x0c4d,
- 0x0c55, 0x0c56,
- 0x0c60, 0x0c61,
- 0x0c82, 0x0c83,
- 0x0c85, 0x0c8c,
- 0x0c8e, 0x0c90,
- 0x0c92, 0x0ca8,
- 0x0caa, 0x0cb3,
- 0x0cb5, 0x0cb9,
- 0x0cbc, 0x0cc4,
- 0x0cc6, 0x0cc8,
- 0x0cca, 0x0ccd,
- 0x0cd5, 0x0cd6,
- 0x0cde, 0x0cde,
- 0x0ce0, 0x0ce1,
- 0x0d02, 0x0d03,
- 0x0d05, 0x0d0c,
- 0x0d0e, 0x0d10,
- 0x0d12, 0x0d28,
- 0x0d2a, 0x0d39,
- 0x0d3e, 0x0d43,
- 0x0d46, 0x0d48,
- 0x0d4a, 0x0d4d,
- 0x0d57, 0x0d57,
- 0x0d60, 0x0d61,
- 0x0d82, 0x0d83,
- 0x0d85, 0x0d96,
- 0x0d9a, 0x0db1,
- 0x0db3, 0x0dbb,
- 0x0dbd, 0x0dbd,
- 0x0dc0, 0x0dc6,
- 0x0dca, 0x0dca,
- 0x0dcf, 0x0dd4,
- 0x0dd6, 0x0dd6,
- 0x0dd8, 0x0ddf,
- 0x0df2, 0x0df3,
- 0x0e01, 0x0e3a,
- 0x0e40, 0x0e4e,
- 0x0e81, 0x0e82,
- 0x0e84, 0x0e84,
- 0x0e87, 0x0e88,
- 0x0e8a, 0x0e8a,
- 0x0e8d, 0x0e8d,
- 0x0e94, 0x0e97,
- 0x0e99, 0x0e9f,
- 0x0ea1, 0x0ea3,
- 0x0ea5, 0x0ea5,
- 0x0ea7, 0x0ea7,
- 0x0eaa, 0x0eab,
- 0x0ead, 0x0eb9,
- 0x0ebb, 0x0ebd,
- 0x0ec0, 0x0ec4,
- 0x0ec6, 0x0ec6,
- 0x0ec8, 0x0ecd,
- 0x0edc, 0x0edd,
- 0x0f00, 0x0f00,
- 0x0f18, 0x0f19,
- 0x0f35, 0x0f35,
- 0x0f37, 0x0f37,
- 0x0f39, 0x0f39,
- 0x0f3e, 0x0f47,
- 0x0f49, 0x0f6a,
- 0x0f71, 0x0f84,
- 0x0f86, 0x0f8b,
- 0x0f90, 0x0f97,
- 0x0f99, 0x0fbc,
- 0x0fc6, 0x0fc6,
- 0x1000, 0x1021,
- 0x1023, 0x1027,
- 0x1029, 0x102a,
- 0x102c, 0x1032,
- 0x1036, 0x1039,
- 0x1050, 0x1059,
- 0x10a0, 0x10c5,
- 0x10d0, 0x10f8,
- 0x1100, 0x1159,
- 0x115f, 0x11a2,
- 0x11a8, 0x11f9,
- 0x1200, 0x1206,
- 0x1208, 0x1246,
- 0x1248, 0x1248,
- 0x124a, 0x124d,
- 0x1250, 0x1256,
- 0x1258, 0x1258,
- 0x125a, 0x125d,
- 0x1260, 0x1286,
- 0x1288, 0x1288,
- 0x128a, 0x128d,
- 0x1290, 0x12ae,
- 0x12b0, 0x12b0,
- 0x12b2, 0x12b5,
- 0x12b8, 0x12be,
- 0x12c0, 0x12c0,
- 0x12c2, 0x12c5,
- 0x12c8, 0x12ce,
- 0x12d0, 0x12d6,
- 0x12d8, 0x12ee,
- 0x12f0, 0x130e,
- 0x1310, 0x1310,
- 0x1312, 0x1315,
- 0x1318, 0x131e,
- 0x1320, 0x1346,
- 0x1348, 0x135a,
- 0x13a0, 0x13f4,
- 0x1401, 0x166c,
- 0x166f, 0x1676,
- 0x1681, 0x169a,
- 0x16a0, 0x16ea,
- 0x1700, 0x170c,
- 0x170e, 0x1714,
- 0x1720, 0x1734,
- 0x1740, 0x1753,
- 0x1760, 0x176c,
- 0x176e, 0x1770,
- 0x1772, 0x1773,
- 0x1780, 0x17b3,
- 0x17b6, 0x17d3,
- 0x17d7, 0x17d7,
- 0x17dc, 0x17dd,
- 0x180b, 0x180d,
- 0x1820, 0x1877,
- 0x1880, 0x18a9,
- 0x1900, 0x191c,
- 0x1920, 0x192b,
- 0x1930, 0x193b,
- 0x1950, 0x196d,
- 0x1970, 0x1974,
- 0x1d00, 0x1d6b,
- 0x1e00, 0x1e9b,
- 0x1ea0, 0x1ef9,
- 0x1f00, 0x1f15,
- 0x1f18, 0x1f1d,
- 0x1f20, 0x1f45,
- 0x1f48, 0x1f4d,
- 0x1f50, 0x1f57,
- 0x1f59, 0x1f59,
- 0x1f5b, 0x1f5b,
- 0x1f5d, 0x1f5d,
- 0x1f5f, 0x1f7d,
- 0x1f80, 0x1fb4,
- 0x1fb6, 0x1fbc,
- 0x1fbe, 0x1fbe,
- 0x1fc2, 0x1fc4,
- 0x1fc6, 0x1fcc,
- 0x1fd0, 0x1fd3,
- 0x1fd6, 0x1fdb,
- 0x1fe0, 0x1fec,
- 0x1ff2, 0x1ff4,
- 0x1ff6, 0x1ffc,
- 0x2071, 0x2071,
- 0x207f, 0x207f,
- 0x20d0, 0x20ea,
- 0x2102, 0x2102,
- 0x2107, 0x2107,
- 0x210a, 0x2113,
- 0x2115, 0x2115,
- 0x2119, 0x211d,
- 0x2124, 0x2124,
- 0x2126, 0x2126,
- 0x2128, 0x2128,
- 0x212a, 0x212d,
- 0x212f, 0x2131,
- 0x2133, 0x2139,
- 0x213d, 0x213f,
- 0x2145, 0x2149,
- 0x3005, 0x3006,
- 0x302a, 0x302f,
- 0x3031, 0x3035,
- 0x303b, 0x303c,
- 0x3041, 0x3096,
- 0x3099, 0x309a,
- 0x309d, 0x309f,
- 0x30a1, 0x30fa,
- 0x30fc, 0x30ff,
- 0x3105, 0x312c,
- 0x3131, 0x318e,
- 0x31a0, 0x31b7,
- 0x31f0, 0x31ff,
- 0x3400, 0x4db5,
- 0x4e00, 0x9fa5,
- 0xa000, 0xa48c,
- 0xac00, 0xd7a3,
- 0xf900, 0xfa2d,
- 0xfa30, 0xfa6a,
- 0xfb00, 0xfb06,
- 0xfb13, 0xfb17,
- 0xfb1d, 0xfb28,
- 0xfb2a, 0xfb36,
- 0xfb38, 0xfb3c,
- 0xfb3e, 0xfb3e,
- 0xfb40, 0xfb41,
- 0xfb43, 0xfb44,
- 0xfb46, 0xfbb1,
- 0xfbd3, 0xfd3d,
- 0xfd50, 0xfd8f,
- 0xfd92, 0xfdc7,
- 0xfdf0, 0xfdfb,
- 0xfe00, 0xfe0f,
- 0xfe20, 0xfe23,
- 0xfe70, 0xfe74,
- 0xfe76, 0xfefc,
- 0xff21, 0xff3a,
- 0xff41, 0xff5a,
- 0xff66, 0xffbe,
- 0xffc2, 0xffc7,
- 0xffca, 0xffcf,
- 0xffd2, 0xffd7,
- 0xffda, 0xffdc,
- 0x10000, 0x1000b,
- 0x1000d, 0x10026,
- 0x10028, 0x1003a,
- 0x1003c, 0x1003d,
- 0x1003f, 0x1004d,
- 0x10050, 0x1005d,
- 0x10080, 0x100fa,
- 0x10300, 0x1031e,
- 0x10330, 0x10349,
- 0x10380, 0x1039d,
- 0x10400, 0x1049d,
- 0x10800, 0x10805,
- 0x10808, 0x10808,
- 0x1080a, 0x10835,
- 0x10837, 0x10838,
- 0x1083c, 0x1083c,
- 0x1083f, 0x1083f,
- 0x1d165, 0x1d169,
- 0x1d16d, 0x1d172,
- 0x1d17b, 0x1d182,
- 0x1d185, 0x1d18b,
- 0x1d1aa, 0x1d1ad,
- 0x1d400, 0x1d454,
- 0x1d456, 0x1d49c,
- 0x1d49e, 0x1d49f,
- 0x1d4a2, 0x1d4a2,
- 0x1d4a5, 0x1d4a6,
- 0x1d4a9, 0x1d4ac,
- 0x1d4ae, 0x1d4b9,
- 0x1d4bb, 0x1d4bb,
- 0x1d4bd, 0x1d4c3,
- 0x1d4c5, 0x1d505,
- 0x1d507, 0x1d50a,
- 0x1d50d, 0x1d514,
- 0x1d516, 0x1d51c,
- 0x1d51e, 0x1d539,
- 0x1d53b, 0x1d53e,
- 0x1d540, 0x1d544,
- 0x1d546, 0x1d546,
- 0x1d54a, 0x1d550,
- 0x1d552, 0x1d6a3,
- 0x1d6a8, 0x1d6c0,
- 0x1d6c2, 0x1d6da,
- 0x1d6dc, 0x1d6fa,
- 0x1d6fc, 0x1d714,
- 0x1d716, 0x1d734,
- 0x1d736, 0x1d74e,
- 0x1d750, 0x1d76e,
- 0x1d770, 0x1d788,
- 0x1d78a, 0x1d7a8,
- 0x1d7aa, 0x1d7c2,
- 0x1d7c4, 0x1d7c9,
- 0x20000, 0x2a6d6,
- 0x2f800, 0x2fa1d,
- 0xe0100, 0xe01ef
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBAlpha */
-
-static const OnigCodePoint SBBlank[] = {
- 2,
- 0x0009, 0x0009,
- 0x0020, 0x0020
-};
-
-static const OnigCodePoint MBBlank[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- 7,
-#else
- 1,
-#endif
- 0x00a0, 0x00a0
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- ,
- 0x1680, 0x1680,
- 0x180e, 0x180e,
- 0x2000, 0x200a,
- 0x202f, 0x202f,
- 0x205f, 0x205f,
- 0x3000, 0x3000
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBBlank */
-
-static const OnigCodePoint SBCntrl[] = {
- 2,
- 0x0000, 0x001f,
- 0x007f, 0x007f
-};
-
-static const OnigCodePoint MBCntrl[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- 18,
-#else
- 2,
#endif
- 0x0080, 0x009f,
- 0x00ad, 0x00ad
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- ,
- 0x0600, 0x0603,
- 0x06dd, 0x06dd,
- 0x070f, 0x070f,
- 0x17b4, 0x17b5,
- 0x200b, 0x200f,
- 0x202a, 0x202e,
- 0x2060, 0x2063,
- 0x206a, 0x206f,
- 0xd800, 0xf8ff,
- 0xfeff, 0xfeff,
- 0xfff9, 0xfffb,
- 0x1d173, 0x1d17a,
- 0xe0001, 0xe0001,
- 0xe0020, 0xe007f,
- 0xf0000, 0xffffd,
- 0x100000, 0x10fffd
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBCntrl */
-
-static const OnigCodePoint SBDigit[] = {
- 1,
- 0x0030, 0x0039
-};
-
-static const OnigCodePoint MBDigit[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- 22,
-#else
- 0
-#endif
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- 0x0660, 0x0669,
- 0x06f0, 0x06f9,
- 0x0966, 0x096f,
- 0x09e6, 0x09ef,
- 0x0a66, 0x0a6f,
- 0x0ae6, 0x0aef,
- 0x0b66, 0x0b6f,
- 0x0be7, 0x0bef,
- 0x0c66, 0x0c6f,
- 0x0ce6, 0x0cef,
- 0x0d66, 0x0d6f,
- 0x0e50, 0x0e59,
- 0x0ed0, 0x0ed9,
- 0x0f20, 0x0f29,
- 0x1040, 0x1049,
- 0x1369, 0x1371,
- 0x17e0, 0x17e9,
- 0x1810, 0x1819,
- 0x1946, 0x194f,
- 0xff10, 0xff19,
- 0x104a0, 0x104a9,
- 0x1d7ce, 0x1d7ff
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBDigit */
-
-static const OnigCodePoint SBGraph[] = {
- 1,
- 0x0021, 0x007e
-};
-
-static const OnigCodePoint MBGraph[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- 404,
-#else
- 1,
-#endif
- 0x00a1, 0x0236
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- ,
- 0x0250, 0x0357,
- 0x035d, 0x036f,
- 0x0374, 0x0375,
- 0x037a, 0x037a,
- 0x037e, 0x037e,
- 0x0384, 0x038a,
- 0x038c, 0x038c,
- 0x038e, 0x03a1,
- 0x03a3, 0x03ce,
- 0x03d0, 0x03fb,
- 0x0400, 0x0486,
- 0x0488, 0x04ce,
- 0x04d0, 0x04f5,
- 0x04f8, 0x04f9,
- 0x0500, 0x050f,
- 0x0531, 0x0556,
- 0x0559, 0x055f,
- 0x0561, 0x0587,
- 0x0589, 0x058a,
- 0x0591, 0x05a1,
- 0x05a3, 0x05b9,
- 0x05bb, 0x05c4,
- 0x05d0, 0x05ea,
- 0x05f0, 0x05f4,
- 0x0600, 0x0603,
- 0x060c, 0x0615,
- 0x061b, 0x061b,
- 0x061f, 0x061f,
- 0x0621, 0x063a,
- 0x0640, 0x0658,
- 0x0660, 0x070d,
- 0x070f, 0x074a,
- 0x074d, 0x074f,
- 0x0780, 0x07b1,
- 0x0901, 0x0939,
- 0x093c, 0x094d,
- 0x0950, 0x0954,
- 0x0958, 0x0970,
- 0x0981, 0x0983,
- 0x0985, 0x098c,
- 0x098f, 0x0990,
- 0x0993, 0x09a8,
- 0x09aa, 0x09b0,
- 0x09b2, 0x09b2,
- 0x09b6, 0x09b9,
- 0x09bc, 0x09c4,
- 0x09c7, 0x09c8,
- 0x09cb, 0x09cd,
- 0x09d7, 0x09d7,
- 0x09dc, 0x09dd,
- 0x09df, 0x09e3,
- 0x09e6, 0x09fa,
- 0x0a01, 0x0a03,
- 0x0a05, 0x0a0a,
- 0x0a0f, 0x0a10,
- 0x0a13, 0x0a28,
- 0x0a2a, 0x0a30,
- 0x0a32, 0x0a33,
- 0x0a35, 0x0a36,
- 0x0a38, 0x0a39,
- 0x0a3c, 0x0a3c,
- 0x0a3e, 0x0a42,
- 0x0a47, 0x0a48,
- 0x0a4b, 0x0a4d,
- 0x0a59, 0x0a5c,
- 0x0a5e, 0x0a5e,
- 0x0a66, 0x0a74,
- 0x0a81, 0x0a83,
- 0x0a85, 0x0a8d,
- 0x0a8f, 0x0a91,
- 0x0a93, 0x0aa8,
- 0x0aaa, 0x0ab0,
- 0x0ab2, 0x0ab3,
- 0x0ab5, 0x0ab9,
- 0x0abc, 0x0ac5,
- 0x0ac7, 0x0ac9,
- 0x0acb, 0x0acd,
- 0x0ad0, 0x0ad0,
- 0x0ae0, 0x0ae3,
- 0x0ae6, 0x0aef,
- 0x0af1, 0x0af1,
- 0x0b01, 0x0b03,
- 0x0b05, 0x0b0c,
- 0x0b0f, 0x0b10,
- 0x0b13, 0x0b28,
- 0x0b2a, 0x0b30,
- 0x0b32, 0x0b33,
- 0x0b35, 0x0b39,
- 0x0b3c, 0x0b43,
- 0x0b47, 0x0b48,
- 0x0b4b, 0x0b4d,
- 0x0b56, 0x0b57,
- 0x0b5c, 0x0b5d,
- 0x0b5f, 0x0b61,
- 0x0b66, 0x0b71,
- 0x0b82, 0x0b83,
- 0x0b85, 0x0b8a,
- 0x0b8e, 0x0b90,
- 0x0b92, 0x0b95,
- 0x0b99, 0x0b9a,
- 0x0b9c, 0x0b9c,
- 0x0b9e, 0x0b9f,
- 0x0ba3, 0x0ba4,
- 0x0ba8, 0x0baa,
- 0x0bae, 0x0bb5,
- 0x0bb7, 0x0bb9,
- 0x0bbe, 0x0bc2,
- 0x0bc6, 0x0bc8,
- 0x0bca, 0x0bcd,
- 0x0bd7, 0x0bd7,
- 0x0be7, 0x0bfa,
- 0x0c01, 0x0c03,
- 0x0c05, 0x0c0c,
- 0x0c0e, 0x0c10,
- 0x0c12, 0x0c28,
- 0x0c2a, 0x0c33,
- 0x0c35, 0x0c39,
- 0x0c3e, 0x0c44,
- 0x0c46, 0x0c48,
- 0x0c4a, 0x0c4d,
- 0x0c55, 0x0c56,
- 0x0c60, 0x0c61,
- 0x0c66, 0x0c6f,
- 0x0c82, 0x0c83,
- 0x0c85, 0x0c8c,
- 0x0c8e, 0x0c90,
- 0x0c92, 0x0ca8,
- 0x0caa, 0x0cb3,
- 0x0cb5, 0x0cb9,
- 0x0cbc, 0x0cc4,
- 0x0cc6, 0x0cc8,
- 0x0cca, 0x0ccd,
- 0x0cd5, 0x0cd6,
- 0x0cde, 0x0cde,
- 0x0ce0, 0x0ce1,
- 0x0ce6, 0x0cef,
- 0x0d02, 0x0d03,
- 0x0d05, 0x0d0c,
- 0x0d0e, 0x0d10,
- 0x0d12, 0x0d28,
- 0x0d2a, 0x0d39,
- 0x0d3e, 0x0d43,
- 0x0d46, 0x0d48,
- 0x0d4a, 0x0d4d,
- 0x0d57, 0x0d57,
- 0x0d60, 0x0d61,
- 0x0d66, 0x0d6f,
- 0x0d82, 0x0d83,
- 0x0d85, 0x0d96,
- 0x0d9a, 0x0db1,
- 0x0db3, 0x0dbb,
- 0x0dbd, 0x0dbd,
- 0x0dc0, 0x0dc6,
- 0x0dca, 0x0dca,
- 0x0dcf, 0x0dd4,
- 0x0dd6, 0x0dd6,
- 0x0dd8, 0x0ddf,
- 0x0df2, 0x0df4,
- 0x0e01, 0x0e3a,
- 0x0e3f, 0x0e5b,
- 0x0e81, 0x0e82,
- 0x0e84, 0x0e84,
- 0x0e87, 0x0e88,
- 0x0e8a, 0x0e8a,
- 0x0e8d, 0x0e8d,
- 0x0e94, 0x0e97,
- 0x0e99, 0x0e9f,
- 0x0ea1, 0x0ea3,
- 0x0ea5, 0x0ea5,
- 0x0ea7, 0x0ea7,
- 0x0eaa, 0x0eab,
- 0x0ead, 0x0eb9,
- 0x0ebb, 0x0ebd,
- 0x0ec0, 0x0ec4,
- 0x0ec6, 0x0ec6,
- 0x0ec8, 0x0ecd,
- 0x0ed0, 0x0ed9,
- 0x0edc, 0x0edd,
- 0x0f00, 0x0f47,
- 0x0f49, 0x0f6a,
- 0x0f71, 0x0f8b,
- 0x0f90, 0x0f97,
- 0x0f99, 0x0fbc,
- 0x0fbe, 0x0fcc,
- 0x0fcf, 0x0fcf,
- 0x1000, 0x1021,
- 0x1023, 0x1027,
- 0x1029, 0x102a,
- 0x102c, 0x1032,
- 0x1036, 0x1039,
- 0x1040, 0x1059,
- 0x10a0, 0x10c5,
- 0x10d0, 0x10f8,
- 0x10fb, 0x10fb,
- 0x1100, 0x1159,
- 0x115f, 0x11a2,
- 0x11a8, 0x11f9,
- 0x1200, 0x1206,
- 0x1208, 0x1246,
- 0x1248, 0x1248,
- 0x124a, 0x124d,
- 0x1250, 0x1256,
- 0x1258, 0x1258,
- 0x125a, 0x125d,
- 0x1260, 0x1286,
- 0x1288, 0x1288,
- 0x128a, 0x128d,
- 0x1290, 0x12ae,
- 0x12b0, 0x12b0,
- 0x12b2, 0x12b5,
- 0x12b8, 0x12be,
- 0x12c0, 0x12c0,
- 0x12c2, 0x12c5,
- 0x12c8, 0x12ce,
- 0x12d0, 0x12d6,
- 0x12d8, 0x12ee,
- 0x12f0, 0x130e,
- 0x1310, 0x1310,
- 0x1312, 0x1315,
- 0x1318, 0x131e,
- 0x1320, 0x1346,
- 0x1348, 0x135a,
- 0x1361, 0x137c,
- 0x13a0, 0x13f4,
- 0x1401, 0x1676,
- 0x1681, 0x169c,
- 0x16a0, 0x16f0,
- 0x1700, 0x170c,
- 0x170e, 0x1714,
- 0x1720, 0x1736,
- 0x1740, 0x1753,
- 0x1760, 0x176c,
- 0x176e, 0x1770,
- 0x1772, 0x1773,
- 0x1780, 0x17dd,
- 0x17e0, 0x17e9,
- 0x17f0, 0x17f9,
- 0x1800, 0x180d,
- 0x1810, 0x1819,
- 0x1820, 0x1877,
- 0x1880, 0x18a9,
- 0x1900, 0x191c,
- 0x1920, 0x192b,
- 0x1930, 0x193b,
- 0x1940, 0x1940,
- 0x1944, 0x196d,
- 0x1970, 0x1974,
- 0x19e0, 0x19ff,
- 0x1d00, 0x1d6b,
- 0x1e00, 0x1e9b,
- 0x1ea0, 0x1ef9,
- 0x1f00, 0x1f15,
- 0x1f18, 0x1f1d,
- 0x1f20, 0x1f45,
- 0x1f48, 0x1f4d,
- 0x1f50, 0x1f57,
- 0x1f59, 0x1f59,
- 0x1f5b, 0x1f5b,
- 0x1f5d, 0x1f5d,
- 0x1f5f, 0x1f7d,
- 0x1f80, 0x1fb4,
- 0x1fb6, 0x1fc4,
- 0x1fc6, 0x1fd3,
- 0x1fd6, 0x1fdb,
- 0x1fdd, 0x1fef,
- 0x1ff2, 0x1ff4,
- 0x1ff6, 0x1ffe,
- 0x200b, 0x2027,
- 0x202a, 0x202e,
- 0x2030, 0x2054,
- 0x2057, 0x2057,
- 0x2060, 0x2063,
- 0x206a, 0x2071,
- 0x2074, 0x208e,
- 0x20a0, 0x20b1,
- 0x20d0, 0x20ea,
- 0x2100, 0x213b,
- 0x213d, 0x214b,
- 0x2153, 0x2183,
- 0x2190, 0x23d0,
- 0x2400, 0x2426,
- 0x2440, 0x244a,
- 0x2460, 0x2617,
- 0x2619, 0x267d,
- 0x2680, 0x2691,
- 0x26a0, 0x26a1,
- 0x2701, 0x2704,
- 0x2706, 0x2709,
- 0x270c, 0x2727,
- 0x2729, 0x274b,
- 0x274d, 0x274d,
- 0x274f, 0x2752,
- 0x2756, 0x2756,
- 0x2758, 0x275e,
- 0x2761, 0x2794,
- 0x2798, 0x27af,
- 0x27b1, 0x27be,
- 0x27d0, 0x27eb,
- 0x27f0, 0x2b0d,
- 0x2e80, 0x2e99,
- 0x2e9b, 0x2ef3,
- 0x2f00, 0x2fd5,
- 0x2ff0, 0x2ffb,
- 0x3001, 0x303f,
- 0x3041, 0x3096,
- 0x3099, 0x30ff,
- 0x3105, 0x312c,
- 0x3131, 0x318e,
- 0x3190, 0x31b7,
- 0x31f0, 0x321e,
- 0x3220, 0x3243,
- 0x3250, 0x327d,
- 0x327f, 0x32fe,
- 0x3300, 0x4db5,
- 0x4dc0, 0x9fa5,
- 0xa000, 0xa48c,
- 0xa490, 0xa4c6,
- 0xac00, 0xd7a3,
- 0xe000, 0xfa2d,
- 0xfa30, 0xfa6a,
- 0xfb00, 0xfb06,
- 0xfb13, 0xfb17,
- 0xfb1d, 0xfb36,
- 0xfb38, 0xfb3c,
- 0xfb3e, 0xfb3e,
- 0xfb40, 0xfb41,
- 0xfb43, 0xfb44,
- 0xfb46, 0xfbb1,
- 0xfbd3, 0xfd3f,
- 0xfd50, 0xfd8f,
- 0xfd92, 0xfdc7,
- 0xfdf0, 0xfdfd,
- 0xfe00, 0xfe0f,
- 0xfe20, 0xfe23,
- 0xfe30, 0xfe52,
- 0xfe54, 0xfe66,
- 0xfe68, 0xfe6b,
- 0xfe70, 0xfe74,
- 0xfe76, 0xfefc,
- 0xfeff, 0xfeff,
- 0xff01, 0xffbe,
- 0xffc2, 0xffc7,
- 0xffca, 0xffcf,
- 0xffd2, 0xffd7,
- 0xffda, 0xffdc,
- 0xffe0, 0xffe6,
- 0xffe8, 0xffee,
- 0xfff9, 0xfffd,
- 0x10000, 0x1000b,
- 0x1000d, 0x10026,
- 0x10028, 0x1003a,
- 0x1003c, 0x1003d,
- 0x1003f, 0x1004d,
- 0x10050, 0x1005d,
- 0x10080, 0x100fa,
- 0x10100, 0x10102,
- 0x10107, 0x10133,
- 0x10137, 0x1013f,
- 0x10300, 0x1031e,
- 0x10320, 0x10323,
- 0x10330, 0x1034a,
- 0x10380, 0x1039d,
- 0x1039f, 0x1039f,
- 0x10400, 0x1049d,
- 0x104a0, 0x104a9,
- 0x10800, 0x10805,
- 0x10808, 0x10808,
- 0x1080a, 0x10835,
- 0x10837, 0x10838,
- 0x1083c, 0x1083c,
- 0x1083f, 0x1083f,
- 0x1d000, 0x1d0f5,
- 0x1d100, 0x1d126,
- 0x1d12a, 0x1d1dd,
- 0x1d300, 0x1d356,
- 0x1d400, 0x1d454,
- 0x1d456, 0x1d49c,
- 0x1d49e, 0x1d49f,
- 0x1d4a2, 0x1d4a2,
- 0x1d4a5, 0x1d4a6,
- 0x1d4a9, 0x1d4ac,
- 0x1d4ae, 0x1d4b9,
- 0x1d4bb, 0x1d4bb,
- 0x1d4bd, 0x1d4c3,
- 0x1d4c5, 0x1d505,
- 0x1d507, 0x1d50a,
- 0x1d50d, 0x1d514,
- 0x1d516, 0x1d51c,
- 0x1d51e, 0x1d539,
- 0x1d53b, 0x1d53e,
- 0x1d540, 0x1d544,
- 0x1d546, 0x1d546,
- 0x1d54a, 0x1d550,
- 0x1d552, 0x1d6a3,
- 0x1d6a8, 0x1d7c9,
- 0x1d7ce, 0x1d7ff,
- 0x20000, 0x2a6d6,
- 0x2f800, 0x2fa1d,
- 0xe0001, 0xe0001,
- 0xe0020, 0xe007f,
- 0xe0100, 0xe01ef,
- 0xf0000, 0xffffd,
- 0x100000, 0x10fffd
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBGraph */
-
-static const OnigCodePoint SBLower[] = {
- 1,
- 0x0061, 0x007a
-};
-
-static const OnigCodePoint MBLower[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- 423,
-#else
- 5,
-#endif
- 0x00aa, 0x00aa,
- 0x00b5, 0x00b5,
- 0x00ba, 0x00ba,
- 0x00df, 0x00f6,
- 0x00f8, 0x00ff
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- ,
- 0x0101, 0x0101,
- 0x0103, 0x0103,
- 0x0105, 0x0105,
- 0x0107, 0x0107,
- 0x0109, 0x0109,
- 0x010b, 0x010b,
- 0x010d, 0x010d,
- 0x010f, 0x010f,
- 0x0111, 0x0111,
- 0x0113, 0x0113,
- 0x0115, 0x0115,
- 0x0117, 0x0117,
- 0x0119, 0x0119,
- 0x011b, 0x011b,
- 0x011d, 0x011d,
- 0x011f, 0x011f,
- 0x0121, 0x0121,
- 0x0123, 0x0123,
- 0x0125, 0x0125,
- 0x0127, 0x0127,
- 0x0129, 0x0129,
- 0x012b, 0x012b,
- 0x012d, 0x012d,
- 0x012f, 0x012f,
- 0x0131, 0x0131,
- 0x0133, 0x0133,
- 0x0135, 0x0135,
- 0x0137, 0x0138,
- 0x013a, 0x013a,
- 0x013c, 0x013c,
- 0x013e, 0x013e,
- 0x0140, 0x0140,
- 0x0142, 0x0142,
- 0x0144, 0x0144,
- 0x0146, 0x0146,
- 0x0148, 0x0149,
- 0x014b, 0x014b,
- 0x014d, 0x014d,
- 0x014f, 0x014f,
- 0x0151, 0x0151,
- 0x0153, 0x0153,
- 0x0155, 0x0155,
- 0x0157, 0x0157,
- 0x0159, 0x0159,
- 0x015b, 0x015b,
- 0x015d, 0x015d,
- 0x015f, 0x015f,
- 0x0161, 0x0161,
- 0x0163, 0x0163,
- 0x0165, 0x0165,
- 0x0167, 0x0167,
- 0x0169, 0x0169,
- 0x016b, 0x016b,
- 0x016d, 0x016d,
- 0x016f, 0x016f,
- 0x0171, 0x0171,
- 0x0173, 0x0173,
- 0x0175, 0x0175,
- 0x0177, 0x0177,
- 0x017a, 0x017a,
- 0x017c, 0x017c,
- 0x017e, 0x0180,
- 0x0183, 0x0183,
- 0x0185, 0x0185,
- 0x0188, 0x0188,
- 0x018c, 0x018d,
- 0x0192, 0x0192,
- 0x0195, 0x0195,
- 0x0199, 0x019b,
- 0x019e, 0x019e,
- 0x01a1, 0x01a1,
- 0x01a3, 0x01a3,
- 0x01a5, 0x01a5,
- 0x01a8, 0x01a8,
- 0x01aa, 0x01ab,
- 0x01ad, 0x01ad,
- 0x01b0, 0x01b0,
- 0x01b4, 0x01b4,
- 0x01b6, 0x01b6,
- 0x01b9, 0x01ba,
- 0x01bd, 0x01bf,
- 0x01c6, 0x01c6,
- 0x01c9, 0x01c9,
- 0x01cc, 0x01cc,
- 0x01ce, 0x01ce,
- 0x01d0, 0x01d0,
- 0x01d2, 0x01d2,
- 0x01d4, 0x01d4,
- 0x01d6, 0x01d6,
- 0x01d8, 0x01d8,
- 0x01da, 0x01da,
- 0x01dc, 0x01dd,
- 0x01df, 0x01df,
- 0x01e1, 0x01e1,
- 0x01e3, 0x01e3,
- 0x01e5, 0x01e5,
- 0x01e7, 0x01e7,
- 0x01e9, 0x01e9,
- 0x01eb, 0x01eb,
- 0x01ed, 0x01ed,
- 0x01ef, 0x01f0,
- 0x01f3, 0x01f3,
- 0x01f5, 0x01f5,
- 0x01f9, 0x01f9,
- 0x01fb, 0x01fb,
- 0x01fd, 0x01fd,
- 0x01ff, 0x01ff,
- 0x0201, 0x0201,
- 0x0203, 0x0203,
- 0x0205, 0x0205,
- 0x0207, 0x0207,
- 0x0209, 0x0209,
- 0x020b, 0x020b,
- 0x020d, 0x020d,
- 0x020f, 0x020f,
- 0x0211, 0x0211,
- 0x0213, 0x0213,
- 0x0215, 0x0215,
- 0x0217, 0x0217,
- 0x0219, 0x0219,
- 0x021b, 0x021b,
- 0x021d, 0x021d,
- 0x021f, 0x021f,
- 0x0221, 0x0221,
- 0x0223, 0x0223,
- 0x0225, 0x0225,
- 0x0227, 0x0227,
- 0x0229, 0x0229,
- 0x022b, 0x022b,
- 0x022d, 0x022d,
- 0x022f, 0x022f,
- 0x0231, 0x0231,
- 0x0233, 0x0236,
- 0x0250, 0x02af,
- 0x0390, 0x0390,
- 0x03ac, 0x03ce,
- 0x03d0, 0x03d1,
- 0x03d5, 0x03d7,
- 0x03d9, 0x03d9,
- 0x03db, 0x03db,
- 0x03dd, 0x03dd,
- 0x03df, 0x03df,
- 0x03e1, 0x03e1,
- 0x03e3, 0x03e3,
- 0x03e5, 0x03e5,
- 0x03e7, 0x03e7,
- 0x03e9, 0x03e9,
- 0x03eb, 0x03eb,
- 0x03ed, 0x03ed,
- 0x03ef, 0x03f3,
- 0x03f5, 0x03f5,
- 0x03f8, 0x03f8,
- 0x03fb, 0x03fb,
- 0x0430, 0x045f,
- 0x0461, 0x0461,
- 0x0463, 0x0463,
- 0x0465, 0x0465,
- 0x0467, 0x0467,
- 0x0469, 0x0469,
- 0x046b, 0x046b,
- 0x046d, 0x046d,
- 0x046f, 0x046f,
- 0x0471, 0x0471,
- 0x0473, 0x0473,
- 0x0475, 0x0475,
- 0x0477, 0x0477,
- 0x0479, 0x0479,
- 0x047b, 0x047b,
- 0x047d, 0x047d,
- 0x047f, 0x047f,
- 0x0481, 0x0481,
- 0x048b, 0x048b,
- 0x048d, 0x048d,
- 0x048f, 0x048f,
- 0x0491, 0x0491,
- 0x0493, 0x0493,
- 0x0495, 0x0495,
- 0x0497, 0x0497,
- 0x0499, 0x0499,
- 0x049b, 0x049b,
- 0x049d, 0x049d,
- 0x049f, 0x049f,
- 0x04a1, 0x04a1,
- 0x04a3, 0x04a3,
- 0x04a5, 0x04a5,
- 0x04a7, 0x04a7,
- 0x04a9, 0x04a9,
- 0x04ab, 0x04ab,
- 0x04ad, 0x04ad,
- 0x04af, 0x04af,
- 0x04b1, 0x04b1,
- 0x04b3, 0x04b3,
- 0x04b5, 0x04b5,
- 0x04b7, 0x04b7,
- 0x04b9, 0x04b9,
- 0x04bb, 0x04bb,
- 0x04bd, 0x04bd,
- 0x04bf, 0x04bf,
- 0x04c2, 0x04c2,
- 0x04c4, 0x04c4,
- 0x04c6, 0x04c6,
- 0x04c8, 0x04c8,
- 0x04ca, 0x04ca,
- 0x04cc, 0x04cc,
- 0x04ce, 0x04ce,
- 0x04d1, 0x04d1,
- 0x04d3, 0x04d3,
- 0x04d5, 0x04d5,
- 0x04d7, 0x04d7,
- 0x04d9, 0x04d9,
- 0x04db, 0x04db,
- 0x04dd, 0x04dd,
- 0x04df, 0x04df,
- 0x04e1, 0x04e1,
- 0x04e3, 0x04e3,
- 0x04e5, 0x04e5,
- 0x04e7, 0x04e7,
- 0x04e9, 0x04e9,
- 0x04eb, 0x04eb,
- 0x04ed, 0x04ed,
- 0x04ef, 0x04ef,
- 0x04f1, 0x04f1,
- 0x04f3, 0x04f3,
- 0x04f5, 0x04f5,
- 0x04f9, 0x04f9,
- 0x0501, 0x0501,
- 0x0503, 0x0503,
- 0x0505, 0x0505,
- 0x0507, 0x0507,
- 0x0509, 0x0509,
- 0x050b, 0x050b,
- 0x050d, 0x050d,
- 0x050f, 0x050f,
- 0x0561, 0x0587,
- 0x1d00, 0x1d2b,
- 0x1d62, 0x1d6b,
- 0x1e01, 0x1e01,
- 0x1e03, 0x1e03,
- 0x1e05, 0x1e05,
- 0x1e07, 0x1e07,
- 0x1e09, 0x1e09,
- 0x1e0b, 0x1e0b,
- 0x1e0d, 0x1e0d,
- 0x1e0f, 0x1e0f,
- 0x1e11, 0x1e11,
- 0x1e13, 0x1e13,
- 0x1e15, 0x1e15,
- 0x1e17, 0x1e17,
- 0x1e19, 0x1e19,
- 0x1e1b, 0x1e1b,
- 0x1e1d, 0x1e1d,
- 0x1e1f, 0x1e1f,
- 0x1e21, 0x1e21,
- 0x1e23, 0x1e23,
- 0x1e25, 0x1e25,
- 0x1e27, 0x1e27,
- 0x1e29, 0x1e29,
- 0x1e2b, 0x1e2b,
- 0x1e2d, 0x1e2d,
- 0x1e2f, 0x1e2f,
- 0x1e31, 0x1e31,
- 0x1e33, 0x1e33,
- 0x1e35, 0x1e35,
- 0x1e37, 0x1e37,
- 0x1e39, 0x1e39,
- 0x1e3b, 0x1e3b,
- 0x1e3d, 0x1e3d,
- 0x1e3f, 0x1e3f,
- 0x1e41, 0x1e41,
- 0x1e43, 0x1e43,
- 0x1e45, 0x1e45,
- 0x1e47, 0x1e47,
- 0x1e49, 0x1e49,
- 0x1e4b, 0x1e4b,
- 0x1e4d, 0x1e4d,
- 0x1e4f, 0x1e4f,
- 0x1e51, 0x1e51,
- 0x1e53, 0x1e53,
- 0x1e55, 0x1e55,
- 0x1e57, 0x1e57,
- 0x1e59, 0x1e59,
- 0x1e5b, 0x1e5b,
- 0x1e5d, 0x1e5d,
- 0x1e5f, 0x1e5f,
- 0x1e61, 0x1e61,
- 0x1e63, 0x1e63,
- 0x1e65, 0x1e65,
- 0x1e67, 0x1e67,
- 0x1e69, 0x1e69,
- 0x1e6b, 0x1e6b,
- 0x1e6d, 0x1e6d,
- 0x1e6f, 0x1e6f,
- 0x1e71, 0x1e71,
- 0x1e73, 0x1e73,
- 0x1e75, 0x1e75,
- 0x1e77, 0x1e77,
- 0x1e79, 0x1e79,
- 0x1e7b, 0x1e7b,
- 0x1e7d, 0x1e7d,
- 0x1e7f, 0x1e7f,
- 0x1e81, 0x1e81,
- 0x1e83, 0x1e83,
- 0x1e85, 0x1e85,
- 0x1e87, 0x1e87,
- 0x1e89, 0x1e89,
- 0x1e8b, 0x1e8b,
- 0x1e8d, 0x1e8d,
- 0x1e8f, 0x1e8f,
- 0x1e91, 0x1e91,
- 0x1e93, 0x1e93,
- 0x1e95, 0x1e9b,
- 0x1ea1, 0x1ea1,
- 0x1ea3, 0x1ea3,
- 0x1ea5, 0x1ea5,
- 0x1ea7, 0x1ea7,
- 0x1ea9, 0x1ea9,
- 0x1eab, 0x1eab,
- 0x1ead, 0x1ead,
- 0x1eaf, 0x1eaf,
- 0x1eb1, 0x1eb1,
- 0x1eb3, 0x1eb3,
- 0x1eb5, 0x1eb5,
- 0x1eb7, 0x1eb7,
- 0x1eb9, 0x1eb9,
- 0x1ebb, 0x1ebb,
- 0x1ebd, 0x1ebd,
- 0x1ebf, 0x1ebf,
- 0x1ec1, 0x1ec1,
- 0x1ec3, 0x1ec3,
- 0x1ec5, 0x1ec5,
- 0x1ec7, 0x1ec7,
- 0x1ec9, 0x1ec9,
- 0x1ecb, 0x1ecb,
- 0x1ecd, 0x1ecd,
- 0x1ecf, 0x1ecf,
- 0x1ed1, 0x1ed1,
- 0x1ed3, 0x1ed3,
- 0x1ed5, 0x1ed5,
- 0x1ed7, 0x1ed7,
- 0x1ed9, 0x1ed9,
- 0x1edb, 0x1edb,
- 0x1edd, 0x1edd,
- 0x1edf, 0x1edf,
- 0x1ee1, 0x1ee1,
- 0x1ee3, 0x1ee3,
- 0x1ee5, 0x1ee5,
- 0x1ee7, 0x1ee7,
- 0x1ee9, 0x1ee9,
- 0x1eeb, 0x1eeb,
- 0x1eed, 0x1eed,
- 0x1eef, 0x1eef,
- 0x1ef1, 0x1ef1,
- 0x1ef3, 0x1ef3,
- 0x1ef5, 0x1ef5,
- 0x1ef7, 0x1ef7,
- 0x1ef9, 0x1ef9,
- 0x1f00, 0x1f07,
- 0x1f10, 0x1f15,
- 0x1f20, 0x1f27,
- 0x1f30, 0x1f37,
- 0x1f40, 0x1f45,
- 0x1f50, 0x1f57,
- 0x1f60, 0x1f67,
- 0x1f70, 0x1f7d,
- 0x1f80, 0x1f87,
- 0x1f90, 0x1f97,
- 0x1fa0, 0x1fa7,
- 0x1fb0, 0x1fb4,
- 0x1fb6, 0x1fb7,
- 0x1fbe, 0x1fbe,
- 0x1fc2, 0x1fc4,
- 0x1fc6, 0x1fc7,
- 0x1fd0, 0x1fd3,
- 0x1fd6, 0x1fd7,
- 0x1fe0, 0x1fe7,
- 0x1ff2, 0x1ff4,
- 0x1ff6, 0x1ff7,
- 0x2071, 0x2071,
- 0x207f, 0x207f,
- 0x210a, 0x210a,
- 0x210e, 0x210f,
- 0x2113, 0x2113,
- 0x212f, 0x212f,
- 0x2134, 0x2134,
- 0x2139, 0x2139,
- 0x213d, 0x213d,
- 0x2146, 0x2149,
- 0xfb00, 0xfb06,
- 0xfb13, 0xfb17,
- 0xff41, 0xff5a,
- 0x10428, 0x1044f,
- 0x1d41a, 0x1d433,
- 0x1d44e, 0x1d454,
- 0x1d456, 0x1d467,
- 0x1d482, 0x1d49b,
- 0x1d4b6, 0x1d4b9,
- 0x1d4bb, 0x1d4bb,
- 0x1d4bd, 0x1d4c3,
- 0x1d4c5, 0x1d4cf,
- 0x1d4ea, 0x1d503,
- 0x1d51e, 0x1d537,
- 0x1d552, 0x1d56b,
- 0x1d586, 0x1d59f,
- 0x1d5ba, 0x1d5d3,
- 0x1d5ee, 0x1d607,
- 0x1d622, 0x1d63b,
- 0x1d656, 0x1d66f,
- 0x1d68a, 0x1d6a3,
- 0x1d6c2, 0x1d6da,
- 0x1d6dc, 0x1d6e1,
- 0x1d6fc, 0x1d714,
- 0x1d716, 0x1d71b,
- 0x1d736, 0x1d74e,
- 0x1d750, 0x1d755,
- 0x1d770, 0x1d788,
- 0x1d78a, 0x1d78f,
- 0x1d7aa, 0x1d7c2,
- 0x1d7c4, 0x1d7c9
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBLower */
-
-static const OnigCodePoint SBPrint[] = {
- 2,
- 0x0009, 0x000d,
- 0x0020, 0x007e
-};
-
-static const OnigCodePoint MBPrint[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- 403,
-#else
- 2,
-#endif
- 0x0085, 0x0085,
- 0x00a0, 0x0236
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- ,
- 0x0250, 0x0357,
- 0x035d, 0x036f,
- 0x0374, 0x0375,
- 0x037a, 0x037a,
- 0x037e, 0x037e,
- 0x0384, 0x038a,
- 0x038c, 0x038c,
- 0x038e, 0x03a1,
- 0x03a3, 0x03ce,
- 0x03d0, 0x03fb,
- 0x0400, 0x0486,
- 0x0488, 0x04ce,
- 0x04d0, 0x04f5,
- 0x04f8, 0x04f9,
- 0x0500, 0x050f,
- 0x0531, 0x0556,
- 0x0559, 0x055f,
- 0x0561, 0x0587,
- 0x0589, 0x058a,
- 0x0591, 0x05a1,
- 0x05a3, 0x05b9,
- 0x05bb, 0x05c4,
- 0x05d0, 0x05ea,
- 0x05f0, 0x05f4,
- 0x0600, 0x0603,
- 0x060c, 0x0615,
- 0x061b, 0x061b,
- 0x061f, 0x061f,
- 0x0621, 0x063a,
- 0x0640, 0x0658,
- 0x0660, 0x070d,
- 0x070f, 0x074a,
- 0x074d, 0x074f,
- 0x0780, 0x07b1,
- 0x0901, 0x0939,
- 0x093c, 0x094d,
- 0x0950, 0x0954,
- 0x0958, 0x0970,
- 0x0981, 0x0983,
- 0x0985, 0x098c,
- 0x098f, 0x0990,
- 0x0993, 0x09a8,
- 0x09aa, 0x09b0,
- 0x09b2, 0x09b2,
- 0x09b6, 0x09b9,
- 0x09bc, 0x09c4,
- 0x09c7, 0x09c8,
- 0x09cb, 0x09cd,
- 0x09d7, 0x09d7,
- 0x09dc, 0x09dd,
- 0x09df, 0x09e3,
- 0x09e6, 0x09fa,
- 0x0a01, 0x0a03,
- 0x0a05, 0x0a0a,
- 0x0a0f, 0x0a10,
- 0x0a13, 0x0a28,
- 0x0a2a, 0x0a30,
- 0x0a32, 0x0a33,
- 0x0a35, 0x0a36,
- 0x0a38, 0x0a39,
- 0x0a3c, 0x0a3c,
- 0x0a3e, 0x0a42,
- 0x0a47, 0x0a48,
- 0x0a4b, 0x0a4d,
- 0x0a59, 0x0a5c,
- 0x0a5e, 0x0a5e,
- 0x0a66, 0x0a74,
- 0x0a81, 0x0a83,
- 0x0a85, 0x0a8d,
- 0x0a8f, 0x0a91,
- 0x0a93, 0x0aa8,
- 0x0aaa, 0x0ab0,
- 0x0ab2, 0x0ab3,
- 0x0ab5, 0x0ab9,
- 0x0abc, 0x0ac5,
- 0x0ac7, 0x0ac9,
- 0x0acb, 0x0acd,
- 0x0ad0, 0x0ad0,
- 0x0ae0, 0x0ae3,
- 0x0ae6, 0x0aef,
- 0x0af1, 0x0af1,
- 0x0b01, 0x0b03,
- 0x0b05, 0x0b0c,
- 0x0b0f, 0x0b10,
- 0x0b13, 0x0b28,
- 0x0b2a, 0x0b30,
- 0x0b32, 0x0b33,
- 0x0b35, 0x0b39,
- 0x0b3c, 0x0b43,
- 0x0b47, 0x0b48,
- 0x0b4b, 0x0b4d,
- 0x0b56, 0x0b57,
- 0x0b5c, 0x0b5d,
- 0x0b5f, 0x0b61,
- 0x0b66, 0x0b71,
- 0x0b82, 0x0b83,
- 0x0b85, 0x0b8a,
- 0x0b8e, 0x0b90,
- 0x0b92, 0x0b95,
- 0x0b99, 0x0b9a,
- 0x0b9c, 0x0b9c,
- 0x0b9e, 0x0b9f,
- 0x0ba3, 0x0ba4,
- 0x0ba8, 0x0baa,
- 0x0bae, 0x0bb5,
- 0x0bb7, 0x0bb9,
- 0x0bbe, 0x0bc2,
- 0x0bc6, 0x0bc8,
- 0x0bca, 0x0bcd,
- 0x0bd7, 0x0bd7,
- 0x0be7, 0x0bfa,
- 0x0c01, 0x0c03,
- 0x0c05, 0x0c0c,
- 0x0c0e, 0x0c10,
- 0x0c12, 0x0c28,
- 0x0c2a, 0x0c33,
- 0x0c35, 0x0c39,
- 0x0c3e, 0x0c44,
- 0x0c46, 0x0c48,
- 0x0c4a, 0x0c4d,
- 0x0c55, 0x0c56,
- 0x0c60, 0x0c61,
- 0x0c66, 0x0c6f,
- 0x0c82, 0x0c83,
- 0x0c85, 0x0c8c,
- 0x0c8e, 0x0c90,
- 0x0c92, 0x0ca8,
- 0x0caa, 0x0cb3,
- 0x0cb5, 0x0cb9,
- 0x0cbc, 0x0cc4,
- 0x0cc6, 0x0cc8,
- 0x0cca, 0x0ccd,
- 0x0cd5, 0x0cd6,
- 0x0cde, 0x0cde,
- 0x0ce0, 0x0ce1,
- 0x0ce6, 0x0cef,
- 0x0d02, 0x0d03,
- 0x0d05, 0x0d0c,
- 0x0d0e, 0x0d10,
- 0x0d12, 0x0d28,
- 0x0d2a, 0x0d39,
- 0x0d3e, 0x0d43,
- 0x0d46, 0x0d48,
- 0x0d4a, 0x0d4d,
- 0x0d57, 0x0d57,
- 0x0d60, 0x0d61,
- 0x0d66, 0x0d6f,
- 0x0d82, 0x0d83,
- 0x0d85, 0x0d96,
- 0x0d9a, 0x0db1,
- 0x0db3, 0x0dbb,
- 0x0dbd, 0x0dbd,
- 0x0dc0, 0x0dc6,
- 0x0dca, 0x0dca,
- 0x0dcf, 0x0dd4,
- 0x0dd6, 0x0dd6,
- 0x0dd8, 0x0ddf,
- 0x0df2, 0x0df4,
- 0x0e01, 0x0e3a,
- 0x0e3f, 0x0e5b,
- 0x0e81, 0x0e82,
- 0x0e84, 0x0e84,
- 0x0e87, 0x0e88,
- 0x0e8a, 0x0e8a,
- 0x0e8d, 0x0e8d,
- 0x0e94, 0x0e97,
- 0x0e99, 0x0e9f,
- 0x0ea1, 0x0ea3,
- 0x0ea5, 0x0ea5,
- 0x0ea7, 0x0ea7,
- 0x0eaa, 0x0eab,
- 0x0ead, 0x0eb9,
- 0x0ebb, 0x0ebd,
- 0x0ec0, 0x0ec4,
- 0x0ec6, 0x0ec6,
- 0x0ec8, 0x0ecd,
- 0x0ed0, 0x0ed9,
- 0x0edc, 0x0edd,
- 0x0f00, 0x0f47,
- 0x0f49, 0x0f6a,
- 0x0f71, 0x0f8b,
- 0x0f90, 0x0f97,
- 0x0f99, 0x0fbc,
- 0x0fbe, 0x0fcc,
- 0x0fcf, 0x0fcf,
- 0x1000, 0x1021,
- 0x1023, 0x1027,
- 0x1029, 0x102a,
- 0x102c, 0x1032,
- 0x1036, 0x1039,
- 0x1040, 0x1059,
- 0x10a0, 0x10c5,
- 0x10d0, 0x10f8,
- 0x10fb, 0x10fb,
- 0x1100, 0x1159,
- 0x115f, 0x11a2,
- 0x11a8, 0x11f9,
- 0x1200, 0x1206,
- 0x1208, 0x1246,
- 0x1248, 0x1248,
- 0x124a, 0x124d,
- 0x1250, 0x1256,
- 0x1258, 0x1258,
- 0x125a, 0x125d,
- 0x1260, 0x1286,
- 0x1288, 0x1288,
- 0x128a, 0x128d,
- 0x1290, 0x12ae,
- 0x12b0, 0x12b0,
- 0x12b2, 0x12b5,
- 0x12b8, 0x12be,
- 0x12c0, 0x12c0,
- 0x12c2, 0x12c5,
- 0x12c8, 0x12ce,
- 0x12d0, 0x12d6,
- 0x12d8, 0x12ee,
- 0x12f0, 0x130e,
- 0x1310, 0x1310,
- 0x1312, 0x1315,
- 0x1318, 0x131e,
- 0x1320, 0x1346,
- 0x1348, 0x135a,
- 0x1361, 0x137c,
- 0x13a0, 0x13f4,
- 0x1401, 0x1676,
- 0x1680, 0x169c,
- 0x16a0, 0x16f0,
- 0x1700, 0x170c,
- 0x170e, 0x1714,
- 0x1720, 0x1736,
- 0x1740, 0x1753,
- 0x1760, 0x176c,
- 0x176e, 0x1770,
- 0x1772, 0x1773,
- 0x1780, 0x17dd,
- 0x17e0, 0x17e9,
- 0x17f0, 0x17f9,
- 0x1800, 0x180e,
- 0x1810, 0x1819,
- 0x1820, 0x1877,
- 0x1880, 0x18a9,
- 0x1900, 0x191c,
- 0x1920, 0x192b,
- 0x1930, 0x193b,
- 0x1940, 0x1940,
- 0x1944, 0x196d,
- 0x1970, 0x1974,
- 0x19e0, 0x19ff,
- 0x1d00, 0x1d6b,
- 0x1e00, 0x1e9b,
- 0x1ea0, 0x1ef9,
- 0x1f00, 0x1f15,
- 0x1f18, 0x1f1d,
- 0x1f20, 0x1f45,
- 0x1f48, 0x1f4d,
- 0x1f50, 0x1f57,
- 0x1f59, 0x1f59,
- 0x1f5b, 0x1f5b,
- 0x1f5d, 0x1f5d,
- 0x1f5f, 0x1f7d,
- 0x1f80, 0x1fb4,
- 0x1fb6, 0x1fc4,
- 0x1fc6, 0x1fd3,
- 0x1fd6, 0x1fdb,
- 0x1fdd, 0x1fef,
- 0x1ff2, 0x1ff4,
- 0x1ff6, 0x1ffe,
- 0x2000, 0x2054,
- 0x2057, 0x2057,
- 0x205f, 0x2063,
- 0x206a, 0x2071,
- 0x2074, 0x208e,
- 0x20a0, 0x20b1,
- 0x20d0, 0x20ea,
- 0x2100, 0x213b,
- 0x213d, 0x214b,
- 0x2153, 0x2183,
- 0x2190, 0x23d0,
- 0x2400, 0x2426,
- 0x2440, 0x244a,
- 0x2460, 0x2617,
- 0x2619, 0x267d,
- 0x2680, 0x2691,
- 0x26a0, 0x26a1,
- 0x2701, 0x2704,
- 0x2706, 0x2709,
- 0x270c, 0x2727,
- 0x2729, 0x274b,
- 0x274d, 0x274d,
- 0x274f, 0x2752,
- 0x2756, 0x2756,
- 0x2758, 0x275e,
- 0x2761, 0x2794,
- 0x2798, 0x27af,
- 0x27b1, 0x27be,
- 0x27d0, 0x27eb,
- 0x27f0, 0x2b0d,
- 0x2e80, 0x2e99,
- 0x2e9b, 0x2ef3,
- 0x2f00, 0x2fd5,
- 0x2ff0, 0x2ffb,
- 0x3000, 0x303f,
- 0x3041, 0x3096,
- 0x3099, 0x30ff,
- 0x3105, 0x312c,
- 0x3131, 0x318e,
- 0x3190, 0x31b7,
- 0x31f0, 0x321e,
- 0x3220, 0x3243,
- 0x3250, 0x327d,
- 0x327f, 0x32fe,
- 0x3300, 0x4db5,
- 0x4dc0, 0x9fa5,
- 0xa000, 0xa48c,
- 0xa490, 0xa4c6,
- 0xac00, 0xd7a3,
- 0xe000, 0xfa2d,
- 0xfa30, 0xfa6a,
- 0xfb00, 0xfb06,
- 0xfb13, 0xfb17,
- 0xfb1d, 0xfb36,
- 0xfb38, 0xfb3c,
- 0xfb3e, 0xfb3e,
- 0xfb40, 0xfb41,
- 0xfb43, 0xfb44,
- 0xfb46, 0xfbb1,
- 0xfbd3, 0xfd3f,
- 0xfd50, 0xfd8f,
- 0xfd92, 0xfdc7,
- 0xfdf0, 0xfdfd,
- 0xfe00, 0xfe0f,
- 0xfe20, 0xfe23,
- 0xfe30, 0xfe52,
- 0xfe54, 0xfe66,
- 0xfe68, 0xfe6b,
- 0xfe70, 0xfe74,
- 0xfe76, 0xfefc,
- 0xfeff, 0xfeff,
- 0xff01, 0xffbe,
- 0xffc2, 0xffc7,
- 0xffca, 0xffcf,
- 0xffd2, 0xffd7,
- 0xffda, 0xffdc,
- 0xffe0, 0xffe6,
- 0xffe8, 0xffee,
- 0xfff9, 0xfffd,
- 0x10000, 0x1000b,
- 0x1000d, 0x10026,
- 0x10028, 0x1003a,
- 0x1003c, 0x1003d,
- 0x1003f, 0x1004d,
- 0x10050, 0x1005d,
- 0x10080, 0x100fa,
- 0x10100, 0x10102,
- 0x10107, 0x10133,
- 0x10137, 0x1013f,
- 0x10300, 0x1031e,
- 0x10320, 0x10323,
- 0x10330, 0x1034a,
- 0x10380, 0x1039d,
- 0x1039f, 0x1039f,
- 0x10400, 0x1049d,
- 0x104a0, 0x104a9,
- 0x10800, 0x10805,
- 0x10808, 0x10808,
- 0x1080a, 0x10835,
- 0x10837, 0x10838,
- 0x1083c, 0x1083c,
- 0x1083f, 0x1083f,
- 0x1d000, 0x1d0f5,
- 0x1d100, 0x1d126,
- 0x1d12a, 0x1d1dd,
- 0x1d300, 0x1d356,
- 0x1d400, 0x1d454,
- 0x1d456, 0x1d49c,
- 0x1d49e, 0x1d49f,
- 0x1d4a2, 0x1d4a2,
- 0x1d4a5, 0x1d4a6,
- 0x1d4a9, 0x1d4ac,
- 0x1d4ae, 0x1d4b9,
- 0x1d4bb, 0x1d4bb,
- 0x1d4bd, 0x1d4c3,
- 0x1d4c5, 0x1d505,
- 0x1d507, 0x1d50a,
- 0x1d50d, 0x1d514,
- 0x1d516, 0x1d51c,
- 0x1d51e, 0x1d539,
- 0x1d53b, 0x1d53e,
- 0x1d540, 0x1d544,
- 0x1d546, 0x1d546,
- 0x1d54a, 0x1d550,
- 0x1d552, 0x1d6a3,
- 0x1d6a8, 0x1d7c9,
- 0x1d7ce, 0x1d7ff,
- 0x20000, 0x2a6d6,
- 0x2f800, 0x2fa1d,
- 0xe0001, 0xe0001,
- 0xe0020, 0xe007f,
- 0xe0100, 0xe01ef,
- 0xf0000, 0xffffd,
- 0x100000, 0x10fffd
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBPrint */
-
-static const OnigCodePoint SBPunct[] = {
- 9,
- 0x0021, 0x0023,
- 0x0025, 0x002a,
- 0x002c, 0x002f,
- 0x003a, 0x003b,
- 0x003f, 0x0040,
- 0x005b, 0x005d,
- 0x005f, 0x005f,
- 0x007b, 0x007b,
- 0x007d, 0x007d
-}; /* end of SBPunct */
-
-static const OnigCodePoint MBPunct[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- 77,
-#else
- 5,
-#endif
- 0x00a1, 0x00a1,
- 0x00ab, 0x00ab,
- 0x00b7, 0x00b7,
- 0x00bb, 0x00bb,
- 0x00bf, 0x00bf
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- ,
- 0x037e, 0x037e,
- 0x0387, 0x0387,
- 0x055a, 0x055f,
- 0x0589, 0x058a,
- 0x05be, 0x05be,
- 0x05c0, 0x05c0,
- 0x05c3, 0x05c3,
- 0x05f3, 0x05f4,
- 0x060c, 0x060d,
- 0x061b, 0x061b,
- 0x061f, 0x061f,
- 0x066a, 0x066d,
- 0x06d4, 0x06d4,
- 0x0700, 0x070d,
- 0x0964, 0x0965,
- 0x0970, 0x0970,
- 0x0df4, 0x0df4,
- 0x0e4f, 0x0e4f,
- 0x0e5a, 0x0e5b,
- 0x0f04, 0x0f12,
- 0x0f3a, 0x0f3d,
- 0x0f85, 0x0f85,
- 0x104a, 0x104f,
- 0x10fb, 0x10fb,
- 0x1361, 0x1368,
- 0x166d, 0x166e,
- 0x169b, 0x169c,
- 0x16eb, 0x16ed,
- 0x1735, 0x1736,
- 0x17d4, 0x17d6,
- 0x17d8, 0x17da,
- 0x1800, 0x180a,
- 0x1944, 0x1945,
- 0x2010, 0x2027,
- 0x2030, 0x2043,
- 0x2045, 0x2051,
- 0x2053, 0x2054,
- 0x2057, 0x2057,
- 0x207d, 0x207e,
- 0x208d, 0x208e,
- 0x2329, 0x232a,
- 0x23b4, 0x23b6,
- 0x2768, 0x2775,
- 0x27e6, 0x27eb,
- 0x2983, 0x2998,
- 0x29d8, 0x29db,
- 0x29fc, 0x29fd,
- 0x3001, 0x3003,
- 0x3008, 0x3011,
- 0x3014, 0x301f,
- 0x3030, 0x3030,
- 0x303d, 0x303d,
- 0x30a0, 0x30a0,
- 0x30fb, 0x30fb,
- 0xfd3e, 0xfd3f,
- 0xfe30, 0xfe52,
- 0xfe54, 0xfe61,
- 0xfe63, 0xfe63,
- 0xfe68, 0xfe68,
- 0xfe6a, 0xfe6b,
- 0xff01, 0xff03,
- 0xff05, 0xff0a,
- 0xff0c, 0xff0f,
- 0xff1a, 0xff1b,
- 0xff1f, 0xff20,
- 0xff3b, 0xff3d,
- 0xff3f, 0xff3f,
- 0xff5b, 0xff5b,
- 0xff5d, 0xff5d,
- 0xff5f, 0xff65,
- 0x10100, 0x10101,
- 0x1039f, 0x1039f
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBPunct */
-
-static const OnigCodePoint SBSpace[] = {
- 2,
- 0x0009, 0x000d,
- 0x0020, 0x0020
-};
-
-static const OnigCodePoint MBSpace[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- 9,
-#else
- 2,
-#endif
- 0x0085, 0x0085,
- 0x00a0, 0x00a0
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- ,
- 0x1680, 0x1680,
- 0x180e, 0x180e,
- 0x2000, 0x200a,
- 0x2028, 0x2029,
- 0x202f, 0x202f,
- 0x205f, 0x205f,
- 0x3000, 0x3000
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBSpace */
-
-static const OnigCodePoint SBUpper[] = {
- 1,
- 0x0041, 0x005a
-};
-
-static const OnigCodePoint MBUpper[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- 420,
-#else
- 2,
-#endif
- 0x00c0, 0x00d6,
- 0x00d8, 0x00de
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- ,
- 0x0100, 0x0100,
- 0x0102, 0x0102,
- 0x0104, 0x0104,
- 0x0106, 0x0106,
- 0x0108, 0x0108,
- 0x010a, 0x010a,
- 0x010c, 0x010c,
- 0x010e, 0x010e,
- 0x0110, 0x0110,
- 0x0112, 0x0112,
- 0x0114, 0x0114,
- 0x0116, 0x0116,
- 0x0118, 0x0118,
- 0x011a, 0x011a,
- 0x011c, 0x011c,
- 0x011e, 0x011e,
- 0x0120, 0x0120,
- 0x0122, 0x0122,
- 0x0124, 0x0124,
- 0x0126, 0x0126,
- 0x0128, 0x0128,
- 0x012a, 0x012a,
- 0x012c, 0x012c,
- 0x012e, 0x012e,
- 0x0130, 0x0130,
- 0x0132, 0x0132,
- 0x0134, 0x0134,
- 0x0136, 0x0136,
- 0x0139, 0x0139,
- 0x013b, 0x013b,
- 0x013d, 0x013d,
- 0x013f, 0x013f,
- 0x0141, 0x0141,
- 0x0143, 0x0143,
- 0x0145, 0x0145,
- 0x0147, 0x0147,
- 0x014a, 0x014a,
- 0x014c, 0x014c,
- 0x014e, 0x014e,
- 0x0150, 0x0150,
- 0x0152, 0x0152,
- 0x0154, 0x0154,
- 0x0156, 0x0156,
- 0x0158, 0x0158,
- 0x015a, 0x015a,
- 0x015c, 0x015c,
- 0x015e, 0x015e,
- 0x0160, 0x0160,
- 0x0162, 0x0162,
- 0x0164, 0x0164,
- 0x0166, 0x0166,
- 0x0168, 0x0168,
- 0x016a, 0x016a,
- 0x016c, 0x016c,
- 0x016e, 0x016e,
- 0x0170, 0x0170,
- 0x0172, 0x0172,
- 0x0174, 0x0174,
- 0x0176, 0x0176,
- 0x0178, 0x0179,
- 0x017b, 0x017b,
- 0x017d, 0x017d,
- 0x0181, 0x0182,
- 0x0184, 0x0184,
- 0x0186, 0x0187,
- 0x0189, 0x018b,
- 0x018e, 0x0191,
- 0x0193, 0x0194,
- 0x0196, 0x0198,
- 0x019c, 0x019d,
- 0x019f, 0x01a0,
- 0x01a2, 0x01a2,
- 0x01a4, 0x01a4,
- 0x01a6, 0x01a7,
- 0x01a9, 0x01a9,
- 0x01ac, 0x01ac,
- 0x01ae, 0x01af,
- 0x01b1, 0x01b3,
- 0x01b5, 0x01b5,
- 0x01b7, 0x01b8,
- 0x01bc, 0x01bc,
- 0x01c4, 0x01c4,
- 0x01c7, 0x01c7,
- 0x01ca, 0x01ca,
- 0x01cd, 0x01cd,
- 0x01cf, 0x01cf,
- 0x01d1, 0x01d1,
- 0x01d3, 0x01d3,
- 0x01d5, 0x01d5,
- 0x01d7, 0x01d7,
- 0x01d9, 0x01d9,
- 0x01db, 0x01db,
- 0x01de, 0x01de,
- 0x01e0, 0x01e0,
- 0x01e2, 0x01e2,
- 0x01e4, 0x01e4,
- 0x01e6, 0x01e6,
- 0x01e8, 0x01e8,
- 0x01ea, 0x01ea,
- 0x01ec, 0x01ec,
- 0x01ee, 0x01ee,
- 0x01f1, 0x01f1,
- 0x01f4, 0x01f4,
- 0x01f6, 0x01f8,
- 0x01fa, 0x01fa,
- 0x01fc, 0x01fc,
- 0x01fe, 0x01fe,
- 0x0200, 0x0200,
- 0x0202, 0x0202,
- 0x0204, 0x0204,
- 0x0206, 0x0206,
- 0x0208, 0x0208,
- 0x020a, 0x020a,
- 0x020c, 0x020c,
- 0x020e, 0x020e,
- 0x0210, 0x0210,
- 0x0212, 0x0212,
- 0x0214, 0x0214,
- 0x0216, 0x0216,
- 0x0218, 0x0218,
- 0x021a, 0x021a,
- 0x021c, 0x021c,
- 0x021e, 0x021e,
- 0x0220, 0x0220,
- 0x0222, 0x0222,
- 0x0224, 0x0224,
- 0x0226, 0x0226,
- 0x0228, 0x0228,
- 0x022a, 0x022a,
- 0x022c, 0x022c,
- 0x022e, 0x022e,
- 0x0230, 0x0230,
- 0x0232, 0x0232,
- 0x0386, 0x0386,
- 0x0388, 0x038a,
- 0x038c, 0x038c,
- 0x038e, 0x038f,
- 0x0391, 0x03a1,
- 0x03a3, 0x03ab,
- 0x03d2, 0x03d4,
- 0x03d8, 0x03d8,
- 0x03da, 0x03da,
- 0x03dc, 0x03dc,
- 0x03de, 0x03de,
- 0x03e0, 0x03e0,
- 0x03e2, 0x03e2,
- 0x03e4, 0x03e4,
- 0x03e6, 0x03e6,
- 0x03e8, 0x03e8,
- 0x03ea, 0x03ea,
- 0x03ec, 0x03ec,
- 0x03ee, 0x03ee,
- 0x03f4, 0x03f4,
- 0x03f7, 0x03f7,
- 0x03f9, 0x03fa,
- 0x0400, 0x042f,
- 0x0460, 0x0460,
- 0x0462, 0x0462,
- 0x0464, 0x0464,
- 0x0466, 0x0466,
- 0x0468, 0x0468,
- 0x046a, 0x046a,
- 0x046c, 0x046c,
- 0x046e, 0x046e,
- 0x0470, 0x0470,
- 0x0472, 0x0472,
- 0x0474, 0x0474,
- 0x0476, 0x0476,
- 0x0478, 0x0478,
- 0x047a, 0x047a,
- 0x047c, 0x047c,
- 0x047e, 0x047e,
- 0x0480, 0x0480,
- 0x048a, 0x048a,
- 0x048c, 0x048c,
- 0x048e, 0x048e,
- 0x0490, 0x0490,
- 0x0492, 0x0492,
- 0x0494, 0x0494,
- 0x0496, 0x0496,
- 0x0498, 0x0498,
- 0x049a, 0x049a,
- 0x049c, 0x049c,
- 0x049e, 0x049e,
- 0x04a0, 0x04a0,
- 0x04a2, 0x04a2,
- 0x04a4, 0x04a4,
- 0x04a6, 0x04a6,
- 0x04a8, 0x04a8,
- 0x04aa, 0x04aa,
- 0x04ac, 0x04ac,
- 0x04ae, 0x04ae,
- 0x04b0, 0x04b0,
- 0x04b2, 0x04b2,
- 0x04b4, 0x04b4,
- 0x04b6, 0x04b6,
- 0x04b8, 0x04b8,
- 0x04ba, 0x04ba,
- 0x04bc, 0x04bc,
- 0x04be, 0x04be,
- 0x04c0, 0x04c1,
- 0x04c3, 0x04c3,
- 0x04c5, 0x04c5,
- 0x04c7, 0x04c7,
- 0x04c9, 0x04c9,
- 0x04cb, 0x04cb,
- 0x04cd, 0x04cd,
- 0x04d0, 0x04d0,
- 0x04d2, 0x04d2,
- 0x04d4, 0x04d4,
- 0x04d6, 0x04d6,
- 0x04d8, 0x04d8,
- 0x04da, 0x04da,
- 0x04dc, 0x04dc,
- 0x04de, 0x04de,
- 0x04e0, 0x04e0,
- 0x04e2, 0x04e2,
- 0x04e4, 0x04e4,
- 0x04e6, 0x04e6,
- 0x04e8, 0x04e8,
- 0x04ea, 0x04ea,
- 0x04ec, 0x04ec,
- 0x04ee, 0x04ee,
- 0x04f0, 0x04f0,
- 0x04f2, 0x04f2,
- 0x04f4, 0x04f4,
- 0x04f8, 0x04f8,
- 0x0500, 0x0500,
- 0x0502, 0x0502,
- 0x0504, 0x0504,
- 0x0506, 0x0506,
- 0x0508, 0x0508,
- 0x050a, 0x050a,
- 0x050c, 0x050c,
- 0x050e, 0x050e,
- 0x0531, 0x0556,
- 0x10a0, 0x10c5,
- 0x1e00, 0x1e00,
- 0x1e02, 0x1e02,
- 0x1e04, 0x1e04,
- 0x1e06, 0x1e06,
- 0x1e08, 0x1e08,
- 0x1e0a, 0x1e0a,
- 0x1e0c, 0x1e0c,
- 0x1e0e, 0x1e0e,
- 0x1e10, 0x1e10,
- 0x1e12, 0x1e12,
- 0x1e14, 0x1e14,
- 0x1e16, 0x1e16,
- 0x1e18, 0x1e18,
- 0x1e1a, 0x1e1a,
- 0x1e1c, 0x1e1c,
- 0x1e1e, 0x1e1e,
- 0x1e20, 0x1e20,
- 0x1e22, 0x1e22,
- 0x1e24, 0x1e24,
- 0x1e26, 0x1e26,
- 0x1e28, 0x1e28,
- 0x1e2a, 0x1e2a,
- 0x1e2c, 0x1e2c,
- 0x1e2e, 0x1e2e,
- 0x1e30, 0x1e30,
- 0x1e32, 0x1e32,
- 0x1e34, 0x1e34,
- 0x1e36, 0x1e36,
- 0x1e38, 0x1e38,
- 0x1e3a, 0x1e3a,
- 0x1e3c, 0x1e3c,
- 0x1e3e, 0x1e3e,
- 0x1e40, 0x1e40,
- 0x1e42, 0x1e42,
- 0x1e44, 0x1e44,
- 0x1e46, 0x1e46,
- 0x1e48, 0x1e48,
- 0x1e4a, 0x1e4a,
- 0x1e4c, 0x1e4c,
- 0x1e4e, 0x1e4e,
- 0x1e50, 0x1e50,
- 0x1e52, 0x1e52,
- 0x1e54, 0x1e54,
- 0x1e56, 0x1e56,
- 0x1e58, 0x1e58,
- 0x1e5a, 0x1e5a,
- 0x1e5c, 0x1e5c,
- 0x1e5e, 0x1e5e,
- 0x1e60, 0x1e60,
- 0x1e62, 0x1e62,
- 0x1e64, 0x1e64,
- 0x1e66, 0x1e66,
- 0x1e68, 0x1e68,
- 0x1e6a, 0x1e6a,
- 0x1e6c, 0x1e6c,
- 0x1e6e, 0x1e6e,
- 0x1e70, 0x1e70,
- 0x1e72, 0x1e72,
- 0x1e74, 0x1e74,
- 0x1e76, 0x1e76,
- 0x1e78, 0x1e78,
- 0x1e7a, 0x1e7a,
- 0x1e7c, 0x1e7c,
- 0x1e7e, 0x1e7e,
- 0x1e80, 0x1e80,
- 0x1e82, 0x1e82,
- 0x1e84, 0x1e84,
- 0x1e86, 0x1e86,
- 0x1e88, 0x1e88,
- 0x1e8a, 0x1e8a,
- 0x1e8c, 0x1e8c,
- 0x1e8e, 0x1e8e,
- 0x1e90, 0x1e90,
- 0x1e92, 0x1e92,
- 0x1e94, 0x1e94,
- 0x1ea0, 0x1ea0,
- 0x1ea2, 0x1ea2,
- 0x1ea4, 0x1ea4,
- 0x1ea6, 0x1ea6,
- 0x1ea8, 0x1ea8,
- 0x1eaa, 0x1eaa,
- 0x1eac, 0x1eac,
- 0x1eae, 0x1eae,
- 0x1eb0, 0x1eb0,
- 0x1eb2, 0x1eb2,
- 0x1eb4, 0x1eb4,
- 0x1eb6, 0x1eb6,
- 0x1eb8, 0x1eb8,
- 0x1eba, 0x1eba,
- 0x1ebc, 0x1ebc,
- 0x1ebe, 0x1ebe,
- 0x1ec0, 0x1ec0,
- 0x1ec2, 0x1ec2,
- 0x1ec4, 0x1ec4,
- 0x1ec6, 0x1ec6,
- 0x1ec8, 0x1ec8,
- 0x1eca, 0x1eca,
- 0x1ecc, 0x1ecc,
- 0x1ece, 0x1ece,
- 0x1ed0, 0x1ed0,
- 0x1ed2, 0x1ed2,
- 0x1ed4, 0x1ed4,
- 0x1ed6, 0x1ed6,
- 0x1ed8, 0x1ed8,
- 0x1eda, 0x1eda,
- 0x1edc, 0x1edc,
- 0x1ede, 0x1ede,
- 0x1ee0, 0x1ee0,
- 0x1ee2, 0x1ee2,
- 0x1ee4, 0x1ee4,
- 0x1ee6, 0x1ee6,
- 0x1ee8, 0x1ee8,
- 0x1eea, 0x1eea,
- 0x1eec, 0x1eec,
- 0x1eee, 0x1eee,
- 0x1ef0, 0x1ef0,
- 0x1ef2, 0x1ef2,
- 0x1ef4, 0x1ef4,
- 0x1ef6, 0x1ef6,
- 0x1ef8, 0x1ef8,
- 0x1f08, 0x1f0f,
- 0x1f18, 0x1f1d,
- 0x1f28, 0x1f2f,
- 0x1f38, 0x1f3f,
- 0x1f48, 0x1f4d,
- 0x1f59, 0x1f59,
- 0x1f5b, 0x1f5b,
- 0x1f5d, 0x1f5d,
- 0x1f5f, 0x1f5f,
- 0x1f68, 0x1f6f,
- 0x1fb8, 0x1fbb,
- 0x1fc8, 0x1fcb,
- 0x1fd8, 0x1fdb,
- 0x1fe8, 0x1fec,
- 0x1ff8, 0x1ffb,
- 0x2102, 0x2102,
- 0x2107, 0x2107,
- 0x210b, 0x210d,
- 0x2110, 0x2112,
- 0x2115, 0x2115,
- 0x2119, 0x211d,
- 0x2124, 0x2124,
- 0x2126, 0x2126,
- 0x2128, 0x2128,
- 0x212a, 0x212d,
- 0x2130, 0x2131,
- 0x2133, 0x2133,
- 0x213e, 0x213f,
- 0x2145, 0x2145,
- 0xff21, 0xff3a,
- 0x10400, 0x10427,
- 0x1d400, 0x1d419,
- 0x1d434, 0x1d44d,
- 0x1d468, 0x1d481,
- 0x1d49c, 0x1d49c,
- 0x1d49e, 0x1d49f,
- 0x1d4a2, 0x1d4a2,
- 0x1d4a5, 0x1d4a6,
- 0x1d4a9, 0x1d4ac,
- 0x1d4ae, 0x1d4b5,
- 0x1d4d0, 0x1d4e9,
- 0x1d504, 0x1d505,
- 0x1d507, 0x1d50a,
- 0x1d50d, 0x1d514,
- 0x1d516, 0x1d51c,
- 0x1d538, 0x1d539,
- 0x1d53b, 0x1d53e,
- 0x1d540, 0x1d544,
- 0x1d546, 0x1d546,
- 0x1d54a, 0x1d550,
- 0x1d56c, 0x1d585,
- 0x1d5a0, 0x1d5b9,
- 0x1d5d4, 0x1d5ed,
- 0x1d608, 0x1d621,
- 0x1d63c, 0x1d655,
- 0x1d670, 0x1d689,
- 0x1d6a8, 0x1d6c0,
- 0x1d6e2, 0x1d6fa,
- 0x1d71c, 0x1d734,
- 0x1d756, 0x1d76e,
- 0x1d790, 0x1d7a8
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBUpper */
-
-static const OnigCodePoint SBXDigit[] = {
- 3,
- 0x0030, 0x0039,
- 0x0041, 0x0046,
- 0x0061, 0x0066
-};
-
-static const OnigCodePoint SBASCII[] = {
- 1,
- 0x0000, 0x007f
-};
-
-static const OnigCodePoint SBWord[] = {
- 4,
- 0x0030, 0x0039,
- 0x0041, 0x005a,
- 0x005f, 0x005f,
- 0x0061, 0x007a
-};
-
-static const OnigCodePoint MBWord[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- 432,
-#else
- 8,
-#endif
- 0x00aa, 0x00aa,
- 0x00b2, 0x00b3,
- 0x00b5, 0x00b5,
- 0x00b9, 0x00ba,
- 0x00bc, 0x00be,
- 0x00c0, 0x00d6,
- 0x00d8, 0x00f6,
-#ifndef USE_UNICODE_FULL_RANGE_CTYPE
- 0x00f8, 0x7fffffff
-#else /* not USE_UNICODE_FULL_RANGE_CTYPE */
- 0x00f8, 0x0236,
- 0x0250, 0x02c1,
- 0x02c6, 0x02d1,
- 0x02e0, 0x02e4,
- 0x02ee, 0x02ee,
- 0x0300, 0x0357,
- 0x035d, 0x036f,
- 0x037a, 0x037a,
- 0x0386, 0x0386,
- 0x0388, 0x038a,
- 0x038c, 0x038c,
- 0x038e, 0x03a1,
- 0x03a3, 0x03ce,
- 0x03d0, 0x03f5,
- 0x03f7, 0x03fb,
- 0x0400, 0x0481,
- 0x0483, 0x0486,
- 0x0488, 0x04ce,
- 0x04d0, 0x04f5,
- 0x04f8, 0x04f9,
- 0x0500, 0x050f,
- 0x0531, 0x0556,
- 0x0559, 0x0559,
- 0x0561, 0x0587,
- 0x0591, 0x05a1,
- 0x05a3, 0x05b9,
- 0x05bb, 0x05bd,
- 0x05bf, 0x05bf,
- 0x05c1, 0x05c2,
- 0x05c4, 0x05c4,
- 0x05d0, 0x05ea,
- 0x05f0, 0x05f2,
- 0x0610, 0x0615,
- 0x0621, 0x063a,
- 0x0640, 0x0658,
- 0x0660, 0x0669,
- 0x066e, 0x06d3,
- 0x06d5, 0x06dc,
- 0x06de, 0x06e8,
- 0x06ea, 0x06fc,
- 0x06ff, 0x06ff,
- 0x0710, 0x074a,
- 0x074d, 0x074f,
- 0x0780, 0x07b1,
- 0x0901, 0x0939,
- 0x093c, 0x094d,
- 0x0950, 0x0954,
- 0x0958, 0x0963,
- 0x0966, 0x096f,
- 0x0981, 0x0983,
- 0x0985, 0x098c,
- 0x098f, 0x0990,
- 0x0993, 0x09a8,
- 0x09aa, 0x09b0,
- 0x09b2, 0x09b2,
- 0x09b6, 0x09b9,
- 0x09bc, 0x09c4,
- 0x09c7, 0x09c8,
- 0x09cb, 0x09cd,
- 0x09d7, 0x09d7,
- 0x09dc, 0x09dd,
- 0x09df, 0x09e3,
- 0x09e6, 0x09f1,
- 0x09f4, 0x09f9,
- 0x0a01, 0x0a03,
- 0x0a05, 0x0a0a,
- 0x0a0f, 0x0a10,
- 0x0a13, 0x0a28,
- 0x0a2a, 0x0a30,
- 0x0a32, 0x0a33,
- 0x0a35, 0x0a36,
- 0x0a38, 0x0a39,
- 0x0a3c, 0x0a3c,
- 0x0a3e, 0x0a42,
- 0x0a47, 0x0a48,
- 0x0a4b, 0x0a4d,
- 0x0a59, 0x0a5c,
- 0x0a5e, 0x0a5e,
- 0x0a66, 0x0a74,
- 0x0a81, 0x0a83,
- 0x0a85, 0x0a8d,
- 0x0a8f, 0x0a91,
- 0x0a93, 0x0aa8,
- 0x0aaa, 0x0ab0,
- 0x0ab2, 0x0ab3,
- 0x0ab5, 0x0ab9,
- 0x0abc, 0x0ac5,
- 0x0ac7, 0x0ac9,
- 0x0acb, 0x0acd,
- 0x0ad0, 0x0ad0,
- 0x0ae0, 0x0ae3,
- 0x0ae6, 0x0aef,
- 0x0b01, 0x0b03,
- 0x0b05, 0x0b0c,
- 0x0b0f, 0x0b10,
- 0x0b13, 0x0b28,
- 0x0b2a, 0x0b30,
- 0x0b32, 0x0b33,
- 0x0b35, 0x0b39,
- 0x0b3c, 0x0b43,
- 0x0b47, 0x0b48,
- 0x0b4b, 0x0b4d,
- 0x0b56, 0x0b57,
- 0x0b5c, 0x0b5d,
- 0x0b5f, 0x0b61,
- 0x0b66, 0x0b6f,
- 0x0b71, 0x0b71,
- 0x0b82, 0x0b83,
- 0x0b85, 0x0b8a,
- 0x0b8e, 0x0b90,
- 0x0b92, 0x0b95,
- 0x0b99, 0x0b9a,
- 0x0b9c, 0x0b9c,
- 0x0b9e, 0x0b9f,
- 0x0ba3, 0x0ba4,
- 0x0ba8, 0x0baa,
- 0x0bae, 0x0bb5,
- 0x0bb7, 0x0bb9,
- 0x0bbe, 0x0bc2,
- 0x0bc6, 0x0bc8,
- 0x0bca, 0x0bcd,
- 0x0bd7, 0x0bd7,
- 0x0be7, 0x0bf2,
- 0x0c01, 0x0c03,
- 0x0c05, 0x0c0c,
- 0x0c0e, 0x0c10,
- 0x0c12, 0x0c28,
- 0x0c2a, 0x0c33,
- 0x0c35, 0x0c39,
- 0x0c3e, 0x0c44,
- 0x0c46, 0x0c48,
- 0x0c4a, 0x0c4d,
- 0x0c55, 0x0c56,
- 0x0c60, 0x0c61,
- 0x0c66, 0x0c6f,
- 0x0c82, 0x0c83,
- 0x0c85, 0x0c8c,
- 0x0c8e, 0x0c90,
- 0x0c92, 0x0ca8,
- 0x0caa, 0x0cb3,
- 0x0cb5, 0x0cb9,
- 0x0cbc, 0x0cc4,
- 0x0cc6, 0x0cc8,
- 0x0cca, 0x0ccd,
- 0x0cd5, 0x0cd6,
- 0x0cde, 0x0cde,
- 0x0ce0, 0x0ce1,
- 0x0ce6, 0x0cef,
- 0x0d02, 0x0d03,
- 0x0d05, 0x0d0c,
- 0x0d0e, 0x0d10,
- 0x0d12, 0x0d28,
- 0x0d2a, 0x0d39,
- 0x0d3e, 0x0d43,
- 0x0d46, 0x0d48,
- 0x0d4a, 0x0d4d,
- 0x0d57, 0x0d57,
- 0x0d60, 0x0d61,
- 0x0d66, 0x0d6f,
- 0x0d82, 0x0d83,
- 0x0d85, 0x0d96,
- 0x0d9a, 0x0db1,
- 0x0db3, 0x0dbb,
- 0x0dbd, 0x0dbd,
- 0x0dc0, 0x0dc6,
- 0x0dca, 0x0dca,
- 0x0dcf, 0x0dd4,
- 0x0dd6, 0x0dd6,
- 0x0dd8, 0x0ddf,
- 0x0df2, 0x0df3,
- 0x0e01, 0x0e3a,
- 0x0e40, 0x0e4e,
- 0x0e50, 0x0e59,
- 0x0e81, 0x0e82,
- 0x0e84, 0x0e84,
- 0x0e87, 0x0e88,
- 0x0e8a, 0x0e8a,
- 0x0e8d, 0x0e8d,
- 0x0e94, 0x0e97,
- 0x0e99, 0x0e9f,
- 0x0ea1, 0x0ea3,
- 0x0ea5, 0x0ea5,
- 0x0ea7, 0x0ea7,
- 0x0eaa, 0x0eab,
- 0x0ead, 0x0eb9,
- 0x0ebb, 0x0ebd,
- 0x0ec0, 0x0ec4,
- 0x0ec6, 0x0ec6,
- 0x0ec8, 0x0ecd,
- 0x0ed0, 0x0ed9,
- 0x0edc, 0x0edd,
- 0x0f00, 0x0f00,
- 0x0f18, 0x0f19,
- 0x0f20, 0x0f33,
- 0x0f35, 0x0f35,
- 0x0f37, 0x0f37,
- 0x0f39, 0x0f39,
- 0x0f3e, 0x0f47,
- 0x0f49, 0x0f6a,
- 0x0f71, 0x0f84,
- 0x0f86, 0x0f8b,
- 0x0f90, 0x0f97,
- 0x0f99, 0x0fbc,
- 0x0fc6, 0x0fc6,
- 0x1000, 0x1021,
- 0x1023, 0x1027,
- 0x1029, 0x102a,
- 0x102c, 0x1032,
- 0x1036, 0x1039,
- 0x1040, 0x1049,
- 0x1050, 0x1059,
- 0x10a0, 0x10c5,
- 0x10d0, 0x10f8,
- 0x1100, 0x1159,
- 0x115f, 0x11a2,
- 0x11a8, 0x11f9,
- 0x1200, 0x1206,
- 0x1208, 0x1246,
- 0x1248, 0x1248,
- 0x124a, 0x124d,
- 0x1250, 0x1256,
- 0x1258, 0x1258,
- 0x125a, 0x125d,
- 0x1260, 0x1286,
- 0x1288, 0x1288,
- 0x128a, 0x128d,
- 0x1290, 0x12ae,
- 0x12b0, 0x12b0,
- 0x12b2, 0x12b5,
- 0x12b8, 0x12be,
- 0x12c0, 0x12c0,
- 0x12c2, 0x12c5,
- 0x12c8, 0x12ce,
- 0x12d0, 0x12d6,
- 0x12d8, 0x12ee,
- 0x12f0, 0x130e,
- 0x1310, 0x1310,
- 0x1312, 0x1315,
- 0x1318, 0x131e,
- 0x1320, 0x1346,
- 0x1348, 0x135a,
- 0x1369, 0x137c,
- 0x13a0, 0x13f4,
- 0x1401, 0x166c,
- 0x166f, 0x1676,
- 0x1681, 0x169a,
- 0x16a0, 0x16ea,
- 0x16ee, 0x16f0,
- 0x1700, 0x170c,
- 0x170e, 0x1714,
- 0x1720, 0x1734,
- 0x1740, 0x1753,
- 0x1760, 0x176c,
- 0x176e, 0x1770,
- 0x1772, 0x1773,
- 0x1780, 0x17b3,
- 0x17b6, 0x17d3,
- 0x17d7, 0x17d7,
- 0x17dc, 0x17dd,
- 0x17e0, 0x17e9,
- 0x17f0, 0x17f9,
- 0x180b, 0x180d,
- 0x1810, 0x1819,
- 0x1820, 0x1877,
- 0x1880, 0x18a9,
- 0x1900, 0x191c,
- 0x1920, 0x192b,
- 0x1930, 0x193b,
- 0x1946, 0x196d,
- 0x1970, 0x1974,
- 0x1d00, 0x1d6b,
- 0x1e00, 0x1e9b,
- 0x1ea0, 0x1ef9,
- 0x1f00, 0x1f15,
- 0x1f18, 0x1f1d,
- 0x1f20, 0x1f45,
- 0x1f48, 0x1f4d,
- 0x1f50, 0x1f57,
- 0x1f59, 0x1f59,
- 0x1f5b, 0x1f5b,
- 0x1f5d, 0x1f5d,
- 0x1f5f, 0x1f7d,
- 0x1f80, 0x1fb4,
- 0x1fb6, 0x1fbc,
- 0x1fbe, 0x1fbe,
- 0x1fc2, 0x1fc4,
- 0x1fc6, 0x1fcc,
- 0x1fd0, 0x1fd3,
- 0x1fd6, 0x1fdb,
- 0x1fe0, 0x1fec,
- 0x1ff2, 0x1ff4,
- 0x1ff6, 0x1ffc,
- 0x203f, 0x2040,
- 0x2054, 0x2054,
- 0x2070, 0x2071,
- 0x2074, 0x2079,
- 0x207f, 0x2089,
- 0x20d0, 0x20ea,
- 0x2102, 0x2102,
- 0x2107, 0x2107,
- 0x210a, 0x2113,
- 0x2115, 0x2115,
- 0x2119, 0x211d,
- 0x2124, 0x2124,
- 0x2126, 0x2126,
- 0x2128, 0x2128,
- 0x212a, 0x212d,
- 0x212f, 0x2131,
- 0x2133, 0x2139,
- 0x213d, 0x213f,
- 0x2145, 0x2149,
- 0x2153, 0x2183,
- 0x2460, 0x249b,
- 0x24ea, 0x24ff,
- 0x2776, 0x2793,
- 0x3005, 0x3007,
- 0x3021, 0x302f,
- 0x3031, 0x3035,
- 0x3038, 0x303c,
- 0x3041, 0x3096,
- 0x3099, 0x309a,
- 0x309d, 0x309f,
- 0x30a1, 0x30ff,
- 0x3105, 0x312c,
- 0x3131, 0x318e,
- 0x3192, 0x3195,
- 0x31a0, 0x31b7,
- 0x31f0, 0x31ff,
- 0x3220, 0x3229,
- 0x3251, 0x325f,
- 0x3280, 0x3289,
- 0x32b1, 0x32bf,
- 0x3400, 0x4db5,
- 0x4e00, 0x9fa5,
- 0xa000, 0xa48c,
- 0xac00, 0xd7a3,
- 0xf900, 0xfa2d,
- 0xfa30, 0xfa6a,
- 0xfb00, 0xfb06,
- 0xfb13, 0xfb17,
- 0xfb1d, 0xfb28,
- 0xfb2a, 0xfb36,
- 0xfb38, 0xfb3c,
- 0xfb3e, 0xfb3e,
- 0xfb40, 0xfb41,
- 0xfb43, 0xfb44,
- 0xfb46, 0xfbb1,
- 0xfbd3, 0xfd3d,
- 0xfd50, 0xfd8f,
- 0xfd92, 0xfdc7,
- 0xfdf0, 0xfdfb,
- 0xfe00, 0xfe0f,
- 0xfe20, 0xfe23,
- 0xfe33, 0xfe34,
- 0xfe4d, 0xfe4f,
- 0xfe70, 0xfe74,
- 0xfe76, 0xfefc,
- 0xff10, 0xff19,
- 0xff21, 0xff3a,
- 0xff3f, 0xff3f,
- 0xff41, 0xff5a,
- 0xff65, 0xffbe,
- 0xffc2, 0xffc7,
- 0xffca, 0xffcf,
- 0xffd2, 0xffd7,
- 0xffda, 0xffdc,
- 0x10000, 0x1000b,
- 0x1000d, 0x10026,
- 0x10028, 0x1003a,
- 0x1003c, 0x1003d,
- 0x1003f, 0x1004d,
- 0x10050, 0x1005d,
- 0x10080, 0x100fa,
- 0x10107, 0x10133,
- 0x10300, 0x1031e,
- 0x10320, 0x10323,
- 0x10330, 0x1034a,
- 0x10380, 0x1039d,
- 0x10400, 0x1049d,
- 0x104a0, 0x104a9,
- 0x10800, 0x10805,
- 0x10808, 0x10808,
- 0x1080a, 0x10835,
- 0x10837, 0x10838,
- 0x1083c, 0x1083c,
- 0x1083f, 0x1083f,
- 0x1d165, 0x1d169,
- 0x1d16d, 0x1d172,
- 0x1d17b, 0x1d182,
- 0x1d185, 0x1d18b,
- 0x1d1aa, 0x1d1ad,
- 0x1d400, 0x1d454,
- 0x1d456, 0x1d49c,
- 0x1d49e, 0x1d49f,
- 0x1d4a2, 0x1d4a2,
- 0x1d4a5, 0x1d4a6,
- 0x1d4a9, 0x1d4ac,
- 0x1d4ae, 0x1d4b9,
- 0x1d4bb, 0x1d4bb,
- 0x1d4bd, 0x1d4c3,
- 0x1d4c5, 0x1d505,
- 0x1d507, 0x1d50a,
- 0x1d50d, 0x1d514,
- 0x1d516, 0x1d51c,
- 0x1d51e, 0x1d539,
- 0x1d53b, 0x1d53e,
- 0x1d540, 0x1d544,
- 0x1d546, 0x1d546,
- 0x1d54a, 0x1d550,
- 0x1d552, 0x1d6a3,
- 0x1d6a8, 0x1d6c0,
- 0x1d6c2, 0x1d6da,
- 0x1d6dc, 0x1d6fa,
- 0x1d6fc, 0x1d714,
- 0x1d716, 0x1d734,
- 0x1d736, 0x1d74e,
- 0x1d750, 0x1d76e,
- 0x1d770, 0x1d788,
- 0x1d78a, 0x1d7a8,
- 0x1d7aa, 0x1d7c2,
- 0x1d7c4, 0x1d7c9,
- 0x1d7ce, 0x1d7ff,
- 0x20000, 0x2a6d6,
- 0x2f800, 0x2fa1d,
- 0xe0100, 0xe01ef
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBWord */
static int
-utf8_get_ctype_code_range(int ctype,
- const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
+utf8_get_ctype_code_range(int ctype, OnigCodePoint *sb_out,
+ const OnigCodePoint* ranges[])
{
-#define CR_SET(sbl,mbl) do { \
- *sbr = sbl; \
- *mbr = mbl; \
-} while (0)
-
-#define CR_SB_SET(sbl) do { \
- *sbr = sbl; \
- *mbr = EmptyRange; \
-} while (0)
-
- switch (ctype) {
- case ONIGENC_CTYPE_ALPHA:
- CR_SET(SBAlpha, MBAlpha);
- break;
- case ONIGENC_CTYPE_BLANK:
- CR_SET(SBBlank, MBBlank);
- break;
- case ONIGENC_CTYPE_CNTRL:
- CR_SET(SBCntrl, MBCntrl);
- break;
- case ONIGENC_CTYPE_DIGIT:
- CR_SET(SBDigit, MBDigit);
- break;
- case ONIGENC_CTYPE_GRAPH:
- CR_SET(SBGraph, MBGraph);
- break;
- case ONIGENC_CTYPE_LOWER:
- CR_SET(SBLower, MBLower);
- break;
- case ONIGENC_CTYPE_PRINT:
- CR_SET(SBPrint, MBPrint);
- break;
- case ONIGENC_CTYPE_PUNCT:
- CR_SET(SBPunct, MBPunct);
- break;
- case ONIGENC_CTYPE_SPACE:
- CR_SET(SBSpace, MBSpace);
- break;
- case ONIGENC_CTYPE_UPPER:
- CR_SET(SBUpper, MBUpper);
- break;
- case ONIGENC_CTYPE_XDIGIT:
- CR_SB_SET(SBXDigit);
- break;
- case ONIGENC_CTYPE_WORD:
- CR_SET(SBWord, MBWord);
- break;
- case ONIGENC_CTYPE_ASCII:
- CR_SB_SET(SBASCII);
- break;
- case ONIGENC_CTYPE_ALNUM:
- CR_SET(SBAlnum, MBAlnum);
- break;
-
- default:
- return ONIGENCERR_TYPE_BUG;
- break;
- }
-
- return 0;
+ *sb_out = 0x80;
+ return onigenc_unicode_ctype_code_range(ctype, ranges);
}
-static int
-utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
-{
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- const OnigCodePoint *range;
-#endif
-
- if (code < 256) {
- return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
- }
-
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-
- switch (ctype) {
- case ONIGENC_CTYPE_ALPHA:
- range = MBAlpha;
- break;
- case ONIGENC_CTYPE_BLANK:
- range = MBBlank;
- break;
- case ONIGENC_CTYPE_CNTRL:
- range = MBCntrl;
- break;
- case ONIGENC_CTYPE_DIGIT:
- range = MBDigit;
- break;
- case ONIGENC_CTYPE_GRAPH:
- range = MBGraph;
- break;
- case ONIGENC_CTYPE_LOWER:
- range = MBLower;
- break;
- case ONIGENC_CTYPE_PRINT:
- range = MBPrint;
- break;
- case ONIGENC_CTYPE_PUNCT:
- range = MBPunct;
- break;
- case ONIGENC_CTYPE_SPACE:
- range = MBSpace;
- break;
- case ONIGENC_CTYPE_UPPER:
- range = MBUpper;
- break;
- case ONIGENC_CTYPE_XDIGIT:
- return FALSE;
- break;
- case ONIGENC_CTYPE_WORD:
- range = MBWord;
- break;
- case ONIGENC_CTYPE_ASCII:
- return FALSE;
- break;
- case ONIGENC_CTYPE_ALNUM:
- range = MBAlnum;
- break;
- case ONIGENC_CTYPE_NEWLINE:
- return FALSE;
- break;
-
- default:
- return ONIGENCERR_TYPE_BUG;
- break;
- }
-
- return onig_is_in_code_range((UChar* )range, code);
-
-#else
-
- if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
-#ifdef USE_INVALID_CODE_SCHEME
- if (code <= VALID_CODE_LIMIT)
-#endif
- return TRUE;
- }
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-
- return FALSE;
-}
static UChar*
utf8_left_adjust_char_head(const UChar* start, const UChar* s)
@@ -3733,31 +303,28 @@ utf8_left_adjust_char_head(const UChar* start, const UChar* s)
return (UChar* )p;
}
+static int
+utf8_get_case_fold_codes_by_str(OnigCaseFoldType flag,
+ const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+ return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF8,
+ flag, p, end, items);
+}
+
OnigEncodingType OnigEncodingUTF8 = {
utf8_mbc_enc_len,
"UTF-8", /* name */
6, /* max byte length */
1, /* min byte length */
- (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
- {
- (OnigCodePoint )'\\' /* esc */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
- },
utf8_is_mbc_newline,
utf8_mbc_to_code,
utf8_code_to_mbclen,
utf8_code_to_mbc,
- utf8_mbc_to_normalize,
- utf8_is_mbc_ambiguous,
- onigenc_iso_8859_1_get_all_pair_ambig_codes,
- onigenc_ess_tsett_get_all_comp_ambig_codes,
- utf8_is_code_ctype,
+ utf8_mbc_case_fold,
+ onigenc_unicode_apply_all_case_fold,
+ utf8_get_case_fold_codes_by_str,
+ onigenc_unicode_property_name_to_ctype,
+ onigenc_unicode_is_code_ctype,
utf8_get_ctype_code_range,
utf8_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match