summaryrefslogtreecommitdiff
path: root/regenc.c
diff options
context:
space:
mode:
Diffstat (limited to 'regenc.c')
-rw-r--r--regenc.c355
1 files changed, 242 insertions, 113 deletions
diff --git a/regenc.c b/regenc.c
index e7234921db..c595f44b29 100644
--- a/regenc.c
+++ b/regenc.c
@@ -1,8 +1,9 @@
/**********************************************************************
- regenc.c - Oniguruma (regular expression library)
+ regenc.c - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2011-2019 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -50,25 +51,51 @@ onigenc_set_default_encoding(OnigEncoding enc)
return 0;
}
+extern int
+onigenc_mbclen(const OnigUChar* p,const OnigUChar* e, OnigEncoding enc)
+{
+ int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e);
+ if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) {
+ ret = ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
+ if (p + ret > e) ret = (int)(e - p); // just for case
+ return ret;
+ }
+ else if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) {
+ return (int)(e - p);
+ }
+ return p < e ? 1 : 0;
+}
+
+extern int
+onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, OnigEncoding enc)
+{
+ int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e);
+ if (ONIGENC_MBCLEN_CHARFOUND_P(ret))
+ return ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
+ else if (ONIGENC_MBCLEN_NEEDMORE_P(ret))
+ return (int )(e - p) + ONIGENC_MBCLEN_NEEDMORE_LEN(ret);
+ return 1;
+}
+
extern UChar*
-onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
+onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
{
- UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
+ UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
if (p < s) {
- p += enc_len(enc, p, s);
+ p += enclen(enc, p, end);
}
return p;
}
extern UChar*
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
- const UChar* start, const UChar* s, const UChar** prev)
+ const UChar* start, const UChar* s, const UChar* end, const UChar** prev)
{
- UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
+ UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
if (p < s) {
if (prev) *prev = (const UChar* )p;
- p += enc_len(enc, p, s);
+ p += enclen(enc, p, end);
}
else {
if (prev) *prev = (const UChar* )NULL; /* Sorry */
@@ -77,22 +104,22 @@ onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
}
extern UChar*
-onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
+onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
{
if (s <= start)
return (UChar* )NULL;
- return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
+ return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
}
extern UChar*
-onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
+onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end, int n)
{
while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
if (s <= start)
return (UChar* )NULL;
- s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
}
return (UChar* )s;
}
@@ -112,7 +139,7 @@ onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
{
int n = 0;
UChar* q = (UChar* )p;
-
+
while (q < end) {
q += ONIGENC_MBC_ENC_LEN(enc, q, end);
n++;
@@ -125,8 +152,8 @@ onigenc_strlen_null(OnigEncoding enc, const UChar* s)
{
int n = 0;
UChar* p = (UChar* )s;
- UChar* e = p + strlen(s);
-
+ UChar* e;
+
while (1) {
if (*p == '\0') {
UChar* q;
@@ -141,6 +168,7 @@ onigenc_strlen_null(OnigEncoding enc, const UChar* s)
}
if (len == 1) return n;
}
+ e = p + ONIGENC_MBC_MAXLEN(enc);
p += ONIGENC_MBC_ENC_LEN(enc, p, e);
n++;
}
@@ -151,7 +179,7 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
{
UChar* start = (UChar* )s;
UChar* p = (UChar* )s;
- UChar* e = p + strlen(s);
+ UChar* e;
while (1) {
if (*p == '\0') {
@@ -167,6 +195,7 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
}
if (len == 1) return (int )(p - start);
}
+ e = p + ONIGENC_MBC_MAXLEN(enc);
p += ONIGENC_MBC_ENC_LEN(enc, p, e);
}
}
@@ -350,17 +379,19 @@ const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
};
#endif
+#if 0
extern void
-onigenc_set_default_caseconv_table(const UChar* table)
+onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
{
/* nothing */
/* obsoleted. */
}
+#endif
extern UChar*
-onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
+onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
{
- return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
+ return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
}
const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
@@ -393,15 +424,14 @@ const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
};
extern int
-onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag,
- OnigApplyAllCaseFoldFunc f, void* arg,
- OnigEncoding enc)
+onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
+ OnigApplyAllCaseFoldFunc f, void* arg,
+ OnigEncoding enc ARG_UNUSED)
{
OnigCodePoint code;
int i, r;
- for (i = 0; i < sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes)
- ; i++) {
+ for (i = 0; i < numberof(OnigAsciiLowerMap); i++) {
code = OnigAsciiLowerMap[i].to;
r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
if (r != 0) return r;
@@ -415,9 +445,9 @@ onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag,
}
extern int
-onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag,
- const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[],
- OnigEncoding enc)
+onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
+ const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
+ OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED)
{
if (0x41 <= *p && *p <= 0x5a) {
items[0].byte_len = 1;
@@ -435,11 +465,11 @@ onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag,
return 0;
}
-extern int
-ss_apply_all_case_fold(OnigCaseFoldType flag,
- OnigApplyAllCaseFoldFunc f, void* arg)
+static int
+ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
+ OnigApplyAllCaseFoldFunc f, void* arg)
{
- static OnigCodePoint ss[] = { 0x73, 0x73 };
+ OnigCodePoint ss[] = { 0x73, 0x73 };
return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
}
@@ -475,7 +505,7 @@ onigenc_apply_all_case_fold_with_map(int map_size,
extern int
onigenc_get_case_fold_codes_by_str_with_map(int map_size,
const OnigPairCaseFoldCodes map[],
- int ess_tsett_flag, OnigCaseFoldType flag,
+ int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
if (0x41 <= *p && *p <= 0x5a) {
@@ -483,7 +513,7 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size,
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
- && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
+ && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
/* SS */
items[1].byte_len = 2;
items[1].code_len = 1;
@@ -498,7 +528,7 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size,
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
- && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
+ && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
/* ss */
items[1].byte_len = 2;
items[1].code_len = 1;
@@ -536,16 +566,16 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size,
for (i = 0; i < map_size; i++) {
if (*p == map[i].from) {
- items[0].byte_len = 1;
- items[0].code_len = 1;
- items[0].code[0] = map[i].to;
- return 1;
+ items[0].byte_len = 1;
+ items[0].code_len = 1;
+ items[0].code[0] = map[i].to;
+ return 1;
}
else if (*p == map[i].to) {
- items[0].byte_len = 1;
- items[0].code_len = 1;
- items[0].code[0] = map[i].from;
- return 1;
+ items[0].byte_len = 1;
+ items[0].code_len = 1;
+ items[0].code[0] = map[i].from;
+ return 1;
}
}
}
@@ -555,15 +585,16 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size,
extern int
-onigenc_not_support_get_ctype_code_range(int ctype,
- OnigCodePoint* sb_out, const OnigCodePoint* ranges[],
- OnigEncoding enc)
+onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
+ OnigCodePoint* sb_out ARG_UNUSED,
+ const OnigCodePoint* ranges[] ARG_UNUSED,
+ OnigEncoding enc)
{
return ONIG_NO_SUPPORT_CONFIG;
}
extern int
-onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc)
+onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED)
{
if (p < end) {
if (*p == 0x0a) return 1;
@@ -573,8 +604,8 @@ onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc)
/* for single byte encodings */
extern int
-onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag, const UChar** p,
- const UChar*end, UChar* lower, OnigEncoding enc)
+onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
+ const UChar* end, UChar* lower, OnigEncoding enc ARG_UNUSED)
{
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
@@ -584,8 +615,8 @@ onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag, const UChar** p,
#if 0
extern int
-onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
- const UChar** pp, const UChar* end)
+onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag ARG_UNUSED,
+ const UChar** pp, const UChar* end ARG_UNUSED)
{
const UChar* p = *pp;
@@ -595,58 +626,79 @@ onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
#endif
extern int
-onigenc_single_byte_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc)
+onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED,
+ OnigEncoding enc ARG_UNUSED)
{
return 1;
}
extern OnigCodePoint
-onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
+onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED,
+ OnigEncoding enc ARG_UNUSED)
{
return (OnigCodePoint )(*p);
}
extern int
-onigenc_single_byte_code_to_mbclen(OnigCodePoint code, OnigEncoding enc)
+onigenc_single_byte_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
{
+ if (code > 0xff)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
return 1;
}
extern int
-onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc)
+onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
{
+ if (code > 0xff) {
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+ }
*buf = (UChar )(code & 0xff);
return 1;
}
extern UChar*
-onigenc_single_byte_left_adjust_char_head(const UChar* start, const UChar* s,
- OnigEncoding enc)
+onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
+ const UChar* s,
+ const UChar* end ARG_UNUSED,
+ OnigEncoding enc ARG_UNUSED)
{
return (UChar* )s;
}
extern int
-onigenc_always_true_is_allowed_reverse_match(const UChar* s, const UChar* end,
- OnigEncoding enc)
+onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
+ const UChar* end ARG_UNUSED,
+ OnigEncoding enc ARG_UNUSED)
{
return TRUE;
}
extern int
-onigenc_always_false_is_allowed_reverse_match(const UChar* s, const UChar* end,
- OnigEncoding enc)
+onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
+ const UChar* end ARG_UNUSED,
+ OnigEncoding enc ARG_UNUSED)
{
return FALSE;
}
+extern int
+onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype,
+ OnigEncoding enc ARG_UNUSED)
+{
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
extern OnigCodePoint
onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
{
int c, i, len;
OnigCodePoint n;
- len = enc_len(enc, p, end);
+ len = enclen(enc, p, end);
n = (OnigCodePoint )(*p++);
if (len == 1) return n;
@@ -659,8 +711,9 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
}
extern int
-onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag,
- const UChar** pp, const UChar* end, UChar* lower)
+onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
+ const UChar** pp, const UChar* end ARG_UNUSED,
+ UChar* lower)
{
int len;
const UChar *p = *pp;
@@ -673,7 +726,7 @@ onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag,
else {
int i;
- len = enc_len(enc, p, end);
+ len = enclen(enc, p, end);
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
@@ -685,7 +738,7 @@ onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag,
#if 0
extern int
onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
- const UChar** pp, const UChar* end)
+ const UChar** pp, const UChar* end ARG_UNUSED)
{
const UChar* p = *pp;
@@ -694,20 +747,21 @@ onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
}
- (*pp) += enc_len(enc, p);
+ (*pp) += enclen(enc, p);
return FALSE;
}
#endif
extern int
-onigenc_mb2_code_to_mbclen(OnigCodePoint code)
+onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
{
- if ((code & 0xff00) != 0) return 2;
- else return 1;
+ if (code <= 0xff) return 1;
+ if (code <= 0xffff) return 2;
+ return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
}
extern int
-onigenc_mb4_code_to_mbclen(OnigCodePoint code)
+onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
{
if ((code & 0xff000000) != 0) return 4;
else if ((code & 0xff0000) != 0) return 3;
@@ -726,10 +780,10 @@ onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
*p++ = (UChar )(code & 0xff);
#if 1
- if (enc_len(enc, buf, p) != (p - buf))
- return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE;
+ if (enclen(enc, buf, p) != (p - buf))
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
#endif
- return p - buf;
+ return (int )(p - buf);
}
extern int
@@ -749,40 +803,39 @@ onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
*p++ = (UChar )(code & 0xff);
#if 1
- if (enc_len(enc, buf, p) != (p - buf))
- return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE;
+ if (enclen(enc, buf, p) != (p - buf))
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
#endif
- return p - buf;
-}
-
-extern int
-onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
-{
- static PosixBracketEntryType PBS[] = {
- { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 },
- { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 },
- { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 },
- { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
- { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 },
- { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 },
- { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 },
- { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 },
- { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 },
- { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 },
- { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 },
- { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
- { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 },
- { (UChar* )"Word", ONIGENC_CTYPE_WORD, 4 },
- { (UChar* )NULL, -1, 0 }
+ return (int )(p - buf);
+}
+
+extern int
+onigenc_minimum_property_name_to_ctype(OnigEncoding enc, const UChar* p, const UChar* end)
+{
+ static const PosixBracketEntryType PBS[] = {
+ POSIX_BRACKET_ENTRY_INIT("Alnum", ONIGENC_CTYPE_ALNUM),
+ POSIX_BRACKET_ENTRY_INIT("Alpha", ONIGENC_CTYPE_ALPHA),
+ POSIX_BRACKET_ENTRY_INIT("Blank", ONIGENC_CTYPE_BLANK),
+ POSIX_BRACKET_ENTRY_INIT("Cntrl", ONIGENC_CTYPE_CNTRL),
+ POSIX_BRACKET_ENTRY_INIT("Digit", ONIGENC_CTYPE_DIGIT),
+ POSIX_BRACKET_ENTRY_INIT("Graph", ONIGENC_CTYPE_GRAPH),
+ POSIX_BRACKET_ENTRY_INIT("Lower", ONIGENC_CTYPE_LOWER),
+ POSIX_BRACKET_ENTRY_INIT("Print", ONIGENC_CTYPE_PRINT),
+ POSIX_BRACKET_ENTRY_INIT("Punct", ONIGENC_CTYPE_PUNCT),
+ POSIX_BRACKET_ENTRY_INIT("Space", ONIGENC_CTYPE_SPACE),
+ POSIX_BRACKET_ENTRY_INIT("Upper", ONIGENC_CTYPE_UPPER),
+ POSIX_BRACKET_ENTRY_INIT("XDigit", ONIGENC_CTYPE_XDIGIT),
+ POSIX_BRACKET_ENTRY_INIT("ASCII", ONIGENC_CTYPE_ASCII),
+ POSIX_BRACKET_ENTRY_INIT("Word", ONIGENC_CTYPE_WORD),
};
- PosixBracketEntryType *pb;
+ const PosixBracketEntryType *pb;
int len;
len = onigenc_strlen(enc, p, end);
- for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
+ for (pb = PBS; pb < PBS + numberof(PBS); pb++) {
if (len == pb->len &&
- onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
+ onigenc_with_ascii_strnicmp(enc, p, end, pb->name, pb->len) == 0)
return pb->ctype;
}
@@ -791,7 +844,7 @@ onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
extern int
onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
- unsigned int ctype)
+ unsigned int ctype)
{
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
@@ -806,7 +859,7 @@ onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
extern int
onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
- unsigned int ctype)
+ unsigned int ctype)
{
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
@@ -833,28 +886,52 @@ onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
if (x) return x;
sascii++;
- p += enc_len(enc, p, end);
+ p += enclen(enc, p, end);
}
return 0;
}
+extern int
+onigenc_with_ascii_strnicmp(OnigEncoding enc, const UChar* p, const UChar* end,
+ const UChar* sascii /* ascii */, int n)
+{
+ int x, c;
+
+ while (n-- > 0) {
+ if (p >= end) return (int )(*sascii);
+
+ c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
+ if (ONIGENC_IS_ASCII_CODE(c))
+ c = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
+ x = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*sascii) - c;
+ if (x) return x;
+
+ sascii++;
+ p += enclen(enc, p, end);
+ }
+ return 0;
+}
+
+#if 0
/* Property management */
static int
resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
{
- int size;
+ size_t size;
const OnigCodePoint **list = *plist;
size = sizeof(OnigCodePoint*) * new_size;
if (IS_NULL(list)) {
list = (const OnigCodePoint** )xmalloc(size);
+ if (IS_NULL(list)) return ONIGERR_MEMORY;
}
else {
- list = (const OnigCodePoint** )xrealloc((void* )list, size);
+ const OnigCodePoint **tmp;
+ tmp = (const OnigCodePoint** )xrealloc((void* )list, size);
+ if (IS_NULL(tmp)) return ONIGERR_MEMORY;
+ list = tmp;
}
- if (IS_NULL(list)) return ONIGERR_MEMORY;
-
*plist = list;
*psize = new_size;
@@ -885,19 +962,71 @@ onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
*pnum = *pnum + 1;
onig_st_insert_strend(*table, name, name + strlen((char* )name),
- (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
+ (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
return 0;
}
+#endif
+#ifdef USE_CASE_MAP_API
extern int
-onigenc_property_list_init(int (*f)())
+onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end,
+ OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc)
{
- int r;
+ OnigCodePoint code;
+ OnigUChar *to_start = to;
+ OnigCaseFoldType flags = *flagP;
+ int codepoint_length;
+
+ while (*pp < end && to < to_end) {
+ codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end);
+ if (codepoint_length < 0)
+ return codepoint_length; /* encoding invalid */
+ code = ONIGENC_MBC_TO_CODE(enc, *pp, end);
+ *pp += codepoint_length;
+
+ if (code >= 'a' && code <= 'z' && (flags & ONIGENC_CASE_UPCASE)) {
+ flags |= ONIGENC_CASE_MODIFIED;
+ code -= 'a' - 'A';
+ }
+ else if (code >= 'A' && code <= 'Z' &&
+ (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
+ flags |= ONIGENC_CASE_MODIFIED;
+ code += 'a' - 'A';
+ }
+ to += ONIGENC_CODE_TO_MBC(enc, code, to);
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
+ }
+ *flagP = flags;
+ return (int )(to - to_start);
+}
- THREAD_ATOMIC_START;
+extern int
+onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
+ const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
+ const struct OnigEncodingTypeST* enc)
+{
+ OnigCodePoint code;
+ OnigUChar *to_start = to;
+ OnigCaseFoldType flags = *flagP;
- r = f();
+ while (*pp < end && to < to_end) {
+ code = *(*pp)++;
- THREAD_ATOMIC_END;
- return r;
+ if (code >= 'a' && code <= 'z' && (flags & ONIGENC_CASE_UPCASE)) {
+ flags |= ONIGENC_CASE_MODIFIED;
+ code -= 'a' - 'A';
+ }
+ else if (code >= 'A' && code <= 'Z' &&
+ (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
+ flags |= ONIGENC_CASE_MODIFIED;
+ code += 'a' - 'A';
+ }
+ *to++ = code;
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
+ }
+ *flagP = flags;
+ return (int )(to - to_start);
}
+#endif