summaryrefslogtreecommitdiff
path: root/regparse.c
diff options
context:
space:
mode:
Diffstat (limited to 'regparse.c')
-rw-r--r--regparse.c1022
1 files changed, 506 insertions, 516 deletions
diff --git a/regparse.c b/regparse.c
index b9a9452a0f..a255644b34 100644
--- a/regparse.c
+++ b/regparse.c
@@ -2,7 +2,7 @@
regparse.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -41,6 +41,8 @@ OnigSyntaxType OnigSyntaxRuby = {
ONIG_SYN_OP2_OPTION_RUBY |
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
+ ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
@@ -54,26 +56,21 @@ OnigSyntaxType OnigSyntaxRuby = {
ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
, ONIG_OPTION_NONE
+ ,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ }
};
OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
extern void onig_null_warn(const char* s) { }
-#ifdef RUBY_PLATFORM
-extern void
-onig_rb_warn(const char* s)
-{
- rb_warn("%s", s);
-}
-
-extern void
-onig_rb_warning(const char* s)
-{
- rb_warning("%s", s);
-}
-#endif
-
#ifdef DEFAULT_WARN_FUNCTION
static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
#else
@@ -160,9 +157,7 @@ static void
bitset_set_all(BitSetRef bs)
{
int i;
- for (i = 0; i < BITSET_SIZE; i++) {
- bs[i] = ~((Bits )0);
- }
+ for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
}
#endif
@@ -170,45 +165,35 @@ static void
bitset_invert(BitSetRef bs)
{
int i;
- for (i = 0; i < BITSET_SIZE; i++) {
- bs[i] = ~(bs[i]);
- }
+ for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
}
static void
bitset_invert_to(BitSetRef from, BitSetRef to)
{
int i;
- for (i = 0; i < BITSET_SIZE; i++) {
- to[i] = ~(from[i]);
- }
+ for (i = 0; i < BITSET_SIZE; i++) { to[i] = ~(from[i]); }
}
static void
bitset_and(BitSetRef dest, BitSetRef bs)
{
int i;
- for (i = 0; i < BITSET_SIZE; i++) {
- dest[i] &= bs[i];
- }
+ for (i = 0; i < BITSET_SIZE; i++) { dest[i] &= bs[i]; }
}
static void
bitset_or(BitSetRef dest, BitSetRef bs)
{
int i;
- for (i = 0; i < BITSET_SIZE; i++) {
- dest[i] |= bs[i];
- }
+ for (i = 0; i < BITSET_SIZE; i++) { dest[i] |= bs[i]; }
}
static void
bitset_copy(BitSetRef dest, BitSetRef bs)
{
int i;
- for (i = 0; i < BITSET_SIZE; i++) {
- dest[i] = bs[i];
- }
+ for (i = 0; i < BITSET_SIZE; i++) { dest[i] = bs[i]; }
}
extern int
@@ -223,8 +208,8 @@ onig_strncmp(const UChar* s1, const UChar* s2, int n)
return 0;
}
-static void
-k_strcpy(UChar* dest, const UChar* src, const UChar* end)
+extern void
+onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
{
int len = end - src;
if (len > 0) {
@@ -273,7 +258,7 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
static UChar*
-k_strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
+strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
int capa)
{
UChar* r;
@@ -284,7 +269,7 @@ k_strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_e
r = (UChar* )xmalloc(capa + 1);
CHECK_NULL_RETURN(r);
- k_strcpy(r + (dest_end - dest), src, src_end);
+ onig_strcpy(r + (dest_end - dest), src, src_end);
return r;
}
@@ -297,8 +282,8 @@ strcat_capa_from_static(UChar* dest, UChar* dest_end,
r = (UChar* )xmalloc(capa + 1);
CHECK_NULL_RETURN(r);
- k_strcpy(r, dest, dest_end);
- k_strcpy(r + (dest_end - dest), src, src_end);
+ onig_strcpy(r, dest, dest_end);
+ onig_strcpy(r + (dest_end - dest), src, src_end);
return r;
}
@@ -332,25 +317,28 @@ static struct st_hash_type type_strend_hash = {
strend_hash,
};
-static st_table*
+extern hash_table_type*
onig_st_init_strend_table_with_size(int size)
{
- return onig_st_init_table_with_size(&type_strend_hash, size);
+ return (hash_table_type* )onig_st_init_table_with_size(&type_strend_hash,
+ size);
}
-static int
-onig_st_lookup_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t *value)
+extern int
+onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,
+ const UChar* end_key, hash_data_type *value)
{
- st_strend_key key;
+ st_strend_key key;
- key.s = (unsigned char* )str_key;
- key.end = (unsigned char* )end_key;
+ key.s = (unsigned char* )str_key;
+ key.end = (unsigned char* )end_key;
- return onig_st_lookup(table, (st_data_t )(&key), value);
+ return onig_st_lookup(table, (st_data_t )(&key), value);
}
-static int
-onig_st_insert_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t value)
+extern int
+onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
+ const UChar* end_key, hash_data_type value)
{
st_strend_key* key;
int result;
@@ -504,7 +492,6 @@ static int
i_names(UChar* key, NameEntry* e, INamesArg* arg)
{
int r = (*(arg->func))(e->name,
- /*e->name + onigenc_str_bytelen_null(arg->enc, e->name), */
e->name + e->name_len,
e->back_num,
(e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
@@ -518,8 +505,7 @@ i_names(UChar* key, NameEntry* e, INamesArg* arg)
extern int
onig_foreach_name(regex_t* reg,
- int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
- void* arg)
+ int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
{
INamesArg narg;
NameTable* t = (NameTable* )reg->name_table;
@@ -585,7 +571,6 @@ typedef struct {
int alloc;
} NameTable;
-
#ifdef ONIG_DEBUG
extern int
onig_print_names(FILE* fp, regex_t* reg)
@@ -683,8 +668,7 @@ name_find(regex_t* reg, UChar* name, UChar* name_end)
extern int
onig_foreach_name(regex_t* reg,
- int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
- void* arg)
+ int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
{
int i, r;
NameEntry* e;
@@ -826,9 +810,8 @@ extern int
onig_name_to_group_numbers(regex_t* reg, const UChar* name,
const UChar* name_end, int** nums)
{
- NameEntry* e;
+ NameEntry* e = name_find(reg, name, name_end);
- e = name_find(reg, name, name_end);
if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
switch (e->back_num) {
@@ -886,8 +869,7 @@ onig_name_to_backref_number(regex_t* reg, const UChar* name,
extern int
onig_foreach_name(regex_t* reg,
- int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
- void* arg)
+ int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
{
return ONIG_NO_SUPPORT_CONFIG;
}
@@ -928,12 +910,12 @@ scan_env_clear(ScanEnv* env)
BIT_STATUS_CLEAR(env->bt_mem_start);
BIT_STATUS_CLEAR(env->bt_mem_end);
BIT_STATUS_CLEAR(env->backrefed_mem);
- env->error = (UChar* )NULL;
- env->error_end = (UChar* )NULL;
- env->num_call = 0;
- env->num_mem = 0;
+ env->error = (UChar* )NULL;
+ env->error_end = (UChar* )NULL;
+ env->num_call = 0;
+ env->num_mem = 0;
#ifdef USE_NAMED_GROUP
- env->num_named = 0;
+ env->num_named = 0;
#endif
env->mem_alloc = 0;
env->mem_nodes_dynamic = (Node** )NULL;
@@ -1009,7 +991,8 @@ onig_node_free(Node* node)
switch (NTYPE(node)) {
case N_STRING:
- if (IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) {
+ if (NSTRING(node).capa != 0 &&
+ IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) {
xfree(NSTRING(node).s);
}
break;
@@ -1033,7 +1016,6 @@ onig_node_free(Node* node)
#else
xfree(node);
#endif
-
node = next_node;
goto start;
}
@@ -1043,17 +1025,15 @@ onig_node_free(Node* node)
{
CClassNode* cc = &(NCCLASS(node));
- if (IS_CCLASS_SHARE(cc))
- return ;
-
+ if (IS_CCLASS_SHARE(cc)) return ;
if (cc->mbuf)
bbuf_free(cc->mbuf);
}
break;
- case N_QUALIFIER:
- if (NQUALIFIER(node).target)
- onig_node_free(NQUALIFIER(node).target);
+ case N_QUANTIFIER:
+ if (NQUANTIFIER(node).target)
+ onig_node_free(NQUANTIFIER(node).target);
break;
case N_EFFECT:
@@ -1144,11 +1124,12 @@ node_new_cclass(void)
}
static Node*
-node_new_cclass_by_codepoint_range(int not,
- const OnigCodePoint sbr[], const OnigCodePoint mbr[])
+node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,
+ const OnigCodePoint ranges[])
{
+ int n, i;
CClassNode* cc;
- int n, i, j;
+ OnigCodePoint j;
Node* node = node_new();
CHECK_NULL_RETURN(node);
@@ -1159,31 +1140,34 @@ node_new_cclass_by_codepoint_range(int not,
if (not != 0) CCLASS_SET_NOT(cc);
BITSET_CLEAR(cc->bs);
- if (IS_NOT_NULL(sbr)) {
- n = ONIGENC_CODE_RANGE_NUM(sbr);
+ if (sb_out > 0 && IS_NOT_NULL(ranges)) {
+ n = ONIGENC_CODE_RANGE_NUM(ranges);
for (i = 0; i < n; i++) {
- for (j = ONIGENC_CODE_RANGE_FROM(sbr, i);
- j <= (int )ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
+ for (j = ONIGENC_CODE_RANGE_FROM(ranges, i);
+ j <= (int )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {
+ if (j >= sb_out) goto sb_end;
+
BITSET_SET_BIT(cc->bs, j);
}
}
}
- if (IS_NULL(mbr)) {
+ sb_end:
+ if (IS_NULL(ranges)) {
is_null:
cc->mbuf = NULL;
}
else {
BBuf* bbuf;
- n = ONIGENC_CODE_RANGE_NUM(mbr);
+ n = ONIGENC_CODE_RANGE_NUM(ranges);
if (n == 0) goto is_null;
bbuf = (BBuf* )xmalloc(sizeof(BBuf));
CHECK_NULL_RETURN_VAL(bbuf, NULL);
bbuf->alloc = n + 1;
bbuf->used = n + 1;
- bbuf->p = (UChar* )((void* )mbr);
+ bbuf->p = (UChar* )((void* )ranges);
cc->mbuf = bbuf;
}
@@ -1192,12 +1176,13 @@ node_new_cclass_by_codepoint_range(int not,
}
static Node*
-node_new_ctype(int type)
+node_new_ctype(int type, int not)
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
node->type = N_CTYPE;
- NCTYPE(node).type = type;
+ NCTYPE(node).ctype = type;
+ NCTYPE(node).not = not;
return node;
}
@@ -1227,8 +1212,26 @@ onig_node_new_list(Node* left, Node* right)
return node_new_list(left, right);
}
-static Node*
-node_new_alt(Node* left, Node* right)
+extern Node*
+onig_node_list_add(Node* list, Node* x)
+{
+ Node *n;
+
+ n = onig_node_new_list(x, NULL);
+ if (IS_NULL(n)) return NULL_NODE;
+
+ if (IS_NOT_NULL(list)) {
+ while (IS_NOT_NULL(NCONS(list).right))
+ list = NCONS(list).right;
+
+ NCONS(list).right = n;
+ }
+
+ return n;
+}
+
+extern Node*
+onig_node_new_alt(Node* left, Node* right)
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
@@ -1318,25 +1321,25 @@ node_new_call(UChar* name, UChar* name_end)
#endif
static Node*
-node_new_qualifier(int lower, int upper, int by_number)
+node_new_quantifier(int lower, int upper, int by_number)
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
- node->type = N_QUALIFIER;
- NQUALIFIER(node).state = 0;
- NQUALIFIER(node).target = NULL;
- NQUALIFIER(node).lower = lower;
- NQUALIFIER(node).upper = upper;
- NQUALIFIER(node).greedy = 1;
- NQUALIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY;
- NQUALIFIER(node).head_exact = NULL_NODE;
- NQUALIFIER(node).next_head_exact = NULL_NODE;
- NQUALIFIER(node).is_refered = 0;
+ node->type = N_QUANTIFIER;
+ NQUANTIFIER(node).state = 0;
+ NQUANTIFIER(node).target = NULL;
+ NQUANTIFIER(node).lower = lower;
+ NQUANTIFIER(node).upper = upper;
+ NQUANTIFIER(node).greedy = 1;
+ NQUANTIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY;
+ NQUANTIFIER(node).head_exact = NULL_NODE;
+ NQUANTIFIER(node).next_head_exact = NULL_NODE;
+ NQUANTIFIER(node).is_refered = 0;
if (by_number != 0)
- NQUALIFIER(node).state |= NST_BY_NUMBER;
+ NQUANTIFIER(node).state |= NST_BY_NUMBER;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
- NQUALIFIER(node).comb_exp_check_num = 0;
+ NQUANTIFIER(node).comb_exp_check_num = 0;
#endif
return node;
@@ -1400,14 +1403,14 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
int capa = len + addlen + NODE_STR_MARGIN;
if (capa <= NSTRING(node).capa) {
- k_strcpy(NSTRING(node).s + len, s, end);
+ onig_strcpy(NSTRING(node).s + len, s, end);
}
else {
if (NSTRING(node).s == NSTRING(node).buf)
p = strcat_capa_from_static(NSTRING(node).s, NSTRING(node).end,
s, end, capa);
else
- p = k_strcat_capa(NSTRING(node).s, NSTRING(node).end, s, end, capa);
+ p = strcat_capa(NSTRING(node).s, NSTRING(node).end, s, end, capa);
CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
NSTRING(node).s = p;
@@ -1415,7 +1418,7 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
}
}
else {
- k_strcpy(NSTRING(node).s + len, s, end);
+ onig_strcpy(NSTRING(node).s + len, s, end);
}
NSTRING(node).end = NSTRING(node).s + len + addlen;
}
@@ -1423,6 +1426,13 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
return 0;
}
+extern int
+onig_node_str_set(Node* node, const UChar* s, const UChar* end)
+{
+ onig_node_str_clear(node);
+ return onig_node_str_cat(node, s, end);
+}
+
static int
node_str_cat_char(Node* node, UChar c)
{
@@ -1531,6 +1541,24 @@ str_node_can_be_split(StrNode* sn, OnigEncoding enc)
return 0;
}
+#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
+static int
+node_str_head_pad(StrNode* sn, int num, UChar val)
+{
+ UChar buf[NODE_STR_BUF_SIZE];
+ int i, len;
+
+ len = sn->end - sn->s;
+ onig_strcpy(buf, sn->s, sn->end);
+ onig_strcpy(&(sn->s[num]), buf, buf + len);
+ sn->end += num;
+
+ for (i = 0; i < num; i++) {
+ sn->s[i] = val;
+ }
+}
+#endif
+
extern int
onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
{
@@ -2069,13 +2097,13 @@ conv_backslash_value(int c, ScanEnv* env)
{
if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
switch (c) {
- case 'n': return '\n';
- case 't': return '\t';
- case 'r': return '\r';
- case 'f': return '\f';
- case 'a': return '\007';
- case 'b': return '\010';
- case 'e': return '\033';
+ case 'n': return '\n';
+ case 't': return '\t';
+ case 'r': return '\r';
+ case 'f': return '\f';
+ case 'a': return '\007';
+ case 'b': return '\010';
+ case 'e': return '\033';
case 'v':
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
return '\v';
@@ -2089,7 +2117,7 @@ conv_backslash_value(int c, ScanEnv* env)
}
static int
-is_invalid_qualifier_target(Node* node)
+is_invalid_quantifier_target(Node* node)
{
switch (NTYPE(node)) {
case N_ANCHOR:
@@ -2098,19 +2126,19 @@ is_invalid_qualifier_target(Node* node)
case N_EFFECT:
if (NEFFECT(node).type == EFFECT_OPTION)
- return is_invalid_qualifier_target(NEFFECT(node).target);
+ return is_invalid_quantifier_target(NEFFECT(node).target);
break;
case N_LIST: /* ex. (?:\G\A)* */
do {
- if (! is_invalid_qualifier_target(NCONS(node).left)) return 0;
+ if (! is_invalid_quantifier_target(NCONS(node).left)) return 0;
} while (IS_NOT_NULL(node = NCONS(node).right));
return 0;
break;
case N_ALT: /* ex. (?:abc|\A)* */
do {
- if (is_invalid_qualifier_target(NCONS(node).left)) return 1;
+ if (is_invalid_quantifier_target(NCONS(node).left)) return 1;
} while (IS_NOT_NULL(node = NCONS(node).right));
break;
@@ -2122,24 +2150,24 @@ is_invalid_qualifier_target(Node* node)
/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
static int
-popular_qualifier_num(QualifierNode* qf)
+popular_quantifier_num(QuantifierNode* q)
{
- if (qf->greedy) {
- if (qf->lower == 0) {
- if (qf->upper == 1) return 0;
- else if (IS_REPEAT_INFINITE(qf->upper)) return 1;
+ if (q->greedy) {
+ if (q->lower == 0) {
+ if (q->upper == 1) return 0;
+ else if (IS_REPEAT_INFINITE(q->upper)) return 1;
}
- else if (qf->lower == 1) {
- if (IS_REPEAT_INFINITE(qf->upper)) return 2;
+ else if (q->lower == 1) {
+ if (IS_REPEAT_INFINITE(q->upper)) return 2;
}
}
else {
- if (qf->lower == 0) {
- if (qf->upper == 1) return 3;
- else if (IS_REPEAT_INFINITE(qf->upper)) return 4;
+ if (q->lower == 0) {
+ if (q->upper == 1) return 3;
+ else if (IS_REPEAT_INFINITE(q->upper)) return 4;
}
- else if (qf->lower == 1) {
- if (IS_REPEAT_INFINITE(qf->upper)) return 5;
+ else if (q->lower == 1) {
+ if (IS_REPEAT_INFINITE(q->upper)) return 5;
}
}
return -1;
@@ -2166,15 +2194,15 @@ static enum ReduceType ReduceTypeTable[6][6] = {
};
extern void
-onig_reduce_nested_qualifier(Node* pnode, Node* cnode)
+onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
{
int pnum, cnum;
- QualifierNode *p, *c;
+ QuantifierNode *p, *c;
- p = &(NQUALIFIER(pnode));
- c = &(NQUALIFIER(cnode));
- pnum = popular_qualifier_num(p);
- cnum = popular_qualifier_num(c);
+ p = &(NQUANTIFIER(pnode));
+ c = &(NQUANTIFIER(cnode));
+ pnum = popular_quantifier_num(p);
+ cnum = popular_quantifier_num(c);
switch(ReduceTypeTable[cnum][pnum]) {
case RQ_DEL:
@@ -2275,6 +2303,7 @@ typedef struct {
UChar* name_end;
} call;
struct {
+ int ctype;
int not;
} prop;
} u;
@@ -2282,7 +2311,7 @@ typedef struct {
static int
-fetch_range_qualifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
+fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
{
int low, up, syn_allow, non_low = 0;
int r = 0;
@@ -2349,7 +2378,7 @@ fetch_range_qualifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
if (PEND) goto invalid;
PFETCH(c);
if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
- if (c != MC_ESC(enc)) goto invalid;
+ if (c != MC_ESC(env->syntax)) goto invalid;
PFETCH(c);
}
if (c != '}') goto invalid;
@@ -2392,7 +2421,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
if (c != '-') return ONIGERR_META_CODE_SYNTAX;
if (PEND) return ONIGERR_END_PATTERN_AT_META;
PFETCH(c);
- if (c == MC_ESC(enc)) {
+ if (c == MC_ESC(env->syntax)) {
v = fetch_escaped_value(&p, end, env);
if (v < 0) return v;
c = (OnigCodePoint )v;
@@ -2422,7 +2451,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
c = 0177;
}
else {
- if (c == MC_ESC(enc)) {
+ if (c == MC_ESC(env->syntax)) {
v = fetch_escaped_value(&p, end, env);
if (v < 0) return v;
c = (OnigCodePoint )v;
@@ -2447,23 +2476,38 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
+static OnigCodePoint
+get_name_end_code_point(OnigCodePoint start)
+{
+ switch (start) {
+ case '<': return (OnigCodePoint )'>'; break;
+ case '\'': return (OnigCodePoint )'\''; break;
+ default:
+ break;
+ }
+
+ return (OnigCodePoint )0;
+}
+
#ifdef USE_NAMED_GROUP
#ifdef USE_BACKREF_AT_LEVEL
/*
\k<name+n>, \k<name-n>
*/
static int
-fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end
- , ScanEnv* env, int* level)
+fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
+ UChar** rname_end, ScanEnv* env, int* level)
{
int r, exist_level = 0;
+ OnigCodePoint end_code;
OnigCodePoint c = 0;
- OnigCodePoint first_code;
OnigEncoding enc = env->enc;
UChar *name_end;
UChar *p = *src;
PFETCH_READY;
+ end_code = get_name_end_code_point(start_code);
+
name_end = end;
r = 0;
if (PEND) {
@@ -2471,8 +2515,7 @@ fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end
}
else {
PFETCH(c);
- first_code = c;
- if (c == '>')
+ if (c == end_code)
return ONIGERR_EMPTY_GROUP_NAME;
if (!ONIGENC_IS_CODE_WORD(enc, c)) {
@@ -2483,14 +2526,14 @@ fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end
while (!PEND) {
name_end = p;
PFETCH(c);
- if (c == '>' || c == ')' || c == '+' || c == '-') break;
+ if (c == end_code || c == ')' || c == '+' || c == '-') break;
if (!ONIGENC_IS_CODE_WORD(enc, c)) {
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
}
}
- if (c != '>') {
+ if (c != end_code) {
if (c == '+' || c == '-') {
int num;
int flag = (c == '-' ? -1 : 1);
@@ -2504,21 +2547,16 @@ fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end
exist_level = 1;
PFETCH(c);
- if (c == '>')
- goto first_check;
+ if (c == end_code)
+ goto end;
}
err:
r = ONIGERR_INVALID_GROUP_NAME;
name_end = end;
}
- else {
- first_check:
- if (ONIGENC_IS_CODE_ASCII(first_code) &&
- ONIGENC_IS_CODE_UPPER(enc, first_code))
- r = ONIGERR_INVALID_GROUP_NAME;
- }
+ end:
if (r == 0) {
*rname_end = name_end;
*src = p;
@@ -2536,16 +2574,19 @@ fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end
1 -> reference name (allow number name)
*/
static int
-fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
+fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
+ UChar** rname_end, ScanEnv* env, int ref)
{
int r, is_num;
+ OnigCodePoint end_code;
OnigCodePoint c = 0;
- OnigCodePoint first_code;
OnigEncoding enc = env->enc;
UChar *name_end;
UChar *p = *src;
PFETCH_READY;
+ end_code = get_name_end_code_point(start_code);
+
name_end = end;
r = 0;
is_num = 0;
@@ -2554,8 +2595,7 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
}
else {
PFETCH(c);
- first_code = c;
- if (c == '>')
+ if (c == end_code)
return ONIGERR_EMPTY_GROUP_NAME;
if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
@@ -2573,7 +2613,7 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
while (!PEND) {
name_end = p;
PFETCH(c);
- if (c == '>' || c == ')') break;
+ if (c == end_code || c == ')') break;
if (is_num == 1) {
if (! ONIGENC_IS_CODE_DIGIT(enc, c)) {
@@ -2590,15 +2630,10 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
}
}
- if (c != '>') {
+ if (c != end_code) {
r = ONIGERR_INVALID_GROUP_NAME;
name_end = end;
}
- else {
- if (ONIGENC_IS_CODE_ASCII(first_code) &&
- ONIGENC_IS_CODE_UPPER(enc, first_code))
- r = ONIGERR_INVALID_GROUP_NAME;
- }
if (r == 0) {
*rname_end = name_end;
@@ -2612,15 +2647,19 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
}
#else
static int
-fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
+fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
+ UChar** rname_end, ScanEnv* env, int ref)
{
int r, len;
+ OnigCodePoint end_code;
OnigCodePoint c = 0;
UChar *name_end;
OnigEncoding enc = env->enc;
UChar *p = *src;
PFETCH_READY;
+ end_code = get_name_end_code_point(start_code);
+
r = 0;
while (!PEND) {
name_end = p;
@@ -2628,11 +2667,11 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
PFETCH(c);
- if (c == '>' || c == ')') break;
+ if (c == end_code || c == ')') break;
if (! ONIGENC_IS_CODE_DIGIT(enc, c))
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
}
- if (c != '>') {
+ if (c != end_code) {
r = ONIGERR_INVALID_GROUP_NAME;
name_end = end;
}
@@ -2710,7 +2749,7 @@ find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
static int
str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
- OnigCodePoint bad, OnigEncoding enc)
+ OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)
{
int i, in_esc;
OnigCodePoint x;
@@ -2738,7 +2777,7 @@ str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
else {
x = ONIGENC_MBC_TO_CODE(enc, p, to);
if (x == bad) return 0;
- else if (x == MC_ESC(enc)) in_esc = 1;
+ else if (x == MC_ESC(syn)) in_esc = 1;
p = q;
}
}
@@ -2774,7 +2813,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
else if (c == '-') {
tok->type = TK_CC_RANGE;
}
- else if (c == MC_ESC(enc)) {
+ else if (c == MC_ESC(syn)) {
if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
goto end;
@@ -2786,37 +2825,45 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
switch (c) {
case 'w':
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_WORD;
+ tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
+ tok->u.prop.not = 0;
break;
case 'W':
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_NOT_WORD;
+ tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
+ tok->u.prop.not = 1;
break;
case 'd':
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_DIGIT;
+ tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
+ tok->u.prop.not = 0;
break;
case 'D':
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_NOT_DIGIT;
+ tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
+ tok->u.prop.not = 1;
break;
case 's':
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_WHITE_SPACE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
+ tok->u.prop.not = 0;
break;
case 'S':
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_NOT_WHITE_SPACE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
+ tok->u.prop.not = 1;
break;
case 'h':
if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_XDIGIT;
+ tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
+ tok->u.prop.not = 0;
break;
case 'H':
if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_NOT_XDIGIT;
+ tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
+ tok->u.prop.not = 1;
break;
case 'p':
@@ -2925,7 +2972,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->backp = p; /* point at '[' is readed */
PINC;
if (str_exist_check_with_esc(send, 2, p, end,
- (OnigCodePoint )']', enc)) {
+ (OnigCodePoint )']', enc, syn)) {
tok->type = TK_POSIX_BRACKET_OPEN;
}
else {
@@ -2978,7 +3025,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->backp = p;
PFETCH(c);
- if (IS_MC_ESC_CODE(c, enc, syn)) {
+ if (IS_MC_ESC_CODE(c, syn)) {
if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
tok->backp = p;
@@ -3035,7 +3082,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '{':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
- r = fetch_range_qualifier(&p, end, tok, env);
+ r = fetch_range_quantifier(&p, end, tok, env);
if (r < 0) return r; /* error */
if (r == 0) goto greedy_check;
else if (r == 2) { /* {n} */
@@ -3065,13 +3112,15 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case 'w':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_WORD;
+ tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
+ tok->u.prop.not = 0;
break;
case 'W':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_NOT_WORD;
+ tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
+ tok->u.prop.not = 1;
break;
case 'b':
@@ -3103,37 +3152,43 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case 's':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_WHITE_SPACE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
+ tok->u.prop.not = 0;
break;
case 'S':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_NOT_WHITE_SPACE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
+ tok->u.prop.not = 1;
break;
case 'd':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_DIGIT;
+ tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
+ tok->u.prop.not = 0;
break;
case 'D':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_NOT_DIGIT;
+ tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
+ tok->u.prop.not = 1;
break;
case 'h':
if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_XDIGIT;
+ tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
+ tok->u.prop.not = 0;
break;
case 'H':
if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
tok->type = TK_CHAR_TYPE;
- tok->u.subtype = CTYPE_NOT_XDIGIT;
+ tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
+ tok->u.prop.not = 1;
break;
case 'A':
@@ -3279,7 +3334,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case 'k':
if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
PFETCH(c);
- if (c == '<') {
+ if (c == '<' || c == '\'') {
UChar* name_end;
int* backs;
@@ -3287,7 +3342,8 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
#ifdef USE_BACKREF_AT_LEVEL
name_end = NULL_UCHARP; /* no need. escape gcc warning. */
- r = fetch_name_with_level(&p, end, &name_end, env, &tok->u.backref.level);
+ r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
+ env, &tok->u.backref.level);
if (r == 1) tok->u.backref.exist_level = 1;
else tok->u.backref.exist_level = 0;
#else
@@ -3331,11 +3387,11 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case 'g':
if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
PFETCH(c);
- if (c == '<') {
+ if (c == '<' || c == '\'') {
UChar* name_end;
prev = p;
- r = fetch_name(&p, end, &name_end, env, 1);
+ r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, 1);
if (r < 0) return r;
tok->type = TK_CALL;
@@ -3395,15 +3451,15 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
#ifdef USE_VARIABLE_META_CHARS
if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
- if (c == MC_ANYCHAR(enc))
+ if (c == MC_ANYCHAR(syn))
goto any_char;
- else if (c == MC_ANYTIME(enc))
+ else if (c == MC_ANYTIME(syn))
goto anytime;
- else if (c == MC_ZERO_OR_ONE_TIME(enc))
+ else if (c == MC_ZERO_OR_ONE_TIME(syn))
goto zero_or_one_time;
- else if (c == MC_ONE_OR_MORE_TIME(enc))
+ else if (c == MC_ONE_OR_MORE_TIME(syn))
goto one_or_more_time;
- else if (c == MC_ANYCHAR_ANYTIME(enc)) {
+ else if (c == MC_ANYCHAR_ANYTIME(syn)) {
tok->type = TK_ANYCHAR_ANYTIME;
goto out;
}
@@ -3454,7 +3510,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '{':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
- r = fetch_range_qualifier(&p, end, tok, env);
+ r = fetch_range_quantifier(&p, end, tok, env);
if (r < 0) return r; /* error */
if (r == 0) goto greedy_check;
else if (r == 2) { /* {n} */
@@ -3480,7 +3536,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
while (1) {
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
PFETCH(c);
- if (c == MC_ESC(enc)) {
+ if (c == MC_ESC(syn)) {
if (!PEND) PFETCH(c);
}
else {
@@ -3557,23 +3613,34 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
static int
add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
- const OnigCodePoint sbr[], const OnigCodePoint mbr[])
+ OnigCodePoint sb_out, const OnigCodePoint mbr[])
{
int i, r;
OnigCodePoint j;
- int nsb = ONIGENC_CODE_RANGE_NUM(sbr);
- int nmb = ONIGENC_CODE_RANGE_NUM(mbr);
+ int n = ONIGENC_CODE_RANGE_NUM(mbr);
if (not == 0) {
- for (i = 0; i < nsb; i++) {
- for (j = ONIGENC_CODE_RANGE_FROM(sbr, i);
- j <= ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
+ for (i = 0; i < n; i++) {
+ for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
+ j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
+ if (j >= sb_out) {
+ if (j == ONIGENC_CODE_RANGE_TO(mbr, i)) i++;
+ else if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
+ r = add_code_range_to_buf(&(cc->mbuf), j,
+ ONIGENC_CODE_RANGE_TO(mbr, i));
+ if (r != 0) return r;
+ i++;
+ }
+
+ goto sb_end;
+ }
BITSET_SET_BIT(cc->bs, j);
}
}
- for (i = 0; i < nmb; i++) {
+ sb_end:
+ for ( ; i < n; i++) {
r = add_code_range_to_buf(&(cc->mbuf),
ONIGENC_CODE_RANGE_FROM(mbr, i),
ONIGENC_CODE_RANGE_TO(mbr, i));
@@ -3583,24 +3650,24 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
else {
OnigCodePoint prev = 0;
- if (ONIGENC_MBC_MINLEN(enc) == 1) {
- for (i = 0; i < nsb; i++) {
- for (j = prev;
- j < ONIGENC_CODE_RANGE_FROM(sbr, i); j++) {
- BITSET_SET_BIT(cc->bs, j);
- }
- prev = ONIGENC_CODE_RANGE_TO(sbr, i) + 1;
- }
- if (prev < 0x7f) {
- for (j = prev; j < 0x7f; j++) {
- BITSET_SET_BIT(cc->bs, j);
- }
+ for (i = 0; i < n; i++) {
+ for (j = prev;
+ j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
+ if (j >= sb_out) {
+ goto sb_end2;
+ }
+ BITSET_SET_BIT(cc->bs, j);
}
-
- prev = 0x80;
+ prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
}
+ for (j = prev; j < sb_out; j++) {
+ BITSET_SET_BIT(cc->bs, j);
+ }
+
+ sb_end2:
+ prev = sb_out;
- for (i = 0; i < nmb; i++) {
+ for (i = 0; i < n; i++) {
if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
r = add_code_range_to_buf(&(cc->mbuf), prev,
ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
@@ -3621,12 +3688,13 @@ static int
add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
{
int c, r;
- const OnigCodePoint *sbr, *mbr;
+ const OnigCodePoint *ranges;
+ OnigCodePoint sb_out;
OnigEncoding enc = env->enc;
- r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr);
+ r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
if (r == 0) {
- return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sbr, mbr);
+ return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);
}
else if (r != ONIG_NO_SUPPORT_CONFIG) {
return r;
@@ -3680,7 +3748,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
case ONIGENC_CTYPE_WORD:
if (not == 0) {
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (ONIGENC_IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);
+ if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);
}
ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
@@ -3702,61 +3770,10 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
}
static int
-parse_ctype_to_enc_ctype(int pctype, int* not)
-{
- int ctype;
-
- switch (pctype) {
- case CTYPE_WORD:
- ctype = ONIGENC_CTYPE_WORD;
- *not = 0;
- break;
- case CTYPE_NOT_WORD:
- ctype = ONIGENC_CTYPE_WORD;
- *not = 1;
- break;
- case CTYPE_WHITE_SPACE:
- ctype = ONIGENC_CTYPE_SPACE;
- *not = 0;
- break;
- case CTYPE_NOT_WHITE_SPACE:
- ctype = ONIGENC_CTYPE_SPACE;
- *not = 1;
- break;
- case CTYPE_DIGIT:
- ctype = ONIGENC_CTYPE_DIGIT;
- *not = 0;
- break;
- case CTYPE_NOT_DIGIT:
- ctype = ONIGENC_CTYPE_DIGIT;
- *not = 1;
- break;
- case CTYPE_XDIGIT:
- ctype = ONIGENC_CTYPE_XDIGIT;
- *not = 0;
- break;
- case CTYPE_NOT_XDIGIT:
- ctype = ONIGENC_CTYPE_XDIGIT;
- *not = 1;
- break;
- default:
- return ONIGERR_PARSER_BUG;
- break;
- }
- return ctype;
-}
-
-typedef struct {
- UChar *name;
- int ctype;
- short int len;
-} PosixBracketEntryType;
-
-static int
parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
{
#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
-#define POSIX_BRACKET_NAME_MAX_LEN 6
+#define POSIX_BRACKET_NAME_MIN_LEN 4
static PosixBracketEntryType PBS[] = {
{ (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
@@ -3772,7 +3789,8 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
{ (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
{ (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
{ (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
- { (UChar* )NULL, -1, 0 }
+ { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },
+ { (UChar* )NULL, -1, 0 }
};
PosixBracketEntryType *pb;
@@ -3789,7 +3807,7 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
else
not = 0;
- if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MAX_LEN + 2)
+ if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
goto not_posix_bracket;
for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
@@ -3823,86 +3841,39 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
}
}
- return 1; /* 1: is not POSIX bracket, but no error. */
-}
-
-static int
-property_name_to_ctype(UChar* p, UChar* end, OnigEncoding enc)
-{
- static PosixBracketEntryType PBS[] = {
- { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 },
- { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 },
- { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 },
- { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
- { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 },
- { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 },
- { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 },
- { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 },
- { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 },
- { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 },
- { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 },
- { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
- { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 },
- { (UChar* )NULL, -1, 0 }
- };
-
- PosixBracketEntryType *pb;
- int len;
-
- len = onigenc_strlen(enc, p, end);
- for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
- if (len == pb->len &&
- onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
- return pb->ctype;
- }
-
- return -1;
+ return 1; /* 1: is not POSIX bracket, but no error. */
}
static int
fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
{
- int ctype;
+ int r;
OnigCodePoint c;
OnigEncoding enc = env->enc;
UChar *prev, *start, *p = *src;
PFETCH_READY;
- /* 'IsXXXX' => 'XXXX' */
- if (!PEND &&
- IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS)) {
- c = PPEEK;
- if (c == 'I') {
- PINC;
- if (! PEND) {
- c = PPEEK;
- if (c == 's')
- PINC;
- else
- PUNFETCH;
- }
- }
- }
-
+ r = 0;
start = prev = p;
while (!PEND) {
prev = p;
PFETCH(c);
if (c == '}') {
- ctype = property_name_to_ctype(start, prev, enc);
- if (ctype < 0) break;
+ r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
+ if (r < 0) break;
*src = p;
- return ctype;
+ return r;
}
- else if (c == '(' || c == ')' || c == '{' || c == '|')
+ else if (c == '(' || c == ')' || c == '{' || c == '|') {
+ r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;
break;
+ }
}
- onig_scan_env_set_error_string(env, ONIGERR_INVALID_CHAR_PROPERTY_NAME,
- *src, prev);
- return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+ onig_scan_env_set_error_string(env, r, *src, prev);
+ return r;
}
static int
@@ -4039,10 +4010,11 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
static int
code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
- OnigEncoding enc)
+ ScanEnv* env)
{
int in_esc;
OnigCodePoint code;
+ OnigEncoding enc = env->enc;
UChar* p = from;
PFETCH_READY;
@@ -4054,7 +4026,7 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
else {
PFETCH(code);
if (code == c) return 1;
- if (code == MC_ESC(enc)) in_esc = 1;
+ if (code == MC_ESC(env->syntax)) in_esc = 1;
}
}
return 0;
@@ -4089,7 +4061,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
if (r < 0) return r;
if (r == TK_CC_CLOSE) {
if (! code_exist_check((OnigCodePoint )']',
- *src, env->pattern_end, 1, env->enc))
+ *src, env->pattern_end, 1, env))
return ONIGERR_EMPTY_CHAR_CLASS;
CC_ESC_WARN(env, (UChar* )"]");
@@ -4205,12 +4177,8 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
break;
case TK_CHAR_TYPE:
- {
- int ctype, not;
- ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
- r = add_ctype_to_cc(cc, ctype, not, env);
- if (r != 0) return r;
- }
+ r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);
+ if (r != 0) return r;
next_class:
r = next_state_class(cc, &vs, &val_type, &state, env);
@@ -4434,6 +4402,14 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
*np = node_new_effect(EFFECT_STOP_BACKTRACK);
break;
+ case '\'':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
+ goto named_group1;
+ }
+ else
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ break;
+
case '<': /* look behind (?<=...), (?<!...) */
PFETCH(c);
if (c == '=')
@@ -4441,35 +4417,45 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
else if (c == '!')
*np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);
#ifdef USE_NAMED_GROUP
- else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
- UChar *name;
- UChar *name_end;
+ else {
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
+ UChar *name;
+ UChar *name_end;
- PUNFETCH;
- list_capture = 0;
+ PUNFETCH;
+ c = '<';
- named_group:
- name = p;
- r = fetch_name(&p, end, &name_end, env, 0);
- if (r < 0) return r;
+ named_group1:
+ list_capture = 0;
- num = scan_env_add_mem_entry(env);
- if (num < 0) return num;
- if (list_capture != 0 && num >= BIT_STATUS_BITS_NUM)
- return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
+ named_group2:
+ name = p;
+ r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, 0);
+ if (r < 0) return r;
- r = name_add(env->reg, name, name_end, num, env);
- if (r != 0) return r;
- *np = node_new_effect_memory(env->option, 1);
- CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
- NEFFECT(*np).regnum = num;
- if (list_capture != 0)
- BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
- env->num_named++;
+ num = scan_env_add_mem_entry(env);
+ if (num < 0) return num;
+ if (list_capture != 0 && num >= BIT_STATUS_BITS_NUM)
+ return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
+
+ r = name_add(env->reg, name, name_end, num, env);
+ if (r != 0) return r;
+ *np = node_new_effect_memory(env->option, 1);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ NEFFECT(*np).regnum = num;
+ if (list_capture != 0)
+ BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
+ env->num_named++;
+ }
+ else {
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ }
}
-#endif
- else
+#else
+ else {
return ONIGERR_UNDEFINED_GROUP_OPTION;
+ }
+#endif
break;
case '@':
@@ -4477,9 +4463,9 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
#ifdef USE_NAMED_GROUP
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
PFETCH(c);
- if (c == '<') {
+ if (c == '<' || c == '\'') {
list_capture = 1;
- goto named_group; /* (?@<name>...) */
+ goto named_group2; /* (?@<name>...) */
}
PUNFETCH;
}
@@ -4619,11 +4605,11 @@ static const char* ReduceQStr[] = {
};
static int
-set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
+set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
{
- QualifierNode* qn;
+ QuantifierNode* qn;
- qn = &(NQUALIFIER(qnode));
+ qn = &(NQUANTIFIER(qnode));
if (qn->lower == 1 && qn->upper == 1) {
return 1;
}
@@ -4642,15 +4628,15 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
}
break;
- case N_QUALIFIER:
+ case N_QUANTIFIER:
{ /* check redundant double repeat. */
/* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
- QualifierNode* qnt = &(NQUALIFIER(target));
- int nestq_num = popular_qualifier_num(qn);
- int targetq_num = popular_qualifier_num(qnt);
+ QuantifierNode* qnt = &(NQUANTIFIER(target));
+ int nestq_num = popular_quantifier_num(qn);
+ int targetq_num = popular_quantifier_num(qnt);
#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
- if (!IS_QUALIFIER_BY_NUMBER(qn) && !IS_QUALIFIER_BY_NUMBER(qnt) &&
+ if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
UChar buf[WARN_BUFSIZE];
@@ -4686,7 +4672,7 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
#endif
if (targetq_num >= 0) {
if (nestq_num >= 0) {
- onig_reduce_nested_qualifier(qnode, target);
+ onig_reduce_nested_quantifier(qnode, target);
goto q_exit;
}
else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
@@ -4708,60 +4694,6 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
return 0;
}
-static int
-make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc,
- CClassNode* cc, Node** root)
-{
- int r, i, j, k, clen, len, ncode, n;
- UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
- Node **ptail, *snode = NULL_NODE;
- const OnigCompAmbigCodes* ccs;
- const OnigCompAmbigCodeItem* ci;
- OnigAmbigType amb;
-
- n = 0;
- *root = NULL_NODE;
- ptail = root;
-
-
- for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
- if ((amb & ambig_flag) == 0) continue;
-
- ncode = ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, amb, &ccs);
- for (i = 0; i < ncode; i++) {
- if (onig_is_code_in_cc(enc, ccs[i].code, cc)) {
- for (j = 0; j < ccs[i].n; j++) {
- ci = &(ccs[i].items[j]);
- if (ci->len > 1) { /* compound only */
- if (IS_CCLASS_NOT(cc)) clear_not_flag_cclass(cc, enc);
-
- clen = ci->len;
- for (k = 0; k < clen; k++) {
- len = ONIGENC_CODE_TO_MBC(enc, ci->code[k], buf);
-
- if (k == 0) {
- snode = node_new_str_raw(buf, buf + len);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
- }
- else {
- r = onig_node_str_cat(snode, buf, buf + len);
- if (r < 0) return r;
- }
- }
-
- *ptail = node_new_alt(snode, NULL_NODE);
- CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY);
- ptail = &(NCONS(*ptail).right);
- n++;
- }
- }
- }
- }
- }
-
- return n;
-}
-
#ifdef USE_SHARED_CCLASS_TABLE
@@ -4840,6 +4772,78 @@ onig_free_shared_cclass_table(void)
#endif /* USE_SHARED_CCLASS_TABLE */
+typedef struct {
+ ScanEnv* env;
+ CClassNode* cc;
+ Node* alt_root;
+ Node** ptail;
+} ICaseFoldArgType;
+
+static int
+i_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)
+{
+ ICaseFoldArgType* iarg;
+ ScanEnv* env;
+ CClassNode* cc;
+ BitSetRef bs;
+
+ iarg = (ICaseFoldArgType* )arg;
+ env = iarg->env;
+ cc = iarg->cc;
+ bs = cc->bs;
+
+ if (to_len == 1) {
+ int in_cc;
+ in_cc = onig_is_code_in_cc(env->enc, from, cc);
+ if ((in_cc != 0 && !IS_CCLASS_NOT(cc)) ||
+ (in_cc == 0 && IS_CCLASS_NOT(cc))) {
+ if (ONIGENC_MBC_MINLEN(env->enc) > 1 || from >= SINGLE_BYTE_SIZE) {
+ add_code_range(&(cc->mbuf), env, *to, *to);
+ }
+ else {
+ if (BITSET_AT(bs, from)) {
+ /* /(?i:[^A-C])/.match("a") ==> fail. */
+ BITSET_SET_BIT(bs, *to);
+ }
+ }
+ }
+ }
+ else {
+ int r, i, len;
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+ Node *snode = NULL_NODE;
+
+ if (onig_is_code_in_cc(env->enc, from, cc)) {
+ if (IS_CCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
+
+ for (i = 0; i < to_len; i++) {
+ len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
+ if (i == 0) {
+ snode = onig_node_new_str(buf, buf + len);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+
+ /* char-class expanded multi-char only
+ compare with string folded at match time. */
+ NSTRING_SET_AMBIG(snode);
+ }
+ else {
+ r = onig_node_str_cat(snode, buf, buf + len);
+ if (r < 0) {
+ onig_node_free(snode);
+ return r;
+ }
+ }
+ }
+
+ *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
+ CHECK_NULL_RETURN_VAL(*(iarg->ptail), ONIGERR_MEMORY);
+ iarg->ptail = &(NCONS((*(iarg->ptail))).right);
+ }
+ }
+
+ return 0;
+}
+
static int
parse_exp(Node** np, OnigToken* tok, int term,
UChar** src, UChar* end, ScanEnv* env)
@@ -4915,19 +4919,35 @@ parse_exp(Node** np, OnigToken* tok, int term,
CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
len = 1;
while (1) {
+ if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
+ if (len == enc_len(env->enc, NSTRING(*np).s)) {
+ r = fetch_token(tok, src, end, env);
+ NSTRING_CLEAR_RAW(*np);
+ goto string_end;
+ }
+ }
+
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
if (r != TK_RAW_BYTE) {
-#ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
- if (len >= enc_len(env->enc, NSTRING(*np).s)) {
- NSTRING_CLEAR_RAW(*np);
+ /* Don't use this, it is wrong for little endian encodings. */
+#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
+ int rem;
+ if (len < ONIGENC_MBC_MINLEN(env->enc)) {
+ rem = ONIGENC_MBC_MINLEN(env->enc) - len;
+ (void )node_str_head_pad(&NSTRING(*np), rem, (UChar )0);
+ if (len + rem == enc_len(env->enc, NSTRING(*np).s)) {
+ NSTRING_CLEAR_RAW(*np);
+ goto string_end;
+ }
}
#endif
- goto string_end;
+ return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
}
r = node_str_cat_char(*np, (UChar )tok->u.c);
if (r < 0) return r;
+
len++;
}
}
@@ -4952,7 +4972,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
OnigCodePoint end_op[2];
UChar *qstart, *qend, *nextp;
- end_op[0] = (OnigCodePoint )MC_ESC(env->enc);
+ end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
end_op[1] = (OnigCodePoint )'E';
qstart = *src;
qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
@@ -4967,28 +4987,24 @@ parse_exp(Node** np, OnigToken* tok, int term,
case TK_CHAR_TYPE:
{
- switch (tok->u.subtype) {
- case CTYPE_WORD:
- case CTYPE_NOT_WORD:
- *np = node_new_ctype(tok->u.subtype);
+ switch (tok->u.prop.ctype) {
+ case ONIGENC_CTYPE_WORD:
+ *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not);
CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
break;
- case CTYPE_WHITE_SPACE:
- case CTYPE_NOT_WHITE_SPACE:
- case CTYPE_DIGIT:
- case CTYPE_NOT_DIGIT:
- case CTYPE_XDIGIT:
- case CTYPE_NOT_XDIGIT:
+ case ONIGENC_CTYPE_SPACE:
+ case ONIGENC_CTYPE_DIGIT:
+ case ONIGENC_CTYPE_XDIGIT:
{
CClassNode* cc;
- int ctype, not;
#ifdef USE_SHARED_CCLASS_TABLE
- const OnigCodePoint *sbr, *mbr;
+ const OnigCodePoint *mbr;
+ OnigCodePoint sb_out;
- ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
- r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr);
+ r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype,
+ &sb_out, &mbr);
if (r == 0 &&
ONIGENC_CODE_RANGE_NUM(mbr)
>= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
@@ -4996,8 +5012,8 @@ parse_exp(Node** np, OnigToken* tok, int term,
type_cclass_key* new_key;
key.enc = env->enc;
- key.not = not;
- key.type = ctype;
+ key.not = tok->u.prop.not;
+ key.type = tok->u.prop.ctype;
THREAD_ATOMIC_START;
@@ -5017,13 +5033,15 @@ parse_exp(Node** np, OnigToken* tok, int term,
}
}
- *np = node_new_cclass_by_codepoint_range(not, sbr, mbr);
+ *np = node_new_cclass_by_codepoint_range(tok->u.prop.not,
+ sb_out, mbr);
if (IS_NULL(*np)) {
THREAD_ATOMIC_END;
return ONIGERR_MEMORY;
}
- CCLASS_SET_SHARE(&(NCCLASS(*np)));
+ cc = &(NCCLASS(*np));
+ CCLASS_SET_SHARE(cc);
new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,
(st_data_t )*np);
@@ -5032,12 +5050,11 @@ parse_exp(Node** np, OnigToken* tok, int term,
}
else {
#endif
- ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
*np = node_new_cclass();
CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
cc = &(NCCLASS(*np));
- add_ctype_to_cc(cc, ctype, 0, env);
- if (not != 0) CCLASS_SET_NOT(cc);
+ add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
+ if (tok->u.prop.not != 0) CCLASS_SET_NOT(cc);
#ifdef USE_SHARED_CCLASS_TABLE
}
#endif
@@ -5066,55 +5083,28 @@ parse_exp(Node** np, OnigToken* tok, int term,
cc = &(NCCLASS(*np));
if (IS_IGNORECASE(env->option)) {
- int i, n, in_cc;
- const OnigPairAmbigCodes* ccs;
- BitSetRef bs = cc->bs;
- OnigAmbigType amb;
-
- for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
- if ((amb & env->ambig_flag) == 0) continue;
-
- n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(env->enc, amb, &ccs);
- for (i = 0; i < n; i++) {
- in_cc = onig_is_code_in_cc(env->enc, ccs[i].from, cc);
-
- if ((in_cc != 0 && !IS_CCLASS_NOT(cc)) ||
- (in_cc == 0 && IS_CCLASS_NOT(cc))) {
- if (ONIGENC_MBC_MINLEN(env->enc) > 1 ||
- ccs[i].from >= SINGLE_BYTE_SIZE) {
- /* if (cc->not) clear_not_flag_cclass(cc, env->enc); */
- add_code_range(&(cc->mbuf), env, ccs[i].to, ccs[i].to);
- }
- else {
- if (BITSET_AT(bs, ccs[i].from)) {
- /* /(?i:[^A-C])/.match("a") ==> fail. */
- BITSET_SET_BIT(bs, ccs[i].to);
- }
- if (BITSET_AT(bs, ccs[i].to)) {
- BITSET_SET_BIT(bs, ccs[i].from);
- }
- }
- }
- }
- }
- }
+ ICaseFoldArgType iarg;
- if (IS_IGNORECASE(env->option) &&
- (env->ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- int res;
- Node *alt_root, *work;
+ iarg.env = env;
+ iarg.cc = cc;
+ iarg.alt_root = NULL_NODE;
+ iarg.ptail = &(iarg.alt_root);
- res = make_compound_alt_node_from_cc(env->ambig_flag, env->enc,
- cc, &alt_root);
- if (res < 0) return res;
- if (res > 0) {
- work = node_new_alt(*np, alt_root);
+ r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
+ i_case_fold, &iarg);
+ if (r != 0) {
+ if (IS_NOT_NULL(iarg.alt_root))
+ onig_node_free(iarg.alt_root);
+ return r;
+ }
+ if (IS_NOT_NULL(iarg.alt_root)) {
+ Node* work = onig_node_new_alt(*np, iarg.alt_root);
if (IS_NULL(work)) {
- onig_node_free(alt_root);
+ onig_node_free(iarg.alt_root);
return ONIGERR_MEMORY;
}
*np = work;
- }
+ }
}
}
break;
@@ -5127,9 +5117,9 @@ parse_exp(Node** np, OnigToken* tok, int term,
case TK_ANYCHAR_ANYTIME:
*np = node_new_anychar();
CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
- qn = node_new_qualifier(0, REPEAT_INFINITE, 0);
+ qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
- NQUALIFIER(qn).target = *np;
+ NQUANTIFIER(qn).target = *np;
*np = qn;
break;
@@ -5185,14 +5175,14 @@ parse_exp(Node** np, OnigToken* tok, int term,
repeat:
if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
- if (is_invalid_qualifier_target(*targetp))
+ if (is_invalid_quantifier_target(*targetp))
return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
- qn = node_new_qualifier(tok->u.repeat.lower, tok->u.repeat.upper,
+ qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
(r == TK_INTERVAL ? 1 : 0));
CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
- NQUALIFIER(qn).greedy = tok->u.repeat.greedy;
- r = set_qualifier(qn, *targetp, group, env);
+ NQUANTIFIER(qn).greedy = tok->u.repeat.greedy;
+ r = set_quantifier(qn, *targetp, group, env);
if (r < 0) return r;
if (tok->u.repeat.possessive != 0) {
@@ -5277,7 +5267,7 @@ parse_subexp(Node** top, OnigToken* tok, int term,
*top = node;
}
else if (r == TK_ALT) {
- *top = node_new_alt(node, NULL);
+ *top = onig_node_new_alt(node, NULL);
headp = &(NCONS(*top).right);
while (r == TK_ALT) {
r = fetch_token(tok, src, end, env);
@@ -5285,7 +5275,7 @@ parse_subexp(Node** top, OnigToken* tok, int term,
r = parse_branch(&node, tok, term, src, end, env);
if (r < 0) return r;
- *headp = node_new_alt(node, NULL);
+ *headp = onig_node_new_alt(node, NULL);
headp = &(NCONS(*headp).right);
}
@@ -5328,13 +5318,13 @@ onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_
#endif
scan_env_clear(env);
- env->option = reg->options;
- env->ambig_flag = reg->ambig_flag;
- env->enc = reg->enc;
- env->syntax = reg->syntax;
- env->pattern = (UChar* )pattern;
- env->pattern_end = (UChar* )end;
- env->reg = reg;
+ env->option = reg->options;
+ env->case_fold_flag = reg->case_fold_flag;
+ env->enc = reg->enc;
+ env->syntax = reg->syntax;
+ env->pattern = (UChar* )pattern;
+ env->pattern_end = (UChar* )end;
+ env->reg = reg;
*root = NULL;
p = (UChar* )pattern;