summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c3032
1 files changed, 1539 insertions, 1493 deletions
diff --git a/regcomp.c b/regcomp.c
index b4264a40bd..320cf520e9 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2,8 +2,8 @@
regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2011-2019 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -126,12 +126,6 @@ bitset_is_empty(BitSetRef bs)
#ifdef ONIG_DEBUG
static int
-onig_is_prelude(void)
-{
- return !rb_const_defined(rb_cThread, rb_intern_const("MUTEX_FOR_THREAD_EXCLUSIVE"));
-}
-
-static int
bitset_on_num(BitSetRef bs)
{
int i, n;
@@ -144,6 +138,27 @@ bitset_on_num(BitSetRef bs)
}
#endif
+// Attempt to right size allocated buffers for a regex post compile
+static void
+onig_reg_resize(regex_t *reg)
+{
+ do {
+ if (!reg->used) {
+ xfree(reg->p);
+ reg->alloc = 0;
+ reg->p = 0;
+ }
+ else if (reg->alloc > reg->used) {
+ unsigned char *new_ptr = xrealloc(reg->p, reg->used);
+ // Skip the right size optimization if memory allocation fails
+ if (new_ptr) {
+ reg->alloc = reg->used;
+ reg->p = new_ptr;
+ }
+ }
+ } while ((reg = reg->chain) != 0);
+}
+
extern int
onig_bbuf_init(BBuf* buf, OnigDistance size)
{
@@ -180,8 +195,7 @@ unset_addr_list_init(UnsetAddrList* uslist, int size)
static void
unset_addr_list_end(UnsetAddrList* uslist)
{
- if (IS_NOT_NULL(uslist->us))
- xfree(uslist->us);
+ xfree(uslist->us);
}
static int
@@ -260,6 +274,7 @@ add_mem_num(regex_t* reg, int num)
return 0;
}
+#if 0
static int
add_pointer(regex_t* reg, void* addr)
{
@@ -268,6 +283,7 @@ add_pointer(regex_t* reg, void* addr)
BBUF_ADD(reg, &ptr, SIZE_POINTER);
return 0;
}
+#endif
static int
add_option(regex_t* reg, OnigOptionType option)
@@ -321,9 +337,10 @@ static int compile_tree(Node* node, regex_t* reg);
(op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
static int
-select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
+select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
{
int op;
+ OnigDistance str_len = roomof(byte_len, mb_len);
if (ignore_case) {
switch (str_len) {
@@ -425,11 +442,11 @@ compile_tree_n_times(Node* node, int n, regex_t* reg)
}
static int
-add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
+add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
regex_t* reg ARG_UNUSED, int ignore_case)
{
int len;
- int op = select_str_opcode(mb_len, str_len, ignore_case);
+ int op = select_str_opcode(mb_len, byte_len, ignore_case);
len = SIZE_OPCODE;
@@ -437,15 +454,15 @@ add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
if (IS_NEED_STR_LEN_OP_EXACT(op))
len += SIZE_LENGTH;
- len += mb_len * (int )str_len;
+ len += (int )byte_len;
return len;
}
static int
-add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
+add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
regex_t* reg, int ignore_case)
{
- int op = select_str_opcode(mb_len, str_len, ignore_case);
+ int op = select_str_opcode(mb_len, byte_len, ignore_case);
add_opcode(reg, op);
if (op == OP_EXACTMBN)
@@ -453,12 +470,12 @@ add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
if (IS_NEED_STR_LEN_OP_EXACT(op)) {
if (op == OP_EXACTN_IC)
- add_length(reg, mb_len * str_len);
+ add_length(reg, byte_len);
else
- add_length(reg, str_len);
+ add_length(reg, byte_len / mb_len);
}
- add_bytes(reg, s, mb_len * str_len);
+ add_bytes(reg, s, byte_len);
return 0;
}
@@ -466,7 +483,7 @@ add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
static int
compile_length_string_node(Node* node, regex_t* reg)
{
- int rlen, r, len, prev_len, slen, ambig;
+ int rlen, r, len, prev_len, blen, ambig;
OnigEncoding enc = reg->enc;
UChar *p, *prev;
StrNode* sn;
@@ -480,24 +497,24 @@ compile_length_string_node(Node* node, regex_t* reg)
p = prev = sn->s;
prev_len = enclen(enc, p, sn->end);
p += prev_len;
- slen = 1;
+ blen = prev_len;
rlen = 0;
for (; p < sn->end; ) {
len = enclen(enc, p, sn->end);
- if (len == prev_len) {
- slen++;
+ if (len == prev_len || ambig) {
+ blen += len;
}
else {
- r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
+ r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
rlen += r;
prev = p;
- slen = 1;
+ blen = len;
prev_len = len;
}
p += len;
}
- r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
+ r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
rlen += r;
return rlen;
}
@@ -514,7 +531,7 @@ compile_length_string_raw_node(StrNode* sn, regex_t* reg)
static int
compile_string_node(Node* node, regex_t* reg)
{
- int r, len, prev_len, slen, ambig;
+ int r, len, prev_len, blen, ambig;
OnigEncoding enc = reg->enc;
UChar *p, *prev, *end;
StrNode* sn;
@@ -529,25 +546,25 @@ compile_string_node(Node* node, regex_t* reg)
p = prev = sn->s;
prev_len = enclen(enc, p, end);
p += prev_len;
- slen = 1;
+ blen = prev_len;
for (; p < end; ) {
len = enclen(enc, p, end);
- if (len == prev_len) {
- slen++;
+ if (len == prev_len || ambig) {
+ blen += len;
}
else {
- r = add_compile_string(prev, prev_len, slen, reg, ambig);
+ r = add_compile_string(prev, prev_len, blen, reg, ambig);
if (r) return r;
prev = p;
- slen = 1;
+ blen = len;
prev_len = len;
}
p += len;
}
- return add_compile_string(prev, prev_len, slen, reg, ambig);
+ return add_compile_string(prev, prev_len, blen, reg, ambig);
}
static int
@@ -587,11 +604,6 @@ compile_length_cclass_node(CClassNode* cc, regex_t* reg)
{
int len;
- if (IS_NCCLASS_SHARE(cc)) {
- len = SIZE_OPCODE + SIZE_POINTER;
- return len;
- }
-
if (IS_NULL(cc->mbuf)) {
len = SIZE_OPCODE + SIZE_BITSET;
}
@@ -617,12 +629,6 @@ compile_cclass_node(CClassNode* cc, regex_t* reg)
{
int r;
- if (IS_NCCLASS_SHARE(cc)) {
- add_opcode(reg, OP_CCLASS_NODE);
- r = add_pointer(reg, cc);
- return r;
- }
-
if (IS_NULL(cc->mbuf)) {
if (IS_NCCLASS_NOT(cc))
add_opcode(reg, OP_CCLASS_NOT);
@@ -770,23 +776,23 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
if (infinite && qn->lower <= 1) {
if (qn->greedy) {
if (qn->lower == 1)
- len = SIZE_OP_JUMP;
+ len = SIZE_OP_JUMP;
else
- len = 0;
+ len = 0;
len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
}
else {
if (qn->lower == 0)
- len = SIZE_OP_JUMP;
+ len = SIZE_OP_JUMP;
else
- len = 0;
+ len = 0;
len += mod_tlen + SIZE_OP_PUSH + cklen;
}
}
else if (qn->upper == 0) {
- if (qn->is_refered != 0) /* /(?<n>..){0}/ */
+ if (qn->is_referred != 0) /* /(?<n>..){0}/ */
len = SIZE_OP_JUMP + tlen;
else
len = 0;
@@ -794,10 +800,10 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
else if (qn->upper == 1 && qn->greedy) {
if (qn->lower == 0) {
if (CKN_ON) {
- len = SIZE_OP_STATE_CHECK_PUSH + tlen;
+ len = SIZE_OP_STATE_CHECK_PUSH + tlen;
}
else {
- len = SIZE_OP_PUSH + tlen;
+ len = SIZE_OP_PUSH + tlen;
}
}
else {
@@ -835,31 +841,31 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg)
if (r) return r;
if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
if (IS_MULTILINE(reg->options))
- r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+ r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
else
- r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
+ r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
if (r) return r;
if (CKN_ON) {
- r = add_state_check_num(reg, ckn);
- if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
}
return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
}
else {
if (IS_MULTILINE(reg->options)) {
- r = add_opcode(reg, (CKN_ON ?
- OP_STATE_CHECK_ANYCHAR_ML_STAR
- : OP_ANYCHAR_ML_STAR));
+ r = add_opcode(reg, (CKN_ON ?
+ OP_STATE_CHECK_ANYCHAR_ML_STAR
+ : OP_ANYCHAR_ML_STAR));
}
else {
- r = add_opcode(reg, (CKN_ON ?
- OP_STATE_CHECK_ANYCHAR_STAR
- : OP_ANYCHAR_STAR));
+ r = add_opcode(reg, (CKN_ON ?
+ OP_STATE_CHECK_ANYCHAR_STAR
+ : OP_ANYCHAR_STAR));
}
if (r) return r;
if (CKN_ON)
- r = add_state_check_num(reg, ckn);
+ r = add_state_check_num(reg, ckn);
return r;
}
@@ -873,49 +879,49 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg)
if (infinite && qn->lower <= 1) {
if (qn->greedy) {
if (qn->lower == 1) {
- r = add_opcode_rel_addr(reg, OP_JUMP,
- (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
- if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
+ if (r) return r;
}
if (CKN_ON) {
- r = add_opcode(reg, OP_STATE_CHECK_PUSH);
- if (r) return r;
- r = add_state_check_num(reg, ckn);
- if (r) return r;
- r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
}
else {
- r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
+ r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
}
if (r) return r;
r = compile_tree_empty_check(qn->target, reg, empty_info);
if (r) return r;
r = add_opcode_rel_addr(reg, OP_JUMP,
- -(mod_tlen + (int )SIZE_OP_JUMP
- + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
+ -(mod_tlen + (int )SIZE_OP_JUMP
+ + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
}
else {
if (qn->lower == 0) {
- r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
- if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
+ if (r) return r;
}
r = compile_tree_empty_check(qn->target, reg, empty_info);
if (r) return r;
if (CKN_ON) {
- r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
- if (r) return r;
- r = add_state_check_num(reg, ckn);
- if (r) return r;
- r = add_rel_addr(reg,
- -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg,
+ -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
}
else
- r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
+ r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
}
}
else if (qn->upper == 0) {
- if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
+ if (qn->is_referred != 0) { /* /(?<n>..){0}/ */
r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
if (r) return r;
r = compile_tree(qn->target, reg);
@@ -926,14 +932,14 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg)
else if (qn->upper == 1 && qn->greedy) {
if (qn->lower == 0) {
if (CKN_ON) {
- r = add_opcode(reg, OP_STATE_CHECK_PUSH);
- if (r) return r;
- r = add_state_check_num(reg, ckn);
- if (r) return r;
- r = add_rel_addr(reg, tlen);
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, tlen);
}
else {
- r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
+ r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
}
if (r) return r;
}
@@ -1006,17 +1012,20 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
}
if (qn->greedy) {
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT
if (IS_NOT_NULL(qn->head_exact))
- len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
- else if (IS_NOT_NULL(qn->next_head_exact))
- len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
+ len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
+ else
+#endif
+ if (IS_NOT_NULL(qn->next_head_exact))
+ len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
else
- len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
+ len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
}
else
len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
}
- else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
+ else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
len = SIZE_OP_JUMP + tlen;
}
else if (!infinite && qn->greedy &&
@@ -1051,17 +1060,17 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg)
if (r) return r;
if (IS_NOT_NULL(qn->next_head_exact)) {
if (IS_MULTILINE(reg->options))
- r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+ r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
else
- r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
+ r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
if (r) return r;
return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
}
else {
if (IS_MULTILINE(reg->options))
- return add_opcode(reg, OP_ANYCHAR_ML_STAR);
+ return add_opcode(reg, OP_ANYCHAR_ML_STAR);
else
- return add_opcode(reg, OP_ANYCHAR_STAR);
+ return add_opcode(reg, OP_ANYCHAR_STAR);
}
}
@@ -1074,15 +1083,18 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg)
(qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
if (qn->greedy) {
- if (IS_NOT_NULL(qn->head_exact))
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
- else if (IS_NOT_NULL(qn->next_head_exact))
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
- else
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT
+ if (IS_NOT_NULL(qn->head_exact))
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
+ else
+#endif
+ if (IS_NOT_NULL(qn->next_head_exact))
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
+ else
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
}
else {
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
}
if (r) return r;
}
@@ -1092,33 +1104,36 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg)
}
if (qn->greedy) {
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT
if (IS_NOT_NULL(qn->head_exact)) {
- r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
- mod_tlen + SIZE_OP_JUMP);
- if (r) return r;
- add_bytes(reg, NSTR(qn->head_exact)->s, 1);
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
- -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
+ r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
+ mod_tlen + SIZE_OP_JUMP);
+ if (r) return r;
+ add_bytes(reg, NSTR(qn->head_exact)->s, 1);
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
}
- else if (IS_NOT_NULL(qn->next_head_exact)) {
- r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
- mod_tlen + SIZE_OP_JUMP);
- if (r) return r;
- add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
+ else
+#endif
+ if (IS_NOT_NULL(qn->next_head_exact)) {
+ r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
+ mod_tlen + SIZE_OP_JUMP);
+ if (r) return r;
+ add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
-(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
}
else {
- r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
- if (r) return r;
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
- -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
+ r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
+ if (r) return r;
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
}
}
else {
@@ -1129,7 +1144,7 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg)
r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
}
}
- else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
+ else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
if (r) return r;
r = compile_tree(qn->target, reg);
@@ -1144,7 +1159,7 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg)
for (i = 0; i < n; i++) {
r = add_opcode_rel_addr(reg, OP_PUSH,
- (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
+ (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
if (r) return r;
r = compile_tree(qn->target, reg);
if (r) return r;
@@ -1231,39 +1246,51 @@ compile_length_enclose_node(EncloseNode* node, regex_t* reg)
#ifdef USE_SUBEXP_CALL
if (IS_ENCLOSE_CALLED(node)) {
len = SIZE_OP_MEMORY_START_PUSH + tlen
- + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
+ + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- len += (IS_ENCLOSE_RECURSION(node)
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
+ len += (IS_ENCLOSE_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
else
- len += (IS_ENCLOSE_RECURSION(node)
- ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
+ len += (IS_ENCLOSE_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
+ }
+ else if (IS_ENCLOSE_RECURSION(node)) {
+ len = SIZE_OP_MEMORY_START_PUSH;
+ len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
+ ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
}
else
#endif
{
if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
- len = SIZE_OP_MEMORY_START_PUSH;
+ len = SIZE_OP_MEMORY_START_PUSH;
else
- len = SIZE_OP_MEMORY_START;
+ len = SIZE_OP_MEMORY_START;
len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
- ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
+ ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
}
break;
case ENCLOSE_STOP_BACKTRACK:
+ /* Disable POP_STOP_BT optimization for simple repeat under the match cache */
+ /* optimization because the match cache optimization pushes an extra item to */
+ /* the stack and it breaks the assumption for this optimization. */
+#ifndef USE_MATCH_CACHE
if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
QtfrNode* qn = NQTFR(node->target);
tlen = compile_length_tree(qn->target, reg);
if (tlen < 0) return tlen;
len = tlen * qn->lower
- + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
+ + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
}
else {
+#endif
len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
+#ifndef USE_MATCH_CACHE
}
+#endif
break;
case ENCLOSE_CONDITION:
@@ -1286,6 +1313,10 @@ compile_length_enclose_node(EncloseNode* node, regex_t* reg)
}
break;
+ case ENCLOSE_ABSENT:
+ len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
+ break;
+
default:
return ONIGERR_TYPE_BUG;
break;
@@ -1317,11 +1348,11 @@ compile_enclose_node(EncloseNode* node, regex_t* reg)
len = compile_length_tree(node->target, reg);
len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- len += (IS_ENCLOSE_RECURSION(node)
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
+ len += (IS_ENCLOSE_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
else
- len += (IS_ENCLOSE_RECURSION(node)
- ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
+ len += (IS_ENCLOSE_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
r = add_opcode_rel_addr(reg, OP_JUMP, len);
if (r) return r;
@@ -1339,30 +1370,42 @@ compile_enclose_node(EncloseNode* node, regex_t* reg)
#ifdef USE_SUBEXP_CALL
if (IS_ENCLOSE_CALLED(node)) {
if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
- ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
+ r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
+ ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
else
- r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
- ? OP_MEMORY_END_REC : OP_MEMORY_END));
+ r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
+ ? OP_MEMORY_END_REC : OP_MEMORY_END));
if (r) return r;
r = add_mem_num(reg, node->regnum);
if (r) return r;
r = add_opcode(reg, OP_RETURN);
}
+ else if (IS_ENCLOSE_RECURSION(node)) {
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
+ r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
+ else
+ r = add_opcode(reg, OP_MEMORY_END_REC);
+ if (r) return r;
+ r = add_mem_num(reg, node->regnum);
+ }
else
#endif
{
if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- r = add_opcode(reg, OP_MEMORY_END_PUSH);
+ r = add_opcode(reg, OP_MEMORY_END_PUSH);
else
- r = add_opcode(reg, OP_MEMORY_END);
+ r = add_opcode(reg, OP_MEMORY_END);
if (r) return r;
r = add_mem_num(reg, node->regnum);
}
break;
case ENCLOSE_STOP_BACKTRACK:
+ /* Disable POP_STOP_BT optimization for simple repeat under the match cache */
+ /* optimization because the match cache optimization pushes an extra item to */
+ /* the stack and it breaks the assumption for this optimization. */
+#ifndef USE_MATCH_CACHE
if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
QtfrNode* qn = NQTFR(node->target);
r = compile_tree_n_times(qn->target, qn->lower, reg);
@@ -1378,15 +1421,18 @@ compile_enclose_node(EncloseNode* node, regex_t* reg)
r = add_opcode(reg, OP_POP);
if (r) return r;
r = add_opcode_rel_addr(reg, OP_JUMP,
- -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
+ -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
}
else {
+#endif
r = add_opcode(reg, OP_PUSH_STOP_BT);
if (r) return r;
r = compile_tree(node->target, reg);
if (r) return r;
r = add_opcode(reg, OP_POP_STOP_BT);
+#ifndef USE_MATCH_CACHE
}
+#endif
break;
case ENCLOSE_CONDITION:
@@ -1422,6 +1468,19 @@ compile_enclose_node(EncloseNode* node, regex_t* reg)
}
break;
+ case ENCLOSE_ABSENT:
+ len = compile_length_tree(node->target, reg);
+ if (len < 0) return len;
+
+ r = add_opcode(reg, OP_PUSH_ABSENT_POS);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ if (r) return r;
+ r = add_opcode(reg, OP_ABSENT_END);
+ break;
+
default:
return ONIGERR_TYPE_BUG;
break;
@@ -1476,9 +1535,6 @@ compile_anchor_node(AnchorNode* node, regex_t* reg)
case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
- /* used for implicit anchor optimization: /.*a/ ==> /(?:^|\G).*a/ */
- case ANCHOR_ANYCHAR_STAR: r = add_opcode(reg, OP_BEGIN_POS_OR_LINE); break;
-
case ANCHOR_WORD_BOUND:
if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
else r = add_opcode(reg, OP_WORD_BOUND);
@@ -1523,11 +1579,11 @@ compile_anchor_node(AnchorNode* node, regex_t* reg)
r = add_opcode(reg, OP_LOOK_BEHIND);
if (r) return r;
if (node->char_len < 0) {
- r = get_char_length_tree(node->target, reg, &n);
- if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ r = get_char_length_tree(node->target, reg, &n);
+ if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
}
else
- n = node->char_len;
+ n = node->char_len;
r = add_length(reg, n);
if (r) return r;
r = compile_tree(node->target, reg);
@@ -1539,14 +1595,14 @@ compile_anchor_node(AnchorNode* node, regex_t* reg)
int n;
len = compile_length_tree(node->target, reg);
r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
- len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
+ len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
if (r) return r;
if (node->char_len < 0) {
- r = get_char_length_tree(node->target, reg, &n);
- if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ r = get_char_length_tree(node->target, reg, &n);
+ if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
}
else
- n = node->char_len;
+ n = node->char_len;
r = add_length(reg, n);
if (r) return r;
r = compile_tree(node->target, reg);
@@ -1582,13 +1638,15 @@ compile_length_tree(Node* node, regex_t* reg)
case NT_ALT:
{
- int n;
-
- n = r = 0;
+ int n = 0;
+ len = 0;
do {
- r += compile_length_tree(NCAR(node), reg);
- n++;
+ r = compile_length_tree(NCAR(node), reg);
+ if (r < 0) return r;
+ len += r;
+ n++;
} while (IS_NOT_NULL(node = NCDR(node)));
+ r = len;
r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
}
break;
@@ -1621,11 +1679,11 @@ compile_length_tree(Node* node, regex_t* reg)
else
#endif
if (br->back_num == 1) {
- r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
- ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
+ r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
+ ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
}
else {
- r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
+ r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
}
}
break;
@@ -1674,26 +1732,26 @@ compile_tree(Node* node, regex_t* reg)
Node* x = node;
len = 0;
do {
- len += compile_length_tree(NCAR(x), reg);
- if (NCDR(x) != NULL) {
- len += SIZE_OP_PUSH + SIZE_OP_JUMP;
- }
+ len += compile_length_tree(NCAR(x), reg);
+ if (NCDR(x) != NULL) {
+ len += SIZE_OP_PUSH + SIZE_OP_JUMP;
+ }
} while (IS_NOT_NULL(x = NCDR(x)));
pos = reg->used + len; /* goal position */
do {
- len = compile_length_tree(NCAR(node), reg);
- if (IS_NOT_NULL(NCDR(node))) {
- r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
- if (r) break;
- }
- r = compile_tree(NCAR(node), reg);
- if (r) break;
- if (IS_NOT_NULL(NCDR(node))) {
- len = pos - (reg->used + SIZE_OP_JUMP);
- r = add_opcode_rel_addr(reg, OP_JUMP, len);
- if (r) break;
- }
+ len = compile_length_tree(NCAR(node), reg);
+ if (IS_NOT_NULL(NCDR(node))) {
+ r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
+ if (r) break;
+ }
+ r = compile_tree(NCAR(node), reg);
+ if (r) break;
+ if (IS_NOT_NULL(NCDR(node))) {
+ len = pos - (reg->used + SIZE_OP_JUMP);
+ r = add_opcode_rel_addr(reg, OP_JUMP, len);
+ if (r) break;
+ }
} while (IS_NOT_NULL(node = NCDR(node)));
}
break;
@@ -1715,18 +1773,18 @@ compile_tree(Node* node, regex_t* reg)
switch (NCTYPE(node)->ctype) {
case ONIGENC_CTYPE_WORD:
- if (NCTYPE(node)->ascii_range != 0) {
- if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
- else op = OP_ASCII_WORD;
- }
- else {
- if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
- else op = OP_WORD;
- }
- break;
+ if (NCTYPE(node)->ascii_range != 0) {
+ if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
+ else op = OP_ASCII_WORD;
+ }
+ else {
+ if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
+ else op = OP_WORD;
+ }
+ break;
default:
- return ONIGERR_TYPE_BUG;
- break;
+ return ONIGERR_TYPE_BUG;
+ break;
}
r = add_opcode(reg, op);
}
@@ -1745,39 +1803,39 @@ compile_tree(Node* node, regex_t* reg)
#ifdef USE_BACKREF_WITH_LEVEL
if (IS_BACKREF_NEST_LEVEL(br)) {
- r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
- if (r) return r;
- r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
- if (r) return r;
- r = add_length(reg, br->nest_level);
- if (r) return r;
-
- goto add_bacref_mems;
+ r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
+ if (r) return r;
+ r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
+ if (r) return r;
+ r = add_length(reg, br->nest_level);
+ if (r) return r;
+
+ goto add_bacref_mems;
}
else
#endif
if (br->back_num == 1) {
- n = br->back_static[0];
- if (IS_IGNORECASE(reg->options)) {
- r = add_opcode(reg, OP_BACKREFN_IC);
- if (r) return r;
- r = add_mem_num(reg, n);
- }
- else {
- switch (n) {
- case 1: r = add_opcode(reg, OP_BACKREF1); break;
- case 2: r = add_opcode(reg, OP_BACKREF2); break;
- default:
- r = add_opcode(reg, OP_BACKREFN);
- if (r) return r;
- r = add_mem_num(reg, n);
- break;
- }
- }
+ n = br->back_static[0];
+ if (IS_IGNORECASE(reg->options)) {
+ r = add_opcode(reg, OP_BACKREFN_IC);
+ if (r) return r;
+ r = add_mem_num(reg, n);
+ }
+ else {
+ switch (n) {
+ case 1: r = add_opcode(reg, OP_BACKREF1); break;
+ case 2: r = add_opcode(reg, OP_BACKREF2); break;
+ default:
+ r = add_opcode(reg, OP_BACKREFN);
+ if (r) return r;
+ r = add_mem_num(reg, n);
+ break;
+ }
+ }
}
else {
- int i;
- int* p;
+ int i;
+ int* p;
if (IS_IGNORECASE(reg->options)) {
r = add_opcode(reg, OP_BACKREF_MULTI_IC);
@@ -1785,18 +1843,18 @@ compile_tree(Node* node, regex_t* reg)
else {
r = add_opcode(reg, OP_BACKREF_MULTI);
}
- if (r) return r;
+ if (r) return r;
#ifdef USE_BACKREF_WITH_LEVEL
add_bacref_mems:
#endif
- r = add_length(reg, br->back_num);
- if (r) return r;
- p = BACKREFS_P(br);
- for (i = br->back_num - 1; i >= 0; i--) {
- r = add_mem_num(reg, p[i]);
- if (r) return r;
- }
+ r = add_length(reg, br->back_num);
+ if (r) return r;
+ p = BACKREFS_P(br);
+ for (i = br->back_num - 1; i >= 0; i--) {
+ r = add_mem_num(reg, p[i]);
+ if (r) return r;
+ }
}
}
break;
@@ -1851,7 +1909,7 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
Node* old = *ptarget;
r = noname_disable_map(ptarget, map, counter);
if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
- onig_reduce_nested_quantifier(node, *ptarget);
+ onig_reduce_nested_quantifier(node, *ptarget);
}
}
break;
@@ -1860,36 +1918,26 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
{
EncloseNode* en = NENCLOSE(node);
if (en->type == ENCLOSE_MEMORY) {
- if (IS_ENCLOSE_NAMED_GROUP(en)) {
- (*counter)++;
- map[en->regnum].new_val = *counter;
- en->regnum = *counter;
- r = noname_disable_map(&(en->target), map, counter);
- }
- else {
- *plink = en->target;
- en->target = NULL_NODE;
- onig_node_free(node);
- r = noname_disable_map(plink, map, counter);
- }
+ if (IS_ENCLOSE_NAMED_GROUP(en)) {
+ (*counter)++;
+ map[en->regnum].new_val = *counter;
+ en->regnum = *counter;
+ }
+ else if (en->regnum != 0) {
+ *plink = en->target;
+ en->target = NULL_NODE;
+ onig_node_free(node);
+ r = noname_disable_map(plink, map, counter);
+ break;
+ }
}
- else
- r = noname_disable_map(&(en->target), map, counter);
+ r = noname_disable_map(&(en->target), map, counter);
}
break;
case NT_ANCHOR:
- {
- AnchorNode* an = NANCHOR(node);
- switch (an->type) {
- case ANCHOR_PREC_READ:
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND:
- case ANCHOR_LOOK_BEHIND_NOT:
- r = noname_disable_map(&(an->target), map, counter);
- break;
- }
- }
+ if (NANCHOR(node)->target)
+ r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
break;
default:
@@ -1900,7 +1948,7 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
}
static int
-renumber_node_backref(Node* node, GroupNumRemap* map)
+renumber_node_backref(Node* node, GroupNumRemap* map, const int num_mem)
{
int i, pos, n, old_num;
int *backs;
@@ -1916,6 +1964,7 @@ renumber_node_backref(Node* node, GroupNumRemap* map)
backs = bn->back_dynamic;
for (i = 0, pos = 0; i < old_num; i++) {
+ if (backs[i] > num_mem) return ONIGERR_INVALID_BACKREF;
n = map[backs[i]].new_val;
if (n > 0) {
backs[pos] = n;
@@ -1928,7 +1977,7 @@ renumber_node_backref(Node* node, GroupNumRemap* map)
}
static int
-renumber_by_map(Node* node, GroupNumRemap* map)
+renumber_by_map(Node* node, GroupNumRemap* map, const int num_mem)
{
int r = 0;
@@ -1936,37 +1985,30 @@ renumber_by_map(Node* node, GroupNumRemap* map)
case NT_LIST:
case NT_ALT:
do {
- r = renumber_by_map(NCAR(node), map);
+ r = renumber_by_map(NCAR(node), map, num_mem);
} while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
break;
case NT_QTFR:
- r = renumber_by_map(NQTFR(node)->target, map);
+ r = renumber_by_map(NQTFR(node)->target, map, num_mem);
break;
case NT_ENCLOSE:
{
EncloseNode* en = NENCLOSE(node);
- if (en->type == ENCLOSE_CONDITION)
+ if (en->type == ENCLOSE_CONDITION) {
+ if (en->regnum > num_mem) return ONIGERR_INVALID_BACKREF;
en->regnum = map[en->regnum].new_val;
- r = renumber_by_map(en->target, map);
+ }
+ r = renumber_by_map(en->target, map, num_mem);
}
break;
case NT_BREF:
- r = renumber_node_backref(node, map);
+ r = renumber_node_backref(node, map, num_mem);
break;
case NT_ANCHOR:
- {
- AnchorNode* an = NANCHOR(node);
- switch (an->type) {
- case ANCHOR_PREC_READ:
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND:
- case ANCHOR_LOOK_BEHIND_NOT:
- r = renumber_by_map(an->target, map);
- break;
- }
- }
+ if (NANCHOR(node)->target)
+ r = renumber_by_map(NANCHOR(node)->target, map, num_mem);
break;
default:
@@ -2000,6 +2042,11 @@ numbered_ref_check(Node* node)
return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
break;
+ case NT_ANCHOR:
+ if (NANCHOR(node)->target)
+ r = numbered_ref_check(NANCHOR(node)->target);
+ break;
+
default:
break;
}
@@ -2023,7 +2070,7 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
r = noname_disable_map(root, map, &counter);
if (r != 0) return r;
- r = renumber_by_map(*root, map);
+ r = renumber_by_map(*root, map, env->num_mem);
if (r != 0) return r;
for (i = 1, pos = 1; i <= env->num_mem; i++) {
@@ -2080,13 +2127,13 @@ quantifiers_memory_node_info(Node* node)
{
int v;
do {
- v = quantifiers_memory_node_info(NCAR(node));
- if (v > r) r = v;
+ v = quantifiers_memory_node_info(NCAR(node));
+ if (v > r) r = v;
} while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
}
break;
-#ifdef USE_SUBEXP_CALL
+# ifdef USE_SUBEXP_CALL
case NT_CALL:
if (IS_CALL_RECURSION(NCALL(node))) {
return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
@@ -2094,13 +2141,13 @@ quantifiers_memory_node_info(Node* node)
else
r = quantifiers_memory_node_info(NCALL(node)->target);
break;
-#endif
+# endif
case NT_QTFR:
{
QtfrNode* qn = NQTFR(node);
if (qn->upper != 0) {
- r = quantifiers_memory_node_info(qn->target);
+ r = quantifiers_memory_node_info(qn->target);
}
}
break;
@@ -2110,16 +2157,17 @@ quantifiers_memory_node_info(Node* node)
EncloseNode* en = NENCLOSE(node);
switch (en->type) {
case ENCLOSE_MEMORY:
- return NQ_TARGET_IS_EMPTY_MEM;
- break;
+ return NQ_TARGET_IS_EMPTY_MEM;
+ break;
case ENCLOSE_OPTION:
case ENCLOSE_STOP_BACKTRACK:
case ENCLOSE_CONDITION:
- r = quantifiers_memory_node_info(en->target);
- break;
+ case ENCLOSE_ABSENT:
+ r = quantifiers_memory_node_info(en->target);
+ break;
default:
- break;
+ break;
}
}
break;
@@ -2159,10 +2207,10 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
r = get_min_match_length(nodes[backs[0]], min, env);
if (r != 0) break;
for (i = 1; i < br->back_num; i++) {
- if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
- r = get_min_match_length(nodes[backs[i]], &tmin, env);
- if (r != 0) break;
- if (*min > tmin) *min = tmin;
+ if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
+ r = get_min_match_length(nodes[backs[i]], &tmin, env);
+ if (r != 0) break;
+ if (*min > tmin) *min = tmin;
}
}
break;
@@ -2172,7 +2220,7 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
if (IS_CALL_RECURSION(NCALL(node))) {
EncloseNode* en = NENCLOSE(NCALL(node)->target);
if (IS_ENCLOSE_MIN_FIXED(en))
- *min = en->min_len;
+ *min = en->min_len;
}
else
r = get_min_match_length(NCALL(node)->target, min, env);
@@ -2191,11 +2239,11 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
Node *x, *y;
y = node;
do {
- x = NCAR(y);
- r = get_min_match_length(x, &tmin, env);
- if (r != 0) break;
- if (y == node) *min = tmin;
- else if (*min > tmin) *min = tmin;
+ x = NCAR(y);
+ r = get_min_match_length(x, &tmin, env);
+ if (r != 0) break;
+ if (y == node) *min = tmin;
+ else if (*min > tmin) *min = tmin;
} while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
}
break;
@@ -2221,9 +2269,9 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
QtfrNode* qn = NQTFR(node);
if (qn->lower > 0) {
- r = get_min_match_length(qn->target, min, env);
- if (r == 0)
- *min = distance_multiply(*min, qn->lower);
+ r = get_min_match_length(qn->target, min, env);
+ if (r == 0)
+ *min = distance_multiply(*min, qn->lower);
}
}
break;
@@ -2233,23 +2281,31 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
EncloseNode* en = NENCLOSE(node);
switch (en->type) {
case ENCLOSE_MEMORY:
-#ifdef USE_SUBEXP_CALL
- if (IS_ENCLOSE_MIN_FIXED(en))
- *min = en->min_len;
- else {
- r = get_min_match_length(en->target, min, env);
- if (r == 0) {
- en->min_len = *min;
- SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
- }
- }
- break;
-#endif
+ if (IS_ENCLOSE_MIN_FIXED(en))
+ *min = en->min_len;
+ else {
+ if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
+ *min = 0; /* recursive */
+ else {
+ SET_ENCLOSE_STATUS(node, NST_MARK1);
+ r = get_min_match_length(en->target, min, env);
+ CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
+ if (r == 0) {
+ en->min_len = *min;
+ SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
+ }
+ }
+ }
+ break;
+
case ENCLOSE_OPTION:
case ENCLOSE_STOP_BACKTRACK:
case ENCLOSE_CONDITION:
- r = get_min_match_length(en->target, min, env);
- break;
+ r = get_min_match_length(en->target, min, env);
+ break;
+
+ case ENCLOSE_ABSENT:
+ break;
}
}
break;
@@ -2274,7 +2330,7 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
do {
r = get_max_match_length(NCAR(node), &tmax, env);
if (r == 0)
- *max = distance_add(*max, tmax);
+ *max = distance_add(*max, tmax);
} while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
break;
@@ -2308,15 +2364,15 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
Node** nodes = SCANENV_MEM_NODES(env);
BRefNode* br = NBREF(node);
if (br->state & NST_RECURSION) {
- *max = ONIG_INFINITE_DISTANCE;
- break;
+ *max = ONIG_INFINITE_DISTANCE;
+ break;
}
backs = BACKREFS_P(br);
for (i = 0; i < br->back_num; i++) {
- if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
- r = get_max_match_length(nodes[backs[i]], &tmax, env);
- if (r != 0) break;
- if (*max < tmax) *max = tmax;
+ if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
+ r = get_max_match_length(nodes[backs[i]], &tmax, env);
+ if (r != 0) break;
+ if (*max < tmax) *max = tmax;
}
}
break;
@@ -2335,13 +2391,13 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
QtfrNode* qn = NQTFR(node);
if (qn->upper != 0) {
- r = get_max_match_length(qn->target, max, env);
- if (r == 0 && *max != 0) {
- if (! IS_REPEAT_INFINITE(qn->upper))
- *max = distance_multiply(*max, qn->upper);
- else
- *max = ONIG_INFINITE_DISTANCE;
- }
+ r = get_max_match_length(qn->target, max, env);
+ if (r == 0 && *max != 0) {
+ if (! IS_REPEAT_INFINITE(qn->upper))
+ *max = distance_multiply(*max, qn->upper);
+ else
+ *max = ONIG_INFINITE_DISTANCE;
+ }
}
}
break;
@@ -2351,23 +2407,31 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
EncloseNode* en = NENCLOSE(node);
switch (en->type) {
case ENCLOSE_MEMORY:
-#ifdef USE_SUBEXP_CALL
- if (IS_ENCLOSE_MAX_FIXED(en))
- *max = en->max_len;
- else {
- r = get_max_match_length(en->target, max, env);
- if (r == 0) {
- en->max_len = *max;
- SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
- }
- }
- break;
-#endif
+ if (IS_ENCLOSE_MAX_FIXED(en))
+ *max = en->max_len;
+ else {
+ if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
+ *max = ONIG_INFINITE_DISTANCE;
+ else {
+ SET_ENCLOSE_STATUS(node, NST_MARK1);
+ r = get_max_match_length(en->target, max, env);
+ CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
+ if (r == 0) {
+ en->max_len = *max;
+ SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
+ }
+ }
+ }
+ break;
+
case ENCLOSE_OPTION:
case ENCLOSE_STOP_BACKTRACK:
case ENCLOSE_CONDITION:
- r = get_max_match_length(en->target, max, env);
- break;
+ r = get_max_match_length(en->target, max, env);
+ break;
+
+ case ENCLOSE_ABSENT:
+ break;
}
}
break;
@@ -2397,7 +2461,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
do {
r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
if (r == 0)
- *len = (int )distance_add(*len, tlen);
+ *len = (int )distance_add(*len, tlen);
} while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
break;
@@ -2408,21 +2472,21 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
- r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
- if (r == 0) {
- if (tlen != tlen2)
- varlen = 1;
- }
+ r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
+ if (r == 0) {
+ if (tlen != tlen2)
+ varlen = 1;
+ }
}
if (r == 0) {
- if (varlen != 0) {
- if (level == 1)
- r = GET_CHAR_LEN_TOP_ALT_VARLEN;
- else
- r = GET_CHAR_LEN_VARLEN;
- }
- else
- *len = tlen;
+ if (varlen != 0) {
+ if (level == 1)
+ r = GET_CHAR_LEN_TOP_ALT_VARLEN;
+ else
+ r = GET_CHAR_LEN_VARLEN;
+ }
+ else
+ *len = tlen;
}
}
break;
@@ -2432,8 +2496,8 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
StrNode* sn = NSTR(node);
UChar *s = sn->s;
while (s < sn->end) {
- s += enclen(reg->enc, s, sn->end);
- (*len)++;
+ s += enclen(reg->enc, s, sn->end);
+ (*len)++;
}
}
break;
@@ -2442,12 +2506,12 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
{
QtfrNode* qn = NQTFR(node);
if (qn->lower == qn->upper) {
- r = get_char_length_tree1(qn->target, reg, &tlen, level);
- if (r == 0)
- *len = (int )distance_multiply(tlen, qn->lower);
+ r = get_char_length_tree1(qn->target, reg, &tlen, level);
+ if (r == 0)
+ *len = (int )distance_multiply(tlen, qn->lower);
}
else
- r = GET_CHAR_LEN_VARLEN;
+ r = GET_CHAR_LEN_VARLEN;
}
break;
@@ -2475,24 +2539,25 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
switch (en->type) {
case ENCLOSE_MEMORY:
#ifdef USE_SUBEXP_CALL
- if (IS_ENCLOSE_CLEN_FIXED(en))
- *len = en->char_len;
- else {
- r = get_char_length_tree1(en->target, reg, len, level);
- if (r == 0) {
- en->char_len = *len;
- SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
- }
- }
- break;
+ if (IS_ENCLOSE_CLEN_FIXED(en))
+ *len = en->char_len;
+ else {
+ r = get_char_length_tree1(en->target, reg, len, level);
+ if (r == 0) {
+ en->char_len = *len;
+ SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
+ }
+ }
+ break;
#endif
case ENCLOSE_OPTION:
case ENCLOSE_STOP_BACKTRACK:
case ENCLOSE_CONDITION:
- r = get_char_length_tree1(en->target, reg, len, level);
- break;
+ r = get_char_length_tree1(en->target, reg, len, level);
+ break;
+ case ENCLOSE_ABSENT:
default:
- break;
+ break;
}
}
break;
@@ -2531,29 +2596,29 @@ is_not_included(Node* x, Node* y, regex_t* reg)
{
switch (ytype) {
case NT_CTYPE:
- if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
- NCTYPE(y)->not != NCTYPE(x)->not &&
- NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
- return 1;
- else
- return 0;
- break;
+ if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
+ NCTYPE(y)->not != NCTYPE(x)->not &&
+ NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
+ return 1;
+ else
+ return 0;
+ break;
case NT_CCLASS:
swap:
- {
- Node* tmp;
- tmp = x; x = y; y = tmp;
- goto retry;
- }
- break;
+ {
+ Node* tmp;
+ tmp = x; x = y; y = tmp;
+ goto retry;
+ }
+ break;
case NT_STR:
- goto swap;
- break;
+ goto swap;
+ break;
default:
- break;
+ break;
}
}
break;
@@ -2563,79 +2628,80 @@ is_not_included(Node* x, Node* y, regex_t* reg)
CClassNode* xc = NCCLASS(x);
switch (ytype) {
case NT_CTYPE:
- switch (NCTYPE(y)->ctype) {
- case ONIGENC_CTYPE_WORD:
- if (NCTYPE(y)->not == 0) {
- if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (BITSET_AT(xc->bs, i)) {
- if (NCTYPE(y)->ascii_range) {
- if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
- }
- else {
- if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
- }
- }
- }
- return 1;
- }
- return 0;
- }
- else {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- int is_word;
- if (NCTYPE(y)->ascii_range)
- is_word = IS_CODE_SB_WORD(reg->enc, i);
- else
- is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
- if (! is_word) {
- if (!IS_NCCLASS_NOT(xc)) {
- if (BITSET_AT(xc->bs, i))
- return 0;
- }
- else {
- if (! BITSET_AT(xc->bs, i))
- return 0;
- }
- }
- }
- return 1;
- }
- break;
-
- default:
- break;
- }
- break;
+ switch (NCTYPE(y)->ctype) {
+ case ONIGENC_CTYPE_WORD:
+ if (NCTYPE(y)->not == 0) {
+ if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (BITSET_AT(xc->bs, i)) {
+ if (NCTYPE(y)->ascii_range) {
+ if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
+ }
+ else {
+ if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
+ }
+ }
+ }
+ return 1;
+ }
+ return 0;
+ }
+ else {
+ if (IS_NOT_NULL(xc->mbuf)) return 0;
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ int is_word;
+ if (NCTYPE(y)->ascii_range)
+ is_word = IS_CODE_SB_WORD(reg->enc, i);
+ else
+ is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
+ if (! is_word) {
+ if (!IS_NCCLASS_NOT(xc)) {
+ if (BITSET_AT(xc->bs, i))
+ return 0;
+ }
+ else {
+ if (! BITSET_AT(xc->bs, i))
+ return 0;
+ }
+ }
+ }
+ return 1;
+ }
+ break;
+
+ default:
+ break;
+ }
+ break;
case NT_CCLASS:
- {
- int v;
- CClassNode* yc = NCCLASS(y);
+ {
+ int v;
+ CClassNode* yc = NCCLASS(y);
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- v = BITSET_AT(xc->bs, i);
- if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ v = BITSET_AT(xc->bs, i);
+ if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
(v == 0 && IS_NCCLASS_NOT(xc))) {
- v = BITSET_AT(yc->bs, i);
- if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
+ v = BITSET_AT(yc->bs, i);
+ if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
(v == 0 && IS_NCCLASS_NOT(yc)))
- return 0;
- }
- }
- if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
- (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
- return 1;
- return 0;
- }
- break;
+ return 0;
+ }
+ }
+ if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
+ (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
+ return 1;
+ return 0;
+ }
+ break;
case NT_STR:
- goto swap;
- break;
+ goto swap;
+ break;
default:
- break;
+ break;
}
}
break;
@@ -2644,60 +2710,60 @@ is_not_included(Node* x, Node* y, regex_t* reg)
{
StrNode* xs = NSTR(x);
if (NSTRING_LEN(x) == 0)
- break;
+ break;
switch (ytype) {
case NT_CTYPE:
- switch (NCTYPE(y)->ctype) {
- case ONIGENC_CTYPE_WORD:
- if (NCTYPE(y)->ascii_range) {
- if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
- return NCTYPE(y)->not;
- else
- return !(NCTYPE(y)->not);
- }
- else {
- if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
- return NCTYPE(y)->not;
- else
- return !(NCTYPE(y)->not);
- }
- break;
- default:
- break;
- }
- break;
+ switch (NCTYPE(y)->ctype) {
+ case ONIGENC_CTYPE_WORD:
+ if (NCTYPE(y)->ascii_range) {
+ if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
+ return NCTYPE(y)->not;
+ else
+ return !(NCTYPE(y)->not);
+ }
+ else {
+ if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
+ return NCTYPE(y)->not;
+ else
+ return !(NCTYPE(y)->not);
+ }
+ break;
+ default:
+ break;
+ }
+ break;
case NT_CCLASS:
- {
- CClassNode* cc = NCCLASS(y);
+ {
+ CClassNode* cc = NCCLASS(y);
- code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
- xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
- return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
- }
- break;
+ code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
+ xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
+ return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
+ }
+ break;
case NT_STR:
- {
- UChar *q;
- StrNode* ys = NSTR(y);
- len = NSTRING_LEN(x);
- if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
- if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
+ {
+ UChar *q;
+ StrNode* ys = NSTR(y);
+ len = NSTRING_LEN(x);
+ if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
+ if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
/* tiny version */
return 0;
- }
- else {
- for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
- if (*p != *q) return 1;
- }
- }
- }
- break;
+ }
+ else {
+ for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
+ if (*p != *q) return 1;
+ }
+ }
+ }
+ break;
default:
- break;
+ break;
}
}
break;
@@ -2737,15 +2803,12 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
case NT_STR:
{
StrNode* sn = NSTR(node);
-
if (sn->end <= sn->s)
- break;
+ break;
- if (exact != 0 &&
- !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
- }
- else {
- n = node;
+ if (exact == 0 ||
+ NSTRING_IS_RAW(node) || !IS_IGNORECASE(reg->options)) {
+ n = node;
}
}
break;
@@ -2754,10 +2817,12 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
{
QtfrNode* qn = NQTFR(node);
if (qn->lower > 0) {
- if (IS_NOT_NULL(qn->head_exact))
- n = qn->head_exact;
- else
- n = get_head_value_node(qn->target, exact, reg);
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT
+ if (IS_NOT_NULL(qn->head_exact))
+ n = qn->head_exact;
+ else
+#endif
+ n = get_head_value_node(qn->target, exact, reg);
}
}
break;
@@ -2767,20 +2832,23 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
EncloseNode* en = NENCLOSE(node);
switch (en->type) {
case ENCLOSE_OPTION:
- {
- OnigOptionType options = reg->options;
+ {
+ OnigOptionType options = reg->options;
- reg->options = NENCLOSE(node)->option;
- n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
- reg->options = options;
- }
- break;
+ reg->options = NENCLOSE(node)->option;
+ n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
+ reg->options = options;
+ }
+ break;
case ENCLOSE_MEMORY:
case ENCLOSE_STOP_BACKTRACK:
case ENCLOSE_CONDITION:
- n = get_head_value_node(en->target, exact, reg);
- break;
+ n = get_head_value_node(en->target, exact, reg);
+ break;
+
+ case ENCLOSE_ABSENT:
+ break;
}
}
break;
@@ -2811,20 +2879,20 @@ check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
case NT_ALT:
do {
r = check_type_tree(NCAR(node), type_mask, enclose_mask,
- anchor_mask);
+ anchor_mask);
} while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
break;
case NT_QTFR:
r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
- anchor_mask);
+ anchor_mask);
break;
case NT_ENCLOSE:
{
EncloseNode* en = NENCLOSE(node);
if ((en->type & enclose_mask) == 0)
- return 1;
+ return 1;
r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
}
@@ -2837,7 +2905,7 @@ check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
if (NANCHOR(node)->target)
r = check_type_tree(NANCHOR(node)->target,
- type_mask, enclose_mask, anchor_mask);
+ type_mask, enclose_mask, anchor_mask);
break;
default:
@@ -2848,8 +2916,8 @@ check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
#ifdef USE_SUBEXP_CALL
-#define RECURSION_EXIST 1
-#define RECURSION_INFINITE 2
+# define RECURSION_EXIST 1
+# define RECURSION_INFINITE 2
static int
subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
@@ -2867,14 +2935,14 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
x = node;
do {
- ret = subexp_inf_recursive_check(NCAR(x), env, head);
- if (ret < 0 || ret == RECURSION_INFINITE) return ret;
- r |= ret;
- if (head) {
- ret = get_min_match_length(NCAR(x), &min, env);
- if (ret != 0) return ret;
- if (min != 0) head = 0;
- }
+ ret = subexp_inf_recursive_check(NCAR(x), env, head);
+ if (ret < 0 || ret == RECURSION_INFINITE) return ret;
+ r |= ret;
+ if (head) {
+ ret = get_min_match_length(NCAR(x), &min, env);
+ if (ret != 0) return ret;
+ if (min != 0) head = 0;
+ }
} while (IS_NOT_NULL(x = NCDR(x)));
}
break;
@@ -2884,9 +2952,9 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
int ret;
r = RECURSION_EXIST;
do {
- ret = subexp_inf_recursive_check(NCAR(node), env, head);
- if (ret < 0 || ret == RECURSION_INFINITE) return ret;
- r &= ret;
+ ret = subexp_inf_recursive_check(NCAR(node), env, head);
+ if (ret < 0 || ret == RECURSION_INFINITE) return ret;
+ r &= ret;
} while (IS_NOT_NULL(node = NCDR(node)));
}
break;
@@ -2906,8 +2974,8 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
case ANCHOR_PREC_READ_NOT:
case ANCHOR_LOOK_BEHIND:
case ANCHOR_LOOK_BEHIND_NOT:
- r = subexp_inf_recursive_check(an->target, env, head);
- break;
+ r = subexp_inf_recursive_check(an->target, env, head);
+ break;
}
}
break;
@@ -2962,8 +3030,8 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
case ANCHOR_PREC_READ_NOT:
case ANCHOR_LOOK_BEHIND:
case ANCHOR_LOOK_BEHIND_NOT:
- r = subexp_inf_recursive_check_trav(an->target, env);
- break;
+ r = subexp_inf_recursive_check_trav(an->target, env);
+ break;
}
}
break;
@@ -2973,10 +3041,10 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
EncloseNode* en = NENCLOSE(node);
if (IS_ENCLOSE_RECURSION(en)) {
- SET_ENCLOSE_STATUS(node, NST_MARK1);
- r = subexp_inf_recursive_check(en->target, env, 1);
- if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
- CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
+ SET_ENCLOSE_STATUS(node, NST_MARK1);
+ r = subexp_inf_recursive_check(en->target, env, 1);
+ if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
+ CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
}
r = subexp_inf_recursive_check_trav(en->target, env);
}
@@ -3015,8 +3083,8 @@ subexp_recursive_check(Node* node)
case ANCHOR_PREC_READ_NOT:
case ANCHOR_LOOK_BEHIND:
case ANCHOR_LOOK_BEHIND_NOT:
- r = subexp_recursive_check(an->target);
- break;
+ r = subexp_recursive_check(an->target);
+ break;
}
}
break;
@@ -3049,7 +3117,7 @@ subexp_recursive_check(Node* node)
static int
subexp_recursive_check_trav(Node* node, ScanEnv* env)
{
-#define FOUND_CALLED_NODE 1
+# define FOUND_CALLED_NODE 1
int type;
int r = 0;
@@ -3061,9 +3129,9 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env)
{
int ret;
do {
- ret = subexp_recursive_check_trav(NCAR(node), env);
- if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
- else if (ret < 0) return ret;
+ ret = subexp_recursive_check_trav(NCAR(node), env);
+ if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
+ else if (ret < 0) return ret;
} while (IS_NOT_NULL(node = NCDR(node)));
}
break;
@@ -3072,7 +3140,7 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env)
r = subexp_recursive_check_trav(NQTFR(node)->target, env);
if (NQTFR(node)->upper == 0) {
if (r == FOUND_CALLED_NODE)
- NQTFR(node)->is_refered = 1;
+ NQTFR(node)->is_referred = 1;
}
break;
@@ -3084,8 +3152,8 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env)
case ANCHOR_PREC_READ_NOT:
case ANCHOR_LOOK_BEHIND:
case ANCHOR_LOOK_BEHIND_NOT:
- r = subexp_recursive_check_trav(an->target, env);
- break;
+ r = subexp_recursive_check_trav(an->target, env);
+ break;
}
}
break;
@@ -3095,16 +3163,16 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env)
EncloseNode* en = NENCLOSE(node);
if (! IS_ENCLOSE_RECURSION(en)) {
- if (IS_ENCLOSE_CALLED(en)) {
- SET_ENCLOSE_STATUS(node, NST_MARK1);
- r = subexp_recursive_check(en->target);
- if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
- CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
- }
+ if (IS_ENCLOSE_CALLED(en)) {
+ SET_ENCLOSE_STATUS(node, NST_MARK1);
+ r = subexp_recursive_check(en->target);
+ if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
+ CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
+ }
}
r = subexp_recursive_check_trav(en->target, env);
if (IS_ENCLOSE_CALLED(en))
- r |= FOUND_CALLED_NODE;
+ r |= FOUND_CALLED_NODE;
}
break;
@@ -3148,62 +3216,62 @@ setup_subexp_call(Node* node, ScanEnv* env)
Node** nodes = SCANENV_MEM_NODES(env);
if (cn->group_num != 0) {
- int gnum = cn->group_num;
+ int gnum = cn->group_num;
-#ifdef USE_NAMED_GROUP
- if (env->num_named > 0 &&
- IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
- !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
- return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
- }
-#endif
- if (gnum > env->num_mem) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
- return ONIGERR_UNDEFINED_GROUP_REFERENCE;
- }
+# ifdef USE_NAMED_GROUP
+ if (env->num_named > 0 &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
+ !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
+ return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
+ }
+# endif
+ if (gnum > env->num_mem) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_GROUP_REFERENCE;
+ }
-#ifdef USE_NAMED_GROUP
+# ifdef USE_NAMED_GROUP
set_call_attr:
-#endif
- cn->target = nodes[cn->group_num];
- if (IS_NULL(cn->target)) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
- return ONIGERR_UNDEFINED_NAME_REFERENCE;
- }
- SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
- BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
- cn->unset_addr_list = env->unset_addr_list;
+# endif
+ cn->target = nodes[cn->group_num];
+ if (IS_NULL(cn->target)) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+ SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
+ BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
+ cn->unset_addr_list = env->unset_addr_list;
}
-#ifdef USE_NAMED_GROUP
-#ifdef USE_PERL_SUBEXP_CALL
+# ifdef USE_NAMED_GROUP
+# ifdef USE_PERL_SUBEXP_CALL
else if (cn->name == cn->name_end) {
- goto set_call_attr;
+ goto set_call_attr;
}
-#endif
+# endif
else {
- int *refs;
-
- int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
- &refs);
- if (n <= 0) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
- return ONIGERR_UNDEFINED_NAME_REFERENCE;
- }
- else if (n > 1 &&
- ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
- onig_scan_env_set_error_string(env,
- ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
- return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
- }
- else {
- cn->group_num = refs[0];
- goto set_call_attr;
- }
+ int *refs;
+
+ int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
+ &refs);
+ if (n <= 0) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+ else if (n > 1 &&
+ ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
+ return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
+ }
+ else {
+ cn->group_num = refs[0];
+ goto set_call_attr;
+ }
}
-#endif
+# endif
}
break;
@@ -3216,8 +3284,8 @@ setup_subexp_call(Node* node, ScanEnv* env)
case ANCHOR_PREC_READ_NOT:
case ANCHOR_LOOK_BEHIND:
case ANCHOR_LOOK_BEHIND_NOT:
- r = setup_subexp_call(an->target, env);
- break;
+ r = setup_subexp_call(an->target, env);
+ break;
}
}
break;
@@ -3230,6 +3298,14 @@ setup_subexp_call(Node* node, ScanEnv* env)
}
#endif
+#define IN_ALT (1<<0)
+#define IN_NOT (1<<1)
+#define IN_REPEAT (1<<2)
+#define IN_VAR_REPEAT (1<<3)
+#define IN_CALL (1<<4)
+#define IN_RECCALL (1<<5)
+#define IN_LOOK_BEHIND (1<<6)
+
/* divide different length alternatives in look-behind.
(?<=A|B) ==> (?<=A)|(?<=B)
(?<!A|B) ==> (?<!A)(?<!B)
@@ -3286,7 +3362,7 @@ setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
}
static int
-next_setup(Node* node, Node* next_node, int in_root, regex_t* reg)
+next_setup(Node* node, Node* next_node, regex_t* reg)
{
int type;
@@ -3299,54 +3375,32 @@ next_setup(Node* node, Node* next_node, int in_root, regex_t* reg)
Node* n = get_head_value_node(next_node, 1, reg);
/* '\0': for UTF-16BE etc... */
if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
- qn->next_head_exact = n;
+ qn->next_head_exact = n;
}
#endif
- /* automatic possessivation a*b ==> (?>a*)b */
+ /* automatic possessification a*b ==> (?>a*)b */
if (qn->lower <= 1) {
- int ttype = NTYPE(qn->target);
- if (IS_NODE_TYPE_SIMPLE(ttype)) {
- Node *x, *y;
- x = get_head_value_node(qn->target, 0, reg);
- if (IS_NOT_NULL(x)) {
- y = get_head_value_node(next_node, 0, reg);
- if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
- Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
- CHECK_NULL_RETURN_MEMERR(en);
- SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
- swap_node(node, en);
- NENCLOSE(node)->target = en;
- }
- }
- }
- }
-
-#ifndef ONIG_DONT_OPTIMIZE
- if (NTYPE(node) == NT_QTFR && /* the type may be changed by above block */
- in_root && /* qn->lower == 0 && */
- NTYPE(qn->target) == NT_CANY &&
- ! IS_MULTILINE(reg->options)) {
- /* implicit anchor: /.*a/ ==> /(?:^|\G).*a/ */
- Node *np;
- np = onig_node_new_list(NULL_NODE, NULL_NODE);
- CHECK_NULL_RETURN_MEMERR(np);
- swap_node(node, np);
- NCDR(node) = onig_node_new_list(np, NULL_NODE);
- if (IS_NULL(NCDR(node))) {
- onig_node_free(np);
- return ONIGERR_MEMORY;
- }
- np = onig_node_new_anchor(ANCHOR_ANYCHAR_STAR); /* (?:^|\G) */
- CHECK_NULL_RETURN_MEMERR(np);
- NCAR(node) = np;
+ int ttype = NTYPE(qn->target);
+ if (IS_NODE_TYPE_SIMPLE(ttype)) {
+ Node *x, *y;
+ x = get_head_value_node(qn->target, 0, reg);
+ if (IS_NOT_NULL(x)) {
+ y = get_head_value_node(next_node, 0, reg);
+ if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
+ Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
+ CHECK_NULL_RETURN_MEMERR(en);
+ SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
+ swap_node(node, en);
+ NENCLOSE(node)->target = en;
+ }
+ }
+ }
}
-#endif
}
}
else if (type == NT_ENCLOSE) {
EncloseNode* en = NENCLOSE(node);
- in_root = 0;
- if (en->type == ENCLOSE_MEMORY) {
+ if (en->type == ENCLOSE_MEMORY && !IS_ENCLOSE_CALLED(en)) {
node = en->target;
goto retry;
}
@@ -3376,15 +3430,15 @@ update_string_node_case_fold(regex_t* reg, Node *node)
len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
for (i = 0; i < len; i++) {
if (sp >= ebuf) {
- UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
- if (IS_NULL(p)) {
- xfree(sbuf);
- return ONIGERR_MEMORY;
- }
- sbuf = p;
- sp = sbuf + sbuf_size;
- sbuf_size *= 2;
- ebuf = sbuf + sbuf_size;
+ UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
+ if (IS_NULL(p)) {
+ xfree(sbuf);
+ return ONIGERR_MEMORY;
+ }
+ sbuf = p;
+ sp = sbuf + sbuf_size;
+ sbuf_size *= 2;
+ ebuf = sbuf + sbuf_size;
}
*sp++ = buf[i];
@@ -3392,18 +3446,14 @@ update_string_node_case_fold(regex_t* reg, Node *node)
}
r = onig_node_str_set(node, sbuf, sp);
- if (r != 0) {
- xfree(sbuf);
- return r;
- }
xfree(sbuf);
- return 0;
+ return r;
}
static int
expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
- regex_t* reg)
+ regex_t* reg)
{
int r;
Node *node;
@@ -3424,26 +3474,39 @@ expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
}
static int
+is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
+ int slen)
+{
+ int i;
+
+ for (i = 0; i < item_num; i++) {
+ if (items[i].byte_len != slen) {
+ return 1;
+ }
+ if (items[i].code_len != 1) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int
expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
- UChar *p, int slen, UChar *end,
- regex_t* reg, Node **rnode)
+ UChar *p, int slen, UChar *end,
+ regex_t* reg, Node **rnode)
{
- int r, i, j, len, varlen, varclen;
+ int r, i, j, len, varlen;
Node *anode, *var_anode, *snode, *xnode, *an;
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
*rnode = var_anode = NULL_NODE;
varlen = 0;
- varclen = 0;
for (i = 0; i < item_num; i++) {
if (items[i].byte_len != slen) {
varlen = 1;
break;
}
- if (items[i].code_len != 1) {
- varclen = 1;
- }
}
if (varlen != 0) {
@@ -3475,8 +3538,8 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
for (j = 0; j < items[i].code_len; j++) {
len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
if (len < 0) {
- r = len;
- goto mem_err2;
+ r = len;
+ goto mem_err2;
}
r = onig_node_str_cat(snode, buf, buf + len);
@@ -3493,29 +3556,29 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
UChar *q = p + items[i].byte_len;
if (q < end) {
- r = expand_case_fold_make_rem_string(&rem, q, end, reg);
- if (r != 0) {
- onig_node_free(an);
- goto mem_err2;
- }
+ r = expand_case_fold_make_rem_string(&rem, q, end, reg);
+ if (r != 0) {
+ onig_node_free(an);
+ goto mem_err2;
+ }
- xnode = onig_node_list_add(NULL_NODE, snode);
- if (IS_NULL(xnode)) {
- onig_node_free(an);
- onig_node_free(rem);
- goto mem_err2;
- }
- if (IS_NULL(onig_node_list_add(xnode, rem))) {
- onig_node_free(an);
- onig_node_free(xnode);
- onig_node_free(rem);
- goto mem_err;
- }
+ xnode = onig_node_list_add(NULL_NODE, snode);
+ if (IS_NULL(xnode)) {
+ onig_node_free(an);
+ onig_node_free(rem);
+ goto mem_err2;
+ }
+ if (IS_NULL(onig_node_list_add(xnode, rem))) {
+ onig_node_free(an);
+ onig_node_free(xnode);
+ onig_node_free(rem);
+ goto mem_err;
+ }
- NCAR(an) = xnode;
+ NCAR(an) = xnode;
}
else {
- NCAR(an) = snode;
+ NCAR(an) = snode;
}
NCDR(var_anode) = an;
@@ -3528,8 +3591,6 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
}
}
- if (varclen && !varlen)
- return 2;
return varlen;
mem_err2:
@@ -3541,31 +3602,36 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
return ONIGERR_MEMORY;
}
-static int
-expand_case_fold_string(Node* node, regex_t* reg)
-{
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
+static int
+expand_case_fold_string(Node* node, regex_t* reg, int state)
+{
int r, n, len, alt_num;
int varlen = 0;
+ int is_in_look_behind;
UChar *start, *end, *p;
Node *top_root, *root, *snode, *prev_node;
OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
- StrNode* sn = NSTR(node);
+ StrNode* sn;
if (NSTRING_IS_AMBIG(node)) return 0;
+ sn = NSTR(node);
+
start = sn->s;
end = sn->end;
if (start >= end) return 0;
+ is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
+
r = 0;
top_root = root = prev_node = snode = NULL_NODE;
alt_num = 1;
p = start;
while (p < end) {
n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
- p, end, items);
+ p, end, items);
if (n < 0) {
r = n;
goto err;
@@ -3573,24 +3639,26 @@ expand_case_fold_string(Node* node, regex_t* reg)
len = enclen(reg->enc, p, end);
- if (n == 0) {
+ varlen = is_case_fold_variable_len(n, items, len);
+ if (n == 0 || varlen == 0 || is_in_look_behind) {
if (IS_NULL(snode)) {
- if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);
- if (IS_NULL(root)) {
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
+ if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
+ onig_node_free(top_root);
+ top_root = root = onig_node_list_add(NULL_NODE, prev_node);
+ if (IS_NULL(root)) {
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
+ }
- prev_node = snode = onig_node_new_str(NULL, NULL);
- if (IS_NULL(snode)) goto mem_err;
- if (IS_NOT_NULL(root)) {
- if (IS_NULL(onig_node_list_add(root, snode))) {
- onig_node_free(snode);
- goto mem_err;
- }
- }
+ prev_node = snode = onig_node_new_str(NULL, NULL);
+ if (IS_NULL(snode)) goto mem_err;
+ if (IS_NOT_NULL(root)) {
+ if (IS_NULL(onig_node_list_add(root, snode))) {
+ onig_node_free(snode);
+ goto mem_err;
+ }
+ }
}
r = onig_node_str_cat(snode, p, p + len);
@@ -3600,37 +3668,43 @@ expand_case_fold_string(Node* node, regex_t* reg)
alt_num *= (n + 1);
if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
+ if (IS_NOT_NULL(snode)) {
+ r = update_string_node_case_fold(reg, snode);
+ if (r == 0) {
+ NSTRING_SET_AMBIG(snode);
+ }
+ }
if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);
- if (IS_NULL(root)) {
- onig_node_free(prev_node);
- goto mem_err;
- }
+ onig_node_free(top_root);
+ top_root = root = onig_node_list_add(NULL_NODE, prev_node);
+ if (IS_NULL(root)) {
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
}
r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
if (r < 0) goto mem_err;
- if (r > 0) varlen = 1;
if (r == 1) {
- if (IS_NULL(root)) {
- top_root = prev_node;
- }
- else {
- if (IS_NULL(onig_node_list_add(root, prev_node))) {
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
+ if (IS_NULL(root)) {
+ top_root = prev_node;
+ }
+ else {
+ if (IS_NULL(onig_node_list_add(root, prev_node))) {
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
+ }
- root = NCAR(prev_node);
+ root = NCAR(prev_node);
}
- else { /* r == 0 || r == 2 */
- if (IS_NOT_NULL(root)) {
- if (IS_NULL(onig_node_list_add(root, prev_node))) {
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
+ else { /* r == 0 */
+ if (IS_NOT_NULL(root)) {
+ if (IS_NULL(onig_node_list_add(root, prev_node))) {
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
+ }
}
snode = NULL_NODE;
@@ -3638,6 +3712,12 @@ expand_case_fold_string(Node* node, regex_t* reg)
p += len;
}
+ if (IS_NOT_NULL(snode)) {
+ r = update_string_node_case_fold(reg, snode);
+ if (r == 0) {
+ NSTRING_SET_AMBIG(snode);
+ }
+ }
if (p < end) {
Node *srem;
@@ -3646,11 +3726,12 @@ expand_case_fold_string(Node* node, regex_t* reg)
if (r != 0) goto mem_err;
if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
+ onig_node_free(top_root);
top_root = root = onig_node_list_add(NULL_NODE, prev_node);
if (IS_NULL(root)) {
- onig_node_free(srem);
- onig_node_free(prev_node);
- goto mem_err;
+ onig_node_free(srem);
+ onig_node_free(prev_node);
+ goto mem_err;
}
}
@@ -3659,28 +3740,17 @@ expand_case_fold_string(Node* node, regex_t* reg)
}
else {
if (IS_NULL(onig_node_list_add(root, srem))) {
- onig_node_free(srem);
- goto mem_err;
+ onig_node_free(srem);
+ goto mem_err;
}
}
}
/* ending */
top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
- if (!varlen) {
- /* When all expanded strings are same length, case-insensitive
- BM search will be used. */
- r = update_string_node_case_fold(reg, node);
- if (r == 0) {
- NSTRING_SET_AMBIG(node);
- }
- }
- else {
- swap_node(node, top_root);
- r = 0;
- }
+ swap_node(node, top_root);
onig_node_free(top_root);
- return r;
+ return 0;
mem_err:
r = ONIGERR_MEMORY;
@@ -3693,12 +3763,12 @@ expand_case_fold_string(Node* node, regex_t* reg)
#ifdef USE_COMBINATION_EXPLOSION_CHECK
-#define CEC_THRES_NUM_BIG_REPEAT 512
-#define CEC_INFINITE_NUM 0x7fffffff
+# define CEC_THRES_NUM_BIG_REPEAT 512
+# define CEC_INFINITE_NUM 0x7fffffff
-#define CEC_IN_INFINITE_REPEAT (1<<0)
-#define CEC_IN_FINITE_REPEAT (1<<1)
-#define CEC_CONT_BIG_REPEAT (1<<2)
+# define CEC_IN_INFINITE_REPEAT (1<<0)
+# define CEC_IN_FINITE_REPEAT (1<<1)
+# define CEC_CONT_BIG_REPEAT (1<<2)
static int
setup_comb_exp_check(Node* node, int state, ScanEnv* env)
@@ -3710,10 +3780,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
switch (type) {
case NT_LIST:
{
- Node* prev = NULL_NODE;
do {
- r = setup_comb_exp_check(NCAR(node), r, env);
- prev = NCAR(node);
+ r = setup_comb_exp_check(NCAR(node), r, env);
} while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
}
break;
@@ -3722,8 +3790,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
{
int ret;
do {
- ret = setup_comb_exp_check(NCAR(node), state, env);
- r |= ret;
+ ret = setup_comb_exp_check(NCAR(node), state, env);
+ r |= ret;
} while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
}
break;
@@ -3737,55 +3805,55 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
int var_num;
if (! IS_REPEAT_INFINITE(qn->upper)) {
- if (qn->upper > 1) {
- /* {0,1}, {1,1} are allowed */
- child_state |= CEC_IN_FINITE_REPEAT;
-
- /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
- if (env->backrefed_mem == 0) {
- if (NTYPE(qn->target) == NT_ENCLOSE) {
- EncloseNode* en = NENCLOSE(qn->target);
- if (en->type == ENCLOSE_MEMORY) {
- if (NTYPE(en->target) == NT_QTFR) {
- QtfrNode* q = NQTFR(en->target);
- if (IS_REPEAT_INFINITE(q->upper)
- && q->greedy == qn->greedy) {
- qn->upper = (qn->lower == 0 ? 1 : qn->lower);
- if (qn->upper == 1)
- child_state = state;
- }
- }
- }
- }
- }
- }
+ if (qn->upper > 1) {
+ /* {0,1}, {1,1} are allowed */
+ child_state |= CEC_IN_FINITE_REPEAT;
+
+ /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
+ if (env->backrefed_mem == 0) {
+ if (NTYPE(qn->target) == NT_ENCLOSE) {
+ EncloseNode* en = NENCLOSE(qn->target);
+ if (en->type == ENCLOSE_MEMORY) {
+ if (NTYPE(en->target) == NT_QTFR) {
+ QtfrNode* q = NQTFR(en->target);
+ if (IS_REPEAT_INFINITE(q->upper)
+ && q->greedy == qn->greedy) {
+ qn->upper = (qn->lower == 0 ? 1 : qn->lower);
+ if (qn->upper == 1)
+ child_state = state;
+ }
+ }
+ }
+ }
+ }
+ }
}
if (state & CEC_IN_FINITE_REPEAT) {
- qn->comb_exp_check_num = -1;
+ qn->comb_exp_check_num = -1;
}
else {
- if (IS_REPEAT_INFINITE(qn->upper)) {
- var_num = CEC_INFINITE_NUM;
- child_state |= CEC_IN_INFINITE_REPEAT;
- }
- else {
- var_num = qn->upper - qn->lower;
- }
+ if (IS_REPEAT_INFINITE(qn->upper)) {
+ var_num = CEC_INFINITE_NUM;
+ child_state |= CEC_IN_INFINITE_REPEAT;
+ }
+ else {
+ var_num = qn->upper - qn->lower;
+ }
- if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
- add_state |= CEC_CONT_BIG_REPEAT;
-
- if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
- ((state & CEC_CONT_BIG_REPEAT) != 0 &&
- var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
- if (qn->comb_exp_check_num == 0) {
- env->num_comb_exp_check++;
- qn->comb_exp_check_num = env->num_comb_exp_check;
- if (env->curr_max_regnum > env->comb_exp_max_regnum)
- env->comb_exp_max_regnum = env->curr_max_regnum;
- }
- }
+ if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
+ add_state |= CEC_CONT_BIG_REPEAT;
+
+ if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
+ ((state & CEC_CONT_BIG_REPEAT) != 0 &&
+ var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
+ if (qn->comb_exp_check_num == 0) {
+ env->num_comb_exp_check++;
+ qn->comb_exp_check_num = env->num_comb_exp_check;
+ if (env->curr_max_regnum > env->comb_exp_max_regnum)
+ env->comb_exp_max_regnum = env->curr_max_regnum;
+ }
+ }
}
r = setup_comb_exp_check(target, child_state, env);
@@ -3799,29 +3867,29 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
switch (en->type) {
case ENCLOSE_MEMORY:
- {
- if (env->curr_max_regnum < en->regnum)
- env->curr_max_regnum = en->regnum;
+ {
+ if (env->curr_max_regnum < en->regnum)
+ env->curr_max_regnum = en->regnum;
- r = setup_comb_exp_check(en->target, state, env);
- }
- break;
+ r = setup_comb_exp_check(en->target, state, env);
+ }
+ break;
default:
- r = setup_comb_exp_check(en->target, state, env);
- break;
+ r = setup_comb_exp_check(en->target, state, env);
+ break;
}
}
break;
-#ifdef USE_SUBEXP_CALL
+# ifdef USE_SUBEXP_CALL
case NT_CALL:
if (IS_CALL_RECURSION(NCALL(node)))
env->has_recursion = 1;
else
r = setup_comb_exp_check(NCALL(node)->target, state, env);
break;
-#endif
+# endif
default:
break;
@@ -3831,12 +3899,6 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
}
#endif
-#define IN_ALT (1<<0)
-#define IN_NOT (1<<1)
-#define IN_REPEAT (1<<2)
-#define IN_VAR_REPEAT (1<<3)
-#define IN_ROOT (1<<4)
-
/* setup_tree does the following work.
1. check empty loop. (set qn->target_empty_info)
2. expand ignore-case in char class.
@@ -3850,25 +3912,19 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
{
int type;
int r = 0;
- int in_root = state & IN_ROOT;
- state &= ~IN_ROOT;
restart:
type = NTYPE(node);
switch (type) {
case NT_LIST:
{
Node* prev = NULL_NODE;
- int prev_in_root = 0;
- state |= in_root;
do {
- r = setup_tree(NCAR(node), reg, state, env);
- if (IS_NOT_NULL(prev) && r == 0) {
- r = next_setup(prev, NCAR(node), prev_in_root, reg);
- }
- prev = NCAR(node);
- prev_in_root = state & IN_ROOT;
- state &= ~IN_ROOT;
+ r = setup_tree(NCAR(node), reg, state, env);
+ if (IS_NOT_NULL(prev) && r == 0) {
+ r = next_setup(prev, NCAR(node), reg);
+ }
+ prev = NCAR(node);
} while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
}
break;
@@ -3884,7 +3940,7 @@ restart:
case NT_STR:
if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
- r = expand_case_fold_string(node, reg);
+ r = expand_case_fold_string(node, reg, state);
}
break;
@@ -3905,15 +3961,15 @@ restart:
BRefNode* br = NBREF(node);
p = BACKREFS_P(br);
for (i = 0; i < br->back_num; i++) {
- if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
- BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
- BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
+ if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
+ BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
+ BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
#ifdef USE_BACKREF_WITH_LEVEL
- if (IS_BACKREF_NEST_LEVEL(br)) {
- BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
- }
+ if (IS_BACKREF_NEST_LEVEL(br)) {
+ BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
+ }
#endif
- SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
+ SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
}
}
break;
@@ -3929,96 +3985,96 @@ restart:
}
if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
- r = get_min_match_length(target, &d, env);
- if (r) break;
- if (d == 0) {
- qn->target_empty_info = NQ_TARGET_IS_EMPTY;
+ r = get_min_match_length(target, &d, env);
+ if (r) break;
+ if (d == 0) {
+ qn->target_empty_info = NQ_TARGET_IS_EMPTY;
#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
- r = quantifiers_memory_node_info(target);
- if (r < 0) break;
- if (r > 0) {
- qn->target_empty_info = r;
- }
+ r = quantifiers_memory_node_info(target);
+ if (r < 0) break;
+ if (r > 0) {
+ qn->target_empty_info = r;
+ }
#endif
#if 0
- r = get_max_match_length(target, &d, env);
- if (r == 0 && d == 0) {
- /* ()* ==> ()?, ()+ ==> () */
- qn->upper = 1;
- if (qn->lower > 1) qn->lower = 1;
- if (NTYPE(target) == NT_STR) {
- qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
- }
- }
+ r = get_max_match_length(target, &d, env);
+ if (r == 0 && d == 0) {
+ /* ()* ==> ()?, ()+ ==> () */
+ qn->upper = 1;
+ if (qn->lower > 1) qn->lower = 1;
+ if (NTYPE(target) == NT_STR) {
+ qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
+ }
+ }
#endif
- }
+ }
}
state |= IN_REPEAT;
if (qn->lower != qn->upper)
- state |= IN_VAR_REPEAT;
+ state |= IN_VAR_REPEAT;
r = setup_tree(target, reg, state, env);
if (r) break;
/* expand string */
#define EXPAND_STRING_MAX_LENGTH 100
if (NTYPE(target) == NT_STR) {
- if (qn->lower > 1) {
- int i, n = qn->lower;
- OnigDistance len = NSTRING_LEN(target);
- StrNode* sn = NSTR(target);
- Node* np;
-
- np = onig_node_new_str(sn->s, sn->end);
- if (IS_NULL(np)) return ONIGERR_MEMORY;
- NSTR(np)->flag = sn->flag;
-
- for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
- r = onig_node_str_cat(np, sn->s, sn->end);
- if (r) {
- onig_node_free(np);
- return r;
- }
- }
- if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
- Node *np1, *np2;
-
- qn->lower -= i;
- if (! IS_REPEAT_INFINITE(qn->upper))
- qn->upper -= i;
-
- np1 = onig_node_new_list(np, NULL);
- if (IS_NULL(np1)) {
- onig_node_free(np);
- return ONIGERR_MEMORY;
- }
- swap_node(np1, node);
- np2 = onig_node_list_add(node, np1);
- if (IS_NULL(np2)) {
- onig_node_free(np1);
- return ONIGERR_MEMORY;
- }
- }
- else {
- swap_node(np, node);
- onig_node_free(np);
- }
- break; /* break case NT_QTFR: */
- }
+ if (qn->lower > 1) {
+ int i, n = qn->lower;
+ OnigDistance len = NSTRING_LEN(target);
+ StrNode* sn = NSTR(target);
+ Node* np;
+
+ np = onig_node_new_str(sn->s, sn->end);
+ if (IS_NULL(np)) return ONIGERR_MEMORY;
+ NSTR(np)->flag = sn->flag;
+
+ for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
+ r = onig_node_str_cat(np, sn->s, sn->end);
+ if (r) {
+ onig_node_free(np);
+ return r;
+ }
+ }
+ if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
+ Node *np1, *np2;
+
+ qn->lower -= i;
+ if (! IS_REPEAT_INFINITE(qn->upper))
+ qn->upper -= i;
+
+ np1 = onig_node_new_list(np, NULL);
+ if (IS_NULL(np1)) {
+ onig_node_free(np);
+ return ONIGERR_MEMORY;
+ }
+ swap_node(np1, node);
+ np2 = onig_node_list_add(node, np1);
+ if (IS_NULL(np2)) {
+ onig_node_free(np1);
+ return ONIGERR_MEMORY;
+ }
+ }
+ else {
+ swap_node(np, node);
+ onig_node_free(np);
+ }
+ break; /* break case NT_QTFR: */
+ }
}
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
if (qn->greedy && (qn->target_empty_info != 0)) {
- if (NTYPE(target) == NT_QTFR) {
- QtfrNode* tqn = NQTFR(target);
- if (IS_NOT_NULL(tqn->head_exact)) {
- qn->head_exact = tqn->head_exact;
- tqn->head_exact = NULL;
- }
- }
- else {
- qn->head_exact = get_head_value_node(qn->target, 1, reg);
- }
+ if (NTYPE(target) == NT_QTFR) {
+ QtfrNode* tqn = NQTFR(target);
+ if (IS_NOT_NULL(tqn->head_exact)) {
+ qn->head_exact = tqn->head_exact;
+ tqn->head_exact = NULL;
+ }
+ }
+ else {
+ qn->head_exact = get_head_value_node(qn->target, 1, reg);
+ }
}
#endif
}
@@ -4030,50 +4086,61 @@ restart:
switch (en->type) {
case ENCLOSE_OPTION:
- {
- OnigOptionType options = reg->options;
- state |= in_root;
- reg->options = NENCLOSE(node)->option;
- r = setup_tree(NENCLOSE(node)->target, reg, state, env);
- reg->options = options;
- }
- break;
+ {
+ OnigOptionType options = reg->options;
+ reg->options = NENCLOSE(node)->option;
+ r = setup_tree(NENCLOSE(node)->target, reg, state, env);
+ reg->options = options;
+ }
+ break;
case ENCLOSE_MEMORY:
- if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
- BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
- /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
- }
+ if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
+ BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
+ /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
+ }
+ if (IS_ENCLOSE_CALLED(en))
+ state |= IN_CALL;
+ if (IS_ENCLOSE_RECURSION(en))
+ state |= IN_RECCALL;
+ else if ((state & IN_RECCALL) != 0)
+ SET_CALL_RECURSION(node);
r = setup_tree(en->target, reg, state, env);
break;
case ENCLOSE_STOP_BACKTRACK:
- {
- Node* target = en->target;
- r = setup_tree(target, reg, state, env);
- if (NTYPE(target) == NT_QTFR) {
- QtfrNode* tqn = NQTFR(target);
- if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
- tqn->greedy != 0) { /* (?>a*), a*+ etc... */
- int qtype = NTYPE(tqn->target);
- if (IS_NODE_TYPE_SIMPLE(qtype))
- SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
- }
- }
- }
- break;
+ {
+ Node* target = en->target;
+ r = setup_tree(target, reg, state, env);
+ if (NTYPE(target) == NT_QTFR) {
+ QtfrNode* tqn = NQTFR(target);
+ if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
+ tqn->greedy != 0) { /* (?>a*), a*+ etc... */
+ int qtype = NTYPE(tqn->target);
+ if (IS_NODE_TYPE_SIMPLE(qtype))
+ SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
+ }
+ }
+ }
+ break;
case ENCLOSE_CONDITION:
#ifdef USE_NAMED_GROUP
- if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
- env->num_named > 0 &&
- IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
- !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
- return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
- }
+ if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
+ env->num_named > 0 &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
+ !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
+ return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
+ }
#endif
- r = setup_tree(NENCLOSE(node)->target, reg, state, env);
- break;
+ if (NENCLOSE(node)->regnum > env->num_mem)
+ return ONIGERR_INVALID_BACKREF;
+ r = setup_tree(NENCLOSE(node)->target, reg, state, env);
+ break;
+
+ case ENCLOSE_ABSENT:
+ r = setup_tree(NENCLOSE(node)->target, reg, state, env);
+ break;
}
}
break;
@@ -4084,11 +4151,11 @@ restart:
switch (an->type) {
case ANCHOR_PREC_READ:
- r = setup_tree(an->target, reg, state, env);
- break;
+ r = setup_tree(an->target, reg, state, env);
+ break;
case ANCHOR_PREC_READ_NOT:
- r = setup_tree(an->target, reg, (state | IN_NOT), env);
- break;
+ r = setup_tree(an->target, reg, (state | IN_NOT), env);
+ break;
/* allowed node types in look-behind */
#define ALLOWED_TYPE_IN_LB \
@@ -4110,30 +4177,31 @@ restart:
ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
case ANCHOR_LOOK_BEHIND:
- {
- r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
- ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
- if (r < 0) return r;
- if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- r = setup_look_behind(node, reg, env);
- if (r != 0) return r;
- if (NTYPE(node) != NT_ANCHOR) goto restart;
- r = setup_tree(an->target, reg, state, env);
- }
- break;
+ {
+ r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
+ ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
+ if (r < 0) return r;
+ if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ if (NTYPE(node) != NT_ANCHOR) goto restart;
+ r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env);
+ if (r != 0) return r;
+ r = setup_look_behind(node, reg, env);
+ }
+ break;
case ANCHOR_LOOK_BEHIND_NOT:
- {
- r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
- ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
- if (r < 0) return r;
- if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- r = setup_look_behind(node, reg, env);
- if (r != 0) return r;
- if (NTYPE(node) != NT_ANCHOR) goto restart;
- r = setup_tree(an->target, reg, (state | IN_NOT), env);
- }
- break;
+ {
+ r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
+ ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
+ if (r < 0) return r;
+ if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ if (NTYPE(node) != NT_ANCHOR) goto restart;
+ r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND),
+ env);
+ if (r != 0) return r;
+ r = setup_look_behind(node, reg, env);
+ }
+ break;
}
}
break;
@@ -4145,153 +4213,73 @@ restart:
return r;
}
-#ifndef USE_SUNDAY_QUICK_SEARCH
-/* set skip map for Boyer-Moore search */
-static int
-set_bm_skip(UChar* s, UChar* end, regex_t* reg,
- UChar skip[], int** int_skip, int ignore_case)
-{
- OnigDistance i, len;
- int clen, flen, n, j, k;
- UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
- OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
- OnigEncoding enc = reg->enc;
-
- len = end - s;
- if (len < ONIG_CHAR_TABLE_SIZE) {
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
-
- n = 0;
- for (i = 0; i < len - 1; i += clen) {
- p = s + i;
- if (ignore_case)
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
- p, end, items);
- clen = enclen(enc, p, end);
-
- for (j = 0; j < n; j++) {
- if ((items[j].code_len != 1) || (items[j].byte_len != clen))
- return 1; /* different length isn't supported. */
- flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
- if (flen != clen)
- return 1; /* different length isn't supported. */
- }
- for (j = 0; j < clen; j++) {
- skip[s[i + j]] = (UChar )(len - 1 - i - j);
- for (k = 0; k < n; k++) {
- skip[buf[k][j]] = (UChar )(len - 1 - i - j);
- }
- }
- }
- }
- else {
- if (IS_NULL(*int_skip)) {
- *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
- if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
- }
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
-
- n = 0;
- for (i = 0; i < len - 1; i += clen) {
- p = s + i;
- if (ignore_case)
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
- p, end, items);
- clen = enclen(enc, p, end);
-
- for (j = 0; j < n; j++) {
- if ((items[j].code_len != 1) || (items[j].byte_len != clen))
- return 1; /* different length isn't supported. */
- flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
- if (flen != clen)
- return 1; /* different length isn't supported. */
- }
- for (j = 0; j < clen; j++) {
- (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
- for (k = 0; k < n; k++) {
- (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
- }
- }
- }
- }
- return 0;
-}
-
-#else /* USE_SUNDAY_QUICK_SEARCH */
-
/* set skip map for Sunday's quick search */
static int
set_bm_skip(UChar* s, UChar* end, regex_t* reg,
- UChar skip[], int** int_skip, int ignore_case)
+ UChar skip[], int ignore_case)
{
OnigDistance i, len;
int clen, flen, n, j, k;
- UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
+ UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
OnigEncoding enc = reg->enc;
len = end - s;
- if (len < ONIG_CHAR_TABLE_SIZE) {
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
+ if (len >= ONIG_CHAR_TABLE_SIZE) {
+ /* This should not happen. */
+ return ONIGERR_TYPE_BUG;
+ }
- n = 0;
+ if (ignore_case) {
for (i = 0; i < len; i += clen) {
p = s + i;
- if (ignore_case)
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
- p, end, items);
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
+ p, end, items);
clen = enclen(enc, p, end);
+ if (p + clen > end)
+ clen = (int )(end - p);
for (j = 0; j < n; j++) {
- if ((items[j].code_len != 1) || (items[j].byte_len != clen))
- return 1; /* different length isn't supported. */
- flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
- if (flen != clen)
- return 1; /* different length isn't supported. */
- }
- for (j = 0; j < clen; j++) {
- skip[s[i + j]] = (UChar )(len - i - j);
- for (k = 0; k < n; k++) {
- skip[buf[k][j]] = (UChar )(len - i - j);
+ if ((items[j].code_len != 1) || (items[j].byte_len != clen)) {
+ /* Different length isn't supported. Stop optimization at here. */
+ end = p;
+ goto endcheck;
+ }
+ flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf);
+ if (flen != clen) {
+ /* Different length isn't supported. Stop optimization at here. */
+ end = p;
+ goto endcheck;
}
}
}
+endcheck:
+ len = end - s;
}
- else {
- if (IS_NULL(*int_skip)) {
- *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
- if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
- }
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
-
- n = 0;
- for (i = 0; i < len; i += clen) {
- p = s + i;
- if (ignore_case)
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
- p, end, items);
- clen = enclen(enc, p, end);
- for (j = 0; j < n; j++) {
- if ((items[j].code_len != 1) || (items[j].byte_len != clen))
- return 1; /* different length isn't supported. */
- flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
- if (flen != clen)
- return 1; /* different length isn't supported. */
- }
- for (j = 0; j < clen; j++) {
- (*int_skip)[s[i + j]] = (int )(len - i - j);
- for (k = 0; k < n; k++) {
- (*int_skip)[buf[k][j]] = (int )(len - i - j);
- }
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
+ skip[i] = (UChar )(len + 1);
+ n = 0;
+ for (i = 0; i < len; i += clen) {
+ p = s + i;
+ if (ignore_case)
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
+ p, end, items);
+ clen = enclen(enc, p, end);
+ if (p + clen > end)
+ clen = (int )(end - p);
+
+ for (j = 0; j < clen; j++) {
+ skip[s[i + j]] = (UChar )(len - i - j);
+ for (k = 0; k < n; k++) {
+ ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
+ skip[buf[j]] = (UChar )(len - i - j);
}
}
}
- return 0;
-}
-#endif /* USE_SUNDAY_QUICK_SEARCH */
-#define OPT_EXACT_MAXLEN 24
+ return (int )len;
+}
typedef struct {
OnigDistance min; /* min byte length */
@@ -4355,7 +4343,7 @@ map_position_value(OnigEncoding enc, int i)
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
};
- if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
+ if (i < numberof(ByteValTable)) {
if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
return 20;
else
@@ -4387,7 +4375,7 @@ distance_value(MinMaxLen* mm)
if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
d = mm->max - mm->min;
- if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
+ if (d < numberof(dist_vals))
/* return dist_vals[d] * 16 / (mm->min + 12); */
return (int )dist_vals[d];
else
@@ -4482,7 +4470,7 @@ copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
static void
concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
- OnigDistance left_len, OnigDistance right_len)
+ OnigDistance left_len, OnigDistance right_len)
{
clear_opt_anc_info(to);
@@ -4598,7 +4586,7 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
static void
concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
- int raw ARG_UNUSED, OnigEncoding enc)
+ int raw ARG_UNUSED, OnigEncoding enc)
{
int i, j, len;
UChar *p;
@@ -4843,7 +4831,7 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
if (add->exb.len > 0 && to->len.max == 0) {
concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
- to->len.max, add->len.max);
+ to->len.max, add->len.max);
copy_opt_anc_info(&add->exb.anc, &tanc);
}
@@ -4874,12 +4862,12 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
if (to->expr.len > 0) {
if (add->len.max > 0) {
if (to->expr.len > (int )add->len.max)
- to->expr.len = (int )add->len.max;
+ to->expr.len = (int )add->len.max;
if (to->expr.mmd.max == 0)
- select_opt_exact_info(enc, &to->exb, &to->expr);
+ select_opt_exact_info(enc, &to->exb, &to->expr);
else
- select_opt_exact_info(enc, &to->exm, &to->expr);
+ select_opt_exact_info(enc, &to->exm, &to->expr);
}
}
else if (add->expr.len > 0) {
@@ -4925,11 +4913,11 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
copy_opt_env(&nenv, env);
do {
- r = optimize_node_left(NCAR(nd), &nopt, &nenv);
- if (r == 0) {
- add_mml(&nenv.mmd, &nopt.len);
- concat_left_node_opt_info(env->enc, opt, &nopt);
- }
+ r = optimize_node_left(NCAR(nd), &nopt, &nenv);
+ if (r == 0) {
+ add_mml(&nenv.mmd, &nopt.len);
+ concat_left_node_opt_info(env->enc, opt, &nopt);
+ }
} while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
}
break;
@@ -4940,11 +4928,11 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
Node* nd = node;
do {
- r = optimize_node_left(NCAR(nd), &nopt, env);
- if (r == 0) {
- if (nd == node) copy_node_opt_info(opt, &nopt);
- else alt_merge_node_opt_info(opt, &nopt, env);
- }
+ r = optimize_node_left(NCAR(nd), &nopt, env);
+ if (r == 0) {
+ if (nd == node) copy_node_opt_info(opt, &nopt);
+ else alt_merge_node_opt_info(opt, &nopt, env);
+ }
} while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
}
break;
@@ -4956,40 +4944,40 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
int is_raw = NSTRING_IS_RAW(node);
if (! NSTRING_IS_AMBIG(node)) {
- concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
- is_raw, env->enc);
- opt->exb.ignore_case = 0;
- if (slen > 0) {
- add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
- }
+ concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
+ is_raw, env->enc);
+ opt->exb.ignore_case = 0;
+ if (slen > 0) {
+ add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
+ }
set_mml(&opt->len, slen, slen);
}
else {
OnigDistance max;
- if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
+ if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
int n = onigenc_strlen(env->enc, sn->s, sn->end);
- max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
- }
- else {
- concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
- is_raw, env->enc);
- opt->exb.ignore_case = 1;
-
- if (slen > 0) {
- r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
- env->enc, env->case_fold_flag);
- if (r != 0) break;
- }
-
- max = slen;
- }
+ max = (OnigDistance )ONIGENC_MBC_MAXLEN_DIST(env->enc) * (OnigDistance)n;
+ }
+ else {
+ concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
+ is_raw, env->enc);
+ opt->exb.ignore_case = 1;
+
+ if (slen > 0) {
+ r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
+ env->enc, env->case_fold_flag);
+ if (r != 0) break;
+ }
+
+ max = slen;
+ }
set_mml(&opt->len, slen, max);
}
if ((OnigDistance )opt->exb.len == slen)
- opt->exb.reach_end = 1;
+ opt->exb.reach_end = 1;
}
break;
@@ -5002,9 +4990,9 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
- OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+ OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
- set_mml(&opt->len, min, max);
+ set_mml(&opt->len, min, max);
}
else {
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
@@ -5013,7 +5001,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
}
}
- set_mml(&opt->len, 1, 1);
+ set_mml(&opt->len, 1, 1);
}
}
break;
@@ -5028,25 +5016,25 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
if (max == 1) {
min = 1;
- maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
- switch (NCTYPE(node)->ctype) {
- case ONIGENC_CTYPE_WORD:
- if (NCTYPE(node)->not != 0) {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
- add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
- }
- }
- }
- else {
- for (i = 0; i < maxcode; i++) {
- if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
- add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
- }
- }
- }
- break;
- }
+ maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
+ switch (NCTYPE(node)->ctype) {
+ case ONIGENC_CTYPE_WORD:
+ if (NCTYPE(node)->not != 0) {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+ }
+ }
+ }
+ else {
+ for (i = 0; i < maxcode; i++) {
+ if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+ }
+ }
+ }
+ break;
+ }
}
else {
min = ONIGENC_MBC_MINLEN(env->enc);
@@ -5071,27 +5059,27 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
case ANCHOR_END_BUF:
case ANCHOR_SEMI_END_BUF:
case ANCHOR_END_LINE:
- case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
- case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
+ case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
+ case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
break;
case ANCHOR_PREC_READ:
{
- NodeOptInfo nopt;
+ NodeOptInfo nopt;
- r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
- if (r == 0) {
- if (nopt.exb.len > 0)
- copy_opt_exact_info(&opt->expr, &nopt.exb);
- else if (nopt.exm.len > 0)
- copy_opt_exact_info(&opt->expr, &nopt.exm);
+ r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
+ if (r == 0) {
+ if (nopt.exb.len > 0)
+ copy_opt_exact_info(&opt->expr, &nopt.exb);
+ else if (nopt.exm.len > 0)
+ copy_opt_exact_info(&opt->expr, &nopt.exm);
- opt->expr.reach_end = 0;
+ opt->expr.reach_end = 0;
- if (nopt.map.value > 0)
- copy_opt_map_info(&opt->map, &nopt.map);
- }
+ if (nopt.map.value > 0)
+ copy_opt_map_info(&opt->map, &nopt.map);
+ }
}
break;
@@ -5109,8 +5097,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
BRefNode* br = NBREF(node);
if (br->state & NST_RECURSION) {
- set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
- break;
+ set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
+ break;
}
backs = BACKREFS_P(br);
r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
@@ -5118,12 +5106,12 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
if (r != 0) break;
for (i = 1; i < br->back_num; i++) {
- r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
- if (r != 0) break;
- r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
- if (r != 0) break;
- if (min > tmin) min = tmin;
- if (max < tmax) max = tmax;
+ r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
+ if (r != 0) break;
+ r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
+ if (r != 0) break;
+ if (min > tmin) min = tmin;
+ if (max < tmax) max = tmax;
}
if (r == 0) set_mml(&opt->len, min, max);
}
@@ -5152,45 +5140,45 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
r = optimize_node_left(qn->target, &nopt, env);
if (r) break;
- if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
- if (env->mmd.max == 0 &&
- NTYPE(qn->target) == NT_CANY && qn->greedy) {
- if (IS_MULTILINE(env->options))
- /* implicit anchor: /.*a/ ==> /\A.*a/ */
- add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
- else
- add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
- }
+ if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
+ if (env->mmd.max == 0 &&
+ NTYPE(qn->target) == NT_CANY && qn->greedy) {
+ if (IS_MULTILINE(env->options))
+ /* implicit anchor: /.*a/ ==> /\A.*a/ */
+ add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
+ else
+ add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
+ }
}
else {
- if (qn->lower > 0) {
- copy_node_opt_info(opt, &nopt);
- if (nopt.exb.len > 0) {
- if (nopt.exb.reach_end) {
- for (i = 2; i <= qn->lower &&
- ! is_full_opt_exact_info(&opt->exb); i++) {
- concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
- }
- if (i < qn->lower) {
- opt->exb.reach_end = 0;
- }
- }
- }
-
- if (qn->lower != qn->upper) {
- opt->exb.reach_end = 0;
- opt->exm.reach_end = 0;
- }
- if (qn->lower > 1)
- opt->exm.reach_end = 0;
- }
+ if (qn->lower > 0) {
+ copy_node_opt_info(opt, &nopt);
+ if (nopt.exb.len > 0) {
+ if (nopt.exb.reach_end) {
+ for (i = 2; i <= qn->lower &&
+ ! is_full_opt_exact_info(&opt->exb); i++) {
+ concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
+ }
+ if (i < qn->lower) {
+ opt->exb.reach_end = 0;
+ }
+ }
+ }
+
+ if (qn->lower != qn->upper) {
+ opt->exb.reach_end = 0;
+ opt->exm.reach_end = 0;
+ }
+ if (qn->lower > 1)
+ opt->exm.reach_end = 0;
+ }
}
min = distance_multiply(nopt.len.min, qn->lower);
if (IS_REPEAT_INFINITE(qn->upper))
- max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
+ max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
else
- max = distance_multiply(nopt.len.max, qn->upper);
+ max = distance_multiply(nopt.len.max, qn->upper);
set_mml(&opt->len, min, max);
}
@@ -5202,51 +5190,55 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
switch (en->type) {
case ENCLOSE_OPTION:
- {
- OnigOptionType save = env->options;
+ {
+ OnigOptionType save = env->options;
- env->options = en->option;
- r = optimize_node_left(en->target, opt, env);
- env->options = save;
- }
- break;
+ env->options = en->option;
+ r = optimize_node_left(en->target, opt, env);
+ env->options = save;
+ }
+ break;
case ENCLOSE_MEMORY:
#ifdef USE_SUBEXP_CALL
- en->opt_count++;
- if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
- OnigDistance min, max;
-
- min = 0;
- max = ONIG_INFINITE_DISTANCE;
- if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
- if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
- set_mml(&opt->len, min, max);
- }
- else
+ en->opt_count++;
+ if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
+ OnigDistance min, max;
+
+ min = 0;
+ max = ONIG_INFINITE_DISTANCE;
+ if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
+ if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
+ set_mml(&opt->len, min, max);
+ }
+ else
#endif
- {
- r = optimize_node_left(en->target, opt, env);
+ {
+ r = optimize_node_left(en->target, opt, env);
- if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
- if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
- remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
- }
- }
- break;
+ if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
+ if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
+ remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
+ }
+ }
+ break;
case ENCLOSE_STOP_BACKTRACK:
case ENCLOSE_CONDITION:
- r = optimize_node_left(en->target, opt, env);
- break;
+ r = optimize_node_left(en->target, opt, env);
+ break;
+
+ case ENCLOSE_ABSENT:
+ set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
+ break;
}
}
break;
default:
#ifdef ONIG_DEBUG
- if (!onig_is_prelude()) fprintf(stderr, "optimize_node_left: undefined node type %d\n",
- NTYPE(node));
+ fprintf(stderr, "optimize_node_left: undefined node type %d\n",
+ NTYPE(node));
#endif
r = ONIGERR_TYPE_BUG;
break;
@@ -5258,7 +5250,6 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
static int
set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
{
- int r;
int allow_reverse;
if (e->len == 0) return 0;
@@ -5269,18 +5260,27 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
reg->exact_end = reg->exact + e->len;
allow_reverse =
- ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
+ ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
if (e->ignore_case > 0) {
if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
- r = set_bm_skip(reg->exact, reg->exact_end, reg,
- reg->map, &(reg->int_map), 1);
- if (r == 0) {
- reg->optimize = (allow_reverse != 0
- ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
+ int orig_len = e->len;
+ e->len = set_bm_skip(reg->exact, reg->exact_end, reg,
+ reg->map, 1);
+ if (e->len >= 3) {
+ reg->exact_end = reg->exact + e->len;
+ reg->optimize = (allow_reverse != 0
+ ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
}
else {
- reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
+ /* Even if BM skip table can't be built (e.g., pattern starts with
+ 's' or 'k' which have multi-byte case fold variants), we should
+ still use EXACT_IC optimization with the original pattern.
+ Without this fallback, patterns like /slackware/i have no
+ optimization at all, causing severe performance regression
+ especially with non-ASCII strings. See [Bug #21824] */
+ e->len = orig_len; /* Restore original length for EXACT_IC */
+ reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
}
}
else {
@@ -5289,12 +5289,10 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
}
else {
if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
- r = set_bm_skip(reg->exact, reg->exact_end, reg,
- reg->map, &(reg->int_map), 0);
- if (r) return r;
-
+ set_bm_skip(reg->exact, reg->exact_end, reg,
+ reg->map, 0);
reg->optimize = (allow_reverse != 0
- ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
+ ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
}
else {
reg->optimize = ONIG_OPTIMIZE_EXACT;
@@ -5335,7 +5333,7 @@ set_sub_anchor(regex_t* reg, OptAncInfo* anc)
reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
}
-#ifdef ONIG_DEBUG
+#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
static void print_optimize_info(FILE* f, regex_t* reg);
#endif
@@ -5360,8 +5358,11 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
ANCHOR_LOOK_BEHIND);
+ if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
+ reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
+
reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
- ANCHOR_PREC_READ_NOT);
+ ANCHOR_PREC_READ_NOT);
if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
reg->anchor_dmin = opt.len.min;
@@ -5371,7 +5372,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
if (opt.exb.len > 0 || opt.exm.len > 0) {
select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
if (opt.map.value > 0 &&
- comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
+ comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
goto set_map;
}
else {
@@ -5391,7 +5392,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
}
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
- if (!onig_is_prelude()) print_optimize_info(stderr, reg);
+ print_optimize_info(stderr, reg);
#endif
return r;
}
@@ -5406,16 +5407,14 @@ clear_optimize_info(regex_t* reg)
reg->sub_anchor = 0;
reg->exact_end = (UChar* )NULL;
reg->threshold_len = 0;
- if (IS_NOT_NULL(reg->exact)) {
- xfree(reg->exact);
- reg->exact = (UChar* )NULL;
- }
+ xfree(reg->exact);
+ reg->exact = (UChar* )NULL;
}
#ifdef ONIG_DEBUG
static void print_enc_string(FILE* fp, OnigEncoding enc,
- const UChar *s, const UChar *end)
+ const UChar *s, const UChar *end)
{
fprintf(fp, "\nPATTERN: /");
@@ -5427,10 +5426,10 @@ static void print_enc_string(FILE* fp, OnigEncoding enc,
while (p < end) {
code = ONIGENC_MBC_TO_CODE(enc, p, end);
if (code >= 0x80) {
- fprintf(fp, " 0x%04x ", (int )code);
+ fprintf(fp, " 0x%04x ", (int )code);
}
else {
- fputc((int )code, fp);
+ fputc((int )code, fp);
}
p += enclen(enc, p, end);
@@ -5445,21 +5444,23 @@ static void print_enc_string(FILE* fp, OnigEncoding enc,
fprintf(fp, "/ (%s)\n", enc->name);
}
+#endif /* ONIG_DEBUG */
+#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
static void
print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
{
if (a == ONIG_INFINITE_DISTANCE)
fputs("inf", f);
else
- fprintf(f, "(%"PRIuSIZE")", a);
+ fprintf(f, "(%"PRIuPTR")", a);
fputs("-", f);
if (b == ONIG_INFINITE_DISTANCE)
fputs("inf", f);
else
- fprintf(f, "(%"PRIuSIZE")", b);
+ fprintf(f, "(%"PRIuPTR")", b);
}
static void
@@ -5536,7 +5537,7 @@ print_optimize_info(FILE* f, regex_t* reg)
for (p = reg->exact; p < reg->exact_end; p++) {
fputc(*p, f);
}
- fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
+ fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
}
else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
int c, i, n = 0;
@@ -5549,7 +5550,7 @@ print_optimize_info(FILE* f, regex_t* reg)
c = 0;
fputc('[', f);
for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
- if (reg->map[i] != 0) {
+ if (reg->map[i] != 0) {
if (c > 0) fputs(", ", f);
c++;
if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
@@ -5563,19 +5564,17 @@ print_optimize_info(FILE* f, regex_t* reg)
}
}
}
-#endif /* ONIG_DEBUG */
+#endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
extern void
onig_free_body(regex_t* reg)
{
if (IS_NOT_NULL(reg)) {
- if (IS_NOT_NULL(reg->p)) xfree(reg->p);
- if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
- if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
- if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
- if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
- if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
+ xfree(reg->p);
+ xfree(reg->exact);
+ xfree(reg->repeat_range);
+ onig_free(reg->chain);
#ifdef USE_NAMED_GROUP
onig_names_free(reg);
@@ -5592,6 +5591,69 @@ onig_free(regex_t* reg)
}
}
+static void*
+dup_copy(const void *ptr, size_t size)
+{
+ void *newptr = xmalloc(size);
+ if (IS_NOT_NULL(newptr)) {
+ memcpy(newptr, ptr, size);
+ }
+ return newptr;
+}
+
+extern int
+onig_reg_copy(regex_t** nreg, regex_t* oreg)
+{
+ if (IS_NOT_NULL(oreg)) {
+ regex_t *reg = *nreg = (regex_t* )xmalloc(sizeof(regex_t));
+ if (IS_NULL(reg)) return ONIGERR_MEMORY;
+
+ *reg = *oreg;
+
+# define COPY_FAILED(mem, size) IS_NULL(reg->mem = dup_copy(reg->mem, size))
+
+ if (IS_NOT_NULL(reg->exact)) {
+ size_t exact_size = reg->exact_end - reg->exact;
+ if (COPY_FAILED(exact, exact_size))
+ goto err;
+ (reg)->exact_end = (reg)->exact + exact_size;
+ }
+
+ if (IS_NOT_NULL(reg->p)) {
+ if (COPY_FAILED(p, reg->alloc))
+ goto err_p;
+ }
+ if (IS_NOT_NULL(reg->repeat_range)) {
+ if (COPY_FAILED(repeat_range, reg->repeat_range_alloc * sizeof(OnigRepeatRange)))
+ goto err_repeat_range;
+ }
+ if (IS_NOT_NULL(reg->name_table)) {
+ if (onig_names_copy(reg, oreg))
+ goto err_name_table;
+ }
+ if (IS_NOT_NULL(reg->chain)) {
+ if (onig_reg_copy(&reg->chain, reg->chain))
+ goto err_chain;
+ }
+ return 0;
+# undef COPY_FAILED
+
+ err_chain:
+ onig_names_free(reg);
+ err_name_table:
+ xfree(reg->repeat_range);
+ err_repeat_range:
+ xfree(reg->p);
+ err_p:
+ xfree(reg->exact);
+ err:
+ xfree(reg);
+ return ONIGERR_MEMORY;
+ }
+ return 0;
+}
+
+#ifdef RUBY
size_t
onig_memsize(const regex_t *reg)
{
@@ -5599,8 +5661,6 @@ onig_memsize(const regex_t *reg)
if (IS_NULL(reg)) return 0;
if (IS_NOT_NULL(reg->p)) size += reg->alloc;
if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
- if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
- if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
@@ -5615,65 +5675,47 @@ onig_region_memsize(const OnigRegion *regs)
size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
return size;
}
+#endif
#define REGEX_TRANSFER(to,from) do {\
- (to)->state = ONIG_STATE_MODIFY;\
onig_free_body(to);\
xmemcpy(to, from, sizeof(regex_t));\
xfree(from);\
} while (0)
+#if 0
extern void
onig_transfer(regex_t* to, regex_t* from)
{
- THREAD_ATOMIC_START;
REGEX_TRANSFER(to, from);
- THREAD_ATOMIC_END;
}
+#endif
-#define REGEX_CHAIN_HEAD(reg) do {\
- while (IS_NOT_NULL((reg)->chain)) {\
- (reg) = (reg)->chain;\
- }\
-} while (0)
-
-extern void
-onig_chain_link_add(regex_t* to, regex_t* add)
-{
- THREAD_ATOMIC_START;
- REGEX_CHAIN_HEAD(to);
- to->chain = add;
- THREAD_ATOMIC_END;
-}
+#ifdef ONIG_DEBUG_COMPILE
+static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
+#endif
+#ifdef ONIG_DEBUG_PARSE_TREE
+static void print_tree(FILE* f, Node* node);
+#endif
-extern void
-onig_chain_reduce(regex_t* reg)
+#ifdef RUBY
+extern int
+onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
+ OnigErrorInfo* einfo)
{
- regex_t *head, *prev;
-
- prev = reg;
- head = prev->chain;
- if (IS_NOT_NULL(head)) {
- reg->state = ONIG_STATE_MODIFY;
- while (IS_NOT_NULL(head->chain)) {
- prev = head;
- head = head->chain;
- }
- prev->chain = (regex_t* )NULL;
- REGEX_TRANSFER(reg, head);
- }
+ return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
}
-
-#ifdef ONIG_DEBUG
-static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
-#endif
-#ifdef ONIG_DEBUG_PARSE_TREE
-static void print_tree P_((FILE* f, Node* node));
#endif
+#ifdef RUBY
+extern int
+onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
+ OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
+#else
extern int
onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
- OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
+ OnigErrorInfo* einfo)
+#endif
{
#define COMPILE_INIT_SIZE 20
@@ -5687,12 +5729,13 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
+#ifdef RUBY
scan_env.sourcefile = sourcefile;
scan_env.sourceline = sourceline;
- reg->state = ONIG_STATE_COMPILING;
+#endif
#ifdef ONIG_DEBUG
- if (!onig_is_prelude()) print_enc_string(stderr, reg->enc, pattern, pattern_end);
+ print_enc_string(stderr, reg->enc, pattern, pattern_end);
#endif
if (reg->alloc == 0) {
@@ -5719,9 +5762,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
#ifdef ONIG_DEBUG_PARSE_TREE
# if 0
fprintf(stderr, "ORIGINAL PARSE TREE:\n");
- if (!onig_is_prelude()) {
- print_tree(stderr, root);
- }
+ print_tree(stderr, root);
# endif
#endif
@@ -5757,11 +5798,11 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
reg->num_call = 0;
#endif
- r = setup_tree(root, reg, IN_ROOT, &scan_env);
+ r = setup_tree(root, reg, 0, &scan_env);
if (r != 0) goto err_unset;
#ifdef ONIG_DEBUG_PARSE_TREE
- if (!onig_is_prelude()) print_tree(stderr, root);
+ print_tree(stderr, root);
#endif
reg->capture_history = scan_env.capture_history;
@@ -5776,24 +5817,24 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
#ifdef USE_COMBINATION_EXPLOSION_CHECK
if (scan_env.backrefed_mem == 0
-#ifdef USE_SUBEXP_CALL
+# ifdef USE_SUBEXP_CALL
|| scan_env.num_call == 0
-#endif
+# endif
) {
setup_comb_exp_check(root, 0, &scan_env);
-#ifdef USE_SUBEXP_CALL
+# ifdef USE_SUBEXP_CALL
if (scan_env.has_recursion != 0) {
scan_env.num_comb_exp_check = 0;
}
else
-#endif
+# endif
if (scan_env.comb_exp_max_regnum > 0) {
int i;
for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
- if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
- scan_env.num_comb_exp_check = 0;
- break;
- }
+ if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
+ scan_env.num_comb_exp_check = 0;
+ break;
+ }
}
}
}
@@ -5827,9 +5868,9 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
reg->stack_pop_level = STACK_POP_LEVEL_ALL;
else {
if (reg->bt_mem_start != 0)
- reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
+ reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
else
- reg->stack_pop_level = STACK_POP_LEVEL_FREE;
+ reg->stack_pop_level = STACK_POP_LEVEL_FREE;
}
}
#ifdef USE_SUBEXP_CALL
@@ -5840,14 +5881,14 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
onig_node_free(root);
#ifdef ONIG_DEBUG_COMPILE
-#ifdef USE_NAMED_GROUP
- if (!onig_is_prelude()) onig_print_names(stderr, reg);
-#endif
- if (!onig_is_prelude()) print_compiled_byte_code_list(stderr, reg);
+# ifdef USE_NAMED_GROUP
+ onig_print_names(stderr, reg);
+# endif
+ print_compiled_byte_code_list(stderr, reg);
#endif
end:
- reg->state = ONIG_STATE_NORMAL;
+ onig_reg_resize(reg);
return r;
err_unset:
@@ -5866,38 +5907,17 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
}
onig_node_free(root);
- if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
- xfree(scan_env.mem_nodes_dynamic);
- return r;
-}
-
-#ifdef USE_RECOMPILE_API
-extern int
-onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
- OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
- OnigErrorInfo* einfo)
-{
- int r;
- regex_t *new_reg;
+ xfree(scan_env.mem_nodes_dynamic);
- r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
- if (r) return r;
- if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
- onig_transfer(reg, new_reg);
- }
- else {
- onig_chain_link_add(reg, new_reg);
- }
- return 0;
+ return r;
}
-#endif
static int onig_inited = 0;
extern int
onig_reg_init(regex_t* reg, OnigOptionType option,
- OnigCaseFoldType case_fold_flag,
- OnigEncoding enc, const OnigSyntaxType* syntax)
+ OnigCaseFoldType case_fold_flag,
+ OnigEncoding enc, const OnigSyntaxType* syntax)
{
if (! onig_inited)
onig_init();
@@ -5905,6 +5925,12 @@ onig_reg_init(regex_t* reg, OnigOptionType option,
if (IS_NULL(reg))
return ONIGERR_INVALID_ARGUMENT;
+ (reg)->exact = (UChar* )NULL;
+ (reg)->chain = (regex_t* )NULL;
+ (reg)->p = (UChar* )NULL;
+ (reg)->name_table = (void* )NULL;
+ (reg)->repeat_range = (OnigRepeatRange* )NULL;
+
if (ONIGENC_IS_UNDEF(enc))
return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
@@ -5913,8 +5939,6 @@ onig_reg_init(regex_t* reg, OnigOptionType option,
return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
}
- (reg)->state = ONIG_STATE_MODIFY;
-
if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
option |= syntax->options;
option &= ~ONIG_OPTION_SINGLELINE;
@@ -5926,56 +5950,53 @@ onig_reg_init(regex_t* reg, OnigOptionType option,
(reg)->options = option;
(reg)->syntax = syntax;
(reg)->optimize = 0;
- (reg)->exact = (UChar* )NULL;
- (reg)->int_map = (int* )NULL;
- (reg)->int_map_backward = (int* )NULL;
- (reg)->chain = (regex_t* )NULL;
- (reg)->p = (UChar* )NULL;
(reg)->alloc = 0;
(reg)->used = 0;
- (reg)->name_table = (void* )NULL;
(reg)->case_fold_flag = case_fold_flag;
+
+ (reg)->timelimit = 0;
+
return 0;
}
extern int
onig_new_without_alloc(regex_t* reg, const UChar* pattern,
const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
- OnigSyntaxType* syntax, OnigErrorInfo* einfo)
+ const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
{
int r;
r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
if (r) return r;
- r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0);
+ r = onig_compile(reg, pattern, pattern_end, einfo);
return r;
}
extern int
onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
- OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
- OnigErrorInfo* einfo)
+ OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
+ OnigErrorInfo* einfo)
{
- int r;
-
*reg = (regex_t* )xmalloc(sizeof(regex_t));
if (IS_NULL(*reg)) return ONIGERR_MEMORY;
- r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
- if (r) goto err;
-
- r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0);
+ int r = onig_new_without_alloc(*reg, pattern, pattern_end, option, enc, syntax, einfo);
if (r) {
- err:
onig_free(*reg);
*reg = NULL;
}
+
return r;
}
+extern int
+onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
+{
+ return onig_init();
+}
extern int
onig_init(void)
@@ -5983,11 +6004,12 @@ onig_init(void)
if (onig_inited != 0)
return 0;
- THREAD_SYSTEM_INIT;
- THREAD_ATOMIC_START;
-
onig_inited = 1;
+#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
+ _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
+#endif
+
onigenc_init();
/* onigenc_set_default_caseconv_table((UChar* )0); */
@@ -5995,32 +6017,56 @@ onig_init(void)
onig_statistics_init();
#endif
- THREAD_ATOMIC_END;
return 0;
}
+static OnigEndCallListItemType* EndCallTop;
+
+extern void onig_add_end_call(void (*func)(void))
+{
+ OnigEndCallListItemType* item;
+
+ item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
+ if (item == 0) return ;
+
+ item->next = EndCallTop;
+ item->func = func;
+
+ EndCallTop = item;
+}
+
+static void
+exec_end_call_list(void)
+{
+ OnigEndCallListItemType* prev;
+ void (*func)(void);
+
+ while (EndCallTop != 0) {
+ func = EndCallTop->func;
+ (*func)();
+
+ prev = EndCallTop;
+ EndCallTop = EndCallTop->next;
+ xfree(prev);
+ }
+}
+
extern int
onig_end(void)
{
- THREAD_ATOMIC_START;
+ exec_end_call_list();
#ifdef ONIG_DEBUG_STATISTICS
- if (!onig_is_prelude()) onig_print_statistics(stderr);
+ onig_print_statistics(stderr);
#endif
-#ifdef USE_SHARED_CCLASS_TABLE
- onig_free_shared_cclass_table();
-#endif
-
-#ifdef USE_PARSE_TREE_NODE_RECYCLE
- onig_free_node_list();
+#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
+ _CrtDumpMemoryLeaks();
#endif
onig_inited = 0;
- THREAD_ATOMIC_END;
- THREAD_SYSTEM_END;
return 0;
}
@@ -6086,14 +6132,14 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
#ifdef ONIG_DEBUG
/* arguments type */
-#define ARG_SPECIAL -1
-#define ARG_NON 0
-#define ARG_RELADDR 1
-#define ARG_ABSADDR 2
-#define ARG_LENGTH 3
-#define ARG_MEMNUM 4
-#define ARG_OPTION 5
-#define ARG_STATE_CHECK 6
+# define ARG_SPECIAL -1
+# define ARG_NON 0
+# define ARG_RELADDR 1
+# define ARG_ABSADDR 2
+# define ARG_LENGTH 3
+# define ARG_MEMNUM 4
+# define ARG_OPTION 5
+# define ARG_STATE_CHECK 6
OnigOpInfoType OnigOpInfo[] = {
{ OP_FINISH, "finish", ARG_NON },
@@ -6118,7 +6164,6 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
{ OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
{ OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
- { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
{ OP_ANYCHAR, "anychar", ARG_NON },
{ OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
{ OP_ANYCHAR_STAR, "anychar*", ARG_NON },
@@ -6143,7 +6188,6 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_END_LINE, "end-line", ARG_NON },
{ OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
{ OP_BEGIN_POSITION, "begin-position", ARG_NON },
- { OP_BEGIN_POS_OR_LINE, "begin-pos-or-line", ARG_NON },
{ OP_BACKREF1, "backref1", ARG_NON },
{ OP_BACKREF2, "backref2", ARG_NON },
{ OP_BACKREFN, "backrefn", ARG_MEMNUM },
@@ -6185,6 +6229,9 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
{ OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
{ OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
+ { OP_PUSH_ABSENT_POS, "push-absent-pos", ARG_NON },
+ { OP_ABSENT, "absent", ARG_RELADDR },
+ { OP_ABSENT_END, "absent-end", ARG_NON },
{ OP_CALL, "call", ARG_ABSADDR },
{ OP_RETURN, "return", ARG_NON },
{ OP_CONDITION, "condition", ARG_SPECIAL },
@@ -6221,15 +6268,17 @@ op2arg_type(int opcode)
return ARG_SPECIAL;
}
+# ifdef ONIG_DEBUG_PARSE_TREE
static void
Indent(FILE* f, int indent)
{
int i;
for (i = 0; i < indent; i++) putc(' ', f);
}
+# endif /* ONIG_DEBUG_PARSE_TREE */
static void
-p_string(FILE* f, int len, UChar* s)
+p_string(FILE* f, ptrdiff_t len, UChar* s)
{
fputs(":", f);
while (len-- > 0) { fputc(*s++, f); }
@@ -6265,7 +6314,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
break;
case ARG_RELADDR:
GET_RELADDR_INC(addr, bp);
- fprintf(f, ":(%d)", addr);
+ fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
break;
case ARG_ABSADDR:
GET_ABSADDR_INC(addr, bp);
@@ -6282,9 +6331,9 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
break;
case ARG_OPTION:
{
- OnigOptionType option = *((OnigOptionType* )bp);
- bp += SIZE_OPTION;
- fprintf(f, ":%d", option);
+ OnigOptionType option = *((OnigOptionType* )bp);
+ bp += SIZE_OPTION;
+ fprintf(f, ":%d", option);
}
break;
@@ -6333,13 +6382,13 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
break;
case OP_EXACTMBN:
{
- int mb_len;
+ int mb_len;
- GET_LENGTH_INC(mb_len, bp);
- GET_LENGTH_INC(len, bp);
- fprintf(f, ":%d:%d:", mb_len, len);
- n = len * mb_len;
- while (n-- > 0) { fputc(*bp++, f); }
+ GET_LENGTH_INC(mb_len, bp);
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d:%d:", mb_len, len);
+ n = len * mb_len;
+ while (n-- > 0) { fputc(*bp++, f); }
}
break;
@@ -6370,9 +6419,9 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
case OP_CCLASS_MB_NOT:
GET_LENGTH_INC(len, bp);
q = bp;
-#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
ALIGNMENT_RIGHT(q);
-#endif
+# endif
GET_CODE_POINT(code, q);
bp += len;
fprintf(f, ":%d:%d", (int )code, len);
@@ -6384,24 +6433,14 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
bp += SIZE_BITSET;
GET_LENGTH_INC(len, bp);
q = bp;
-#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
ALIGNMENT_RIGHT(q);
-#endif
+# endif
GET_CODE_POINT(code, q);
bp += len;
fprintf(f, ":%d:%d:%d", n, (int )code, len);
break;
- case OP_CCLASS_NODE:
- {
- CClassNode *cc;
-
- GET_POINTER_INC(cc, bp);
- n = bitset_on_num(cc->bs);
- fprintf(f, ":%"PRIuPTR":%d", (uintptr_t)cc, n);
- }
- break;
-
case OP_BACKREFN_IC:
mem = *((MemNumType* )bp);
bp += SIZE_MEMNUM;
@@ -6413,40 +6452,40 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
fputs(" ", f);
GET_LENGTH_INC(len, bp);
for (i = 0; i < len; i++) {
- GET_MEMNUM_INC(mem, bp);
- if (i > 0) fputs(", ", f);
- fprintf(f, "%d", mem);
+ GET_MEMNUM_INC(mem, bp);
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", mem);
}
break;
case OP_BACKREF_WITH_LEVEL:
{
- OnigOptionType option;
- LengthType level;
-
- GET_OPTION_INC(option, bp);
- fprintf(f, ":%d", option);
- GET_LENGTH_INC(level, bp);
- fprintf(f, ":%d", level);
-
- fputs(" ", f);
- GET_LENGTH_INC(len, bp);
- for (i = 0; i < len; i++) {
- GET_MEMNUM_INC(mem, bp);
- if (i > 0) fputs(", ", f);
- fprintf(f, "%d", mem);
- }
+ OnigOptionType option;
+ LengthType level;
+
+ GET_OPTION_INC(option, bp);
+ fprintf(f, ":%d", option);
+ GET_LENGTH_INC(level, bp);
+ fprintf(f, ":%d", level);
+
+ fputs(" ", f);
+ GET_LENGTH_INC(len, bp);
+ for (i = 0; i < len; i++) {
+ GET_MEMNUM_INC(mem, bp);
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", mem);
+ }
}
break;
case OP_REPEAT:
case OP_REPEAT_NG:
{
- mem = *((MemNumType* )bp);
- bp += SIZE_MEMNUM;
- addr = *((RelAddrType* )bp);
- bp += SIZE_RELADDR;
- fprintf(f, ":%d:%d", mem, addr);
+ mem = *((MemNumType* )bp);
+ bp += SIZE_MEMNUM;
+ addr = *((RelAddrType* )bp);
+ bp += SIZE_RELADDR;
+ fprintf(f, ":%d:%d", mem, addr);
}
break;
@@ -6454,7 +6493,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
case OP_PUSH_IF_PEEK_NEXT:
addr = *((RelAddrType* )bp);
bp += SIZE_RELADDR;
- fprintf(f, ":(%d)", addr);
+ fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
p_string(f, 1, bp);
bp += 1;
break;
@@ -6467,7 +6506,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
case OP_PUSH_LOOK_BEHIND_NOT:
GET_RELADDR_INC(addr, bp);
GET_LENGTH_INC(len, bp);
- fprintf(f, ":%d:(%d)", len, addr);
+ fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
break;
case OP_STATE_CHECK_PUSH:
@@ -6476,24 +6515,25 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
bp += SIZE_STATE_CHECK_NUM;
addr = *((RelAddrType* )bp);
bp += SIZE_RELADDR;
- fprintf(f, ":%d:(%d)", scn, addr);
+ fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
break;
case OP_CONDITION:
GET_MEMNUM_INC(mem, bp);
GET_RELADDR_INC(addr, bp);
- fprintf(f, ":%d:(%d)", mem, addr);
+ fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
break;
default:
fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
- *--bp);
+ bp[-1]);
}
}
fputs("]", f);
if (nextp) *nextp = bp;
}
+# ifdef ONIG_DEBUG_COMPILE
static void
print_compiled_byte_code_list(FILE* f, regex_t* reg)
{
@@ -6515,7 +6555,9 @@ print_compiled_byte_code_list(FILE* f, regex_t* reg)
fprintf(f, "\n");
}
+# endif /* ONIG_DEBUG_COMPILE */
+# ifdef ONIG_DEBUG_PARSE_TREE
static void
print_indent_tree(FILE* f, Node* node, int indent)
{
@@ -6534,15 +6576,15 @@ print_indent_tree(FILE* f, Node* node, int indent)
case NT_LIST:
case NT_ALT:
if (NTYPE(node) == NT_LIST)
- fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t)node);
+ fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
else
- fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t)node);
+ fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
print_indent_tree(f, NCAR(node), indent + add);
while (IS_NOT_NULL(node = NCDR(node))) {
if (NTYPE(node) != type) {
- fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
- exit(0);
+ fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
+ exit(0);
}
print_indent_tree(f, NCAR(node), indent + add);
}
@@ -6550,36 +6592,39 @@ print_indent_tree(FILE* f, Node* node, int indent)
case NT_STR:
fprintf(f, "<string%s:%"PRIxPTR">",
- (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t)node);
+ (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
if (*p >= 0x20 && *p < 0x7f)
- fputc(*p, f);
+ fputc(*p, f);
else {
- fprintf(f, " 0x%02x", *p);
+ fprintf(f, " 0x%02x", *p);
}
}
break;
case NT_CCLASS:
- fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t)node);
- if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
+ fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
+ if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
if (NCCLASS(node)->mbuf) {
BBuf* bbuf = NCCLASS(node)->mbuf;
- for (i = 0; i < (int )bbuf->used; i++) {
- if (i > 0) fprintf(f, ",");
- fprintf(f, "%0x", bbuf->p[i]);
+ OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
+ OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
+ fprintf(f, "%d", *data++);
+ for (; data < end; data+=2) {
+ fprintf(f, ",");
+ fprintf(f, "%04x-%04x", data[0], data[1]);
}
}
break;
case NT_CTYPE:
- fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t)node);
+ fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
switch (NCTYPE(node)->ctype) {
case ONIGENC_CTYPE_WORD:
if (NCTYPE(node)->not != 0)
- fputs("not word", f);
+ fputs("not word", f);
else
- fputs("word", f);
+ fputs("word", f);
break;
default:
@@ -6589,11 +6634,11 @@ print_indent_tree(FILE* f, Node* node, int indent)
break;
case NT_CANY:
- fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t)node);
+ fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
break;
case NT_ANCHOR:
- fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t)node);
+ fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
switch (NANCHOR(node)->type) {
case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
case ANCHOR_END_BUF: fputs("end buf", f); break;
@@ -6601,14 +6646,13 @@ print_indent_tree(FILE* f, Node* node, int indent)
case ANCHOR_END_LINE: fputs("end line", f); break;
case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
- case ANCHOR_ANYCHAR_STAR: fputs("begin position/line", f); break;
case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
-#ifdef USE_WORD_BEGIN_END
+# ifdef USE_WORD_BEGIN_END
case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
case ANCHOR_WORD_END: fputs("word end", f); break;
-#endif
+# endif
case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
@@ -6626,33 +6670,33 @@ print_indent_tree(FILE* f, Node* node, int indent)
int* p;
BRefNode* br = NBREF(node);
p = BACKREFS_P(br);
- fprintf(f, "<backref:%"PRIxPTR">", (intptr_t)node);
+ fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
for (i = 0; i < br->back_num; i++) {
- if (i > 0) fputs(", ", f);
- fprintf(f, "%d", p[i]);
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", p[i]);
}
}
break;
-#ifdef USE_SUBEXP_CALL
+# ifdef USE_SUBEXP_CALL
case NT_CALL:
{
CallNode* cn = NCALL(node);
- fprintf(f, "<call:%"PRIxPTR">", (intptr_t)node);
+ fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
p_string(f, cn->name_end - cn->name, cn->name);
}
break;
-#endif
+# endif
case NT_QTFR:
- fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t)node,
- NQTFR(node)->lower, NQTFR(node)->upper,
- (NQTFR(node)->greedy ? "" : "?"));
+ fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
+ NQTFR(node)->lower, NQTFR(node)->upper,
+ (NQTFR(node)->greedy ? "" : "?"));
print_indent_tree(f, NQTFR(node)->target, indent + add);
break;
case NT_ENCLOSE:
- fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node);
+ fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
switch (NENCLOSE(node)->type) {
case ENCLOSE_OPTION:
fprintf(f, "option:%d", NENCLOSE(node)->option);
@@ -6666,6 +6710,9 @@ print_indent_tree(FILE* f, Node* node, int indent)
case ENCLOSE_CONDITION:
fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
break;
+ case ENCLOSE_ABSENT:
+ fprintf(f, "absent");
+ break;
default:
break;
@@ -6687,12 +6734,11 @@ print_indent_tree(FILE* f, Node* node, int indent)
fflush(f);
}
-#endif /* ONIG_DEBUG */
-#ifdef ONIG_DEBUG_PARSE_TREE
static void
print_tree(FILE* f, Node* node)
{
print_indent_tree(f, node, 0);
}
-#endif
+# endif /* ONIG_DEBUG_PARSE_TREE */
+#endif /* ONIG_DEBUG */