summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c5413
1 files changed, 3362 insertions, 2051 deletions
diff --git a/regcomp.c b/regcomp.c
index de44cfe037..1895ac24ed 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1,21 +1,95 @@
/**********************************************************************
+ regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2011-2019 K.Takata <kentkt AT csc DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
- regcomp.c - Oniguruma (regular expression library)
+#include "regparse.h"
- Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
+OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
+
+extern OnigCaseFoldType
+onig_get_default_case_fold_flag(void)
+{
+ return OnigDefaultCaseFoldFlag;
+}
+
+extern int
+onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
+{
+ OnigDefaultCaseFoldFlag = case_fold_flag;
+ return 0;
+}
-**********************************************************************/
-#include "regparse.h"
#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
#endif
+#if 0
+static UChar*
+str_dup(UChar* s, UChar* end)
+{
+ ptrdiff_t len = end - s;
+
+ if (len > 0) {
+ UChar* r = (UChar* )xmalloc(len + 1);
+ CHECK_NULL_RETURN(r);
+ xmemcpy(r, s, len);
+ r[len] = (UChar )0;
+ return r;
+ }
+ else return NULL;
+}
+#endif
+
static void
swap_node(Node* a, Node* b)
{
Node c;
c = *a; *a = *b; *b = c;
+
+ if (NTYPE(a) == NT_STR) {
+ StrNode* sn = NSTR(a);
+ if (sn->capa == 0) {
+ size_t len = sn->end - sn->s;
+ sn->s = sn->buf;
+ sn->end = sn->s + len;
+ }
+ }
+
+ if (NTYPE(b) == NT_STR) {
+ StrNode* sn = NSTR(b);
+ if (sn->capa == 0) {
+ size_t len = sn->end - sn->s;
+ sn->s = sn->buf;
+ sn->end = sn->s + len;
+ }
+ }
}
static OnigDistance
@@ -64,13 +138,40 @@ bitset_on_num(BitSetRef bs)
}
#endif
+// Attempt to right size allocated buffers for a regex post compile
+static void
+onig_reg_resize(regex_t *reg)
+{
+ do {
+ if (!reg->used) {
+ xfree(reg->p);
+ reg->alloc = 0;
+ reg->p = 0;
+ }
+ else if (reg->alloc > reg->used) {
+ unsigned char *new_ptr = xrealloc(reg->p, reg->used);
+ // Skip the right size optimization if memory allocation fails
+ if (new_ptr) {
+ reg->alloc = reg->used;
+ reg->p = new_ptr;
+ }
+ }
+ } while ((reg = reg->chain) != 0);
+}
+
extern int
-onig_bbuf_init(BBuf* buf, int size)
+onig_bbuf_init(BBuf* buf, OnigDistance size)
{
- buf->p = (UChar* )xmalloc(size);
- if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
+ if (size <= 0) {
+ size = 0;
+ buf->p = NULL;
+ }
+ else {
+ buf->p = (UChar* )xmalloc(size);
+ if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
+ }
- buf->alloc = size;
+ buf->alloc = (unsigned int )size;
buf->used = 0;
return 0;
}
@@ -84,7 +185,7 @@ unset_addr_list_init(UnsetAddrList* uslist, int size)
UnsetAddr* p;
p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
- CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+ CHECK_NULL_RETURN_MEMERR(p);
uslist->num = 0;
uslist->alloc = size;
uslist->us = p;
@@ -94,8 +195,7 @@ unset_addr_list_init(UnsetAddrList* uslist, int size)
static void
unset_addr_list_end(UnsetAddrList* uslist)
{
- if (IS_NOT_NULL(uslist->us))
- xfree(uslist->us);
+ xfree(uslist->us);
}
static int
@@ -107,7 +207,7 @@ unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
if (uslist->num >= uslist->alloc) {
size = uslist->alloc * 2;
p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
- CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+ CHECK_NULL_RETURN_MEMERR(p);
uslist->alloc = size;
uslist->us = p;
}
@@ -120,52 +220,30 @@ unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
#endif /* USE_SUBEXP_CALL */
-#if 0
static int
-bitset_mbmaxlen(BitSetRef bs, int negative, OnigEncoding enc)
+add_opcode(regex_t* reg, int opcode)
{
- int i;
- int len, maxlen = 0;
-
- if (negative) {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (! BITSET_AT(bs, i)) {
- len = enc_len(enc, i);
- if (len > maxlen) maxlen = len;
- }
- }
- }
- else {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (BITSET_AT(bs, i)) {
- len = enc_len(enc, i);
- if (len > maxlen) maxlen = len;
- }
- }
- }
- return maxlen;
+ BBUF_ADD1(reg, opcode);
+ return 0;
}
-#endif
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
static int
-add_opcode(regex_t* reg, int opcode)
+add_state_check_num(regex_t* reg, int num)
{
- BBUF_ADD1(reg, opcode);
+ StateCheckNumType n = (StateCheckNumType )num;
+
+ BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
return 0;
}
+#endif
static int
add_rel_addr(regex_t* reg, int addr)
{
RelAddrType ra = (RelAddrType )addr;
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
BBUF_ADD(reg, &ra, SIZE_RELADDR);
-#else
- UChar buf[SERIALIZE_BUFSIZE];
- SERIALIZE_RELADDR(ra, buf);
- BBUF_ADD(reg, buf, SIZE_RELADDR);
-#endif
return 0;
}
@@ -174,28 +252,16 @@ add_abs_addr(regex_t* reg, int addr)
{
AbsAddrType ra = (AbsAddrType )addr;
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
BBUF_ADD(reg, &ra, SIZE_ABSADDR);
-#else
- UChar buf[SERIALIZE_BUFSIZE];
- SERIALIZE_ABSADDR(ra, buf);
- BBUF_ADD(reg, buf, SIZE_ABSADDR);
-#endif
return 0;
}
static int
-add_length(regex_t* reg, int len)
+add_length(regex_t* reg, OnigDistance len)
{
LengthType l = (LengthType )len;
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
BBUF_ADD(reg, &l, SIZE_LENGTH);
-#else
- UChar buf[SERIALIZE_BUFSIZE];
- SERIALIZE_LENGTH(l, buf);
- BBUF_ADD(reg, buf, SIZE_LENGTH);
-#endif
return 0;
}
@@ -204,29 +270,17 @@ add_mem_num(regex_t* reg, int num)
{
MemNumType n = (MemNumType )num;
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
BBUF_ADD(reg, &n, SIZE_MEMNUM);
-#else
- UChar buf[SERIALIZE_BUFSIZE];
- SERIALIZE_MEMNUM(n, buf);
- BBUF_ADD(reg, buf, SIZE_MEMNUM);
-#endif
return 0;
}
#if 0
static int
-add_repeat_num(regex_t* reg, int num)
+add_pointer(regex_t* reg, void* addr)
{
- RepeatNumType n = (RepeatNumType )num;
+ PointerType ptr = (PointerType )addr;
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
- BBUF_ADD(reg, &n, SIZE_REPEATNUM);
-#else
- UChar buf[SERIALIZE_BUFSIZE];
- SERIALIZE_REPEATNUM(n, buf);
- BBUF_ADD(reg, buf, SIZE_REPEATNUM);
-#endif
+ BBUF_ADD(reg, &ptr, SIZE_POINTER);
return 0;
}
#endif
@@ -234,13 +288,7 @@ add_repeat_num(regex_t* reg, int num)
static int
add_option(regex_t* reg, OnigOptionType option)
{
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
BBUF_ADD(reg, &option, SIZE_OPTION);
-#else
- UChar buf[SERIALIZE_BUFSIZE];
- SERIALIZE_OPTION(option, buf);
- BBUF_ADD(reg, buf, SIZE_OPTION);
-#endif
return 0;
}
@@ -256,7 +304,7 @@ add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
}
static int
-add_bytes(regex_t* reg, UChar* bytes, int len)
+add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
{
BBUF_ADD(reg, bytes, len);
return 0;
@@ -289,19 +337,20 @@ static int compile_tree(Node* node, regex_t* reg);
(op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
static int
-select_str_opcode(int mb_len, int str_len, int ignore_case)
+select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
{
int op;
+ OnigDistance str_len = roomof(byte_len, mb_len);
- switch (mb_len) {
- case 1:
- if (ignore_case) {
- switch (str_len) {
- case 1: op = OP_EXACT1_IC; break;
- default: op = OP_EXACTN_IC; break;
- }
+ if (ignore_case) {
+ switch (str_len) {
+ case 1: op = OP_EXACT1_IC; break;
+ default: op = OP_EXACTN_IC; break;
}
- else {
+ }
+ else {
+ switch (mb_len) {
+ case 1:
switch (str_len) {
case 1: op = OP_EXACT1; break;
case 2: op = OP_EXACT2; break;
@@ -310,25 +359,25 @@ select_str_opcode(int mb_len, int str_len, int ignore_case)
case 5: op = OP_EXACT5; break;
default: op = OP_EXACTN; break;
}
- }
- break;
+ break;
- case 2:
- switch (str_len) {
- case 1: op = OP_EXACTMB2N1; break;
- case 2: op = OP_EXACTMB2N2; break;
- case 3: op = OP_EXACTMB2N3; break;
- default: op = OP_EXACTMB2N; break;
- }
- break;
+ case 2:
+ switch (str_len) {
+ case 1: op = OP_EXACTMB2N1; break;
+ case 2: op = OP_EXACTMB2N2; break;
+ case 3: op = OP_EXACTMB2N3; break;
+ default: op = OP_EXACTMB2N; break;
+ }
+ break;
- case 3:
- op = OP_EXACTMB3N;
- break;
+ case 3:
+ op = OP_EXACTMB3N;
+ break;
- default:
- op = OP_EXACTMBN;
- break;
+ default:
+ op = OP_EXACTMBN;
+ break;
+ }
}
return op;
}
@@ -345,6 +394,7 @@ compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
if (r) return r;
reg->num_null_check++;
+ if ((MemNumType)reg->num_null_check <= 0) return ONIGERR_TOO_MANY_NULL_CHECK;
}
r = compile_tree(node, reg);
@@ -373,7 +423,7 @@ compile_call(CallNode* node, regex_t* reg)
r = add_opcode(reg, OP_CALL);
if (r) return r;
r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
- node->target);
+ node->target);
if (r) return r;
r = add_abs_addr(reg, 0 /*dummy addr.*/);
return r;
@@ -393,88 +443,79 @@ compile_tree_n_times(Node* node, int n, regex_t* reg)
}
static int
-add_compile_string_length(UChar* s, int mb_len, int str_len,
- regex_t* reg, int ignore_case)
+add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
+ regex_t* reg ARG_UNUSED, int ignore_case)
{
int len;
- int op = select_str_opcode(mb_len, str_len, ignore_case);
+ int op = select_str_opcode(mb_len, byte_len, ignore_case);
len = SIZE_OPCODE;
- if (op == OP_EXACTMBN)
- len += SIZE_LENGTH;
+ if (op == OP_EXACTMBN) len += SIZE_LENGTH;
if (IS_NEED_STR_LEN_OP_EXACT(op))
len += SIZE_LENGTH;
- len += mb_len * str_len;
+ len += (int )byte_len;
return len;
}
static int
-add_compile_string(UChar* s, int mb_len, int str_len,
- regex_t* reg, int ignore_case)
+add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
+ regex_t* reg, int ignore_case)
{
- int op = select_str_opcode(mb_len, str_len, ignore_case);
+ int op = select_str_opcode(mb_len, byte_len, ignore_case);
add_opcode(reg, op);
if (op == OP_EXACTMBN)
add_length(reg, mb_len);
- if (IS_NEED_STR_LEN_OP_EXACT(op))
- add_length(reg, str_len);
+ if (IS_NEED_STR_LEN_OP_EXACT(op)) {
+ if (op == OP_EXACTN_IC)
+ add_length(reg, byte_len);
+ else
+ add_length(reg, byte_len / mb_len);
+ }
- add_bytes(reg, s, mb_len * str_len);
+ add_bytes(reg, s, byte_len);
return 0;
}
static int
-compile_length_string_node(StrNode* sn, regex_t* reg)
+compile_length_string_node(Node* node, regex_t* reg)
{
- int rlen, r, len, prev_len, slen, ambig, ic;
+ int rlen, r, len, prev_len, blen, ambig;
OnigEncoding enc = reg->enc;
UChar *p, *prev;
+ StrNode* sn;
+ sn = NSTR(node);
if (sn->end <= sn->s)
return 0;
- ic = IS_IGNORECASE(reg->options);
+ ambig = NSTRING_IS_AMBIG(node);
p = prev = sn->s;
- prev_len = enc_len(enc, *p);
- if (ic != 0 && prev_len == 1)
- ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
- else
- ambig = 0;
-
+ prev_len = enclen(enc, p, sn->end);
p += prev_len;
- slen = 1;
+ blen = prev_len;
rlen = 0;
for (; p < sn->end; ) {
- len = enc_len(enc, *p);
- if (len == prev_len) {
- slen++;
- if (ic != 0 && ambig == 0 && len == 1)
- ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
+ len = enclen(enc, p, sn->end);
+ if (len == prev_len || ambig) {
+ blen += len;
}
else {
- r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
+ r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
rlen += r;
-
- if (ic != 0 && len == 1)
- ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
- else
- ambig = 0;
-
prev = p;
- slen = 1;
+ blen = len;
prev_len = len;
}
-
p += len;
}
- r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
+ r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
rlen += r;
return rlen;
}
@@ -489,58 +530,42 @@ compile_length_string_raw_node(StrNode* sn, regex_t* reg)
}
static int
-compile_string_node(StrNode* sn, regex_t* reg)
+compile_string_node(Node* node, regex_t* reg)
{
- int r, len, prev_len, slen, ambig, ic;
+ int r, len, prev_len, blen, ambig;
OnigEncoding enc = reg->enc;
- UChar *p, *prev;
+ UChar *p, *prev, *end;
+ StrNode* sn;
+ sn = NSTR(node);
if (sn->end <= sn->s)
return 0;
- ic = IS_IGNORECASE(reg->options);
+ end = sn->end;
+ ambig = NSTRING_IS_AMBIG(node);
p = prev = sn->s;
- prev_len = enc_len(enc, *p);
- if (ic != 0 && prev_len == 1) {
- ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
- if (ambig != 0)
- ONIGENC_MBC_TO_LOWER(reg->enc, p, p);
- }
- else
- ambig = 0;
-
+ prev_len = enclen(enc, p, end);
p += prev_len;
- slen = 1;
+ blen = prev_len;
- for (; p < sn->end; ) {
- len = enc_len(enc, *p);
- if (len == prev_len) {
- slen++;
- if (ic != 0 && len == 1) {
- if (ambig == 0)
- ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
- if (ambig != 0) ONIGENC_MBC_TO_LOWER(reg->enc, p, p);
- }
+ for (; p < end; ) {
+ len = enclen(enc, p, end);
+ if (len == prev_len || ambig) {
+ blen += len;
}
else {
- r = add_compile_string(prev, prev_len, slen, reg, ambig);
+ r = add_compile_string(prev, prev_len, blen, reg, ambig);
if (r) return r;
- if (ic != 0 && len == 1) {
- ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
- if (ambig != 0) ONIGENC_MBC_TO_LOWER(reg->enc, p, p);
- }
- else
- ambig = 0;
prev = p;
- slen = 1;
+ blen = len;
prev_len = len;
}
p += len;
}
- return add_compile_string(prev, prev_len, slen, reg, ambig);
+ return add_compile_string(prev, prev_len, blen, reg, ambig);
}
static int
@@ -584,8 +609,7 @@ compile_length_cclass_node(CClassNode* cc, regex_t* reg)
len = SIZE_OPCODE + SIZE_BITSET;
}
else {
- if (bitset_is_empty(cc->bs)) {
- /* SIZE_BITSET is included in mbuf->used. */
+ if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
len = SIZE_OPCODE;
}
else {
@@ -607,21 +631,27 @@ compile_cclass_node(CClassNode* cc, regex_t* reg)
int r;
if (IS_NULL(cc->mbuf)) {
- if (cc->not) add_opcode(reg, OP_CCLASS_NOT);
- else add_opcode(reg, OP_CCLASS);
+ if (IS_NCCLASS_NOT(cc))
+ add_opcode(reg, OP_CCLASS_NOT);
+ else
+ add_opcode(reg, OP_CCLASS);
r = add_bitset(reg, cc->bs);
}
else {
- if (bitset_is_empty(cc->bs)) {
- if (cc->not) add_opcode(reg, OP_CCLASS_MB_NOT);
- else add_opcode(reg, OP_CCLASS_MB);
+ if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
+ if (IS_NCCLASS_NOT(cc))
+ add_opcode(reg, OP_CCLASS_MB_NOT);
+ else
+ add_opcode(reg, OP_CCLASS_MB);
r = add_multi_byte_cclass(cc->mbuf, reg);
}
else {
- if (cc->not) add_opcode(reg, OP_CCLASS_MIX_NOT);
- else add_opcode(reg, OP_CCLASS_MIX);
+ if (IS_NCCLASS_NOT(cc))
+ add_opcode(reg, OP_CCLASS_MIX_NOT);
+ else
+ add_opcode(reg, OP_CCLASS_MIX);
r = add_bitset(reg, cc->bs);
if (r) return r;
@@ -641,7 +671,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
if (reg->repeat_range_alloc == 0) {
p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
- CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+ CHECK_NULL_RETURN_MEMERR(p);
reg->repeat_range = p;
reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
}
@@ -649,8 +679,8 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
int n;
n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
- sizeof(OnigRepeatRange) * n);
- CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+ sizeof(OnigRepeatRange) * n);
+ CHECK_NULL_RETURN_MEMERR(p);
reg->repeat_range = p;
reg->repeat_range_alloc = n;
}
@@ -659,13 +689,13 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
}
p[id].lower = lower;
- p[id].upper = upper;
+ p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
return 0;
}
static int
-compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
- regex_t* reg)
+compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
+ regex_t* reg)
{
int r;
int num_repeat = reg->num_repeat;
@@ -674,6 +704,7 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
if (r) return r;
r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
reg->num_repeat++;
+ if ((MemNumType)reg->num_repeat <= 0) return ONIGERR_TOO_MANY_RANGE_REPEAT;
if (r) return r;
r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
if (r) return r;
@@ -684,16 +715,272 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
r = compile_tree_empty_check(qn->target, reg, empty_info);
if (r) return r;
- r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
+ if (
+#ifdef USE_SUBEXP_CALL
+ reg->num_call > 0 ||
+#endif
+ IS_QUANTIFIER_IN_REPEAT(qn)) {
+ r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
+ }
+ else {
+ r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
+ }
if (r) return r;
r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
return r;
}
-#define QUALIFIER_EXPAND_LIMIT_SIZE 50
+static int
+is_anychar_star_quantifier(QtfrNode* qn)
+{
+ if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
+ NTYPE(qn->target) == NT_CANY)
+ return 1;
+ else
+ return 0;
+}
+
+#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
+#define CKN_ON (ckn > 0)
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+
+static int
+compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
+{
+ int len, mod_tlen, cklen;
+ int ckn;
+ int infinite = IS_REPEAT_INFINITE(qn->upper);
+ int empty_info = qn->target_empty_info;
+ int tlen = compile_length_tree(qn->target, reg);
+
+ if (tlen < 0) return tlen;
+
+ ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
+
+ cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
+
+ /* anychar repeat */
+ if (NTYPE(qn->target) == NT_CANY) {
+ if (qn->greedy && infinite) {
+ if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
+ return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
+ else
+ return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
+ }
+ }
+
+ if (empty_info != 0)
+ mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+ else
+ mod_tlen = tlen;
+
+ if (infinite && qn->lower <= 1) {
+ if (qn->greedy) {
+ if (qn->lower == 1)
+ len = SIZE_OP_JUMP;
+ else
+ len = 0;
+
+ len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
+ }
+ else {
+ if (qn->lower == 0)
+ len = SIZE_OP_JUMP;
+ else
+ len = 0;
+
+ len += mod_tlen + SIZE_OP_PUSH + cklen;
+ }
+ }
+ else if (qn->upper == 0) {
+ if (qn->is_referred != 0) /* /(?<n>..){0}/ */
+ len = SIZE_OP_JUMP + tlen;
+ else
+ len = 0;
+ }
+ else if (qn->upper == 1 && qn->greedy) {
+ if (qn->lower == 0) {
+ if (CKN_ON) {
+ len = SIZE_OP_STATE_CHECK_PUSH + tlen;
+ }
+ else {
+ len = SIZE_OP_PUSH + tlen;
+ }
+ }
+ else {
+ len = tlen;
+ }
+ }
+ else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+ len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
+ }
+ else {
+ len = SIZE_OP_REPEAT_INC
+ + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
+ if (CKN_ON)
+ len += SIZE_OP_STATE_CHECK;
+ }
+
+ return len;
+}
+
+static int
+compile_quantifier_node(QtfrNode* qn, regex_t* reg)
+{
+ int r, mod_tlen;
+ int ckn;
+ int infinite = IS_REPEAT_INFINITE(qn->upper);
+ int empty_info = qn->target_empty_info;
+ int tlen = compile_length_tree(qn->target, reg);
+
+ if (tlen < 0) return tlen;
+
+ ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
+
+ if (is_anychar_star_quantifier(qn)) {
+ r = compile_tree_n_times(qn->target, qn->lower, reg);
+ if (r) return r;
+ if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
+ if (IS_MULTILINE(reg->options))
+ r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+ else
+ r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
+ if (r) return r;
+ if (CKN_ON) {
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ }
+
+ return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
+ }
+ else {
+ if (IS_MULTILINE(reg->options)) {
+ r = add_opcode(reg, (CKN_ON ?
+ OP_STATE_CHECK_ANYCHAR_ML_STAR
+ : OP_ANYCHAR_ML_STAR));
+ }
+ else {
+ r = add_opcode(reg, (CKN_ON ?
+ OP_STATE_CHECK_ANYCHAR_STAR
+ : OP_ANYCHAR_STAR));
+ }
+ if (r) return r;
+ if (CKN_ON)
+ r = add_state_check_num(reg, ckn);
+
+ return r;
+ }
+ }
+
+ if (empty_info != 0)
+ mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+ else
+ mod_tlen = tlen;
+
+ if (infinite && qn->lower <= 1) {
+ if (qn->greedy) {
+ if (qn->lower == 1) {
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
+ if (r) return r;
+ }
+
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
+ }
+ if (r) return r;
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -(mod_tlen + (int )SIZE_OP_JUMP
+ + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
+ }
+ else {
+ if (qn->lower == 0) {
+ r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
+ if (r) return r;
+ }
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg,
+ -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
+ }
+ else
+ r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
+ }
+ }
+ else if (qn->upper == 0) {
+ if (qn->is_referred != 0) { /* /(?<n>..){0}/ */
+ r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ }
+ else
+ r = 0;
+ }
+ else if (qn->upper == 1 && qn->greedy) {
+ if (qn->lower == 0) {
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, tlen);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
+ }
+ if (r) return r;
+ }
+
+ r = compile_tree(qn->target, reg);
+ }
+ else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, SIZE_OP_JUMP);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
+ }
+
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ }
+ else {
+ r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
+ if (CKN_ON) {
+ if (r) return r;
+ r = add_opcode(reg, OP_STATE_CHECK);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ }
+ }
+ return r;
+}
+
+#else /* USE_COMBINATION_EXPLOSION_CHECK */
static int
-compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
+compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
{
int len, mod_tlen;
int infinite = IS_REPEAT_INFINITE(qn->upper);
@@ -703,12 +990,12 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
if (tlen < 0) return tlen;
/* anychar repeat */
- if (NTYPE(qn->target) == N_ANYCHAR) {
+ if (NTYPE(qn->target) == NT_CANY) {
if (qn->greedy && infinite) {
if (IS_NOT_NULL(qn->next_head_exact))
- return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
+ return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
else
- return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
+ return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
}
}
@@ -718,8 +1005,8 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
mod_tlen = tlen;
if (infinite &&
- (qn->lower <= 1 || tlen * qn->lower <= QUALIFIER_EXPAND_LIMIT_SIZE)) {
- if (qn->lower == 1 && tlen > QUALIFIER_EXPAND_LIMIT_SIZE) {
+ (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
len = SIZE_OP_JUMP;
}
else {
@@ -727,21 +1014,25 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
}
if (qn->greedy) {
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT
if (IS_NOT_NULL(qn->head_exact))
- len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
- else if (IS_NOT_NULL(qn->next_head_exact))
- len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
+ len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
+ else
+#endif
+ if (IS_NOT_NULL(qn->next_head_exact))
+ len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
else
- len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
+ len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
}
else
len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
}
- else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
+ else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
len = SIZE_OP_JUMP + tlen;
}
else if (!infinite && qn->greedy &&
- (tlen + SIZE_OP_PUSH) * qn->upper <= QUALIFIER_EXPAND_LIMIT_SIZE) {
+ (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
+ <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
len = tlen * qn->lower;
len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
}
@@ -757,17 +1048,7 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
}
static int
-is_anychar_star_qualifier(QualifierNode* qn)
-{
- if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
- NTYPE(qn->target) == N_ANYCHAR)
- return 1;
- else
- return 0;
-}
-
-static int
-compile_qualifier_node(QualifierNode* qn, regex_t* reg)
+compile_quantifier_node(QtfrNode* qn, regex_t* reg)
{
int i, r, mod_tlen;
int infinite = IS_REPEAT_INFINITE(qn->upper);
@@ -776,22 +1057,22 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
if (tlen < 0) return tlen;
- if (is_anychar_star_qualifier(qn)) {
+ if (is_anychar_star_quantifier(qn)) {
r = compile_tree_n_times(qn->target, qn->lower, reg);
if (r) return r;
if (IS_NOT_NULL(qn->next_head_exact)) {
if (IS_MULTILINE(reg->options))
- r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+ r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
else
- r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
+ r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
if (r) return r;
- return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
+ return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
}
else {
if (IS_MULTILINE(reg->options))
- return add_opcode(reg, OP_ANYCHAR_ML_STAR);
+ return add_opcode(reg, OP_ANYCHAR_ML_STAR);
else
- return add_opcode(reg, OP_ANYCHAR_STAR);
+ return add_opcode(reg, OP_ANYCHAR_STAR);
}
}
@@ -801,18 +1082,21 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
mod_tlen = tlen;
if (infinite &&
- (qn->lower <= 1 || tlen * qn->lower <= QUALIFIER_EXPAND_LIMIT_SIZE)) {
- if (qn->lower == 1 && tlen > QUALIFIER_EXPAND_LIMIT_SIZE) {
+ (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
if (qn->greedy) {
- if (IS_NOT_NULL(qn->head_exact))
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
- else if (IS_NOT_NULL(qn->next_head_exact))
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
- else
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT
+ if (IS_NOT_NULL(qn->head_exact))
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
+ else
+#endif
+ if (IS_NOT_NULL(qn->next_head_exact))
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
+ else
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
}
else {
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
}
if (r) return r;
}
@@ -822,33 +1106,36 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
}
if (qn->greedy) {
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT
if (IS_NOT_NULL(qn->head_exact)) {
- r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
- mod_tlen + SIZE_OP_JUMP);
- if (r) return r;
- add_bytes(reg, NSTRING(qn->head_exact).s, 1);
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
- -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
+ r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
+ mod_tlen + SIZE_OP_JUMP);
+ if (r) return r;
+ add_bytes(reg, NSTR(qn->head_exact)->s, 1);
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
}
- else if (IS_NOT_NULL(qn->next_head_exact)) {
- r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
- mod_tlen + SIZE_OP_JUMP);
- if (r) return r;
- add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
+ else
+#endif
+ if (IS_NOT_NULL(qn->next_head_exact)) {
+ r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
+ mod_tlen + SIZE_OP_JUMP);
+ if (r) return r;
+ add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
-(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
}
else {
- r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
- if (r) return r;
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
- -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
+ r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
+ if (r) return r;
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
}
}
else {
@@ -859,13 +1146,14 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
}
}
- else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
+ else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
if (r) return r;
r = compile_tree(qn->target, reg);
}
else if (!infinite && qn->greedy &&
- (tlen + SIZE_OP_PUSH) * qn->upper <= QUALIFIER_EXPAND_LIMIT_SIZE) {
+ (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
+ <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
int n = qn->upper - qn->lower;
r = compile_tree_n_times(qn->target, qn->lower, reg);
@@ -873,7 +1161,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
for (i = 0; i < n; i++) {
r = add_opcode_rel_addr(reg, OP_PUSH,
- (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
+ (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
if (r) return r;
r = compile_tree(qn->target, reg);
if (r) return r;
@@ -891,9 +1179,10 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
}
return r;
}
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
static int
-compile_length_option_node(EffectNode* node, regex_t* reg)
+compile_length_option_node(EncloseNode* node, regex_t* reg)
{
int tlen;
OnigOptionType prev = reg->options;
@@ -913,7 +1202,7 @@ compile_length_option_node(EffectNode* node, regex_t* reg)
}
static int
-compile_option_node(EffectNode* node, regex_t* reg)
+compile_option_node(EncloseNode* node, regex_t* reg)
{
int r;
OnigOptionType prev = reg->options;
@@ -925,28 +1214,26 @@ compile_option_node(EffectNode* node, regex_t* reg)
if (r) return r;
r = add_opcode(reg, OP_FAIL);
if (r) return r;
+ }
- reg->options = node->option;
- r = compile_tree(node->target, reg);
- reg->options = prev;
+ reg->options = node->option;
+ r = compile_tree(node->target, reg);
+ reg->options = prev;
+
+ if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
if (r) return r;
r = add_opcode_option(reg, OP_SET_OPTION, prev);
}
- else {
- reg->options = node->option;
- r = compile_tree(node->target, reg);
- reg->options = prev;
- }
return r;
}
static int
-compile_length_effect_node(EffectNode* node, regex_t* reg)
+compile_length_enclose_node(EncloseNode* node, regex_t* reg)
{
int len;
int tlen;
- if (node->type == EFFECT_OPTION)
+ if (node->type == ENCLOSE_OPTION)
return compile_length_option_node(node, reg);
if (node->target) {
@@ -957,43 +1244,79 @@ compile_length_effect_node(EffectNode* node, regex_t* reg)
tlen = 0;
switch (node->type) {
- case EFFECT_MEMORY:
+ case ENCLOSE_MEMORY:
#ifdef USE_SUBEXP_CALL
- if (IS_EFFECT_CALLED(node)) {
+ if (IS_ENCLOSE_CALLED(node)) {
len = SIZE_OP_MEMORY_START_PUSH + tlen
- + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
+ + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- len += (IS_EFFECT_RECURSION(node)
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
+ len += (IS_ENCLOSE_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
else
- len += (IS_EFFECT_RECURSION(node)
- ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
+ len += (IS_ENCLOSE_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
+ }
+ else if (IS_ENCLOSE_RECURSION(node)) {
+ len = SIZE_OP_MEMORY_START_PUSH;
+ len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
+ ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
}
else
#endif
{
if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
- len = SIZE_OP_MEMORY_START_PUSH;
+ len = SIZE_OP_MEMORY_START_PUSH;
else
- len = SIZE_OP_MEMORY_START;
+ len = SIZE_OP_MEMORY_START;
len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
- ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
+ ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
}
break;
- case EFFECT_STOP_BACKTRACK:
- if (IS_EFFECT_SIMPLE_REPEAT(node)) {
- QualifierNode* qn = &NQUALIFIER(node->target);
+ case ENCLOSE_STOP_BACKTRACK:
+ /* Disable POP_STOP_BT optimization for simple repeat under the match cache */
+ /* optimization because the match cache optimization pushes an extra item to */
+ /* the stack and it breaks the assumption for this optimization. */
+#ifndef USE_MATCH_CACHE
+ if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
+ QtfrNode* qn = NQTFR(node->target);
tlen = compile_length_tree(qn->target, reg);
if (tlen < 0) return tlen;
len = tlen * qn->lower
- + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
+ + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
}
else {
+#endif
len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
+#ifndef USE_MATCH_CACHE
}
+#endif
+ break;
+
+ case ENCLOSE_CONDITION:
+ len = SIZE_OP_CONDITION;
+ if (NTYPE(node->target) == NT_ALT) {
+ Node* x = node->target;
+
+ tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
+ if (tlen < 0) return tlen;
+ len += tlen + SIZE_OP_JUMP;
+ if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
+ x = NCDR(x);
+ tlen = compile_length_tree(NCAR(x), reg); /* no-node */
+ if (tlen < 0) return tlen;
+ len += tlen;
+ if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
+ }
+ else {
+ return ONIGERR_PARSER_BUG;
+ }
+ break;
+
+ case ENCLOSE_ABSENT:
+ len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
break;
default:
@@ -1007,17 +1330,17 @@ compile_length_effect_node(EffectNode* node, regex_t* reg)
static int get_char_length_tree(Node* node, regex_t* reg, int* len);
static int
-compile_effect_node(EffectNode* node, regex_t* reg)
+compile_enclose_node(EncloseNode* node, regex_t* reg)
{
int r, len;
- if (node->type == EFFECT_OPTION)
+ if (node->type == ENCLOSE_OPTION)
return compile_option_node(node, reg);
switch (node->type) {
- case EFFECT_MEMORY:
+ case ENCLOSE_MEMORY:
#ifdef USE_SUBEXP_CALL
- if (IS_EFFECT_CALLED(node)) {
+ if (IS_ENCLOSE_CALLED(node)) {
r = add_opcode(reg, OP_CALL);
if (r) return r;
node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
@@ -1027,11 +1350,11 @@ compile_effect_node(EffectNode* node, regex_t* reg)
len = compile_length_tree(node->target, reg);
len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- len += (IS_EFFECT_RECURSION(node)
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
+ len += (IS_ENCLOSE_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
else
- len += (IS_EFFECT_RECURSION(node)
- ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
+ len += (IS_ENCLOSE_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
r = add_opcode_rel_addr(reg, OP_JUMP, len);
if (r) return r;
@@ -1047,34 +1370,46 @@ compile_effect_node(EffectNode* node, regex_t* reg)
r = compile_tree(node->target, reg);
if (r) return r;
#ifdef USE_SUBEXP_CALL
- if (IS_EFFECT_CALLED(node)) {
+ if (IS_ENCLOSE_CALLED(node)) {
if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- r = add_opcode(reg, (IS_EFFECT_RECURSION(node)
- ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
+ r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
+ ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
else
- r = add_opcode(reg, (IS_EFFECT_RECURSION(node)
- ? OP_MEMORY_END_REC : OP_MEMORY_END));
+ r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
+ ? OP_MEMORY_END_REC : OP_MEMORY_END));
if (r) return r;
r = add_mem_num(reg, node->regnum);
if (r) return r;
r = add_opcode(reg, OP_RETURN);
}
+ else if (IS_ENCLOSE_RECURSION(node)) {
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
+ r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
+ else
+ r = add_opcode(reg, OP_MEMORY_END_REC);
+ if (r) return r;
+ r = add_mem_num(reg, node->regnum);
+ }
else
#endif
{
if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- r = add_opcode(reg, OP_MEMORY_END_PUSH);
+ r = add_opcode(reg, OP_MEMORY_END_PUSH);
else
- r = add_opcode(reg, OP_MEMORY_END);
+ r = add_opcode(reg, OP_MEMORY_END);
if (r) return r;
r = add_mem_num(reg, node->regnum);
}
break;
- case EFFECT_STOP_BACKTRACK:
- if (IS_EFFECT_SIMPLE_REPEAT(node)) {
- QualifierNode* qn = &NQUALIFIER(node->target);
+ case ENCLOSE_STOP_BACKTRACK:
+ /* Disable POP_STOP_BT optimization for simple repeat under the match cache */
+ /* optimization because the match cache optimization pushes an extra item to */
+ /* the stack and it breaks the assumption for this optimization. */
+#ifndef USE_MATCH_CACHE
+ if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
+ QtfrNode* qn = NQTFR(node->target);
r = compile_tree_n_times(qn->target, qn->lower, reg);
if (r) return r;
@@ -1088,15 +1423,64 @@ compile_effect_node(EffectNode* node, regex_t* reg)
r = add_opcode(reg, OP_POP);
if (r) return r;
r = add_opcode_rel_addr(reg, OP_JUMP,
- -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
+ -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
}
else {
+#endif
r = add_opcode(reg, OP_PUSH_STOP_BT);
if (r) return r;
r = compile_tree(node->target, reg);
if (r) return r;
r = add_opcode(reg, OP_POP_STOP_BT);
+#ifndef USE_MATCH_CACHE
}
+#endif
+ break;
+
+ case ENCLOSE_CONDITION:
+ r = add_opcode(reg, OP_CONDITION);
+ if (r) return r;
+ r = add_mem_num(reg, node->regnum);
+ if (r) return r;
+
+ if (NTYPE(node->target) == NT_ALT) {
+ Node* x = node->target;
+ int len2;
+
+ len = compile_length_tree(NCAR(x), reg); /* yes-node */
+ if (len < 0) return len;
+ if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
+ x = NCDR(x);
+ len2 = compile_length_tree(NCAR(x), reg); /* no-node */
+ if (len2 < 0) return len2;
+ if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
+
+ x = node->target;
+ r = add_rel_addr(reg, len + SIZE_OP_JUMP);
+ if (r) return r;
+ r = compile_tree(NCAR(x), reg); /* yes-node */
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP, len2);
+ if (r) return r;
+ x = NCDR(x);
+ r = compile_tree(NCAR(x), reg); /* no-node */
+ }
+ else {
+ return ONIGERR_PARSER_BUG;
+ }
+ break;
+
+ case ENCLOSE_ABSENT:
+ len = compile_length_tree(node->target, reg);
+ if (len < 0) return len;
+
+ r = add_opcode(reg, OP_PUSH_ABSENT_POS);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ if (r) return r;
+ r = add_opcode(reg, OP_ABSENT_END);
break;
default:
@@ -1153,12 +1537,25 @@ compile_anchor_node(AnchorNode* node, regex_t* reg)
case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
- case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break;
- case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
+ case ANCHOR_WORD_BOUND:
+ if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
+ else r = add_opcode(reg, OP_WORD_BOUND);
+ break;
+ case ANCHOR_NOT_WORD_BOUND:
+ if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
+ else r = add_opcode(reg, OP_NOT_WORD_BOUND);
+ break;
#ifdef USE_WORD_BEGIN_END
- case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break;
- case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break;
+ case ANCHOR_WORD_BEGIN:
+ if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
+ else r = add_opcode(reg, OP_WORD_BEGIN);
+ break;
+ case ANCHOR_WORD_END:
+ if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
+ else r = add_opcode(reg, OP_WORD_END);
+ break;
#endif
+ case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
case ANCHOR_PREC_READ:
r = add_opcode(reg, OP_PUSH_POS);
@@ -1184,11 +1581,11 @@ compile_anchor_node(AnchorNode* node, regex_t* reg)
r = add_opcode(reg, OP_LOOK_BEHIND);
if (r) return r;
if (node->char_len < 0) {
- r = get_char_length_tree(node->target, reg, &n);
- if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ r = get_char_length_tree(node->target, reg, &n);
+ if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
}
else
- n = node->char_len;
+ n = node->char_len;
r = add_length(reg, n);
if (r) return r;
r = compile_tree(node->target, reg);
@@ -1200,14 +1597,14 @@ compile_anchor_node(AnchorNode* node, regex_t* reg)
int n;
len = compile_length_tree(node->target, reg);
r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
- len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
+ len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
if (r) return r;
if (node->char_len < 0) {
- r = get_char_length_tree(node->target, reg, &n);
- if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ r = get_char_length_tree(node->target, reg, &n);
+ if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
}
else
- n = node->char_len;
+ n = node->char_len;
r = add_length(reg, n);
if (r) return r;
r = compile_tree(node->target, reg);
@@ -1231,75 +1628,84 @@ compile_length_tree(Node* node, regex_t* reg)
type = NTYPE(node);
switch (type) {
- case N_LIST:
+ case NT_LIST:
len = 0;
do {
- r = compile_length_tree(NCONS(node).left, reg);
+ r = compile_length_tree(NCAR(node), reg);
if (r < 0) return r;
len += r;
- } while (IS_NOT_NULL(node = NCONS(node).right));
+ } while (IS_NOT_NULL(node = NCDR(node)));
r = len;
break;
- case N_ALT:
+ case NT_ALT:
{
- int n;
-
- n = r = 0;
+ int n = 0;
+ len = 0;
do {
- r += compile_length_tree(NCONS(node).left, reg);
- n++;
- } while (IS_NOT_NULL(node = NCONS(node).right));
+ r = compile_length_tree(NCAR(node), reg);
+ if (r < 0) return r;
+ len += r;
+ n++;
+ } while (IS_NOT_NULL(node = NCDR(node)));
+ r = len;
r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
}
break;
- case N_STRING:
+ case NT_STR:
if (NSTRING_IS_RAW(node))
- r = compile_length_string_raw_node(&(NSTRING(node)), reg);
+ r = compile_length_string_raw_node(NSTR(node), reg);
else
- r = compile_length_string_node(&(NSTRING(node)), reg);
+ r = compile_length_string_node(node, reg);
break;
- case N_CCLASS:
- r = compile_length_cclass_node(&(NCCLASS(node)), reg);
+ case NT_CCLASS:
+ r = compile_length_cclass_node(NCCLASS(node), reg);
break;
- case N_CTYPE:
- case N_ANYCHAR:
+ case NT_CTYPE:
+ case NT_CANY:
r = SIZE_OPCODE;
break;
- case N_BACKREF:
+ case NT_BREF:
{
- BackrefNode* br = &(NBACKREF(node));
+ BRefNode* br = NBREF(node);
+#ifdef USE_BACKREF_WITH_LEVEL
+ if (IS_BACKREF_NEST_LEVEL(br)) {
+ r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
+ SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
+ }
+ else
+#endif
if (br->back_num == 1) {
- r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 3)
- ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
+ r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
+ ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
}
else {
- r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
+ r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
}
}
break;
#ifdef USE_SUBEXP_CALL
- case N_CALL:
+ case NT_CALL:
r = SIZE_OP_CALL;
break;
#endif
- case N_QUALIFIER:
- r = compile_length_qualifier_node(&(NQUALIFIER(node)), reg);
+ case NT_QTFR:
+ r = compile_length_quantifier_node(NQTFR(node), reg);
break;
- case N_EFFECT:
- r = compile_length_effect_node(&NEFFECT(node), reg);
+ case NT_ENCLOSE:
+ r = compile_length_enclose_node(NENCLOSE(node), reg);
break;
- case N_ANCHOR:
- r = compile_length_anchor_node(&(NANCHOR(node)), reg);
+ case NT_ANCHOR:
+ r = compile_length_anchor_node(NANCHOR(node), reg);
break;
default:
@@ -1317,131 +1723,160 @@ compile_tree(Node* node, regex_t* reg)
type = NTYPE(node);
switch (type) {
- case N_LIST:
+ case NT_LIST:
do {
- r = compile_tree(NCONS(node).left, reg);
- } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ r = compile_tree(NCAR(node), reg);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
break;
- case N_ALT:
+ case NT_ALT:
{
Node* x = node;
len = 0;
do {
- len += compile_length_tree(NCONS(x).left, reg);
- if (NCONS(x).right != NULL) {
- len += SIZE_OP_PUSH + SIZE_OP_JUMP;
- }
- } while (IS_NOT_NULL(x = NCONS(x).right));
+ len += compile_length_tree(NCAR(x), reg);
+ if (NCDR(x) != NULL) {
+ len += SIZE_OP_PUSH + SIZE_OP_JUMP;
+ }
+ } while (IS_NOT_NULL(x = NCDR(x)));
pos = reg->used + len; /* goal position */
do {
- len = compile_length_tree(NCONS(node).left, reg);
- if (IS_NOT_NULL(NCONS(node).right)) {
- r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
- if (r) break;
- }
- r = compile_tree(NCONS(node).left, reg);
- if (r) break;
- if (IS_NOT_NULL(NCONS(node).right)) {
- len = pos - (reg->used + SIZE_OP_JUMP);
- r = add_opcode_rel_addr(reg, OP_JUMP, len);
- if (r) break;
- }
- } while (IS_NOT_NULL(node = NCONS(node).right));
+ len = compile_length_tree(NCAR(node), reg);
+ if (IS_NOT_NULL(NCDR(node))) {
+ r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
+ if (r) break;
+ }
+ r = compile_tree(NCAR(node), reg);
+ if (r) break;
+ if (IS_NOT_NULL(NCDR(node))) {
+ len = pos - (reg->used + SIZE_OP_JUMP);
+ r = add_opcode_rel_addr(reg, OP_JUMP, len);
+ if (r) break;
+ }
+ } while (IS_NOT_NULL(node = NCDR(node)));
}
break;
- case N_STRING:
+ case NT_STR:
if (NSTRING_IS_RAW(node))
- r = compile_string_raw_node(&(NSTRING(node)), reg);
+ r = compile_string_raw_node(NSTR(node), reg);
else
- r = compile_string_node(&(NSTRING(node)), reg);
+ r = compile_string_node(node, reg);
break;
- case N_CCLASS:
- r = compile_cclass_node(&(NCCLASS(node)), reg);
+ case NT_CCLASS:
+ r = compile_cclass_node(NCCLASS(node), reg);
break;
- case N_CTYPE:
+ case NT_CTYPE:
{
int op;
- switch (NCTYPE(node).type) {
- case CTYPE_WORD: op = OP_WORD; break;
- case CTYPE_NOT_WORD: op = OP_NOT_WORD; break;
+ switch (NCTYPE(node)->ctype) {
+ case ONIGENC_CTYPE_WORD:
+ if (NCTYPE(node)->ascii_range != 0) {
+ if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
+ else op = OP_ASCII_WORD;
+ }
+ else {
+ if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
+ else op = OP_WORD;
+ }
+ break;
default:
- return ONIGERR_TYPE_BUG;
- break;
+ return ONIGERR_TYPE_BUG;
+ break;
}
r = add_opcode(reg, op);
}
break;
- case N_ANYCHAR:
+ case NT_CANY:
if (IS_MULTILINE(reg->options))
r = add_opcode(reg, OP_ANYCHAR_ML);
else
r = add_opcode(reg, OP_ANYCHAR);
break;
- case N_BACKREF:
+ case NT_BREF:
{
- int i;
- BackrefNode* br = &(NBACKREF(node));
-
+ BRefNode* br = NBREF(node);
+
+#ifdef USE_BACKREF_WITH_LEVEL
+ if (IS_BACKREF_NEST_LEVEL(br)) {
+ r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
+ if (r) return r;
+ r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
+ if (r) return r;
+ r = add_length(reg, br->nest_level);
+ if (r) return r;
+
+ goto add_bacref_mems;
+ }
+ else
+#endif
if (br->back_num == 1) {
- n = br->back_static[0];
- if (IS_IGNORECASE(reg->options)) {
- r = add_opcode(reg, OP_BACKREFN_IC);
- if (r) return r;
- r = add_mem_num(reg, n);
- }
- else {
- switch (n) {
- case 1: r = add_opcode(reg, OP_BACKREF1); break;
- case 2: r = add_opcode(reg, OP_BACKREF2); break;
- case 3: r = add_opcode(reg, OP_BACKREF3); break;
- default:
- r = add_opcode(reg, OP_BACKREFN);
- if (r) return r;
- r = add_mem_num(reg, n);
- break;
- }
- }
+ n = br->back_static[0];
+ if (IS_IGNORECASE(reg->options)) {
+ r = add_opcode(reg, OP_BACKREFN_IC);
+ if (r) return r;
+ r = add_mem_num(reg, n);
+ }
+ else {
+ switch (n) {
+ case 1: r = add_opcode(reg, OP_BACKREF1); break;
+ case 2: r = add_opcode(reg, OP_BACKREF2); break;
+ default:
+ r = add_opcode(reg, OP_BACKREFN);
+ if (r) return r;
+ r = add_mem_num(reg, n);
+ break;
+ }
+ }
}
else {
- int* p;
- add_opcode(reg, (IS_IGNORECASE(reg->options) ?
- OP_BACKREF_MULTI_IC : OP_BACKREF_MULTI));
- if (r) return r;
- add_length(reg, br->back_num);
- if (r) return r;
- p = BACKREFS_P(br);
- for (i = br->back_num - 1; i >= 0; i--) {
- r = add_mem_num(reg, p[i]);
- if (r) return r;
- }
+ int i;
+ int* p;
+
+ if (IS_IGNORECASE(reg->options)) {
+ r = add_opcode(reg, OP_BACKREF_MULTI_IC);
+ }
+ else {
+ r = add_opcode(reg, OP_BACKREF_MULTI);
+ }
+ if (r) return r;
+
+#ifdef USE_BACKREF_WITH_LEVEL
+ add_bacref_mems:
+#endif
+ r = add_length(reg, br->back_num);
+ if (r) return r;
+ p = BACKREFS_P(br);
+ for (i = br->back_num - 1; i >= 0; i--) {
+ r = add_mem_num(reg, p[i]);
+ if (r) return r;
+ }
}
}
break;
#ifdef USE_SUBEXP_CALL
- case N_CALL:
- r = compile_call(&(NCALL(node)), reg);
+ case NT_CALL:
+ r = compile_call(NCALL(node), reg);
break;
#endif
- case N_QUALIFIER:
- r = compile_qualifier_node(&(NQUALIFIER(node)), reg);
+ case NT_QTFR:
+ r = compile_quantifier_node(NQTFR(node), reg);
break;
- case N_EFFECT:
- r = compile_effect_node(&NEFFECT(node), reg);
+ case NT_ENCLOSE:
+ r = compile_enclose_node(NENCLOSE(node), reg);
break;
- case N_ANCHOR:
- r = compile_anchor_node(&(NANCHOR(node)), reg);
+ case NT_ANCHOR:
+ r = compile_anchor_node(NANCHOR(node), reg);
break;
default:
@@ -1455,57 +1890,58 @@ compile_tree(Node* node, regex_t* reg)
}
#ifdef USE_NAMED_GROUP
-typedef struct {
- int new_val;
-} NumMap;
static int
-noname_disable_map(Node** plink, NumMap* map, int* counter)
+noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
{
int r = 0;
Node* node = *plink;
switch (NTYPE(node)) {
- case N_LIST:
- case N_ALT:
+ case NT_LIST:
+ case NT_ALT:
do {
- r = noname_disable_map(&(NCONS(node).left), map, counter);
- } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ r = noname_disable_map(&(NCAR(node)), map, counter);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
break;
- case N_QUALIFIER:
+ case NT_QTFR:
{
- Node** ptarget = &(NQUALIFIER(node).target);
+ Node** ptarget = &(NQTFR(node)->target);
Node* old = *ptarget;
r = noname_disable_map(ptarget, map, counter);
- if (*ptarget != old && NTYPE(*ptarget) == N_QUALIFIER) {
- onig_reduce_nested_qualifier(node, *ptarget);
+ if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
+ onig_reduce_nested_quantifier(node, *ptarget);
}
}
break;
- case N_EFFECT:
+ case NT_ENCLOSE:
{
- EffectNode* en = &(NEFFECT(node));
- if (en->type == EFFECT_MEMORY) {
- if (IS_EFFECT_NAMED_GROUP(en)) {
- (*counter)++;
- map[en->regnum].new_val = *counter;
- en->regnum = *counter;
- r = noname_disable_map(&(en->target), map, counter);
- }
- else {
- *plink = en->target;
- en->target = NULL_NODE;
- onig_node_free(node);
- r = noname_disable_map(plink, map, counter);
- }
+ EncloseNode* en = NENCLOSE(node);
+ if (en->type == ENCLOSE_MEMORY) {
+ if (IS_ENCLOSE_NAMED_GROUP(en)) {
+ (*counter)++;
+ map[en->regnum].new_val = *counter;
+ en->regnum = *counter;
+ }
+ else if (en->regnum != 0) {
+ *plink = en->target;
+ en->target = NULL_NODE;
+ onig_node_free(node);
+ r = noname_disable_map(plink, map, counter);
+ break;
+ }
}
- else
- r = noname_disable_map(&(en->target), map, counter);
+ r = noname_disable_map(&(en->target), map, counter);
}
break;
+ case NT_ANCHOR:
+ if (NANCHOR(node)->target)
+ r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
+ break;
+
default:
break;
}
@@ -1514,11 +1950,11 @@ noname_disable_map(Node** plink, NumMap* map, int* counter)
}
static int
-renumber_node_backref(Node* node, NumMap* map)
+renumber_node_backref(Node* node, GroupNumRemap* map, const int num_mem)
{
int i, pos, n, old_num;
int *backs;
- BackrefNode* bn = &(NBACKREF(node));
+ BRefNode* bn = NBREF(node);
if (! IS_BACKREF_NAME_REF(bn))
return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
@@ -1530,6 +1966,7 @@ renumber_node_backref(Node* node, NumMap* map)
backs = bn->back_dynamic;
for (i = 0, pos = 0; i < old_num; i++) {
+ if (backs[i] > num_mem) return ONIGERR_INVALID_BACKREF;
n = map[backs[i]].new_val;
if (n > 0) {
backs[pos] = n;
@@ -1542,26 +1979,38 @@ renumber_node_backref(Node* node, NumMap* map)
}
static int
-renumber_by_map(Node* node, NumMap* map)
+renumber_by_map(Node* node, GroupNumRemap* map, const int num_mem)
{
int r = 0;
switch (NTYPE(node)) {
- case N_LIST:
- case N_ALT:
+ case NT_LIST:
+ case NT_ALT:
do {
- r = renumber_by_map(NCONS(node).left, map);
- } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ r = renumber_by_map(NCAR(node), map, num_mem);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
break;
- case N_QUALIFIER:
- r = renumber_by_map(NQUALIFIER(node).target, map);
+ case NT_QTFR:
+ r = renumber_by_map(NQTFR(node)->target, map, num_mem);
break;
- case N_EFFECT:
- r = renumber_by_map(NEFFECT(node).target, map);
+ case NT_ENCLOSE:
+ {
+ EncloseNode* en = NENCLOSE(node);
+ if (en->type == ENCLOSE_CONDITION) {
+ if (en->regnum > num_mem) return ONIGERR_INVALID_BACKREF;
+ en->regnum = map[en->regnum].new_val;
+ }
+ r = renumber_by_map(en->target, map, num_mem);
+ }
+ break;
+
+ case NT_BREF:
+ r = renumber_node_backref(node, map, num_mem);
break;
- case N_BACKREF:
- r = renumber_node_backref(node, map);
+ case NT_ANCHOR:
+ if (NANCHOR(node)->target)
+ r = renumber_by_map(NANCHOR(node)->target, map, num_mem);
break;
default:
@@ -1577,24 +2026,29 @@ numbered_ref_check(Node* node)
int r = 0;
switch (NTYPE(node)) {
- case N_LIST:
- case N_ALT:
+ case NT_LIST:
+ case NT_ALT:
do {
- r = numbered_ref_check(NCONS(node).left);
- } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ r = numbered_ref_check(NCAR(node));
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
break;
- case N_QUALIFIER:
- r = numbered_ref_check(NQUALIFIER(node).target);
+ case NT_QTFR:
+ r = numbered_ref_check(NQTFR(node)->target);
break;
- case N_EFFECT:
- r = numbered_ref_check(NEFFECT(node).target);
+ case NT_ENCLOSE:
+ r = numbered_ref_check(NENCLOSE(node)->target);
break;
- case N_BACKREF:
- if (! IS_BACKREF_NAME_REF(&(NBACKREF(node))))
+ case NT_BREF:
+ if (! IS_BACKREF_NAME_REF(NBREF(node)))
return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
break;
+ case NT_ANCHOR:
+ if (NANCHOR(node)->target)
+ r = numbered_ref_check(NANCHOR(node)->target);
+ break;
+
default:
break;
}
@@ -1607,10 +2061,10 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
{
int r, i, pos, counter;
BitStatusType loc;
- NumMap* map;
+ GroupNumRemap* map;
- map = (NumMap* )xalloca(sizeof(NumMap) * (env->num_mem + 1));
- CHECK_NULL_RETURN_VAL(map, ONIGERR_MEMORY);
+ map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
+ CHECK_NULL_RETURN_MEMERR(map);
for (i = 1; i <= env->num_mem; i++) {
map[i].new_val = 0;
}
@@ -1618,7 +2072,7 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
r = noname_disable_map(root, map, &counter);
if (r != 0) return r;
- r = renumber_by_map(*root, map);
+ r = renumber_by_map(*root, map, env->num_mem);
if (r != 0) return r;
for (i = 1, pos = 1; i <= env->num_mem; i++) {
@@ -1638,7 +2092,8 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
env->num_mem = env->num_named;
reg->num_mem = env->num_named;
- return 0;
+
+ return onig_renumber_name_table(reg, map);
}
#endif /* USE_NAMED_GROUP */
@@ -1647,97 +2102,91 @@ static int
unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
{
int i, offset;
- EffectNode* en;
+ EncloseNode* en;
AbsAddrType addr;
-#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
- UChar buf[SERIALIZE_BUFSIZE];
-#endif
for (i = 0; i < uslist->num; i++) {
- en = &(NEFFECT(uslist->us[i].target));
- if (! IS_EFFECT_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
+ en = NENCLOSE(uslist->us[i].target);
+ if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
addr = en->call_addr;
offset = uslist->us[i].offset;
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
-#else
- SERIALIZE_ABSADDR(addr, buf);
- BBUF_WRITE(reg, offset, buf, SIZE_ABSADDR);
-#endif
}
return 0;
}
#endif
-#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
static int
-qualifiers_memory_node_info(Node* node)
+quantifiers_memory_node_info(Node* node)
{
int r = 0;
switch (NTYPE(node)) {
- case N_LIST:
- case N_ALT:
+ case NT_LIST:
+ case NT_ALT:
{
int v;
do {
- v = qualifiers_memory_node_info(NCONS(node).left);
- if (v > r) r = v;
- } while (v >= 0 && IS_NOT_NULL(node = NCONS(node).right));
+ v = quantifiers_memory_node_info(NCAR(node));
+ if (v > r) r = v;
+ } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
}
break;
-#ifdef USE_SUBEXP_CALL
- case N_CALL:
- if (IS_CALL_RECURSION(&NCALL(node))) {
+# ifdef USE_SUBEXP_CALL
+ case NT_CALL:
+ if (IS_CALL_RECURSION(NCALL(node))) {
return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
}
else
- r = qualifiers_memory_node_info(NCALL(node).target);
+ r = quantifiers_memory_node_info(NCALL(node)->target);
break;
-#endif
+# endif
- case N_QUALIFIER:
+ case NT_QTFR:
{
- QualifierNode* qn = &(NQUALIFIER(node));
+ QtfrNode* qn = NQTFR(node);
if (qn->upper != 0) {
- r = qualifiers_memory_node_info(qn->target);
+ r = quantifiers_memory_node_info(qn->target);
}
}
break;
- case N_EFFECT:
+ case NT_ENCLOSE:
{
- EffectNode* en = &(NEFFECT(node));
+ EncloseNode* en = NENCLOSE(node);
switch (en->type) {
- case EFFECT_MEMORY:
- return NQ_TARGET_IS_EMPTY_MEM;
- break;
-
- case EFFECT_OPTION:
- case EFFECT_STOP_BACKTRACK:
- r = qualifiers_memory_node_info(en->target);
- break;
+ case ENCLOSE_MEMORY:
+ return NQ_TARGET_IS_EMPTY_MEM;
+ break;
+
+ case ENCLOSE_OPTION:
+ case ENCLOSE_STOP_BACKTRACK:
+ case ENCLOSE_CONDITION:
+ case ENCLOSE_ABSENT:
+ r = quantifiers_memory_node_info(en->target);
+ break;
default:
- break;
+ break;
}
}
break;
- case N_BACKREF:
- case N_STRING:
- case N_CTYPE:
- case N_CCLASS:
- case N_ANYCHAR:
- case N_ANCHOR:
+ case NT_BREF:
+ case NT_STR:
+ case NT_CTYPE:
+ case NT_CCLASS:
+ case NT_CANY:
+ case NT_ANCHOR:
default:
break;
}
return r;
}
-#endif /* USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK */
+#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
static int
get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
@@ -1747,12 +2196,12 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
*min = 0;
switch (NTYPE(node)) {
- case N_BACKREF:
+ case NT_BREF:
{
int i;
int* backs;
Node** nodes = SCANENV_MEM_NODES(env);
- BackrefNode* br = &(NBACKREF(node));
+ BRefNode* br = NBREF(node);
if (br->state & NST_RECURSION) break;
backs = BACKREFS_P(br);
@@ -1760,106 +2209,110 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
r = get_min_match_length(nodes[backs[0]], min, env);
if (r != 0) break;
for (i = 1; i < br->back_num; i++) {
- if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
- r = get_min_match_length(nodes[backs[i]], &tmin, env);
- if (r != 0) break;
- if (*min > tmin) *min = tmin;
+ if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
+ r = get_min_match_length(nodes[backs[i]], &tmin, env);
+ if (r != 0) break;
+ if (*min > tmin) *min = tmin;
}
}
break;
#ifdef USE_SUBEXP_CALL
- case N_CALL:
- if (IS_CALL_RECURSION(&NCALL(node))) {
- EffectNode* en = &(NEFFECT(NCALL(node).target));
- if (IS_EFFECT_MIN_FIXED(en))
- *min = en->min_len;
+ case NT_CALL:
+ if (IS_CALL_RECURSION(NCALL(node))) {
+ EncloseNode* en = NENCLOSE(NCALL(node)->target);
+ if (IS_ENCLOSE_MIN_FIXED(en))
+ *min = en->min_len;
}
else
- r = get_min_match_length(NCALL(node).target, min, env);
+ r = get_min_match_length(NCALL(node)->target, min, env);
break;
#endif
- case N_LIST:
+ case NT_LIST:
do {
- r = get_min_match_length(NCONS(node).left, &tmin, env);
+ r = get_min_match_length(NCAR(node), &tmin, env);
if (r == 0) *min += tmin;
- } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
break;
- case N_ALT:
+ case NT_ALT:
{
Node *x, *y;
y = node;
do {
- x = NCONS(y).left;
- r = get_min_match_length(x, &tmin, env);
- if (r != 0) break;
- if (y == node) *min = tmin;
- else if (*min > tmin) *min = tmin;
- } while (r == 0 && IS_NOT_NULL(y = NCONS(y).right));
+ x = NCAR(y);
+ r = get_min_match_length(x, &tmin, env);
+ if (r != 0) break;
+ if (y == node) *min = tmin;
+ else if (*min > tmin) *min = tmin;
+ } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
}
break;
- case N_STRING:
+ case NT_STR:
{
- StrNode* sn = &(NSTRING(node));
+ StrNode* sn = NSTR(node);
*min = sn->end - sn->s;
}
break;
- case N_CTYPE:
- switch (NCTYPE(node).type) {
- case CTYPE_WORD: *min = 1; break;
- case CTYPE_NOT_WORD: *min = 1; break;
- default:
- break;
- }
+ case NT_CTYPE:
+ *min = 1;
break;
- case N_CCLASS:
- case N_ANYCHAR:
+ case NT_CCLASS:
+ case NT_CANY:
*min = 1;
break;
- case N_QUALIFIER:
+ case NT_QTFR:
{
- QualifierNode* qn = &(NQUALIFIER(node));
+ QtfrNode* qn = NQTFR(node);
if (qn->lower > 0) {
- r = get_min_match_length(qn->target, min, env);
- if (r == 0)
- *min = distance_multiply(*min, qn->lower);
+ r = get_min_match_length(qn->target, min, env);
+ if (r == 0)
+ *min = distance_multiply(*min, qn->lower);
}
}
break;
- case N_EFFECT:
+ case NT_ENCLOSE:
{
- EffectNode* en = &(NEFFECT(node));
+ EncloseNode* en = NENCLOSE(node);
switch (en->type) {
- case EFFECT_MEMORY:
-#ifdef USE_SUBEXP_CALL
- if (IS_EFFECT_MIN_FIXED(en))
- *min = en->min_len;
- else {
- r = get_min_match_length(en->target, min, env);
- if (r == 0) {
- en->min_len = *min;
- SET_EFFECT_STATUS(node, NST_MIN_FIXED);
- }
- }
- break;
-#endif
- case EFFECT_OPTION:
- case EFFECT_STOP_BACKTRACK:
- r = get_min_match_length(en->target, min, env);
- break;
+ case ENCLOSE_MEMORY:
+ if (IS_ENCLOSE_MIN_FIXED(en))
+ *min = en->min_len;
+ else {
+ if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
+ *min = 0; /* recursive */
+ else {
+ SET_ENCLOSE_STATUS(node, NST_MARK1);
+ r = get_min_match_length(en->target, min, env);
+ CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
+ if (r == 0) {
+ en->min_len = *min;
+ SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
+ }
+ }
+ }
+ break;
+
+ case ENCLOSE_OPTION:
+ case ENCLOSE_STOP_BACKTRACK:
+ case ENCLOSE_CONDITION:
+ r = get_min_match_length(en->target, min, env);
+ break;
+
+ case ENCLOSE_ABSENT:
+ break;
}
}
break;
- case N_ANCHOR:
+ case NT_ANCHOR:
default:
break;
}
@@ -1875,116 +2328,117 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
*max = 0;
switch (NTYPE(node)) {
- case N_LIST:
+ case NT_LIST:
do {
- r = get_max_match_length(NCONS(node).left, &tmax, env);
+ r = get_max_match_length(NCAR(node), &tmax, env);
if (r == 0)
- *max = distance_add(*max, tmax);
- } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ *max = distance_add(*max, tmax);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
break;
- case N_ALT:
+ case NT_ALT:
do {
- r = get_max_match_length(NCONS(node).left, &tmax, env);
+ r = get_max_match_length(NCAR(node), &tmax, env);
if (r == 0 && *max < tmax) *max = tmax;
- } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
break;
- case N_STRING:
+ case NT_STR:
{
- StrNode* sn = &(NSTRING(node));
+ StrNode* sn = NSTR(node);
*max = sn->end - sn->s;
}
break;
- case N_CTYPE:
- switch (NCTYPE(node).type) {
- case CTYPE_WORD:
- case CTYPE_NOT_WORD:
- *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
- break;
-
- default:
- break;
- }
+ case NT_CTYPE:
+ *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
break;
- case N_CCLASS:
- case N_ANYCHAR:
+ case NT_CCLASS:
+ case NT_CANY:
*max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
break;
- case N_BACKREF:
+ case NT_BREF:
{
int i;
int* backs;
Node** nodes = SCANENV_MEM_NODES(env);
- BackrefNode* br = &(NBACKREF(node));
+ BRefNode* br = NBREF(node);
if (br->state & NST_RECURSION) {
- *max = ONIG_INFINITE_DISTANCE;
- break;
+ *max = ONIG_INFINITE_DISTANCE;
+ break;
}
backs = BACKREFS_P(br);
for (i = 0; i < br->back_num; i++) {
- if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
- r = get_max_match_length(nodes[backs[i]], &tmax, env);
- if (r != 0) break;
- if (*max < tmax) *max = tmax;
+ if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
+ r = get_max_match_length(nodes[backs[i]], &tmax, env);
+ if (r != 0) break;
+ if (*max < tmax) *max = tmax;
}
}
break;
#ifdef USE_SUBEXP_CALL
- case N_CALL:
- if (! IS_CALL_RECURSION(&(NCALL(node))))
- r = get_max_match_length(NCALL(node).target, max, env);
+ case NT_CALL:
+ if (! IS_CALL_RECURSION(NCALL(node)))
+ r = get_max_match_length(NCALL(node)->target, max, env);
else
*max = ONIG_INFINITE_DISTANCE;
break;
#endif
- case N_QUALIFIER:
+ case NT_QTFR:
{
- QualifierNode* qn = &(NQUALIFIER(node));
+ QtfrNode* qn = NQTFR(node);
if (qn->upper != 0) {
- r = get_max_match_length(qn->target, max, env);
- if (r == 0 && *max != 0) {
- if (! IS_REPEAT_INFINITE(qn->upper))
- *max = distance_multiply(*max, qn->upper);
- else
- *max = ONIG_INFINITE_DISTANCE;
- }
+ r = get_max_match_length(qn->target, max, env);
+ if (r == 0 && *max != 0) {
+ if (! IS_REPEAT_INFINITE(qn->upper))
+ *max = distance_multiply(*max, qn->upper);
+ else
+ *max = ONIG_INFINITE_DISTANCE;
+ }
}
}
break;
- case N_EFFECT:
+ case NT_ENCLOSE:
{
- EffectNode* en = &(NEFFECT(node));
+ EncloseNode* en = NENCLOSE(node);
switch (en->type) {
- case EFFECT_MEMORY:
-#ifdef USE_SUBEXP_CALL
- if (IS_EFFECT_MAX_FIXED(en))
- *max = en->max_len;
- else {
- r = get_max_match_length(en->target, max, env);
- if (r == 0) {
- en->max_len = *max;
- SET_EFFECT_STATUS(node, NST_MAX_FIXED);
- }
- }
- break;
-#endif
- case EFFECT_OPTION:
- case EFFECT_STOP_BACKTRACK:
- r = get_max_match_length(en->target, max, env);
- break;
+ case ENCLOSE_MEMORY:
+ if (IS_ENCLOSE_MAX_FIXED(en))
+ *max = en->max_len;
+ else {
+ if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
+ *max = ONIG_INFINITE_DISTANCE;
+ else {
+ SET_ENCLOSE_STATUS(node, NST_MARK1);
+ r = get_max_match_length(en->target, max, env);
+ CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
+ if (r == 0) {
+ en->max_len = *max;
+ SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
+ }
+ }
+ }
+ break;
+
+ case ENCLOSE_OPTION:
+ case ENCLOSE_STOP_BACKTRACK:
+ case ENCLOSE_CONDITION:
+ r = get_max_match_length(en->target, max, env);
+ break;
+
+ case ENCLOSE_ABSENT:
+ break;
}
}
break;
- case N_ANCHOR:
+ case NT_ANCHOR:
default:
break;
}
@@ -2005,115 +2459,112 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
level++;
*len = 0;
switch (NTYPE(node)) {
- case N_LIST:
+ case NT_LIST:
do {
- r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level);
+ r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
if (r == 0)
- *len = distance_add(*len, tlen);
- } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ *len = (int )distance_add(*len, tlen);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
break;
- case N_ALT:
+ case NT_ALT:
{
int tlen2;
int varlen = 0;
- r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level);
- while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)) {
- r = get_char_length_tree1(NCONS(node).left, reg, &tlen2, level);
- if (r == 0) {
- if (tlen != tlen2)
- varlen = 1;
- }
+ r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
+ while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
+ r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
+ if (r == 0) {
+ if (tlen != tlen2)
+ varlen = 1;
+ }
}
if (r == 0) {
- if (varlen != 0) {
- if (level == 1)
- r = GET_CHAR_LEN_TOP_ALT_VARLEN;
- else
- r = GET_CHAR_LEN_VARLEN;
- }
- else
- *len = tlen;
+ if (varlen != 0) {
+ if (level == 1)
+ r = GET_CHAR_LEN_TOP_ALT_VARLEN;
+ else
+ r = GET_CHAR_LEN_VARLEN;
+ }
+ else
+ *len = tlen;
}
}
break;
- case N_STRING:
+ case NT_STR:
{
- StrNode* sn = &(NSTRING(node));
+ StrNode* sn = NSTR(node);
UChar *s = sn->s;
while (s < sn->end) {
- s += enc_len(reg->enc, *s);
- (*len)++;
+ s += enclen(reg->enc, s, sn->end);
+ (*len)++;
}
}
break;
- case N_QUALIFIER:
+ case NT_QTFR:
{
- QualifierNode* qn = &(NQUALIFIER(node));
+ QtfrNode* qn = NQTFR(node);
if (qn->lower == qn->upper) {
- r = get_char_length_tree1(qn->target, reg, &tlen, level);
- if (r == 0)
- *len = distance_multiply(tlen, qn->lower);
+ r = get_char_length_tree1(qn->target, reg, &tlen, level);
+ if (r == 0)
+ *len = (int )distance_multiply(tlen, qn->lower);
}
else
- r = GET_CHAR_LEN_VARLEN;
+ r = GET_CHAR_LEN_VARLEN;
}
break;
#ifdef USE_SUBEXP_CALL
- case N_CALL:
- if (! IS_CALL_RECURSION(&(NCALL(node))))
- r = get_char_length_tree1(NCALL(node).target, reg, len, level);
+ case NT_CALL:
+ if (! IS_CALL_RECURSION(NCALL(node)))
+ r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
else
r = GET_CHAR_LEN_VARLEN;
break;
#endif
- case N_CTYPE:
- switch (NCTYPE(node).type) {
- case CTYPE_WORD:
- case CTYPE_NOT_WORD:
- *len = 1;
- break;
- }
+ case NT_CTYPE:
+ *len = 1;
break;
- case N_CCLASS:
- case N_ANYCHAR:
+ case NT_CCLASS:
+ case NT_CANY:
*len = 1;
break;
- case N_EFFECT:
+ case NT_ENCLOSE:
{
- EffectNode* en = &(NEFFECT(node));
+ EncloseNode* en = NENCLOSE(node);
switch (en->type) {
- case EFFECT_MEMORY:
+ case ENCLOSE_MEMORY:
#ifdef USE_SUBEXP_CALL
- if (IS_EFFECT_CLEN_FIXED(en))
- *len = en->char_len;
- else {
- r = get_char_length_tree1(en->target, reg, len, level);
- if (r == 0) {
- en->char_len = *len;
- SET_EFFECT_STATUS(node, NST_CLEN_FIXED);
- }
- }
- break;
+ if (IS_ENCLOSE_CLEN_FIXED(en))
+ *len = en->char_len;
+ else {
+ r = get_char_length_tree1(en->target, reg, len, level);
+ if (r == 0) {
+ en->char_len = *len;
+ SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
+ }
+ }
+ break;
#endif
- case EFFECT_OPTION:
- case EFFECT_STOP_BACKTRACK:
- r = get_char_length_tree1(en->target, reg, len, level);
- break;
+ case ENCLOSE_OPTION:
+ case ENCLOSE_STOP_BACKTRACK:
+ case ENCLOSE_CONDITION:
+ r = get_char_length_tree1(en->target, reg, len, level);
+ break;
+ case ENCLOSE_ABSENT:
default:
- break;
+ break;
}
}
break;
- case N_ANCHOR:
+ case NT_ANCHOR:
break;
default:
@@ -2130,210 +2581,191 @@ get_char_length_tree(Node* node, regex_t* reg, int* len)
return get_char_length_tree1(node, reg, len, 0);
}
-extern int
-onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
-{
- int found;
-
- if (code >= SINGLE_BYTE_SIZE) {
- if (IS_NULL(cc->mbuf)) {
- found = 0;
- }
- else {
- found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
- }
- }
- else {
- found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
- }
-
- if (cc->not == 0)
- return found;
- else
- return !found;
-}
-
/* x is not included y ==> 1 : 0 */
static int
is_not_included(Node* x, Node* y, regex_t* reg)
{
- int i, len;
+ int i;
+ OnigDistance len;
OnigCodePoint code;
- UChar *p, c;
+ UChar *p;
int ytype;
retry:
ytype = NTYPE(y);
switch (NTYPE(x)) {
- case N_CTYPE:
+ case NT_CTYPE:
{
switch (ytype) {
- case N_CTYPE:
- switch (NCTYPE(x).type) {
- case CTYPE_WORD:
- if (NCTYPE(y).type == CTYPE_NOT_WORD)
- return 1;
- else
- return 0;
- break;
- case CTYPE_NOT_WORD:
- if (NCTYPE(y).type == CTYPE_WORD)
- return 1;
- else
- return 0;
- break;
- default:
- break;
- }
- break;
-
- case N_CCLASS:
+ case NT_CTYPE:
+ if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
+ NCTYPE(y)->not != NCTYPE(x)->not &&
+ NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
+ return 1;
+ else
+ return 0;
+ break;
+
+ case NT_CCLASS:
swap:
- {
- Node* tmp;
- tmp = x; x = y; y = tmp;
- goto retry;
- }
- break;
+ {
+ Node* tmp;
+ tmp = x; x = y; y = tmp;
+ goto retry;
+ }
+ break;
- case N_STRING:
- goto swap;
- break;
+ case NT_STR:
+ goto swap;
+ break;
default:
- break;
+ break;
}
}
break;
- case N_CCLASS:
+ case NT_CCLASS:
{
- CClassNode* xc = &(NCCLASS(x));
+ CClassNode* xc = NCCLASS(x);
switch (ytype) {
- case N_CTYPE:
- switch (NCTYPE(y).type) {
- case CTYPE_WORD:
- if (IS_NULL(xc->mbuf) && xc->not == 0) {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (BITSET_AT(xc->bs, i)) {
- if (ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) return 0;
- }
- }
- return 1;
- }
- return 0;
- break;
- case CTYPE_NOT_WORD:
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (! ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) {
- if (xc->not == 0) {
- if (BITSET_AT(xc->bs, i))
- return 0;
- }
- else {
- if (! BITSET_AT(xc->bs, i))
- return 0;
- }
- }
- }
- return 1;
- break;
-
- default:
- break;
- }
- break;
-
- case N_CCLASS:
- {
- int v;
- CClassNode* yc = &(NCCLASS(y));
-
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- v = BITSET_AT(xc->bs, i);
- if ((v != 0 && xc->not == 0) || (v == 0 && xc->not)) {
- v = BITSET_AT(yc->bs, i);
- if ((v != 0 && yc->not == 0) || (v == 0 && yc->not))
- return 0;
- }
- }
- if ((IS_NULL(xc->mbuf) && xc->not == 0) ||
- (IS_NULL(yc->mbuf) && yc->not == 0))
- return 1;
- return 0;
- }
- break;
+ case NT_CTYPE:
+ switch (NCTYPE(y)->ctype) {
+ case ONIGENC_CTYPE_WORD:
+ if (NCTYPE(y)->not == 0) {
+ if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (BITSET_AT(xc->bs, i)) {
+ if (NCTYPE(y)->ascii_range) {
+ if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
+ }
+ else {
+ if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
+ }
+ }
+ }
+ return 1;
+ }
+ return 0;
+ }
+ else {
+ if (IS_NOT_NULL(xc->mbuf)) return 0;
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ int is_word;
+ if (NCTYPE(y)->ascii_range)
+ is_word = IS_CODE_SB_WORD(reg->enc, i);
+ else
+ is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
+ if (! is_word) {
+ if (!IS_NCCLASS_NOT(xc)) {
+ if (BITSET_AT(xc->bs, i))
+ return 0;
+ }
+ else {
+ if (! BITSET_AT(xc->bs, i))
+ return 0;
+ }
+ }
+ }
+ return 1;
+ }
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ case NT_CCLASS:
+ {
+ int v;
+ CClassNode* yc = NCCLASS(y);
+
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ v = BITSET_AT(xc->bs, i);
+ if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
+ (v == 0 && IS_NCCLASS_NOT(xc))) {
+ v = BITSET_AT(yc->bs, i);
+ if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
+ (v == 0 && IS_NCCLASS_NOT(yc)))
+ return 0;
+ }
+ }
+ if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
+ (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
+ return 1;
+ return 0;
+ }
+ break;
- case N_STRING:
- goto swap;
- break;
+ case NT_STR:
+ goto swap;
+ break;
default:
- break;
+ break;
}
}
break;
- case N_STRING:
+ case NT_STR:
{
- StrNode* xs = &(NSTRING(x));
+ StrNode* xs = NSTR(x);
if (NSTRING_LEN(x) == 0)
- break;
+ break;
- c = *(xs->s);
switch (ytype) {
- case N_CTYPE:
- switch (NCTYPE(y).type) {
- case CTYPE_WORD:
- return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 0 : 1);
- break;
- case CTYPE_NOT_WORD:
- return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 1 : 0);
- break;
- default:
- break;
- }
- break;
+ case NT_CTYPE:
+ switch (NCTYPE(y)->ctype) {
+ case ONIGENC_CTYPE_WORD:
+ if (NCTYPE(y)->ascii_range) {
+ if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
+ return NCTYPE(y)->not;
+ else
+ return !(NCTYPE(y)->not);
+ }
+ else {
+ if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
+ return NCTYPE(y)->not;
+ else
+ return !(NCTYPE(y)->not);
+ }
+ break;
+ default:
+ break;
+ }
+ break;
- case N_CCLASS:
- {
- CClassNode* cc = &(NCCLASS(y));
+ case NT_CCLASS:
+ {
+ CClassNode* cc = NCCLASS(y);
+
+ code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
+ xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
+ return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
+ }
+ break;
+
+ case NT_STR:
+ {
+ UChar *q;
+ StrNode* ys = NSTR(y);
+ len = NSTRING_LEN(x);
+ if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
+ if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
+ /* tiny version */
+ return 0;
+ }
+ else {
+ for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
+ if (*p != *q) return 1;
+ }
+ }
+ }
+ break;
- code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
- xs->s + enc_len(reg->enc, c));
- return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
- }
- break;
-
- case N_STRING:
- {
- UChar *q;
- StrNode* ys = &(NSTRING(y));
- len = NSTRING_LEN(x);
- if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
- if (NSTRING_IS_CASE_AMBIG(x) || NSTRING_IS_CASE_AMBIG(y)) {
- UChar plow[ONIGENC_MBC_TO_LOWER_MAXLEN];
- UChar qlow[ONIGENC_MBC_TO_LOWER_MAXLEN];
- int plen, qlen;
- for (p = ys->s, q = xs->s; q < xs->end; ) {
- plen = ONIGENC_MBC_TO_LOWER(reg->enc, p, plow);
- qlen = ONIGENC_MBC_TO_LOWER(reg->enc, q, qlow);
- if (plen != qlen || onig_strncmp(plow, qlow, plen) != 0)
- return 1;
- p += enc_len(reg->enc, *p);
- q += enc_len(reg->enc, *q);
- }
- }
- else {
- for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
- if (*p != *q) return 1;
- }
- }
- }
- break;
-
default:
- break;
+ break;
}
}
break;
@@ -2351,80 +2783,81 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
Node* n = NULL_NODE;
switch (NTYPE(node)) {
- case N_BACKREF:
- case N_ALT:
- case N_ANYCHAR:
+ case NT_BREF:
+ case NT_ALT:
+ case NT_CANY:
#ifdef USE_SUBEXP_CALL
- case N_CALL:
+ case NT_CALL:
#endif
break;
- case N_CTYPE:
- case N_CCLASS:
+ case NT_CTYPE:
+ case NT_CCLASS:
if (exact == 0) {
n = node;
}
break;
- case N_LIST:
- n = get_head_value_node(NCONS(node).left, exact, reg);
+ case NT_LIST:
+ n = get_head_value_node(NCAR(node), exact, reg);
break;
- case N_STRING:
+ case NT_STR:
{
- StrNode* sn = &(NSTRING(node));
-
+ StrNode* sn = NSTR(node);
if (sn->end <= sn->s)
- break;
+ break;
- if (exact != 0 &&
- !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
- if (! ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, sn->s))
- n = node;
- }
- else {
- n = node;
+ if (exact == 0 ||
+ NSTRING_IS_RAW(node) || !IS_IGNORECASE(reg->options)) {
+ n = node;
}
}
break;
- case N_QUALIFIER:
+ case NT_QTFR:
{
- QualifierNode* qn = &(NQUALIFIER(node));
+ QtfrNode* qn = NQTFR(node);
if (qn->lower > 0) {
- if (IS_NOT_NULL(qn->head_exact))
- n = qn->head_exact;
- else
- n = get_head_value_node(qn->target, exact, reg);
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT
+ if (IS_NOT_NULL(qn->head_exact))
+ n = qn->head_exact;
+ else
+#endif
+ n = get_head_value_node(qn->target, exact, reg);
}
}
break;
- case N_EFFECT:
+ case NT_ENCLOSE:
{
- EffectNode* en = &(NEFFECT(node));
+ EncloseNode* en = NENCLOSE(node);
switch (en->type) {
- case EFFECT_OPTION:
- {
- OnigOptionType options = reg->options;
+ case ENCLOSE_OPTION:
+ {
+ OnigOptionType options = reg->options;
- reg->options = NEFFECT(node).option;
- n = get_head_value_node(NEFFECT(node).target, exact, reg);
- reg->options = options;
- }
- break;
+ reg->options = NENCLOSE(node)->option;
+ n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
+ reg->options = options;
+ }
+ break;
- case EFFECT_MEMORY:
- case EFFECT_STOP_BACKTRACK:
- n = get_head_value_node(en->target, exact, reg);
- break;
+ case ENCLOSE_MEMORY:
+ case ENCLOSE_STOP_BACKTRACK:
+ case ENCLOSE_CONDITION:
+ n = get_head_value_node(en->target, exact, reg);
+ break;
+
+ case ENCLOSE_ABSENT:
+ break;
}
}
break;
- case N_ANCHOR:
- if (NANCHOR(node).type == ANCHOR_PREC_READ)
- n = get_head_value_node(NANCHOR(node).target, exact, reg);
+ case NT_ANCHOR:
+ if (NANCHOR(node)->type == ANCHOR_PREC_READ)
+ n = get_head_value_node(NANCHOR(node)->target, exact, reg);
break;
default:
@@ -2435,45 +2868,46 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
}
static int
-check_type_tree(Node* node, int type_mask, int effect_mask, int anchor_mask)
+check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
{
int type, r = 0;
type = NTYPE(node);
- if ((type & type_mask) == 0)
+ if ((NTYPE2BIT(type) & type_mask) == 0)
return 1;
switch (type) {
- case N_LIST:
- case N_ALT:
+ case NT_LIST:
+ case NT_ALT:
do {
- r = check_type_tree(NCONS(node).left, type_mask, effect_mask, anchor_mask);
- } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ r = check_type_tree(NCAR(node), type_mask, enclose_mask,
+ anchor_mask);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
break;
- case N_QUALIFIER:
- r = check_type_tree(NQUALIFIER(node).target, type_mask, effect_mask,
- anchor_mask);
+ case NT_QTFR:
+ r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
+ anchor_mask);
break;
- case N_EFFECT:
+ case NT_ENCLOSE:
{
- EffectNode* en = &(NEFFECT(node));
- if ((en->type & effect_mask) == 0)
- return 1;
+ EncloseNode* en = NENCLOSE(node);
+ if ((en->type & enclose_mask) == 0)
+ return 1;
- r = check_type_tree(en->target, type_mask, effect_mask, anchor_mask);
+ r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
}
break;
- case N_ANCHOR:
- type = NANCHOR(node).type;
+ case NT_ANCHOR:
+ type = NANCHOR(node)->type;
if ((type & anchor_mask) == 0)
return 1;
- if (NANCHOR(node).target)
- r = check_type_tree(NANCHOR(node).target,
- type_mask, effect_mask, anchor_mask);
+ if (NANCHOR(node)->target)
+ r = check_type_tree(NANCHOR(node)->target,
+ type_mask, enclose_mask, anchor_mask);
break;
default:
@@ -2484,8 +2918,8 @@ check_type_tree(Node* node, int type_mask, int effect_mask, int anchor_mask)
#ifdef USE_SUBEXP_CALL
-#define RECURSION_EXIST 1
-#define RECURSION_INFINITE 2
+# define RECURSION_EXIST 1
+# define RECURSION_INFINITE 2
static int
subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
@@ -2495,7 +2929,7 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
type = NTYPE(node);
switch (type) {
- case N_LIST:
+ case NT_LIST:
{
Node *x;
OnigDistance min;
@@ -2503,61 +2937,64 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
x = node;
do {
- ret = subexp_inf_recursive_check(NCONS(x).left, env, head);
- if (ret < 0 || ret == RECURSION_INFINITE) return ret;
- r |= ret;
- if (head) {
- ret = get_min_match_length(NCONS(x).left, &min, env);
- if (ret != 0) return ret;
- if (min != 0) head = 0;
- }
- } while (IS_NOT_NULL(x = NCONS(x).right));
+ ret = subexp_inf_recursive_check(NCAR(x), env, head);
+ if (ret < 0 || ret == RECURSION_INFINITE) return ret;
+ r |= ret;
+ if (head) {
+ ret = get_min_match_length(NCAR(x), &min, env);
+ if (ret != 0) return ret;
+ if (min != 0) head = 0;
+ }
+ } while (IS_NOT_NULL(x = NCDR(x)));
}
break;
- case N_ALT:
+ case NT_ALT:
{
int ret;
r = RECURSION_EXIST;
do {
- ret = subexp_inf_recursive_check(NCONS(node).left, env, head);
- if (ret < 0 || ret == RECURSION_INFINITE) return ret;
- r &= ret;
- } while (IS_NOT_NULL(node = NCONS(node).right));
+ ret = subexp_inf_recursive_check(NCAR(node), env, head);
+ if (ret < 0 || ret == RECURSION_INFINITE) return ret;
+ r &= ret;
+ } while (IS_NOT_NULL(node = NCDR(node)));
}
break;
- case N_QUALIFIER:
- r = subexp_inf_recursive_check(NQUALIFIER(node).target, env, head);
+ case NT_QTFR:
+ r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
+ if (r == RECURSION_EXIST) {
+ if (NQTFR(node)->lower == 0) r = 0;
+ }
break;
- case N_ANCHOR:
+ case NT_ANCHOR:
{
- AnchorNode* an = &(NANCHOR(node));
+ AnchorNode* an = NANCHOR(node);
switch (an->type) {
case ANCHOR_PREC_READ:
case ANCHOR_PREC_READ_NOT:
case ANCHOR_LOOK_BEHIND:
case ANCHOR_LOOK_BEHIND_NOT:
- r = subexp_inf_recursive_check(an->target, env, head);
- break;
+ r = subexp_inf_recursive_check(an->target, env, head);
+ break;
}
}
break;
- case N_CALL:
- r = subexp_inf_recursive_check(NCALL(node).target, env, head);
+ case NT_CALL:
+ r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
break;
- case N_EFFECT:
- if (IS_EFFECT_MARK2(&(NEFFECT(node))))
+ case NT_ENCLOSE:
+ if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
return 0;
- else if (IS_EFFECT_MARK1(&(NEFFECT(node))))
+ else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
else {
- SET_EFFECT_STATUS(node, NST_MARK2);
- r = subexp_inf_recursive_check(NEFFECT(node).target, env, head);
- CLEAR_EFFECT_STATUS(node, NST_MARK2);
+ SET_ENCLOSE_STATUS(node, NST_MARK2);
+ r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
+ CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
}
break;
@@ -2576,40 +3013,40 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
type = NTYPE(node);
switch (type) {
- case N_LIST:
- case N_ALT:
+ case NT_LIST:
+ case NT_ALT:
do {
- r = subexp_inf_recursive_check_trav(NCONS(node).left, env);
- } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ r = subexp_inf_recursive_check_trav(NCAR(node), env);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
break;
- case N_QUALIFIER:
- r = subexp_inf_recursive_check_trav(NQUALIFIER(node).target, env);
+ case NT_QTFR:
+ r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
break;
- case N_ANCHOR:
+ case NT_ANCHOR:
{
- AnchorNode* an = &(NANCHOR(node));
+ AnchorNode* an = NANCHOR(node);
switch (an->type) {
case ANCHOR_PREC_READ:
case ANCHOR_PREC_READ_NOT:
case ANCHOR_LOOK_BEHIND:
case ANCHOR_LOOK_BEHIND_NOT:
- r = subexp_inf_recursive_check_trav(an->target, env);
- break;
+ r = subexp_inf_recursive_check_trav(an->target, env);
+ break;
}
}
break;
- case N_EFFECT:
+ case NT_ENCLOSE:
{
- EffectNode* en = &(NEFFECT(node));
+ EncloseNode* en = NENCLOSE(node);
- if (IS_EFFECT_RECURSION(en)) {
- SET_EFFECT_STATUS(node, NST_MARK1);
- r = subexp_inf_recursive_check(en->target, env, 1);
- if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
- CLEAR_EFFECT_STATUS(node, NST_MARK1);
+ if (IS_ENCLOSE_RECURSION(en)) {
+ SET_ENCLOSE_STATUS(node, NST_MARK1);
+ r = subexp_inf_recursive_check(en->target, env, 1);
+ if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
+ CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
}
r = subexp_inf_recursive_check_trav(en->target, env);
}
@@ -2626,50 +3063,48 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
static int
subexp_recursive_check(Node* node)
{
- int type;
int r = 0;
- type = NTYPE(node);
- switch (type) {
- case N_LIST:
- case N_ALT:
+ switch (NTYPE(node)) {
+ case NT_LIST:
+ case NT_ALT:
do {
- r |= subexp_recursive_check(NCONS(node).left);
- } while (IS_NOT_NULL(node = NCONS(node).right));
+ r |= subexp_recursive_check(NCAR(node));
+ } while (IS_NOT_NULL(node = NCDR(node)));
break;
- case N_QUALIFIER:
- r = subexp_recursive_check(NQUALIFIER(node).target);
+ case NT_QTFR:
+ r = subexp_recursive_check(NQTFR(node)->target);
break;
- case N_ANCHOR:
+ case NT_ANCHOR:
{
- AnchorNode* an = &(NANCHOR(node));
+ AnchorNode* an = NANCHOR(node);
switch (an->type) {
case ANCHOR_PREC_READ:
case ANCHOR_PREC_READ_NOT:
case ANCHOR_LOOK_BEHIND:
case ANCHOR_LOOK_BEHIND_NOT:
- r = subexp_recursive_check(an->target);
- break;
+ r = subexp_recursive_check(an->target);
+ break;
}
}
break;
- case N_CALL:
- r = subexp_recursive_check(NCALL(node).target);
+ case NT_CALL:
+ r = subexp_recursive_check(NCALL(node)->target);
if (r != 0) SET_CALL_RECURSION(node);
break;
- case N_EFFECT:
- if (IS_EFFECT_MARK2(&(NEFFECT(node))))
+ case NT_ENCLOSE:
+ if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
return 0;
- else if (IS_EFFECT_MARK1(&(NEFFECT(node))))
+ else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
return 1; /* recursion */
else {
- SET_EFFECT_STATUS(node, NST_MARK2);
- r = subexp_recursive_check(NEFFECT(node).target);
- CLEAR_EFFECT_STATUS(node, NST_MARK2);
+ SET_ENCLOSE_STATUS(node, NST_MARK2);
+ r = subexp_recursive_check(NENCLOSE(node)->target);
+ CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
}
break;
@@ -2684,62 +3119,62 @@ subexp_recursive_check(Node* node)
static int
subexp_recursive_check_trav(Node* node, ScanEnv* env)
{
-#define FOUND_CALLED_NODE 1
+# define FOUND_CALLED_NODE 1
int type;
int r = 0;
type = NTYPE(node);
switch (type) {
- case N_LIST:
- case N_ALT:
+ case NT_LIST:
+ case NT_ALT:
{
int ret;
do {
- ret = subexp_recursive_check_trav(NCONS(node).left, env);
- if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
- else if (ret < 0) return ret;
- } while (IS_NOT_NULL(node = NCONS(node).right));
+ ret = subexp_recursive_check_trav(NCAR(node), env);
+ if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
+ else if (ret < 0) return ret;
+ } while (IS_NOT_NULL(node = NCDR(node)));
}
break;
- case N_QUALIFIER:
- r = subexp_recursive_check_trav(NQUALIFIER(node).target, env);
- if (NQUALIFIER(node).upper == 0) {
+ case NT_QTFR:
+ r = subexp_recursive_check_trav(NQTFR(node)->target, env);
+ if (NQTFR(node)->upper == 0) {
if (r == FOUND_CALLED_NODE)
- NQUALIFIER(node).is_refered = 1;
+ NQTFR(node)->is_referred = 1;
}
break;
- case N_ANCHOR:
+ case NT_ANCHOR:
{
- AnchorNode* an = &(NANCHOR(node));
+ AnchorNode* an = NANCHOR(node);
switch (an->type) {
case ANCHOR_PREC_READ:
case ANCHOR_PREC_READ_NOT:
case ANCHOR_LOOK_BEHIND:
case ANCHOR_LOOK_BEHIND_NOT:
- r = subexp_recursive_check_trav(an->target, env);
- break;
+ r = subexp_recursive_check_trav(an->target, env);
+ break;
}
}
break;
- case N_EFFECT:
+ case NT_ENCLOSE:
{
- EffectNode* en = &(NEFFECT(node));
-
- if (! IS_EFFECT_RECURSION(en)) {
- if (IS_EFFECT_CALLED(en)) {
- SET_EFFECT_STATUS(node, NST_MARK1);
- r = subexp_recursive_check(en->target);
- if (r != 0) SET_EFFECT_STATUS(node, NST_RECURSION);
- CLEAR_EFFECT_STATUS(node, NST_MARK1);
- }
+ EncloseNode* en = NENCLOSE(node);
+
+ if (! IS_ENCLOSE_RECURSION(en)) {
+ if (IS_ENCLOSE_CALLED(en)) {
+ SET_ENCLOSE_STATUS(node, NST_MARK1);
+ r = subexp_recursive_check(en->target);
+ if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
+ CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
+ }
}
r = subexp_recursive_check_trav(en->target, env);
- if (IS_EFFECT_CALLED(en))
- r |= FOUND_CALLED_NODE;
+ if (IS_ENCLOSE_CALLED(en))
+ r |= FOUND_CALLED_NODE;
}
break;
@@ -2758,93 +3193,101 @@ setup_subexp_call(Node* node, ScanEnv* env)
type = NTYPE(node);
switch (type) {
- case N_LIST:
+ case NT_LIST:
do {
- r = setup_subexp_call(NCONS(node).left, env);
- } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ r = setup_subexp_call(NCAR(node), env);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
break;
- case N_ALT:
+ case NT_ALT:
do {
- r = setup_subexp_call(NCONS(node).left, env);
- } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ r = setup_subexp_call(NCAR(node), env);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
break;
- case N_QUALIFIER:
- r = setup_subexp_call(NQUALIFIER(node).target, env);
+ case NT_QTFR:
+ r = setup_subexp_call(NQTFR(node)->target, env);
break;
- case N_EFFECT:
- r = setup_subexp_call(NEFFECT(node).target, env);
+ case NT_ENCLOSE:
+ r = setup_subexp_call(NENCLOSE(node)->target, env);
break;
- case N_CALL:
+ case NT_CALL:
{
- int n, num, *refs;
- UChar *p;
- CallNode* cn = &(NCALL(node));
+ CallNode* cn = NCALL(node);
Node** nodes = SCANENV_MEM_NODES(env);
-#ifdef USE_NAMED_GROUP
- n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
-#else
- n = -1;
-#endif
- if (n <= 0) {
- /* name not found, check group number. (?*ddd) */
- p = cn->name;
- num = onig_scan_unsigned_number(&p, cn->name_end, env->enc);
- if (num <= 0 || p != cn->name_end) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
- return ONIGERR_UNDEFINED_NAME_REFERENCE;
- }
-#ifdef USE_NAMED_GROUP
- if (env->num_named > 0 &&
- IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
- !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
- return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
- }
-#endif
- if (num > env->num_mem) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
- return ONIGERR_UNDEFINED_GROUP_REFERENCE;
- }
- cn->ref_num = num;
- goto set_call_attr;
+ if (cn->group_num != 0) {
+ int gnum = cn->group_num;
+
+# ifdef USE_NAMED_GROUP
+ if (env->num_named > 0 &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
+ !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
+ return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
+ }
+# endif
+ if (gnum > env->num_mem) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_GROUP_REFERENCE;
+ }
+
+# ifdef USE_NAMED_GROUP
+ set_call_attr:
+# endif
+ cn->target = nodes[cn->group_num];
+ if (IS_NULL(cn->target)) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+ SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
+ BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
+ cn->unset_addr_list = env->unset_addr_list;
}
- else if (n > 1) {
- onig_scan_env_set_error_string(env,
- ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
- return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
+# ifdef USE_NAMED_GROUP
+# ifdef USE_PERL_SUBEXP_CALL
+ else if (cn->name == cn->name_end) {
+ goto set_call_attr;
}
+# endif
else {
- cn->ref_num = refs[0];
- set_call_attr:
- cn->target = nodes[cn->ref_num];
- if (IS_NULL(cn->target)) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
- return ONIGERR_UNDEFINED_NAME_REFERENCE;
- }
- SET_EFFECT_STATUS(cn->target, NST_CALLED);
- BIT_STATUS_ON_AT(env->bt_mem_start, cn->ref_num);
- cn->unset_addr_list = env->unset_addr_list;
+ int *refs;
+
+ int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
+ &refs);
+ if (n <= 0) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+ else if (n > 1 &&
+ ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
+ return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
+ }
+ else {
+ cn->group_num = refs[0];
+ goto set_call_attr;
+ }
}
+# endif
}
break;
- case N_ANCHOR:
+ case NT_ANCHOR:
{
- AnchorNode* an = &(NANCHOR(node));
+ AnchorNode* an = NANCHOR(node);
switch (an->type) {
case ANCHOR_PREC_READ:
case ANCHOR_PREC_READ_NOT:
case ANCHOR_LOOK_BEHIND:
case ANCHOR_LOOK_BEHIND_NOT:
- r = setup_subexp_call(an->target, env);
- break;
+ r = setup_subexp_call(an->target, env);
+ break;
}
}
break;
@@ -2857,6 +3300,14 @@ setup_subexp_call(Node* node, ScanEnv* env)
}
#endif
+#define IN_ALT (1<<0)
+#define IN_NOT (1<<1)
+#define IN_REPEAT (1<<2)
+#define IN_VAR_REPEAT (1<<3)
+#define IN_CALL (1<<4)
+#define IN_RECCALL (1<<5)
+#define IN_LOOK_BEHIND (1<<6)
+
/* divide different length alternatives in look-behind.
(?<=A|B) ==> (?<=A)|(?<=B)
(?<!A|B) ==> (?<!A)(?<!B)
@@ -2864,30 +3315,29 @@ setup_subexp_call(Node* node, ScanEnv* env)
static int
divide_look_behind_alternatives(Node* node)
{
- Node tmp_node;
Node *head, *np, *insert_node;
- AnchorNode* an = &(NANCHOR(node));
+ AnchorNode* an = NANCHOR(node);
int anc_type = an->type;
head = an->target;
- np = NCONS(head).left;
- tmp_node = *node; *node = *head; *head = tmp_node;
- NCONS(node).left = head;
- NANCHOR(head).target = np;
+ np = NCAR(head);
+ swap_node(node, head);
+ NCAR(node) = head;
+ NANCHOR(head)->target = np;
np = node;
- while ((np = NCONS(np).right) != NULL_NODE) {
+ while ((np = NCDR(np)) != NULL_NODE) {
insert_node = onig_node_new_anchor(anc_type);
- CHECK_NULL_RETURN_VAL(insert_node, ONIGERR_MEMORY);
- NANCHOR(insert_node).target = NCONS(np).left;
- NCONS(np).left = insert_node;
+ CHECK_NULL_RETURN_MEMERR(insert_node);
+ NANCHOR(insert_node)->target = NCAR(np);
+ NCAR(np) = insert_node;
}
if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
np = node;
do {
- np->type = N_LIST; /* alt -> list */
- } while ((np = NCONS(np).right) != NULL_NODE);
+ SET_NTYPE(np, NT_LIST); /* alt -> list */
+ } while ((np = NCDR(np)) != NULL_NODE);
}
return 0;
}
@@ -2896,7 +3346,7 @@ static int
setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
{
int r, len;
- AnchorNode* an = &(NANCHOR(node));
+ AnchorNode* an = NANCHOR(node);
r = get_char_length_tree(an->target, reg, &len);
if (r == 0)
@@ -2920,35 +3370,39 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
retry:
type = NTYPE(node);
- if (type == N_QUALIFIER) {
- QualifierNode* qn = &(NQUALIFIER(node));
+ if (type == NT_QTFR) {
+ QtfrNode* qn = NQTFR(node);
if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
-#ifdef USE_QUALIFIER_PEEK_NEXT
- qn->next_head_exact = get_head_value_node(next_node, 1, reg);
+#ifdef USE_QTFR_PEEK_NEXT
+ Node* n = get_head_value_node(next_node, 1, reg);
+ /* '\0': for UTF-16BE etc... */
+ if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
+ qn->next_head_exact = n;
+ }
#endif
- /* automatic posseivation a*b ==> (?>a*)b */
+ /* automatic possessification a*b ==> (?>a*)b */
if (qn->lower <= 1) {
- int ttype = NTYPE(qn->target);
- if (IS_NODE_TYPE_SIMPLE(ttype)) {
- Node *x, *y;
- x = get_head_value_node(qn->target, 0, reg);
- if (IS_NOT_NULL(x)) {
- y = get_head_value_node(next_node, 0, reg);
- if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
- Node* en = onig_node_new_effect(EFFECT_STOP_BACKTRACK);
- CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY);
- SET_EFFECT_STATUS(en, NST_SIMPLE_REPEAT);
- swap_node(node, en);
- NEFFECT(node).target = en;
- }
- }
- }
+ int ttype = NTYPE(qn->target);
+ if (IS_NODE_TYPE_SIMPLE(ttype)) {
+ Node *x, *y;
+ x = get_head_value_node(qn->target, 0, reg);
+ if (IS_NOT_NULL(x)) {
+ y = get_head_value_node(next_node, 0, reg);
+ if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
+ Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
+ CHECK_NULL_RETURN_MEMERR(en);
+ SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
+ swap_node(node, en);
+ NENCLOSE(node)->target = en;
+ }
+ }
+ }
}
}
}
- else if (type == N_EFFECT) {
- EffectNode* en = &(NEFFECT(node));
- if (en->type == EFFECT_MEMORY) {
+ else if (type == NT_ENCLOSE) {
+ EncloseNode* en = NENCLOSE(node);
+ if (en->type == ENCLOSE_MEMORY && !IS_ENCLOSE_CALLED(en)) {
node = en->target;
goto retry;
}
@@ -2956,9 +3410,496 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
return 0;
}
-#define IN_ALT (1<<0)
-#define IN_NOT (1<<1)
-#define IN_REPEAT (1<<2)
+
+static int
+update_string_node_case_fold(regex_t* reg, Node *node)
+{
+ UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+ UChar *sbuf, *ebuf, *sp;
+ int r, i, len;
+ OnigDistance sbuf_size;
+ StrNode* sn = NSTR(node);
+
+ end = sn->end;
+ sbuf_size = (end - sn->s) * 2;
+ sbuf = (UChar* )xmalloc(sbuf_size);
+ CHECK_NULL_RETURN_MEMERR(sbuf);
+ ebuf = sbuf + sbuf_size;
+
+ sp = sbuf;
+ p = sn->s;
+ while (p < end) {
+ len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
+ for (i = 0; i < len; i++) {
+ if (sp >= ebuf) {
+ UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
+ if (IS_NULL(p)) {
+ xfree(sbuf);
+ return ONIGERR_MEMORY;
+ }
+ sbuf = p;
+ sp = sbuf + sbuf_size;
+ sbuf_size *= 2;
+ ebuf = sbuf + sbuf_size;
+ }
+
+ *sp++ = buf[i];
+ }
+ }
+
+ r = onig_node_str_set(node, sbuf, sp);
+
+ xfree(sbuf);
+ return r;
+}
+
+static int
+expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
+ regex_t* reg)
+{
+ int r;
+ Node *node;
+
+ node = onig_node_new_str(s, end);
+ if (IS_NULL(node)) return ONIGERR_MEMORY;
+
+ r = update_string_node_case_fold(reg, node);
+ if (r != 0) {
+ onig_node_free(node);
+ return r;
+ }
+
+ NSTRING_SET_AMBIG(node);
+ NSTRING_SET_DONT_GET_OPT_INFO(node);
+ *rnode = node;
+ return 0;
+}
+
+static int
+is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
+ int slen)
+{
+ int i;
+
+ for (i = 0; i < item_num; i++) {
+ if (items[i].byte_len != slen) {
+ return 1;
+ }
+ if (items[i].code_len != 1) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int
+expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
+ UChar *p, int slen, UChar *end,
+ regex_t* reg, Node **rnode)
+{
+ int r, i, j, len, varlen;
+ Node *anode, *var_anode, *snode, *xnode, *an;
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+
+ *rnode = var_anode = NULL_NODE;
+
+ varlen = 0;
+ for (i = 0; i < item_num; i++) {
+ if (items[i].byte_len != slen) {
+ varlen = 1;
+ break;
+ }
+ }
+
+ if (varlen != 0) {
+ *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
+ if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
+
+ xnode = onig_node_new_list(NULL, NULL);
+ if (IS_NULL(xnode)) goto mem_err;
+ NCAR(var_anode) = xnode;
+
+ anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
+ if (IS_NULL(anode)) goto mem_err;
+ NCAR(xnode) = anode;
+ }
+ else {
+ *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
+ if (IS_NULL(anode)) return ONIGERR_MEMORY;
+ }
+
+ snode = onig_node_new_str(p, p + slen);
+ if (IS_NULL(snode)) goto mem_err;
+
+ NCAR(anode) = snode;
+
+ for (i = 0; i < item_num; i++) {
+ snode = onig_node_new_str(NULL, NULL);
+ if (IS_NULL(snode)) goto mem_err;
+
+ for (j = 0; j < items[i].code_len; j++) {
+ len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
+ if (len < 0) {
+ r = len;
+ goto mem_err2;
+ }
+
+ r = onig_node_str_cat(snode, buf, buf + len);
+ if (r != 0) goto mem_err2;
+ }
+
+ an = onig_node_new_alt(NULL_NODE, NULL_NODE);
+ if (IS_NULL(an)) {
+ goto mem_err2;
+ }
+
+ if (items[i].byte_len != slen) {
+ Node *rem;
+ UChar *q = p + items[i].byte_len;
+
+ if (q < end) {
+ r = expand_case_fold_make_rem_string(&rem, q, end, reg);
+ if (r != 0) {
+ onig_node_free(an);
+ goto mem_err2;
+ }
+
+ xnode = onig_node_list_add(NULL_NODE, snode);
+ if (IS_NULL(xnode)) {
+ onig_node_free(an);
+ onig_node_free(rem);
+ goto mem_err2;
+ }
+ if (IS_NULL(onig_node_list_add(xnode, rem))) {
+ onig_node_free(an);
+ onig_node_free(xnode);
+ onig_node_free(rem);
+ goto mem_err;
+ }
+
+ NCAR(an) = xnode;
+ }
+ else {
+ NCAR(an) = snode;
+ }
+
+ NCDR(var_anode) = an;
+ var_anode = an;
+ }
+ else {
+ NCAR(an) = snode;
+ NCDR(anode) = an;
+ anode = an;
+ }
+ }
+
+ return varlen;
+
+ mem_err2:
+ onig_node_free(snode);
+
+ mem_err:
+ onig_node_free(*rnode);
+
+ return ONIGERR_MEMORY;
+}
+
+#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
+
+static int
+expand_case_fold_string(Node* node, regex_t* reg, int state)
+{
+ int r, n, len, alt_num;
+ int varlen = 0;
+ int is_in_look_behind;
+ UChar *start, *end, *p;
+ Node *top_root, *root, *snode, *prev_node;
+ OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
+ StrNode* sn;
+
+ if (NSTRING_IS_AMBIG(node)) return 0;
+
+ sn = NSTR(node);
+
+ start = sn->s;
+ end = sn->end;
+ if (start >= end) return 0;
+
+ is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
+
+ r = 0;
+ top_root = root = prev_node = snode = NULL_NODE;
+ alt_num = 1;
+ p = start;
+ while (p < end) {
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
+ p, end, items);
+ if (n < 0) {
+ r = n;
+ goto err;
+ }
+
+ len = enclen(reg->enc, p, end);
+
+ varlen = is_case_fold_variable_len(n, items, len);
+ if (n == 0 || varlen == 0 || is_in_look_behind) {
+ if (IS_NULL(snode)) {
+ if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
+ onig_node_free(top_root);
+ top_root = root = onig_node_list_add(NULL_NODE, prev_node);
+ if (IS_NULL(root)) {
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
+ }
+
+ prev_node = snode = onig_node_new_str(NULL, NULL);
+ if (IS_NULL(snode)) goto mem_err;
+ if (IS_NOT_NULL(root)) {
+ if (IS_NULL(onig_node_list_add(root, snode))) {
+ onig_node_free(snode);
+ goto mem_err;
+ }
+ }
+ }
+
+ r = onig_node_str_cat(snode, p, p + len);
+ if (r != 0) goto err;
+ }
+ else {
+ alt_num *= (n + 1);
+ if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
+
+ if (IS_NOT_NULL(snode)) {
+ r = update_string_node_case_fold(reg, snode);
+ if (r == 0) {
+ NSTRING_SET_AMBIG(snode);
+ }
+ }
+ if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
+ onig_node_free(top_root);
+ top_root = root = onig_node_list_add(NULL_NODE, prev_node);
+ if (IS_NULL(root)) {
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
+ }
+
+ r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
+ if (r < 0) goto mem_err;
+ if (r == 1) {
+ if (IS_NULL(root)) {
+ top_root = prev_node;
+ }
+ else {
+ if (IS_NULL(onig_node_list_add(root, prev_node))) {
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
+ }
+
+ root = NCAR(prev_node);
+ }
+ else { /* r == 0 */
+ if (IS_NOT_NULL(root)) {
+ if (IS_NULL(onig_node_list_add(root, prev_node))) {
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
+ }
+ }
+
+ snode = NULL_NODE;
+ }
+
+ p += len;
+ }
+ if (IS_NOT_NULL(snode)) {
+ r = update_string_node_case_fold(reg, snode);
+ if (r == 0) {
+ NSTRING_SET_AMBIG(snode);
+ }
+ }
+
+ if (p < end) {
+ Node *srem;
+
+ r = expand_case_fold_make_rem_string(&srem, p, end, reg);
+ if (r != 0) goto mem_err;
+
+ if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
+ onig_node_free(top_root);
+ top_root = root = onig_node_list_add(NULL_NODE, prev_node);
+ if (IS_NULL(root)) {
+ onig_node_free(srem);
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
+ }
+
+ if (IS_NULL(root)) {
+ prev_node = srem;
+ }
+ else {
+ if (IS_NULL(onig_node_list_add(root, srem))) {
+ onig_node_free(srem);
+ goto mem_err;
+ }
+ }
+ }
+
+ /* ending */
+ top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
+ swap_node(node, top_root);
+ onig_node_free(top_root);
+ return 0;
+
+ mem_err:
+ r = ONIGERR_MEMORY;
+
+ err:
+ onig_node_free(top_root);
+ return r;
+}
+
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+
+# define CEC_THRES_NUM_BIG_REPEAT 512
+# define CEC_INFINITE_NUM 0x7fffffff
+
+# define CEC_IN_INFINITE_REPEAT (1<<0)
+# define CEC_IN_FINITE_REPEAT (1<<1)
+# define CEC_CONT_BIG_REPEAT (1<<2)
+
+static int
+setup_comb_exp_check(Node* node, int state, ScanEnv* env)
+{
+ int type;
+ int r = state;
+
+ type = NTYPE(node);
+ switch (type) {
+ case NT_LIST:
+ {
+ do {
+ r = setup_comb_exp_check(NCAR(node), r, env);
+ } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
+ }
+ break;
+
+ case NT_ALT:
+ {
+ int ret;
+ do {
+ ret = setup_comb_exp_check(NCAR(node), state, env);
+ r |= ret;
+ } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
+ }
+ break;
+
+ case NT_QTFR:
+ {
+ int child_state = state;
+ int add_state = 0;
+ QtfrNode* qn = NQTFR(node);
+ Node* target = qn->target;
+ int var_num;
+
+ if (! IS_REPEAT_INFINITE(qn->upper)) {
+ if (qn->upper > 1) {
+ /* {0,1}, {1,1} are allowed */
+ child_state |= CEC_IN_FINITE_REPEAT;
+
+ /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
+ if (env->backrefed_mem == 0) {
+ if (NTYPE(qn->target) == NT_ENCLOSE) {
+ EncloseNode* en = NENCLOSE(qn->target);
+ if (en->type == ENCLOSE_MEMORY) {
+ if (NTYPE(en->target) == NT_QTFR) {
+ QtfrNode* q = NQTFR(en->target);
+ if (IS_REPEAT_INFINITE(q->upper)
+ && q->greedy == qn->greedy) {
+ qn->upper = (qn->lower == 0 ? 1 : qn->lower);
+ if (qn->upper == 1)
+ child_state = state;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (state & CEC_IN_FINITE_REPEAT) {
+ qn->comb_exp_check_num = -1;
+ }
+ else {
+ if (IS_REPEAT_INFINITE(qn->upper)) {
+ var_num = CEC_INFINITE_NUM;
+ child_state |= CEC_IN_INFINITE_REPEAT;
+ }
+ else {
+ var_num = qn->upper - qn->lower;
+ }
+
+ if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
+ add_state |= CEC_CONT_BIG_REPEAT;
+
+ if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
+ ((state & CEC_CONT_BIG_REPEAT) != 0 &&
+ var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
+ if (qn->comb_exp_check_num == 0) {
+ env->num_comb_exp_check++;
+ qn->comb_exp_check_num = env->num_comb_exp_check;
+ if (env->curr_max_regnum > env->comb_exp_max_regnum)
+ env->comb_exp_max_regnum = env->curr_max_regnum;
+ }
+ }
+ }
+
+ r = setup_comb_exp_check(target, child_state, env);
+ r |= add_state;
+ }
+ break;
+
+ case NT_ENCLOSE:
+ {
+ EncloseNode* en = NENCLOSE(node);
+
+ switch (en->type) {
+ case ENCLOSE_MEMORY:
+ {
+ if (env->curr_max_regnum < en->regnum)
+ env->curr_max_regnum = en->regnum;
+
+ r = setup_comb_exp_check(en->target, state, env);
+ }
+ break;
+
+ default:
+ r = setup_comb_exp_check(en->target, state, env);
+ break;
+ }
+ }
+ break;
+
+# ifdef USE_SUBEXP_CALL
+ case NT_CALL:
+ if (IS_CALL_RECURSION(NCALL(node)))
+ env->has_recursion = 1;
+ else
+ r = setup_comb_exp_check(NCALL(node)->target, state, env);
+ break;
+# endif
+
+ default:
+ break;
+ }
+
+ return r;
+}
+#endif
/* setup_tree does the following work.
1. check empty loop. (set qn->target_empty_info)
@@ -2974,247 +3915,295 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
int type;
int r = 0;
+restart:
type = NTYPE(node);
switch (type) {
- case N_LIST:
+ case NT_LIST:
{
Node* prev = NULL_NODE;
do {
- r = setup_tree(NCONS(node).left, reg, state, env);
- if (IS_NOT_NULL(prev) && r == 0) {
- r = next_setup(prev, NCONS(node).left, reg);
- }
- prev = NCONS(node).left;
- } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ r = setup_tree(NCAR(node), reg, state, env);
+ if (IS_NOT_NULL(prev) && r == 0) {
+ r = next_setup(prev, NCAR(node), reg);
+ }
+ prev = NCAR(node);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
}
break;
- case N_ALT:
+ case NT_ALT:
do {
- r = setup_tree(NCONS(node).left, reg, (state | IN_ALT), env);
- } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
break;
- case N_CCLASS:
- if (IS_IGNORECASE(reg->options)) {
- int i;
- UChar c, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
- BitSetRef bs = NCCLASS(node).bs;
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- c = (UChar )i;
- ONIGENC_MBC_TO_LOWER(reg->enc, &c, lowbuf);
- if (*lowbuf != c) {
- if (BITSET_AT(bs, c)) BITSET_SET_BIT(bs, *lowbuf);
- if (BITSET_AT(bs, *lowbuf)) BITSET_SET_BIT(bs, c);
- }
- }
- }
+ case NT_CCLASS:
break;
- case N_STRING:
+ case NT_STR:
if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
- StrNode* sn = &NSTRING(node);
- UChar* p = sn->s;
-
- while (p < sn->end) {
- if (ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p)) {
- NSTRING_SET_CASE_AMBIG(node);
- break;
- }
- p += enc_len(reg->enc, *p);
- }
+ r = expand_case_fold_string(node, reg, state);
}
break;
- case N_CTYPE:
- case N_ANYCHAR:
+ case NT_CTYPE:
+ case NT_CANY:
break;
#ifdef USE_SUBEXP_CALL
- case N_CALL:
+ case NT_CALL:
break;
#endif
- case N_BACKREF:
+ case NT_BREF:
{
int i;
int* p;
Node** nodes = SCANENV_MEM_NODES(env);
- BackrefNode* br = &(NBACKREF(node));
+ BRefNode* br = NBREF(node);
p = BACKREFS_P(br);
for (i = 0; i < br->back_num; i++) {
- if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
- BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
- BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
- SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
+ if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
+ BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
+ BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
+#ifdef USE_BACKREF_WITH_LEVEL
+ if (IS_BACKREF_NEST_LEVEL(br)) {
+ BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
+ }
+#endif
+ SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
}
}
break;
- case N_QUALIFIER:
+ case NT_QTFR:
{
OnigDistance d;
- QualifierNode* qn = &(NQUALIFIER(node));
+ QtfrNode* qn = NQTFR(node);
Node* target = qn->target;
+ if ((state & IN_REPEAT) != 0) {
+ qn->state |= NST_IN_REPEAT;
+ }
+
if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
- r = get_min_match_length(target, &d, env);
- if (r) break;
- if (d == 0) {
- qn->target_empty_info = NQ_TARGET_IS_EMPTY;
-#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
- r = qualifiers_memory_node_info(target);
- if (r < 0) break;
- if (r > 0) {
- qn->target_empty_info = r;
- }
+ r = get_min_match_length(target, &d, env);
+ if (r) break;
+ if (d == 0) {
+ qn->target_empty_info = NQ_TARGET_IS_EMPTY;
+#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
+ r = quantifiers_memory_node_info(target);
+ if (r < 0) break;
+ if (r > 0) {
+ qn->target_empty_info = r;
+ }
#endif
#if 0
- r = get_max_match_length(target, &d, env);
- if (r == 0 && d == 0) {
- /* ()* ==> ()?, ()+ ==> () */
- qn->upper = 1;
- if (qn->lower > 1) qn->lower = 1;
- if (NTYPE(target) == N_STRING) {
- qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
- }
- }
+ r = get_max_match_length(target, &d, env);
+ if (r == 0 && d == 0) {
+ /* ()* ==> ()?, ()+ ==> () */
+ qn->upper = 1;
+ if (qn->lower > 1) qn->lower = 1;
+ if (NTYPE(target) == NT_STR) {
+ qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
+ }
+ }
#endif
- }
+ }
}
+ state |= IN_REPEAT;
if (qn->lower != qn->upper)
- state |= IN_REPEAT;
+ state |= IN_VAR_REPEAT;
r = setup_tree(target, reg, state, env);
if (r) break;
/* expand string */
#define EXPAND_STRING_MAX_LENGTH 100
- if (NTYPE(target) == N_STRING) {
- if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
- qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
- int len = NSTRING_LEN(target);
- StrNode* sn = &(NSTRING(target));
-
- if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
- int i, n = qn->lower;
- onig_node_conv_to_str_node(node, NSTRING(target).flag);
- for (i = 0; i < n; i++) {
- r = onig_node_str_cat(node, sn->s, sn->end);
- if (r) break;
- }
- onig_node_free(target);
- break; /* break case N_QUALIFIER: */
- }
- }
+ if (NTYPE(target) == NT_STR) {
+ if (qn->lower > 1) {
+ int i, n = qn->lower;
+ OnigDistance len = NSTRING_LEN(target);
+ StrNode* sn = NSTR(target);
+ Node* np;
+
+ np = onig_node_new_str(sn->s, sn->end);
+ if (IS_NULL(np)) return ONIGERR_MEMORY;
+ NSTR(np)->flag = sn->flag;
+
+ for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
+ r = onig_node_str_cat(np, sn->s, sn->end);
+ if (r) {
+ onig_node_free(np);
+ return r;
+ }
+ }
+ if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
+ Node *np1, *np2;
+
+ qn->lower -= i;
+ if (! IS_REPEAT_INFINITE(qn->upper))
+ qn->upper -= i;
+
+ np1 = onig_node_new_list(np, NULL);
+ if (IS_NULL(np1)) {
+ onig_node_free(np);
+ return ONIGERR_MEMORY;
+ }
+ swap_node(np1, node);
+ np2 = onig_node_list_add(node, np1);
+ if (IS_NULL(np2)) {
+ onig_node_free(np1);
+ return ONIGERR_MEMORY;
+ }
+ }
+ else {
+ swap_node(np, node);
+ onig_node_free(np);
+ }
+ break; /* break case NT_QTFR: */
+ }
}
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
if (qn->greedy && (qn->target_empty_info != 0)) {
- if (NTYPE(target) == N_QUALIFIER) {
- QualifierNode* tqn = &(NQUALIFIER(target));
- if (IS_NOT_NULL(tqn->head_exact)) {
- qn->head_exact = tqn->head_exact;
- tqn->head_exact = NULL;
- }
- }
- else {
- qn->head_exact = get_head_value_node(qn->target, 1, reg);
- }
+ if (NTYPE(target) == NT_QTFR) {
+ QtfrNode* tqn = NQTFR(target);
+ if (IS_NOT_NULL(tqn->head_exact)) {
+ qn->head_exact = tqn->head_exact;
+ tqn->head_exact = NULL;
+ }
+ }
+ else {
+ qn->head_exact = get_head_value_node(qn->target, 1, reg);
+ }
}
#endif
}
break;
- case N_EFFECT:
+ case NT_ENCLOSE:
{
- EffectNode* en = &(NEFFECT(node));
+ EncloseNode* en = NENCLOSE(node);
switch (en->type) {
- case EFFECT_OPTION:
- {
- OnigOptionType options = reg->options;
- reg->options = NEFFECT(node).option;
- r = setup_tree(NEFFECT(node).target, reg, state, env);
- reg->options = options;
- }
- break;
+ case ENCLOSE_OPTION:
+ {
+ OnigOptionType options = reg->options;
+ reg->options = NENCLOSE(node)->option;
+ r = setup_tree(NENCLOSE(node)->target, reg, state, env);
+ reg->options = options;
+ }
+ break;
- case EFFECT_MEMORY:
- if ((state & (IN_ALT | IN_NOT | IN_REPEAT)) != 0) {
- BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
- /* SET_EFFECT_STATUS(node, NST_MEM_IN_ALT_NOT); */
- }
- /* fall */
- case EFFECT_STOP_BACKTRACK:
- {
- Node* target = en->target;
- r = setup_tree(target, reg, state, env);
- if (NTYPE(target) == N_QUALIFIER) {
- QualifierNode* tqn = &(NQUALIFIER(target));
- if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
- tqn->greedy != 0) { /* (?>a*), a*+ etc... */
- int qtype = NTYPE(tqn->target);
- if (IS_NODE_TYPE_SIMPLE(qtype))
- SET_EFFECT_STATUS(node, NST_SIMPLE_REPEAT);
- }
- }
- }
- break;
+ case ENCLOSE_MEMORY:
+ if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
+ BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
+ /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
+ }
+ if (IS_ENCLOSE_CALLED(en))
+ state |= IN_CALL;
+ if (IS_ENCLOSE_RECURSION(en))
+ state |= IN_RECCALL;
+ else if ((state & IN_RECCALL) != 0)
+ SET_CALL_RECURSION(node);
+ r = setup_tree(en->target, reg, state, env);
+ break;
+
+ case ENCLOSE_STOP_BACKTRACK:
+ {
+ Node* target = en->target;
+ r = setup_tree(target, reg, state, env);
+ if (NTYPE(target) == NT_QTFR) {
+ QtfrNode* tqn = NQTFR(target);
+ if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
+ tqn->greedy != 0) { /* (?>a*), a*+ etc... */
+ int qtype = NTYPE(tqn->target);
+ if (IS_NODE_TYPE_SIMPLE(qtype))
+ SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
+ }
+ }
+ }
+ break;
+
+ case ENCLOSE_CONDITION:
+#ifdef USE_NAMED_GROUP
+ if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
+ env->num_named > 0 &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
+ !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
+ return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
+ }
+#endif
+ if (NENCLOSE(node)->regnum > env->num_mem)
+ return ONIGERR_INVALID_BACKREF;
+ r = setup_tree(NENCLOSE(node)->target, reg, state, env);
+ break;
+
+ case ENCLOSE_ABSENT:
+ r = setup_tree(NENCLOSE(node)->target, reg, state, env);
+ break;
}
}
break;
- case N_ANCHOR:
+ case NT_ANCHOR:
{
- AnchorNode* an = &(NANCHOR(node));
+ AnchorNode* an = NANCHOR(node);
switch (an->type) {
case ANCHOR_PREC_READ:
- r = setup_tree(an->target, reg, state, env);
- break;
+ r = setup_tree(an->target, reg, state, env);
+ break;
case ANCHOR_PREC_READ_NOT:
- r = setup_tree(an->target, reg, (state | IN_NOT), env);
- break;
+ r = setup_tree(an->target, reg, (state | IN_NOT), env);
+ break;
/* allowed node types in look-behind */
#define ALLOWED_TYPE_IN_LB \
- ( N_LIST | N_ALT | N_STRING | N_CCLASS | N_CTYPE | \
- N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUALIFIER | N_CALL )
+ ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
+ BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
-#define ALLOWED_EFFECT_IN_LB ( EFFECT_MEMORY )
-#define ALLOWED_EFFECT_IN_LB_NOT 0
+#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
+#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
#define ALLOWED_ANCHOR_IN_LB \
-( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF )
+( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
+ ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
+ ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
+ ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
#define ALLOWED_ANCHOR_IN_LB_NOT \
-( ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF )
- /* can't allow all anchors, because \G in look-behind through Search().
- ex. /(?<=\G)zz/.match("azz") => success. */
+( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
+ ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
+ ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
+ ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
case ANCHOR_LOOK_BEHIND:
- {
- r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
- ALLOWED_EFFECT_IN_LB, ALLOWED_ANCHOR_IN_LB);
- if (r < 0) return r;
- if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- r = setup_look_behind(node, reg, env);
- if (r != 0) return r;
- r = setup_tree(an->target, reg, state, env);
- }
- break;
+ {
+ r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
+ ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
+ if (r < 0) return r;
+ if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ if (NTYPE(node) != NT_ANCHOR) goto restart;
+ r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env);
+ if (r != 0) return r;
+ r = setup_look_behind(node, reg, env);
+ }
+ break;
case ANCHOR_LOOK_BEHIND_NOT:
- {
- r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
- ALLOWED_EFFECT_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
- if (r < 0) return r;
- if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- r = setup_look_behind(node, reg, env);
- if (r != 0) return r;
- r = setup_tree(an->target, reg, (state | IN_NOT), env);
- }
- break;
+ {
+ r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
+ ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
+ if (r < 0) return r;
+ if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ if (NTYPE(node) != NT_ANCHOR) goto restart;
+ r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND),
+ env);
+ if (r != 0) return r;
+ r = setup_look_behind(node, reg, env);
+ }
+ break;
}
}
break;
@@ -3226,51 +4215,73 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
return r;
}
-/* set skip map for Boyer-Moor search */
+/* set skip map for Sunday's quick search */
static int
-set_bm_skip(UChar* s, UChar* end, OnigEncoding enc, int ignore_case,
- UChar skip[], int** int_skip)
+set_bm_skip(UChar* s, UChar* end, regex_t* reg,
+ UChar skip[], int ignore_case)
{
- int i, len;
- UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ OnigDistance i, len;
+ int clen, flen, n, j, k;
+ UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+ OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
+ OnigEncoding enc = reg->enc;
len = end - s;
- if (len < ONIG_CHAR_TABLE_SIZE) {
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
+ if (len >= ONIG_CHAR_TABLE_SIZE) {
+ /* This should not happen. */
+ return ONIGERR_TYPE_BUG;
+ }
- if (ignore_case) {
- for (i = 0; i < len - 1; i++) {
- ONIGENC_MBC_TO_LOWER(enc, &(s[i]), lowbuf);
- skip[*lowbuf] = len - 1 - i;
+ if (ignore_case) {
+ for (i = 0; i < len; i += clen) {
+ p = s + i;
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
+ p, end, items);
+ clen = enclen(enc, p, end);
+ if (p + clen > end)
+ clen = (int )(end - p);
+
+ for (j = 0; j < n; j++) {
+ if ((items[j].code_len != 1) || (items[j].byte_len != clen)) {
+ /* Different length isn't supported. Stop optimization at here. */
+ end = p;
+ goto endcheck;
+ }
+ flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf);
+ if (flen != clen) {
+ /* Different length isn't supported. Stop optimization at here. */
+ end = p;
+ goto endcheck;
+ }
}
}
- else {
- for (i = 0; i < len - 1; i++)
- skip[s[i]] = len - 1 - i;
- }
+endcheck:
+ len = end - s;
}
- else {
- if (IS_NULL(*int_skip)) {
- *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
- if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
- }
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
- if (ignore_case) {
- for (i = 0; i < len - 1; i++) {
- ONIGENC_MBC_TO_LOWER(enc, &(s[i]), lowbuf);
- (*int_skip)[*lowbuf] = len - 1 - i;
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
+ skip[i] = (UChar )(len + 1);
+ n = 0;
+ for (i = 0; i < len; i += clen) {
+ p = s + i;
+ if (ignore_case)
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
+ p, end, items);
+ clen = enclen(enc, p, end);
+ if (p + clen > end)
+ clen = (int )(end - p);
+
+ for (j = 0; j < clen; j++) {
+ skip[s[i + j]] = (UChar )(len - i - j);
+ for (k = 0; k < n; k++) {
+ ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
+ skip[buf[j]] = (UChar )(len - i - j);
}
}
- else {
- for (i = 0; i < len - 1; i++)
- (*int_skip)[s[i]] = len - 1 - i;
- }
}
- return 0;
-}
-#define OPT_EXACT_MAXLEN 24
+ return (int )len;
+}
typedef struct {
OnigDistance min; /* min byte length */
@@ -3278,11 +4289,11 @@ typedef struct {
} MinMaxLen;
typedef struct {
- MinMaxLen mmd;
- BitStatusType backrefed_status;
- OnigEncoding enc;
- OnigOptionType options;
- ScanEnv* scan_env;
+ MinMaxLen mmd;
+ OnigEncoding enc;
+ OnigOptionType options;
+ OnigCaseFoldType case_fold_flag;
+ ScanEnv* scan_env;
} OptEnv;
typedef struct {
@@ -3295,7 +4306,7 @@ typedef struct {
OptAncInfo anc;
int reach_end;
- int ignore_case;
+ int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
int len;
UChar s[OPT_EXACT_MAXLEN];
} OptExactInfo;
@@ -3321,49 +4332,54 @@ typedef struct {
static int
-map_position_value(int i)
-{
- static int vals[] = {
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 1, 1, 10, 10, 1, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 1, 6, 3, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5,
- 5, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 5, 5, 5,
- 5, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 10,
+map_position_value(OnigEncoding enc, int i)
+{
+ static const short int ByteValTable[] = {
+ 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
+ 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
+ 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
};
- if (i < sizeof(vals)/sizeof(vals[0])) return vals[i];
-
- return 7; /* Take it easy. */
+ if (i < numberof(ByteValTable)) {
+ if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
+ return 20;
+ else
+ return (int )ByteValTable[i];
+ }
+ else
+ return 4; /* Take it easy. */
}
static int
distance_value(MinMaxLen* mm)
{
/* 1000 / (min-max-dist + 1) */
- static int dist_vals[] = {
- 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
- 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
- 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
- 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
- 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
- 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
- 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
- 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
- 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
+ static const short int dist_vals[] = {
+ 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
+ 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
+ 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
+ 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
+ 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
+ 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
+ 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
+ 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
+ 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
11, 11, 11, 11, 11, 10, 10, 10, 10, 10
};
- int d;
+ OnigDistance d;
if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
d = mm->max - mm->min;
- if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
+ if (d < numberof(dist_vals))
/* return dist_vals[d] * 16 / (mm->min + 12); */
- return dist_vals[d];
+ return (int )dist_vals[d];
else
return 1;
}
@@ -3419,12 +4435,14 @@ add_mml(MinMaxLen* to, MinMaxLen* from)
to->max = distance_add(to->max, from->max);
}
+#if 0
static void
add_len_mml(MinMaxLen* to, OnigDistance len)
{
to->min = distance_add(to->min, len);
to->max = distance_add(to->max, len);
}
+#endif
static void
alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
@@ -3454,7 +4472,7 @@ copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
static void
concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
- OnigDistance left_len, OnigDistance right_len)
+ OnigDistance left_len, OnigDistance right_len)
{
clear_opt_anc_info(to);
@@ -3467,6 +4485,9 @@ concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
if (right_len == 0) {
to->right_anchor |= left->right_anchor;
}
+ else {
+ to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
+ }
}
static int
@@ -3525,7 +4546,7 @@ clear_opt_exact_info(OptExactInfo* ex)
clear_mml(&ex->mmd);
clear_opt_anc_info(&ex->anc);
ex->reach_end = 0;
- ex->ignore_case = 0;
+ ex->ignore_case = -1; /* unset */
ex->len = 0;
ex->s[0] = '\0';
}
@@ -3537,22 +4558,28 @@ copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
}
static void
-concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add)
+concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
{
- int i, n;
+ int i, j, len;
+ UChar *p, *end;
OptAncInfo tanc;
- if (! to->ignore_case && add->ignore_case) {
- if (to->len >= add->len) return ; /* avoid */
-
- to->ignore_case = 1;
+ if (to->ignore_case < 0)
+ to->ignore_case = add->ignore_case;
+ else if (to->ignore_case != add->ignore_case)
+ return ; /* avoid */
+
+ p = add->s;
+ end = p + add->len;
+ for (i = to->len; p < end; ) {
+ len = enclen(enc, p, end);
+ if (i + len > OPT_EXACT_MAXLEN) break;
+ for (j = 0; j < len && p < end; j++)
+ to->s[i++] = *p++;
}
- for (i = to->len, n = 0; n < add->len && i < OPT_EXACT_MAXLEN; i++, n++)
- to->s[i] = add->s[n];
-
to->len = i;
- to->reach_end = (n == add->len ? add->reach_end : 0);
+ to->reach_end = (p == end ? add->reach_end : 0);
concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
if (! to->reach_end) tanc.right_anchor = 0;
@@ -3560,22 +4587,17 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add)
}
static void
-concat_opt_exact_info_str(OptExactInfo* to,
- UChar* s, UChar* end, int raw, OnigEncoding enc)
+concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
+ int raw ARG_UNUSED, OnigEncoding enc)
{
int i, j, len;
UChar *p;
for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
- if (raw) {
+ len = enclen(enc, p, end);
+ if (i + len > OPT_EXACT_MAXLEN) break;
+ for (j = 0; j < len && p < end; j++)
to->s[i++] = *p++;
- }
- else {
- len = enc_len(enc, *p);
- if (i + len > OPT_EXACT_MAXLEN) break;
- for (j = 0; j < len; j++)
- to->s[i++] = *p++;
- }
}
to->len = i;
@@ -3598,7 +4620,7 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
for (i = 0; i < to->len && i < add->len; ) {
if (to->s[i] != add->s[i]) break;
- len = enc_len(env->enc, to->s[i]);
+ len = enclen(env->enc, to->s + i, to->s + to->len);
for (j = 1; j < len; j++) {
if (to->s[i+j] != add->s[i+j]) break;
@@ -3611,34 +4633,72 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
to->reach_end = 0;
}
to->len = i;
- to->ignore_case |= add->ignore_case;
+ if (to->ignore_case < 0)
+ to->ignore_case = add->ignore_case;
+ else if (add->ignore_case >= 0)
+ to->ignore_case |= add->ignore_case;
alt_merge_opt_anc_info(&to->anc, &add->anc);
if (! to->reach_end) to->anc.right_anchor = 0;
}
static void
-select_opt_exact_info(OptExactInfo* now, OptExactInfo* alt)
+select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
{
- int vlen1, vlen2;
+ int v1, v2;
- vlen1 = now->len * (now->ignore_case ? 1 : 2);
- vlen2 = alt->len * (alt->ignore_case ? 1 : 2);
+ v1 = now->len;
+ v2 = alt->len;
- if (comp_distance_value(&now->mmd, &alt->mmd, vlen1, vlen2) > 0)
+ if (v2 == 0) {
+ return ;
+ }
+ else if (v1 == 0) {
+ copy_opt_exact_info(now, alt);
+ return ;
+ }
+ else if (v1 <= 2 && v2 <= 2) {
+ /* ByteValTable[x] is big value --> low price */
+ v2 = map_position_value(enc, now->s[0]);
+ v1 = map_position_value(enc, alt->s[0]);
+
+ if (now->len > 1) v1 += 5;
+ if (alt->len > 1) v2 += 5;
+ }
+
+ if (now->ignore_case <= 0) v1 *= 2;
+ if (alt->ignore_case <= 0) v2 *= 2;
+
+ if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
copy_opt_exact_info(now, alt);
}
static void
clear_opt_map_info(OptMapInfo* map)
{
- int i;
+ static const OptMapInfo clean_info = {
+ {0, 0}, {0, 0}, 0,
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ }
+ };
- clear_mml(&map->mmd);
- clear_opt_anc_info(&map->anc);
- map->value = 0;
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
- map->map[i] = 0;
+ xmemcpy(map, &clean_info, sizeof(OptMapInfo));
}
static void
@@ -3648,40 +4708,40 @@ copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
}
static void
-add_char_opt_map_info(OptMapInfo* map, int c)
+add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
{
if (map->map[c] == 0) {
map->map[c] = 1;
- map->value += map_position_value(c);
+ map->value += map_position_value(enc, c);
}
}
-static void
-add_char_amb_opt_map_info(OptMapInfo* map, int c, OnigEncoding enc)
+static int
+add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
+ OnigEncoding enc, OnigCaseFoldType case_fold_flag)
{
- UChar x, low[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+ int i, n;
- add_char_opt_map_info(map, c);
+ add_char_opt_map_info(map, p[0], enc);
- x = (UChar )c;
- ONIGENC_MBC_TO_LOWER(enc, &x, low);
- if (*low != x) {
- add_char_opt_map_info(map, (int )(*low));
- }
- else {
- int i;
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
- x = (UChar )i;
- ONIGENC_MBC_TO_LOWER(enc, &x, low);
- if ((int )(*low) == c) add_char_opt_map_info(map, i);
- }
+ case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
+ if (n < 0) return n;
+
+ for (i = 0; i < n; i++) {
+ ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
+ add_char_opt_map_info(map, buf[0], enc);
}
+
+ return 0;
}
static void
select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
{
- static int z = 1<<15; /* 32768: something big value */
+ const int z = 1<<15; /* 32768: something big value */
int v1, v2;
@@ -3705,13 +4765,13 @@ comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
if (m->value <= 0) return -1;
- ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
+ ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
vm = COMP_EM_BASE * 5 * 2 / m->value;
return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
}
static void
-alt_merge_opt_map_info(OptMapInfo* to, OptMapInfo* add)
+alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
{
int i, val;
@@ -3730,7 +4790,7 @@ alt_merge_opt_map_info(OptMapInfo* to, OptMapInfo* add)
to->map[i] = 1;
if (to->map[i])
- val += map_position_value(i);
+ val += map_position_value(enc, i);
}
to->value = val;
@@ -3763,7 +4823,7 @@ copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
}
static void
-concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add)
+concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
{
int exb_reach, exm_reach;
OptAncInfo tanc;
@@ -3773,7 +4833,7 @@ concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add)
if (add->exb.len > 0 && to->len.max == 0) {
concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
- to->len.max, add->len.max);
+ to->len.max, add->len.max);
copy_opt_anc_info(&add->exb.anc, &tanc);
}
@@ -3790,26 +4850,26 @@ concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add)
if (add->exb.len > 0) {
if (exb_reach) {
- concat_opt_exact_info(&to->exb, &add->exb);
+ concat_opt_exact_info(&to->exb, &add->exb, enc);
clear_opt_exact_info(&add->exb);
}
else if (exm_reach) {
- concat_opt_exact_info(&to->exm, &add->exb);
+ concat_opt_exact_info(&to->exm, &add->exb, enc);
clear_opt_exact_info(&add->exb);
}
}
- select_opt_exact_info(&to->exm, &add->exb);
- select_opt_exact_info(&to->exm, &add->exm);
+ select_opt_exact_info(enc, &to->exm, &add->exb);
+ select_opt_exact_info(enc, &to->exm, &add->exm);
if (to->expr.len > 0) {
if (add->len.max > 0) {
if (to->expr.len > (int )add->len.max)
- to->expr.len = add->len.max;
+ to->expr.len = (int )add->len.max;
if (to->expr.mmd.max == 0)
- select_opt_exact_info(&to->exb, &to->expr);
+ select_opt_exact_info(enc, &to->exb, &to->expr);
else
- select_opt_exact_info(&to->exm, &to->expr);
+ select_opt_exact_info(enc, &to->exm, &to->expr);
}
}
else if (add->expr.len > 0) {
@@ -3828,7 +4888,7 @@ alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
alt_merge_opt_exact_info(&to->exb, &add->exb, env);
alt_merge_opt_exact_info(&to->exm, &add->exm, env);
alt_merge_opt_exact_info(&to->expr, &add->expr, env);
- alt_merge_opt_map_info (&to->map, &add->map);
+ alt_merge_opt_map_info(env->enc, &to->map, &add->map);
alt_merge_mml(&to->len, &add->len);
}
@@ -3847,7 +4907,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
type = NTYPE(node);
switch (type) {
- case N_LIST:
+ case NT_LIST:
{
OptEnv nenv;
NodeOptInfo nopt;
@@ -3855,220 +4915,192 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
copy_opt_env(&nenv, env);
do {
- r = optimize_node_left(NCONS(nd).left, &nopt, &nenv);
- if (r == 0) {
- add_mml(&nenv.mmd, &nopt.len);
- concat_left_node_opt_info(opt, &nopt);
- }
- } while (r == 0 && IS_NOT_NULL(nd = NCONS(nd).right));
+ r = optimize_node_left(NCAR(nd), &nopt, &nenv);
+ if (r == 0) {
+ add_mml(&nenv.mmd, &nopt.len);
+ concat_left_node_opt_info(env->enc, opt, &nopt);
+ }
+ } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
}
break;
- case N_ALT:
+ case NT_ALT:
{
NodeOptInfo nopt;
Node* nd = node;
do {
- r = optimize_node_left(NCONS(nd).left, &nopt, env);
- if (r == 0) {
- if (nd == node) copy_node_opt_info(opt, &nopt);
- else alt_merge_node_opt_info(opt, &nopt, env);
- }
- } while ((r == 0) && IS_NOT_NULL(nd = NCONS(nd).right));
+ r = optimize_node_left(NCAR(nd), &nopt, env);
+ if (r == 0) {
+ if (nd == node) copy_node_opt_info(opt, &nopt);
+ else alt_merge_node_opt_info(opt, &nopt, env);
+ }
+ } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
}
break;
- case N_STRING:
+ case NT_STR:
{
- UChar *p;
- int len, plen;
- StrNode* sn = &(NSTRING(node));
- int slen = sn->end - sn->s;
+ StrNode* sn = NSTR(node);
+ OnigDistance slen = sn->end - sn->s;
int is_raw = NSTRING_IS_RAW(node);
- if ((! IS_IGNORECASE(env->options)) || is_raw) {
- concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
- NSTRING_IS_RAW(node), env->enc);
- if (slen > 0) {
- add_char_opt_map_info(&opt->map, *(sn->s));
- }
+ if (! NSTRING_IS_AMBIG(node)) {
+ concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
+ is_raw, env->enc);
+ opt->exb.ignore_case = 0;
+ if (slen > 0) {
+ add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
+ }
+ set_mml(&opt->len, slen, slen);
}
else {
- for (p = sn->s; p < sn->end; ) {
- len = enc_len(env->enc, *p);
- if (len == 1 && ONIGENC_IS_MBC_CASE_AMBIG(env->enc, p)) {
- break;
- }
- p += len;
- }
+ OnigDistance max;
- plen = p - sn->s;
- if (plen > slen / 5) {
- concat_opt_exact_info_str(&opt->exb, sn->s, p, is_raw, env->enc);
- concat_opt_exact_info_str(&opt->exm, p, sn->end, is_raw, env->enc);
- opt->exm.ignore_case = 1;
- if (opt->exm.len == sn->end - p)
- opt->exm.reach_end = 1;
+ if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
+ int n = onigenc_strlen(env->enc, sn->s, sn->end);
+ max = (OnigDistance )ONIGENC_MBC_MAXLEN_DIST(env->enc) * (OnigDistance)n;
+ }
+ else {
+ concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
+ is_raw, env->enc);
+ opt->exb.ignore_case = 1;
+
+ if (slen > 0) {
+ r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
+ env->enc, env->case_fold_flag);
+ if (r != 0) break;
+ }
- copy_mml(&(opt->exm.mmd), &(opt->exb.mmd));
- add_len_mml(&(opt->exm.mmd), plen);
- }
- else {
- concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
- is_raw, env->enc);
- opt->exb.ignore_case = 1;
- }
+ max = slen;
+ }
- if (slen > 0) {
- if (p == sn->s)
- add_char_amb_opt_map_info(&opt->map, *(sn->s), env->enc);
- else
- add_char_opt_map_info(&opt->map, *(sn->s));
- }
+ set_mml(&opt->len, slen, max);
}
- if (opt->exb.len == slen)
- opt->exb.reach_end = 1;
-
- set_mml(&opt->len, slen, slen);
+ if ((OnigDistance )opt->exb.len == slen)
+ opt->exb.reach_end = 1;
}
break;
- case N_CCLASS:
+ case NT_CCLASS:
{
- int i, z, len, found, mb_found;
- CClassNode* cc = &(NCCLASS(node));
-
- /* no need to check ignore case. (setted in setup_tree()) */
- found = mb_found = 0;
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- z = BITSET_AT(cc->bs, i);
- if ((z && !cc->not) || (!z && cc->not)) {
- found = 1;
- add_char_opt_map_info(&opt->map, i);
- }
- }
+ int i, z;
+ CClassNode* cc = NCCLASS(node);
- if (! ONIGENC_IS_SINGLEBYTE(env->enc)) {
- if (! IS_NULL(cc->mbuf) ||
- (cc->not != 0 && found != 0)) {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- z = ONIGENC_IS_MBC_HEAD(env->enc, i);
- if (z) {
- mb_found = 1;
- add_char_opt_map_info(&opt->map, i);
- }
- }
- }
- }
+ /* no need to check ignore case. (set in setup_tree()) */
- if (mb_found) {
- len = ONIGENC_MBC_MAXLEN_DIST(env->enc);
- set_mml(&opt->len, 1, len);
+ if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
+ OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
+ OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+
+ set_mml(&opt->len, min, max);
}
- else if (found) {
- len = 1;
- set_mml(&opt->len, 1, len);
+ else {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ z = BITSET_AT(cc->bs, i);
+ if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+ }
+ }
+ set_mml(&opt->len, 1, 1);
}
}
break;
- case N_CTYPE:
+ case NT_CTYPE:
{
- int c;
- int len, min, max;
-
- min = ONIGENC_MBC_MAXLEN_DIST(env->enc);
- max = 0;
-
-#define IS_WORD_HEAD_BYTE(enc,b) \
- (ONIGENC_IS_MBC_ASCII(&b) ? ONIGENC_IS_CODE_WORD(enc,((OnigCodePoint )b)) \
- : ONIGENC_IS_MBC_HEAD(enc,b))
-
- switch (NCTYPE(node).type) {
- case CTYPE_WORD:
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (IS_WORD_HEAD_BYTE(env->enc, c)) {
- add_char_opt_map_info(&opt->map, c);
- len = enc_len(env->enc, c);
- if (len < min) min = len;
- if (len > max) max = len;
- }
- }
- break;
-
- case CTYPE_NOT_WORD:
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (! IS_WORD_HEAD_BYTE(env->enc, c)) {
- add_char_opt_map_info(&opt->map, c);
- len = enc_len(env->enc, c);
- if (len < min) min = len;
- if (len > max) max = len;
- }
- }
- break;
+ int i, min, max;
+ int maxcode;
+
+ max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+
+ if (max == 1) {
+ min = 1;
+
+ maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
+ switch (NCTYPE(node)->ctype) {
+ case ONIGENC_CTYPE_WORD:
+ if (NCTYPE(node)->not != 0) {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+ }
+ }
+ }
+ else {
+ for (i = 0; i < maxcode; i++) {
+ if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+ }
+ }
+ }
+ break;
+ }
+ }
+ else {
+ min = ONIGENC_MBC_MINLEN(env->enc);
}
-
set_mml(&opt->len, min, max);
}
break;
- case N_ANYCHAR:
+ case NT_CANY:
{
- OnigDistance len = ONIGENC_MBC_MAXLEN_DIST(env->enc);
- set_mml(&opt->len, 1, len);
+ OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
+ OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+ set_mml(&opt->len, min, max);
}
break;
- case N_ANCHOR:
- switch (NANCHOR(node).type) {
+ case NT_ANCHOR:
+ switch (NANCHOR(node)->type) {
case ANCHOR_BEGIN_BUF:
case ANCHOR_BEGIN_POSITION:
case ANCHOR_BEGIN_LINE:
case ANCHOR_END_BUF:
case ANCHOR_SEMI_END_BUF:
case ANCHOR_END_LINE:
- add_opt_anc_info(&opt->anc, NANCHOR(node).type);
+ case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
+ case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
+ add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
break;
case ANCHOR_PREC_READ:
{
- NodeOptInfo nopt;
+ NodeOptInfo nopt;
- r = optimize_node_left(NANCHOR(node).target, &nopt, env);
- if (r == 0) {
- if (nopt.exb.len > 0)
- copy_opt_exact_info(&opt->expr, &nopt.exb);
- else if (nopt.exm.len > 0)
- copy_opt_exact_info(&opt->expr, &nopt.exm);
+ r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
+ if (r == 0) {
+ if (nopt.exb.len > 0)
+ copy_opt_exact_info(&opt->expr, &nopt.exb);
+ else if (nopt.exm.len > 0)
+ copy_opt_exact_info(&opt->expr, &nopt.exm);
- opt->expr.reach_end = 0;
+ opt->expr.reach_end = 0;
- if (nopt.map.value > 0)
- copy_opt_map_info(&opt->map, &nopt.map);
- }
+ if (nopt.map.value > 0)
+ copy_opt_map_info(&opt->map, &nopt.map);
+ }
}
break;
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */
case ANCHOR_LOOK_BEHIND_NOT:
break;
}
break;
- case N_BACKREF:
+ case NT_BREF:
{
int i;
int* backs;
OnigDistance min, max, tmin, tmax;
Node** nodes = SCANENV_MEM_NODES(env->scan_env);
- BackrefNode* br = &(NBACKREF(node));
+ BRefNode* br = NBREF(node);
if (br->state & NST_RECURSION) {
- set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
- break;
+ set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
+ break;
}
backs = BACKREFS_P(br);
r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
@@ -4076,125 +5108,131 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
if (r != 0) break;
for (i = 1; i < br->back_num; i++) {
- r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
- if (r != 0) break;
- r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
- if (r != 0) break;
- if (min > tmin) min = tmin;
- if (max < tmax) max = tmax;
+ r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
+ if (r != 0) break;
+ r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
+ if (r != 0) break;
+ if (min > tmin) min = tmin;
+ if (max < tmax) max = tmax;
}
if (r == 0) set_mml(&opt->len, min, max);
}
break;
#ifdef USE_SUBEXP_CALL
- case N_CALL:
- if (IS_CALL_RECURSION(&(NCALL(node))))
+ case NT_CALL:
+ if (IS_CALL_RECURSION(NCALL(node)))
set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
else {
OnigOptionType save = env->options;
- env->options = NEFFECT(NCALL(node).target).option;
- r = optimize_node_left(NCALL(node).target, opt, env);
+ env->options = NENCLOSE(NCALL(node)->target)->option;
+ r = optimize_node_left(NCALL(node)->target, opt, env);
env->options = save;
}
break;
#endif
- case N_QUALIFIER:
+ case NT_QTFR:
{
int i;
OnigDistance min, max;
NodeOptInfo nopt;
- QualifierNode* qn = &(NQUALIFIER(node));
+ QtfrNode* qn = NQTFR(node);
r = optimize_node_left(qn->target, &nopt, env);
if (r) break;
if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
- if (env->mmd.max == 0 &&
- NTYPE(qn->target) == N_ANYCHAR && qn->greedy) {
- if (IS_POSIXLINE(env->options))
- add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_PL);
- else
- add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
- }
+ if (env->mmd.max == 0 &&
+ NTYPE(qn->target) == NT_CANY && qn->greedy) {
+ if (IS_MULTILINE(env->options))
+ /* implicit anchor: /.*a/ ==> /\A.*a/ */
+ add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
+ else
+ add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
+ }
}
else {
- if (qn->lower > 0) {
- copy_node_opt_info(opt, &nopt);
- if (nopt.exb.len > 0) {
- if (nopt.exb.reach_end) {
- for (i = 2; i < qn->lower &&
- ! is_full_opt_exact_info(&opt->exb); i++) {
- concat_opt_exact_info(&opt->exb, &nopt.exb);
- }
- if (i < qn->lower) {
- opt->exb.reach_end = 0;
- }
- }
- }
-
- if (qn->lower != qn->upper) {
- opt->exb.reach_end = 0;
- opt->exm.reach_end = 0;
- }
- if (qn->lower > 1)
- opt->exm.reach_end = 0;
- }
+ if (qn->lower > 0) {
+ copy_node_opt_info(opt, &nopt);
+ if (nopt.exb.len > 0) {
+ if (nopt.exb.reach_end) {
+ for (i = 2; i <= qn->lower &&
+ ! is_full_opt_exact_info(&opt->exb); i++) {
+ concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
+ }
+ if (i < qn->lower) {
+ opt->exb.reach_end = 0;
+ }
+ }
+ }
+
+ if (qn->lower != qn->upper) {
+ opt->exb.reach_end = 0;
+ opt->exm.reach_end = 0;
+ }
+ if (qn->lower > 1)
+ opt->exm.reach_end = 0;
+ }
}
min = distance_multiply(nopt.len.min, qn->lower);
if (IS_REPEAT_INFINITE(qn->upper))
- max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
+ max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
else
- max = distance_multiply(nopt.len.max, qn->upper);
+ max = distance_multiply(nopt.len.max, qn->upper);
set_mml(&opt->len, min, max);
}
break;
- case N_EFFECT:
+ case NT_ENCLOSE:
{
- EffectNode* en = &(NEFFECT(node));
+ EncloseNode* en = NENCLOSE(node);
switch (en->type) {
- case EFFECT_OPTION:
- {
- OnigOptionType save = env->options;
+ case ENCLOSE_OPTION:
+ {
+ OnigOptionType save = env->options;
- env->options = en->option;
- r = optimize_node_left(en->target, opt, env);
- env->options = save;
- }
- break;
+ env->options = en->option;
+ r = optimize_node_left(en->target, opt, env);
+ env->options = save;
+ }
+ break;
- case EFFECT_MEMORY:
+ case ENCLOSE_MEMORY:
#ifdef USE_SUBEXP_CALL
- en->opt_count++;
- if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
- OnigDistance min, max;
-
- min = 0;
- max = ONIG_INFINITE_DISTANCE;
- if (IS_EFFECT_MIN_FIXED(en)) min = en->min_len;
- if (IS_EFFECT_MAX_FIXED(en)) max = en->max_len;
- set_mml(&opt->len, min, max);
- }
- else
+ en->opt_count++;
+ if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
+ OnigDistance min, max;
+
+ min = 0;
+ max = ONIG_INFINITE_DISTANCE;
+ if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
+ if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
+ set_mml(&opt->len, min, max);
+ }
+ else
#endif
- {
- r = optimize_node_left(en->target, opt, env);
+ {
+ r = optimize_node_left(en->target, opt, env);
- if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
- if (BIT_STATUS_AT(env->backrefed_status, en->regnum))
- remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
- }
- }
- break;
+ if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
+ if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
+ remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
+ }
+ }
+ break;
- case EFFECT_STOP_BACKTRACK:
- r = optimize_node_left(en->target, opt, env);
- break;
+ case ENCLOSE_STOP_BACKTRACK:
+ case ENCLOSE_CONDITION:
+ r = optimize_node_left(en->target, opt, env);
+ break;
+
+ case ENCLOSE_ABSENT:
+ set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
+ break;
}
}
break;
@@ -4202,7 +5240,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
default:
#ifdef ONIG_DEBUG
fprintf(stderr, "optimize_node_left: undefined node type %d\n",
- NTYPE(node));
+ NTYPE(node));
#endif
r = ONIGERR_TYPE_BUG;
break;
@@ -4214,53 +5252,49 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
static int
set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
{
- int r;
+ int allow_reverse;
if (e->len == 0) return 0;
- reg->exact = onig_strdup(e->s, e->s + e->len);
- CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
-
+ reg->exact = (UChar* )xmalloc(e->len);
+ CHECK_NULL_RETURN_MEMERR(reg->exact);
+ xmemcpy(reg->exact, e->s, e->len);
reg->exact_end = reg->exact + e->len;
-
- if (e->ignore_case) {
- UChar buf[ONIGENC_MBC_TO_LOWER_MAXLEN];
- int len, low_len, i, j, alloc_size;
-
- alloc_size = e->len;
- i = j = 0;
- while (i < e->len) {
- low_len = ONIGENC_MBC_TO_LOWER(reg->enc, &(e->s[i]), buf);
- len = enc_len(reg->enc, e->s[i]);
- if (low_len > alloc_size - i) {
- reg->exact = xrealloc(reg->exact, alloc_size * 2);
- CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
- alloc_size *= 2;
- }
- xmemcpy(&(reg->exact[j]), buf, low_len);
- i += len;
- j += low_len;
+ allow_reverse =
+ ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
+
+ if (e->ignore_case > 0) {
+ if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
+ int orig_len = e->len;
+ e->len = set_bm_skip(reg->exact, reg->exact_end, reg,
+ reg->map, 1);
+ if (e->len >= 3) {
+ reg->exact_end = reg->exact + e->len;
+ reg->optimize = (allow_reverse != 0
+ ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
+ }
+ else {
+ /* Even if BM skip table can't be built (e.g., pattern starts with
+ 's' or 'k' which have multi-byte case fold variants), we should
+ still use EXACT_IC optimization with the original pattern.
+ Without this fallback, patterns like /slackware/i have no
+ optimization at all, causing severe performance regression
+ especially with non-ASCII strings. See [Bug #21824] */
+ e->len = orig_len; /* Restore original length for EXACT_IC */
+ reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
+ }
+ }
+ else {
+ reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
}
- reg->exact_end = reg->exact + j;
- reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
}
else {
- int allow_reverse;
-
- if (e->anc.left_anchor & ANCHOR_BEGIN_LINE)
- allow_reverse = 1;
- else
- allow_reverse =
- ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
-
if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
- r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, 0,
- reg->map, &(reg->int_map));
- if (r) return r;
-
+ set_bm_skip(reg->exact, reg->exact_end, reg,
+ reg->map, 0);
reg->optimize = (allow_reverse != 0
- ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
+ ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
}
else {
reg->optimize = ONIG_OPTIMIZE_EXACT;
@@ -4271,7 +5305,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
reg->dmax = e->mmd.max;
if (reg->dmin != ONIG_INFINITE_DISTANCE) {
- reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact);
+ reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
}
return 0;
@@ -4290,7 +5324,7 @@ set_optimize_map_info(regex_t* reg, OptMapInfo* m)
reg->dmax = m->mmd.max;
if (reg->dmin != ONIG_INFINITE_DISTANCE) {
- reg->threshold_len = reg->dmin + 1;
+ reg->threshold_len = (int )(reg->dmin + 1);
}
}
@@ -4301,7 +5335,7 @@ set_sub_anchor(regex_t* reg, OptAncInfo* anc)
reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
}
-#ifdef ONIG_DEBUG
+#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
static void print_optimize_info(FILE* f, regex_t* reg);
#endif
@@ -4313,8 +5347,9 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
NodeOptInfo opt;
OptEnv env;
- env.enc = reg->enc;
- env.options = reg->options;
+ env.enc = reg->enc;
+ env.options = reg->options;
+ env.case_fold_flag = reg->case_fold_flag;
env.scan_env = scan_env;
clear_mml(&env.mmd);
@@ -4322,9 +5357,14 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
if (r) return r;
reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
- ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL);
+ ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
+ ANCHOR_LOOK_BEHIND);
+
+ if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
+ reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
- reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
+ reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
+ ANCHOR_PREC_READ_NOT);
if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
reg->anchor_dmin = opt.len.min;
@@ -4332,9 +5372,9 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
}
if (opt.exb.len > 0 || opt.exm.len > 0) {
- select_opt_exact_info(&opt.exb, &opt.exm);
+ select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
if (opt.map.value > 0 &&
- comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
+ comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
goto set_map;
}
else {
@@ -4369,28 +5409,60 @@ clear_optimize_info(regex_t* reg)
reg->sub_anchor = 0;
reg->exact_end = (UChar* )NULL;
reg->threshold_len = 0;
- if (IS_NOT_NULL(reg->exact)) {
- xfree(reg->exact);
- reg->exact = (UChar* )NULL;
- }
+ xfree(reg->exact);
+ reg->exact = (UChar* )NULL;
}
#ifdef ONIG_DEBUG
+static void print_enc_string(FILE* fp, OnigEncoding enc,
+ const UChar *s, const UChar *end)
+{
+ fprintf(fp, "\nPATTERN: /");
+
+ if (ONIGENC_MBC_MINLEN(enc) > 1) {
+ const UChar *p;
+ OnigCodePoint code;
+
+ p = s;
+ while (p < end) {
+ code = ONIGENC_MBC_TO_CODE(enc, p, end);
+ if (code >= 0x80) {
+ fprintf(fp, " 0x%04x ", (int )code);
+ }
+ else {
+ fputc((int )code, fp);
+ }
+
+ p += enclen(enc, p, end);
+ }
+ }
+ else {
+ while (s < end) {
+ fputc((int )*s, fp);
+ s++;
+ }
+ }
+
+ fprintf(fp, "/ (%s)\n", enc->name);
+}
+#endif /* ONIG_DEBUG */
+
+#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
static void
print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
{
if (a == ONIG_INFINITE_DISTANCE)
fputs("inf", f);
else
- fprintf(f, "(%u)", a);
+ fprintf(f, "(%"PRIuPTR")", a);
fputs("-", f);
if (b == ONIG_INFINITE_DISTANCE)
fputs("inf", f);
else
- fprintf(f, "(%u)", b);
+ fprintf(f, "(%"PRIuPTR")", b);
}
static void
@@ -4434,9 +5506,9 @@ print_anchor(FILE* f, int anchor)
q = 1;
fprintf(f, "anychar-star");
}
- if (anchor & ANCHOR_ANYCHAR_STAR_PL) {
+ if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
if (q) fprintf(f, ", ");
- fprintf(f, "anychar-star-pl");
+ fprintf(f, "anychar-star-ml");
}
fprintf(f, "]");
@@ -4445,8 +5517,9 @@ print_anchor(FILE* f, int anchor)
static void
print_optimize_info(FILE* f, regex_t* reg)
{
- static char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
- "EXACT_IC", "MAP" };
+ static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
+ "EXACT_IC", "MAP",
+ "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
fprintf(f, "optimize: %s\n", on[reg->optimize]);
fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
@@ -4466,40 +5539,49 @@ print_optimize_info(FILE* f, regex_t* reg)
for (p = reg->exact; p < reg->exact_end; p++) {
fputc(*p, f);
}
- fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact));
+ fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
}
else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
- int i, n = 0;
+ int c, i, n = 0;
+
for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
if (reg->map[i]) n++;
fprintf(f, "map: n=%d\n", n);
if (n > 0) {
+ c = 0;
fputc('[', f);
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
- if (reg->map[i] && enc_len(reg->enc, i) == 1 &&
- ONIGENC_IS_CODE_PRINT(reg->enc, i))
- fputc(i, f);
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
+ if (reg->map[i] != 0) {
+ if (c > 0) fputs(", ", f);
+ c++;
+ if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
+ ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
+ fputc(i, f);
+ else
+ fprintf(f, "%d", i);
+ }
+ }
fprintf(f, "]\n");
}
}
}
-#endif /* ONIG_DEBUG */
+#endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
-static void
+extern void
onig_free_body(regex_t* reg)
{
- if (IS_NOT_NULL(reg->p)) xfree(reg->p);
- if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
- if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
- if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
- if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
- if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
+ if (IS_NOT_NULL(reg)) {
+ xfree(reg->p);
+ xfree(reg->exact);
+ xfree(reg->repeat_range);
+ onig_free(reg->chain);
#ifdef USE_NAMED_GROUP
- onig_names_free(reg);
+ onig_names_free(reg);
#endif
+ }
}
extern void
@@ -4511,152 +5593,152 @@ onig_free(regex_t* reg)
}
}
-#define REGEX_TRANSFER(to,from) do {\
- (to)->state = ONIG_STATE_MODIFY;\
- onig_free_body(to);\
- xmemcpy(to, from, sizeof(regex_t));\
- xfree(from);\
-} while (0)
-
-static void
-onig_transfer(regex_t* to, regex_t* from)
+static void*
+dup_copy(const void *ptr, size_t size)
{
- THREAD_ATOMIC_START;
- REGEX_TRANSFER(to, from);
- THREAD_ATOMIC_END;
-}
-
-#define REGEX_CHAIN_HEAD(reg) do {\
- while (IS_NOT_NULL((reg)->chain)) {\
- (reg) = (reg)->chain;\
- }\
-} while (0)
-
-static void
-onig_chain_link_add(regex_t* to, regex_t* add)
-{
- THREAD_ATOMIC_START;
- REGEX_CHAIN_HEAD(to);
- to->chain = add;
- THREAD_ATOMIC_END;
-}
-
-extern void
-onig_chain_reduce(regex_t* reg)
-{
- regex_t *head, *prev;
-
- THREAD_ATOMIC_START;
- prev = reg;
- head = prev->chain;
- if (IS_NOT_NULL(head)) {
- reg->state = ONIG_STATE_MODIFY;
- while (IS_NOT_NULL(head->chain)) {
- prev = head;
- head = head->chain;
- }
- prev->chain = (regex_t* )NULL;
- REGEX_TRANSFER(reg, head);
+ void *newptr = xmalloc(size);
+ if (IS_NOT_NULL(newptr)) {
+ memcpy(newptr, ptr, size);
}
- THREAD_ATOMIC_END;
+ return newptr;
}
-#if 0
extern int
-onig_clone(regex_t** to, regex_t* from)
+onig_reg_copy(regex_t** nreg, regex_t* oreg)
{
- int r, size;
- regex_t* reg;
-
- if (ONIG_STATE(from) == ONIG_STATE_NORMAL) {
- from->state++; /* increment as search counter */
- if (IS_NOT_NULL(from->chain)) {
- onig_chain_reduce(from);
- from->state++;
- }
- }
- else {
- int n = 0;
- while (ONIG_STATE(from) < ONIG_STATE_NORMAL) {
- if (++n > THREAD_PASS_LIMIT_COUNT)
- return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
- THREAD_PASS;
- }
- from->state++; /* increment as search counter */
- }
+ if (IS_NOT_NULL(oreg)) {
+ regex_t *reg = *nreg = (regex_t* )xmalloc(sizeof(regex_t));
+ if (IS_NULL(reg)) return ONIGERR_MEMORY;
- r = onig_alloc_init(&reg, ONIG_OPTION_NONE, from->enc, ONIG_SYNTAX_DEFAULT);
- if (r != 0) {
- from->state--;
- return r;
- }
+ *reg = *oreg;
- xmemcpy(reg, from, sizeof(onig_t));
- reg->state = ONIG_STATE_NORMAL;
- reg->chain = (regex_t* )NULL;
+# define COPY_FAILED(mem, size) IS_NULL(reg->mem = dup_copy(reg->mem, size))
- if (from->p) {
- reg->p = (UChar* )xmalloc(reg->alloc);
- if (IS_NULL(reg->p)) goto mem_error;
- xmemcpy(reg->p, from->p, reg->alloc);
- }
+ if (IS_NOT_NULL(reg->exact)) {
+ size_t exact_size = reg->exact_end - reg->exact;
+ if (COPY_FAILED(exact, exact_size))
+ goto err;
+ (reg)->exact_end = (reg)->exact + exact_size;
+ }
- if (from->exact) {
- reg->exact = (UChar* )xmalloc(from->exact_end - from->exact);
- if (IS_NULL(reg->exact)) goto mem_error;
- reg->exact_end = reg->exact + (from->exact_end - from->exact);
- xmemcpy(reg->exact, from->exact, reg->exact_end - reg->exact);
+ if (IS_NOT_NULL(reg->p)) {
+ if (COPY_FAILED(p, reg->alloc))
+ goto err_p;
+ }
+ if (IS_NOT_NULL(reg->repeat_range)) {
+ if (COPY_FAILED(repeat_range, reg->repeat_range_alloc * sizeof(OnigRepeatRange)))
+ goto err_repeat_range;
+ }
+ if (IS_NOT_NULL(reg->name_table)) {
+ if (onig_names_copy(reg, oreg))
+ goto err_name_table;
+ }
+ if (IS_NOT_NULL(reg->chain)) {
+ if (onig_reg_copy(&reg->chain, reg->chain))
+ goto err_chain;
+ }
+ return 0;
+# undef COPY_FAILED
+
+ err_chain:
+ onig_names_free(reg);
+ err_name_table:
+ xfree(reg->repeat_range);
+ err_repeat_range:
+ xfree(reg->p);
+ err_p:
+ xfree(reg->exact);
+ err:
+ xfree(reg);
+ return ONIGERR_MEMORY;
}
+ return 0;
+}
- if (from->int_map) {
- size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
- reg->int_map = (int* )xmalloc(size);
- if (IS_NULL(reg->int_map)) goto mem_error;
- xmemcpy(reg->int_map, from->int_map, size);
- }
+#ifdef RUBY
+size_t
+onig_memsize(const regex_t *reg)
+{
+ size_t size = sizeof(regex_t);
+ if (IS_NULL(reg)) return 0;
+ if (IS_NOT_NULL(reg->p)) size += reg->alloc;
+ if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
+ if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
+ if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
- if (from->int_map_backward) {
- size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
- reg->int_map_backward = (int* )xmalloc(size);
- if (IS_NULL(reg->int_map_backward)) goto mem_error;
- xmemcpy(reg->int_map_backward, from->int_map_backward, size);
- }
+ return size;
+}
-#ifdef USE_NAMED_GROUP
- reg->name_table = names_clone(from); /* names_clone is not implemented */
+size_t
+onig_region_memsize(const OnigRegion *regs)
+{
+ size_t size = sizeof(*regs);
+ if (IS_NULL(regs)) return 0;
+ size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
+ return size;
+}
#endif
- from->state--;
- *to = reg;
- return 0;
+#define REGEX_TRANSFER(to,from) do {\
+ onig_free_body(to);\
+ xmemcpy(to, from, sizeof(regex_t));\
+ xfree(from);\
+} while (0)
- mem_error:
- from->state--;
- return ONIGERR_MEMORY;
+#if 0
+extern void
+onig_transfer(regex_t* to, regex_t* from)
+{
+ REGEX_TRANSFER(to, from);
}
#endif
-#ifdef ONIG_DEBUG
-static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
+#ifdef ONIG_DEBUG_COMPILE
+static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
#endif
#ifdef ONIG_DEBUG_PARSE_TREE
-static void print_tree P_((FILE* f, Node* node));
+static void print_tree(FILE* f, Node* node);
#endif
+#ifdef RUBY
extern int
-onig_compile(regex_t* reg, UChar* pattern, UChar* pattern_end,
- OnigErrorInfo* einfo)
+onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
+ OnigErrorInfo* einfo)
+{
+ return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
+}
+#endif
+
+#ifdef RUBY
+extern int
+onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
+ OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
+#else
+extern int
+onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
+ OnigErrorInfo* einfo)
+#endif
{
#define COMPILE_INIT_SIZE 20
- int r, init_size;
+ int r;
+ OnigDistance init_size;
Node* root;
- ScanEnv scan_env;
+ ScanEnv scan_env = {0};
#ifdef USE_SUBEXP_CALL
UnsetAddrList uslist;
#endif
- reg->state = ONIG_STATE_COMPILING;
+ if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
+
+#ifdef RUBY
+ scan_env.sourcefile = sourcefile;
+ scan_env.sourceline = sourceline;
+#endif
+
+#ifdef ONIG_DEBUG
+ print_enc_string(stderr, reg->enc, pattern, pattern_end);
+#endif
if (reg->alloc == 0) {
init_size = (pattern_end - pattern) * 2;
@@ -4672,10 +5754,20 @@ onig_compile(regex_t* reg, UChar* pattern, UChar* pattern_end,
reg->num_null_check = 0;
reg->repeat_range_alloc = 0;
reg->repeat_range = (OnigRepeatRange* )NULL;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ reg->num_comb_exp_check = 0;
+#endif
r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
if (r != 0) goto err;
+#ifdef ONIG_DEBUG_PARSE_TREE
+# if 0
+ fprintf(stderr, "ORIGINAL PARSE TREE:\n");
+ print_tree(stderr, root);
+# endif
+#endif
+
#ifdef USE_NAMED_GROUP
/* mixed use named group and no-named group */
if (scan_env.num_named > 0 &&
@@ -4690,10 +5782,6 @@ onig_compile(regex_t* reg, UChar* pattern, UChar* pattern_end,
}
#endif
-#ifdef ONIG_DEBUG_PARSE_TREE
- print_tree(stderr, root);
-#endif
-
#ifdef USE_SUBEXP_CALL
if (scan_env.num_call > 0) {
r = unset_addr_list_init(&uslist, scan_env.num_call);
@@ -4715,6 +5803,10 @@ onig_compile(regex_t* reg, UChar* pattern, UChar* pattern_end,
r = setup_tree(root, reg, 0, &scan_env);
if (r != 0) goto err_unset;
+#ifdef ONIG_DEBUG_PARSE_TREE
+ print_tree(stderr, root);
+#endif
+
reg->capture_history = scan_env.capture_history;
reg->bt_mem_start = scan_env.bt_mem_start;
reg->bt_mem_start |= reg->capture_history;
@@ -4725,6 +5817,33 @@ onig_compile(regex_t* reg, UChar* pattern, UChar* pattern_end,
reg->bt_mem_end |= reg->capture_history;
}
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ if (scan_env.backrefed_mem == 0
+# ifdef USE_SUBEXP_CALL
+ || scan_env.num_call == 0
+# endif
+ ) {
+ setup_comb_exp_check(root, 0, &scan_env);
+# ifdef USE_SUBEXP_CALL
+ if (scan_env.has_recursion != 0) {
+ scan_env.num_comb_exp_check = 0;
+ }
+ else
+# endif
+ if (scan_env.comb_exp_max_regnum > 0) {
+ int i;
+ for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
+ if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
+ scan_env.num_comb_exp_check = 0;
+ break;
+ }
+ }
+ }
+ }
+
+ reg->num_comb_exp_check = scan_env.num_comb_exp_check;
+#endif
+
clear_optimize_info(reg);
#ifndef ONIG_DONT_OPTIMIZE
r = set_optimize_info_from_tree(root, reg, &scan_env);
@@ -4751,9 +5870,9 @@ onig_compile(regex_t* reg, UChar* pattern, UChar* pattern_end,
reg->stack_pop_level = STACK_POP_LEVEL_ALL;
else {
if (reg->bt_mem_start != 0)
- reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
+ reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
else
- reg->stack_pop_level = STACK_POP_LEVEL_FREE;
+ reg->stack_pop_level = STACK_POP_LEVEL_FREE;
}
}
#ifdef USE_SUBEXP_CALL
@@ -4764,14 +5883,14 @@ onig_compile(regex_t* reg, UChar* pattern, UChar* pattern_end,
onig_node_free(root);
#ifdef ONIG_DEBUG_COMPILE
-#ifdef USE_NAMED_GROUP
+# ifdef USE_NAMED_GROUP
onig_print_names(stderr, reg);
-#endif
+# endif
print_compiled_byte_code_list(stderr, reg);
#endif
end:
- reg->state = ONIG_STATE_NORMAL;
+ onig_reg_resize(reg);
return r;
err_unset:
@@ -4783,50 +5902,44 @@ onig_compile(regex_t* reg, UChar* pattern, UChar* pattern_end,
err:
if (IS_NOT_NULL(scan_env.error)) {
if (IS_NOT_NULL(einfo)) {
+ einfo->enc = scan_env.enc;
einfo->par = scan_env.error;
einfo->par_end = scan_env.error_end;
}
}
- if (IS_NOT_NULL(root)) onig_node_free(root);
- if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
- xfree(scan_env.mem_nodes_dynamic);
- return r;
-}
-
-extern int
-onig_recompile(regex_t* reg, UChar* pattern, UChar* pattern_end,
- OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
- OnigErrorInfo* einfo)
-{
- int r;
- regex_t *new_reg;
+ onig_node_free(root);
+ xfree(scan_env.mem_nodes_dynamic);
- r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
- if (r) return r;
- if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
- onig_transfer(reg, new_reg);
- }
- else {
- onig_chain_link_add(reg, new_reg);
- }
- return 0;
+ return r;
}
static int onig_inited = 0;
extern int
-onig_alloc_init(regex_t** reg, OnigOptionType option, OnigEncoding enc,
- OnigSyntaxType* syntax)
+onig_reg_init(regex_t* reg, OnigOptionType option,
+ OnigCaseFoldType case_fold_flag,
+ OnigEncoding enc, const OnigSyntaxType* syntax)
{
if (! onig_inited)
onig_init();
+ if (IS_NULL(reg))
+ return ONIGERR_INVALID_ARGUMENT;
+
+ (reg)->exact = (UChar* )NULL;
+ (reg)->chain = (regex_t* )NULL;
+ (reg)->p = (UChar* )NULL;
+ (reg)->name_table = (void* )NULL;
+ (reg)->repeat_range = (OnigRepeatRange* )NULL;
+
if (ONIGENC_IS_UNDEF(enc))
- return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
+ return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
- *reg = (regex_t* )xmalloc(sizeof(regex_t));
- if (IS_NULL(*reg)) return ONIGERR_MEMORY;
+ if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
+ == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
+ return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
+ }
if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
option |= syntax->options;
@@ -4835,83 +5948,201 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigEncoding enc,
else
option |= syntax->options;
- (*reg)->state = ONIG_STATE_NORMAL;
- (*reg)->enc = enc;
- (*reg)->options = option;
- (*reg)->syntax = syntax;
- (*reg)->optimize = 0;
- (*reg)->exact = (UChar* )NULL;
- (*reg)->int_map = (int* )NULL;
- (*reg)->int_map_backward = (int* )NULL;
- (*reg)->chain = (regex_t* )NULL;
-
- (*reg)->p = (UChar* )NULL;
- (*reg)->alloc = 0;
- (*reg)->used = 0;
- (*reg)->name_table = (void* )NULL;
+ (reg)->enc = enc;
+ (reg)->options = option;
+ (reg)->syntax = syntax;
+ (reg)->optimize = 0;
+
+ (reg)->alloc = 0;
+ (reg)->used = 0;
+
+ (reg)->case_fold_flag = case_fold_flag;
+
+ (reg)->timelimit = 0;
return 0;
}
extern int
-onig_new(regex_t** reg, UChar* pattern, UChar* pattern_end,
- OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
- OnigErrorInfo* einfo)
+onig_new_without_alloc(regex_t* reg, const UChar* pattern,
+ const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
+ const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
{
int r;
- if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
-
- r = onig_alloc_init(reg, option, enc, syntax);
+ r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
if (r) return r;
- r = onig_compile(*reg, pattern, pattern_end, einfo);
+ r = onig_compile(reg, pattern, pattern_end, einfo);
+ return r;
+}
+
+extern int
+onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
+ OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
+ OnigErrorInfo* einfo)
+{
+ *reg = (regex_t* )xmalloc(sizeof(regex_t));
+ if (IS_NULL(*reg)) return ONIGERR_MEMORY;
+
+ int r = onig_new_without_alloc(*reg, pattern, pattern_end, option, enc, syntax, einfo);
if (r) {
onig_free(*reg);
*reg = NULL;
}
+
return r;
}
extern int
-onig_init()
+onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
+{
+ return onig_init();
+}
+
+extern int
+onig_init(void)
{
if (onig_inited != 0)
return 0;
onig_inited = 1;
- THREAD_ATOMIC_START;
+#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
+ _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
+#endif
onigenc_init();
- onigenc_set_default_caseconv_table((UChar* )0);
+ /* onigenc_set_default_caseconv_table((UChar* )0); */
#ifdef ONIG_DEBUG_STATISTICS
onig_statistics_init();
#endif
- THREAD_ATOMIC_END;
return 0;
}
+
+static OnigEndCallListItemType* EndCallTop;
+
+extern void onig_add_end_call(void (*func)(void))
+{
+ OnigEndCallListItemType* item;
+
+ item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
+ if (item == 0) return ;
+
+ item->next = EndCallTop;
+ item->func = func;
+
+ EndCallTop = item;
+}
+
+static void
+exec_end_call_list(void)
+{
+ OnigEndCallListItemType* prev;
+ void (*func)(void);
+
+ while (EndCallTop != 0) {
+ func = EndCallTop->func;
+ (*func)();
+
+ prev = EndCallTop;
+ EndCallTop = EndCallTop->next;
+ xfree(prev);
+ }
+}
+
extern int
-onig_end()
+onig_end(void)
{
+ exec_end_call_list();
+
#ifdef ONIG_DEBUG_STATISTICS
onig_print_statistics(stderr);
#endif
-#ifdef USE_RECYCLE_NODE
- onig_free_node_list();
+#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
+ _CrtDumpMemoryLeaks();
#endif
onig_inited = 0;
+
return 0;
}
+extern int
+onig_is_in_code_range(const UChar* p, OnigCodePoint code)
+{
+ OnigCodePoint n, *data;
+ OnigCodePoint low, high, x;
+
+ GET_CODE_POINT(n, p);
+ data = (OnigCodePoint* )p;
+ data++;
+
+ for (low = 0, high = n; low < high; ) {
+ x = (low + high) >> 1;
+ if (code > data[x * 2 + 1])
+ low = x + 1;
+ else
+ high = x;
+ }
+
+ return ((low < n && code >= data[low * 2]) ? 1 : 0);
+}
+
+extern int
+onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
+{
+ int found;
+
+ if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
+ if (IS_NULL(cc->mbuf)) {
+ found = 0;
+ }
+ else {
+ found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
+ }
+ }
+ else {
+ found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
+ }
+
+ if (IS_NCCLASS_NOT(cc))
+ return !found;
+ else
+ return found;
+}
+
+extern int
+onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
+{
+ int len;
+
+ if (ONIGENC_MBC_MINLEN(enc) > 1) {
+ len = 2;
+ }
+ else {
+ len = ONIGENC_CODE_TO_MBCLEN(enc, code);
+ }
+ return onig_is_code_in_cc_len(len, code, cc);
+}
+
#ifdef ONIG_DEBUG
+/* arguments type */
+# define ARG_SPECIAL -1
+# define ARG_NON 0
+# define ARG_RELADDR 1
+# define ARG_ABSADDR 2
+# define ARG_LENGTH 3
+# define ARG_MEMNUM 4
+# define ARG_OPTION 5
+# define ARG_STATE_CHECK 6
+
OnigOpInfoType OnigOpInfo[] = {
{ OP_FINISH, "finish", ARG_NON },
{ OP_END, "end", ARG_NON },
@@ -4941,64 +6172,81 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
{ OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
{ OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
- { OP_WORD, "word", ARG_NON },
- { OP_NOT_WORD, "not-word", ARG_NON },
- { OP_WORD_SB, "word-sb", ARG_NON },
- { OP_WORD_MB, "word-mb", ARG_NON },
- { OP_WORD_BOUND, "word-bound", ARG_NON },
- { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
- { OP_WORD_BEGIN, "word-begin", ARG_NON },
- { OP_WORD_END, "word-end", ARG_NON },
- { OP_BEGIN_BUF, "begin-buf", ARG_NON },
- { OP_END_BUF, "end-buf", ARG_NON },
- { OP_BEGIN_LINE, "begin-line", ARG_NON },
- { OP_END_LINE, "end-line", ARG_NON },
- { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
- { OP_BEGIN_POSITION, "begin-position", ARG_NON },
- { OP_BACKREF1, "backref1", ARG_NON },
- { OP_BACKREF2, "backref2", ARG_NON },
- { OP_BACKREF3, "backref3", ARG_NON },
- { OP_BACKREFN, "backrefn", ARG_MEMNUM },
- { OP_BACKREFN_IC, "backrefn-ic", ARG_MEMNUM },
- { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
- { OP_BACKREF_MULTI_IC, "backref_multi-ic",ARG_SPECIAL },
- { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
- { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
- { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
- { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
- { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
- { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
- { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
- { OP_SET_OPTION, "set-option", ARG_OPTION },
- { OP_FAIL, "fail", ARG_NON },
- { OP_JUMP, "jump", ARG_RELADDR },
- { OP_PUSH, "push", ARG_RELADDR },
- { OP_POP, "pop", ARG_NON },
- { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
- { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
- { OP_REPEAT, "repeat", ARG_SPECIAL },
- { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
- { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
- { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
- { OP_NULL_CHECK_START, "null-check-start",ARG_MEMNUM },
- { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
- { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
- { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
- { OP_PUSH_POS, "push-pos", ARG_NON },
- { OP_POP_POS, "pop-pos", ARG_NON },
- { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
- { OP_FAIL_POS, "fail-pos", ARG_NON },
- { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
- { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
- { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
+ { OP_WORD, "word", ARG_NON },
+ { OP_NOT_WORD, "not-word", ARG_NON },
+ { OP_WORD_BOUND, "word-bound", ARG_NON },
+ { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
+ { OP_WORD_BEGIN, "word-begin", ARG_NON },
+ { OP_WORD_END, "word-end", ARG_NON },
+ { OP_ASCII_WORD, "ascii-word", ARG_NON },
+ { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
+ { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
+ { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
+ { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
+ { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
+ { OP_BEGIN_BUF, "begin-buf", ARG_NON },
+ { OP_END_BUF, "end-buf", ARG_NON },
+ { OP_BEGIN_LINE, "begin-line", ARG_NON },
+ { OP_END_LINE, "end-line", ARG_NON },
+ { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
+ { OP_BEGIN_POSITION, "begin-position", ARG_NON },
+ { OP_BACKREF1, "backref1", ARG_NON },
+ { OP_BACKREF2, "backref2", ARG_NON },
+ { OP_BACKREFN, "backrefn", ARG_MEMNUM },
+ { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
+ { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
+ { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
+ { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
+ { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
+ { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
+ { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
+ { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
+ { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
+ { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
+ { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
+ { OP_SET_OPTION, "set-option", ARG_OPTION },
+ { OP_KEEP, "keep", ARG_NON },
+ { OP_FAIL, "fail", ARG_NON },
+ { OP_JUMP, "jump", ARG_RELADDR },
+ { OP_PUSH, "push", ARG_RELADDR },
+ { OP_POP, "pop", ARG_NON },
+ { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
+ { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
+ { OP_REPEAT, "repeat", ARG_SPECIAL },
+ { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
+ { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
+ { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
+ { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
+ { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
+ { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
+ { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
+ { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
+ { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
+ { OP_PUSH_POS, "push-pos", ARG_NON },
+ { OP_POP_POS, "pop-pos", ARG_NON },
+ { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
+ { OP_FAIL_POS, "fail-pos", ARG_NON },
+ { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
+ { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
+ { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
{ OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
{ OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
- { OP_CALL, "call", ARG_ABSADDR },
- { OP_RETURN, "return", ARG_NON },
+ { OP_PUSH_ABSENT_POS, "push-absent-pos", ARG_NON },
+ { OP_ABSENT, "absent", ARG_RELADDR },
+ { OP_ABSENT_END, "absent-end", ARG_NON },
+ { OP_CALL, "call", ARG_ABSADDR },
+ { OP_RETURN, "return", ARG_NON },
+ { OP_CONDITION, "condition", ARG_SPECIAL },
+ { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
+ { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
+ { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
+ { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
+ { OP_STATE_CHECK_ANYCHAR_ML_STAR,
+ "state-check-anychar-ml*", ARG_STATE_CHECK },
{ -1, "", ARG_NON }
};
-static char*
+static const char*
op2name(int opcode)
{
int i;
@@ -5022,15 +6270,17 @@ op2arg_type(int opcode)
return ARG_SPECIAL;
}
+# ifdef ONIG_DEBUG_PARSE_TREE
static void
Indent(FILE* f, int indent)
{
int i;
for (i = 0; i < indent; i++) putc(' ', f);
}
+# endif /* ONIG_DEBUG_PARSE_TREE */
static void
-p_string(FILE* f, int len, UChar* s)
+p_string(FILE* f, ptrdiff_t len, UChar* s)
{
fputs(":", f);
while (len-- > 0) { fputc(*s++, f); }
@@ -5046,12 +6296,14 @@ p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
}
extern void
-onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
+onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
+ OnigEncoding enc)
{
int i, n, arg_type;
RelAddrType addr;
LengthType len;
MemNumType mem;
+ StateCheckNumType scn;
OnigCodePoint code;
UChar *q;
@@ -5063,9 +6315,8 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
case ARG_NON:
break;
case ARG_RELADDR:
- addr = *((RelAddrType* )bp);
- bp += SIZE_RELADDR;
- fprintf(f, ":(%d)", addr);
+ GET_RELADDR_INC(addr, bp);
+ fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
break;
case ARG_ABSADDR:
GET_ABSADDR_INC(addr, bp);
@@ -5082,11 +6333,17 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
break;
case ARG_OPTION:
{
- OnigOptionType option = *((OnigOptionType* )bp);
- bp += SIZE_OPTION;
- fprintf(f, ":%d", option);
+ OnigOptionType option = *((OnigOptionType* )bp);
+ bp += SIZE_OPTION;
+ fprintf(f, ":%d", option);
}
break;
+
+ case ARG_STATE_CHECK:
+ scn = *((StateCheckNumType* )bp);
+ bp += SIZE_STATE_CHECK_NUM;
+ fprintf(f, ":%d", scn);
+ break;
}
}
else {
@@ -5108,7 +6365,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
p_len_string(f, len, 1, bp);
bp += len;
break;
-
+
case OP_EXACTMB2N1:
p_string(f, 2, bp); bp += 2; break;
case OP_EXACTMB2N2:
@@ -5127,18 +6384,20 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
break;
case OP_EXACTMBN:
{
- int mb_len;
-
- GET_LENGTH_INC(mb_len, bp);
- GET_LENGTH_INC(len, bp);
- fprintf(f, ":%d:%d:", mb_len, len);
- n = len * mb_len;
- while (n-- > 0) { fputc(*bp++, f); }
+ int mb_len;
+
+ GET_LENGTH_INC(mb_len, bp);
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d:%d:", mb_len, len);
+ n = len * mb_len;
+ while (n-- > 0) { fputc(*bp++, f); }
}
break;
case OP_EXACT1_IC:
- p_string(f, 1, bp++);
+ len = enclen(enc, bp, bpend);
+ p_string(f, len, bp);
+ bp += len;
break;
case OP_EXACTN_IC:
GET_LENGTH_INC(len, bp);
@@ -5162,9 +6421,9 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
case OP_CCLASS_MB_NOT:
GET_LENGTH_INC(len, bp);
q = bp;
-#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
ALIGNMENT_RIGHT(q);
-#endif
+# endif
GET_CODE_POINT(code, q);
bp += len;
fprintf(f, ":%d:%d", (int )code, len);
@@ -5176,33 +6435,59 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
bp += SIZE_BITSET;
GET_LENGTH_INC(len, bp);
q = bp;
-#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
ALIGNMENT_RIGHT(q);
-#endif
+# endif
GET_CODE_POINT(code, q);
bp += len;
fprintf(f, ":%d:%d:%d", n, (int )code, len);
break;
- case OP_BACKREF_MULTI:
+ case OP_BACKREFN_IC:
+ mem = *((MemNumType* )bp);
+ bp += SIZE_MEMNUM;
+ fprintf(f, ":%d", mem);
+ break;
+
case OP_BACKREF_MULTI_IC:
+ case OP_BACKREF_MULTI:
fputs(" ", f);
GET_LENGTH_INC(len, bp);
for (i = 0; i < len; i++) {
- GET_MEMNUM_INC(mem, bp);
- if (i > 0) fputs(", ", f);
- fprintf(f, "%d", mem);
+ GET_MEMNUM_INC(mem, bp);
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", mem);
+ }
+ break;
+
+ case OP_BACKREF_WITH_LEVEL:
+ {
+ OnigOptionType option;
+ LengthType level;
+
+ GET_OPTION_INC(option, bp);
+ fprintf(f, ":%d", option);
+ GET_LENGTH_INC(level, bp);
+ fprintf(f, ":%d", level);
+
+ fputs(" ", f);
+ GET_LENGTH_INC(len, bp);
+ for (i = 0; i < len; i++) {
+ GET_MEMNUM_INC(mem, bp);
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", mem);
+ }
}
break;
case OP_REPEAT:
case OP_REPEAT_NG:
{
- mem = *((MemNumType* )bp);
- bp += SIZE_MEMNUM;
- addr = *((RelAddrType* )bp);
- bp += SIZE_RELADDR;
- fprintf(f, ":%d:%d", mem, addr);
+ mem = *((MemNumType* )bp);
+ bp += SIZE_MEMNUM;
+ addr = *((RelAddrType* )bp);
+ bp += SIZE_RELADDR;
+ fprintf(f, ":%d:%d", mem, addr);
}
break;
@@ -5210,7 +6495,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
case OP_PUSH_IF_PEEK_NEXT:
addr = *((RelAddrType* )bp);
bp += SIZE_RELADDR;
- fprintf(f, ":(%d)", addr);
+ fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
p_string(f, 1, bp);
bp += 1;
break;
@@ -5223,18 +6508,34 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
case OP_PUSH_LOOK_BEHIND_NOT:
GET_RELADDR_INC(addr, bp);
GET_LENGTH_INC(len, bp);
- fprintf(f, ":%d:(%d)", len, addr);
+ fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
+ break;
+
+ case OP_STATE_CHECK_PUSH:
+ case OP_STATE_CHECK_PUSH_OR_JUMP:
+ scn = *((StateCheckNumType* )bp);
+ bp += SIZE_STATE_CHECK_NUM;
+ addr = *((RelAddrType* )bp);
+ bp += SIZE_RELADDR;
+ fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
+ break;
+
+ case OP_CONDITION:
+ GET_MEMNUM_INC(mem, bp);
+ GET_RELADDR_INC(addr, bp);
+ fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
break;
default:
fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
- *--bp);
+ bp[-1]);
}
}
fputs("]", f);
if (nextp) *nextp = bp;
}
+# ifdef ONIG_DEBUG_COMPILE
static void
print_compiled_byte_code_list(FILE* f, regex_t* reg)
{
@@ -5242,27 +6543,27 @@ print_compiled_byte_code_list(FILE* f, regex_t* reg)
UChar* bp = reg->p;
UChar* end = reg->p + reg->used;
- fprintf(f, "code length: %d\n", reg->used);
+ fprintf(f, "code length: %d", reg->used);
- ncode = 0;
+ ncode = -1;
while (bp < end) {
ncode++;
- if (bp > reg->p) {
- if (ncode % 5 == 0)
- fprintf(f, "\n");
- else
- fputs(" ", f);
- }
- onig_print_compiled_byte_code(f, bp, &bp);
+ if (ncode % 5 == 0)
+ fprintf(f, "\n%ld:", bp - reg->p);
+ else
+ fprintf(f, " %ld:", bp - reg->p);
+ onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
}
fprintf(f, "\n");
}
+# endif /* ONIG_DEBUG_COMPILE */
+# ifdef ONIG_DEBUG_PARSE_TREE
static void
print_indent_tree(FILE* f, Node* node, int indent)
{
- int i, type;
+ int i, type, container_p = 0;
int add = 3;
UChar* p;
@@ -5274,71 +6575,73 @@ print_indent_tree(FILE* f, Node* node, int indent)
type = NTYPE(node);
switch (type) {
- case N_LIST:
- case N_ALT:
- if (NTYPE(node) == N_LIST)
- fprintf(f, "<list:%x>\n", (int )node);
+ case NT_LIST:
+ case NT_ALT:
+ if (NTYPE(node) == NT_LIST)
+ fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
else
- fprintf(f, "<alt:%x>\n", (int )node);
+ fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
- print_indent_tree(f, NCONS(node).left, indent + add);
- while (IS_NOT_NULL(node = NCONS(node).right)) {
+ print_indent_tree(f, NCAR(node), indent + add);
+ while (IS_NOT_NULL(node = NCDR(node))) {
if (NTYPE(node) != type) {
- fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
- exit(0);
+ fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
+ exit(0);
}
- print_indent_tree(f, NCONS(node).left, indent + add);
+ print_indent_tree(f, NCAR(node), indent + add);
}
break;
- case N_STRING:
- fprintf(f, "<string%s:%x>",
- (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node);
- for (p = NSTRING(node).s; p < NSTRING(node).end; p++) {
+ case NT_STR:
+ fprintf(f, "<string%s:%"PRIxPTR">",
+ (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
+ for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
if (*p >= 0x20 && *p < 0x7f)
- fputc(*p, f);
+ fputc(*p, f);
else {
- fprintf(f, " 0x%02x", *p);
+ fprintf(f, " 0x%02x", *p);
}
}
break;
- case N_CCLASS:
- fprintf(f, "<cclass:%x>", (int )node);
- if (NCCLASS(node).not) fputs(" not", f);
- if (NCCLASS(node).mbuf) {
- BBuf* bbuf = NCCLASS(node).mbuf;
- for (i = 0; i < bbuf->used; i++) {
- if (i > 0) fprintf(f, ",");
- fprintf(f, "%0x", bbuf->p[i]);
+ case NT_CCLASS:
+ fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
+ if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
+ if (NCCLASS(node)->mbuf) {
+ BBuf* bbuf = NCCLASS(node)->mbuf;
+ OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
+ OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
+ fprintf(f, "%d", *data++);
+ for (; data < end; data+=2) {
+ fprintf(f, ",");
+ fprintf(f, "%04x-%04x", data[0], data[1]);
}
}
-#if 0
- fprintf(f, "\n");
- Indent(f, indent);
- for (i = 0; i < SINGLE_BYTE_SIZE; i++)
- fputc((BITSET_AT(NCCLASS(node).bs, i) ? '1' : '0'), f);
-#endif
break;
- case N_CTYPE:
- fprintf(f, "<ctype:%x> ", (int )node);
- switch (NCTYPE(node).type) {
- case CTYPE_WORD: fputs("word", f); break;
- case CTYPE_NOT_WORD: fputs("not word", f); break;
+ case NT_CTYPE:
+ fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
+ switch (NCTYPE(node)->ctype) {
+ case ONIGENC_CTYPE_WORD:
+ if (NCTYPE(node)->not != 0)
+ fputs("not word", f);
+ else
+ fputs("word", f);
+ break;
+
default:
fprintf(f, "ERROR: undefined ctype.\n");
exit(0);
}
break;
- case N_ANYCHAR:
- fprintf(f, "<anychar:%x>", (int )node);
+ case NT_CANY:
+ fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
break;
- case N_ANCHOR:
- fprintf(f, "<anchor:%x> ", (int )node);
- switch (NANCHOR(node).type) {
+ case NT_ANCHOR:
+ fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
+ switch (NANCHOR(node)->type) {
case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
case ANCHOR_END_BUF: fputs("end buf", f); break;
case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
@@ -5348,14 +6651,15 @@ print_indent_tree(FILE* f, Node* node, int indent)
case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
-#ifdef USE_WORD_BEGIN_END
+# ifdef USE_WORD_BEGIN_END
case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
case ANCHOR_WORD_END: fputs("word end", f); break;
-#endif
- case ANCHOR_PREC_READ: fputs("prec read", f); break;
- case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break;
- case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break;
- case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break;
+# endif
+ case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
+ case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
+ case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
+ case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
+ case ANCHOR_KEEP: fputs("keep",f); break;
default:
fprintf(f, "ERROR: undefined anchor type.\n");
@@ -5363,55 +6667,60 @@ print_indent_tree(FILE* f, Node* node, int indent)
}
break;
- case N_BACKREF:
+ case NT_BREF:
{
int* p;
- BackrefNode* br = &(NBACKREF(node));
+ BRefNode* br = NBREF(node);
p = BACKREFS_P(br);
- fprintf(f, "<backref:%x>", (int )node);
+ fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
for (i = 0; i < br->back_num; i++) {
- if (i > 0) fputs(", ", f);
- fprintf(f, "%d", p[i]);
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", p[i]);
}
}
break;
-#ifdef USE_SUBEXP_CALL
- case N_CALL:
+# ifdef USE_SUBEXP_CALL
+ case NT_CALL:
{
- CallNode* cn = &(NCALL(node));
- fprintf(f, "<call:%x>", (int )node);
+ CallNode* cn = NCALL(node);
+ fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
p_string(f, cn->name_end - cn->name, cn->name);
}
break;
-#endif
+# endif
- case N_QUALIFIER:
- fprintf(f, "<qualifier:%x>{%d,%d}%s\n", (int )node,
- NQUALIFIER(node).lower, NQUALIFIER(node).upper,
- (NQUALIFIER(node).greedy ? "" : "?"));
- print_indent_tree(f, NQUALIFIER(node).target, indent + add);
+ case NT_QTFR:
+ fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
+ NQTFR(node)->lower, NQTFR(node)->upper,
+ (NQTFR(node)->greedy ? "" : "?"));
+ print_indent_tree(f, NQTFR(node)->target, indent + add);
break;
- case N_EFFECT:
- fprintf(f, "<effect:%x> ", (int )node);
- switch (NEFFECT(node).type) {
- case EFFECT_OPTION:
- fprintf(f, "option:%d\n", NEFFECT(node).option);
- print_indent_tree(f, NEFFECT(node).target, indent + add);
+ case NT_ENCLOSE:
+ fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
+ switch (NENCLOSE(node)->type) {
+ case ENCLOSE_OPTION:
+ fprintf(f, "option:%d", NENCLOSE(node)->option);
break;
- case EFFECT_MEMORY:
- fprintf(f, "memory:%d", NEFFECT(node).regnum);
+ case ENCLOSE_MEMORY:
+ fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
break;
- case EFFECT_STOP_BACKTRACK:
+ case ENCLOSE_STOP_BACKTRACK:
fprintf(f, "stop-bt");
break;
+ case ENCLOSE_CONDITION:
+ fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
+ break;
+ case ENCLOSE_ABSENT:
+ fprintf(f, "absent");
+ break;
default:
break;
}
fprintf(f, "\n");
- print_indent_tree(f, NEFFECT(node).target, indent + add);
+ print_indent_tree(f, NENCLOSE(node)->target, indent + add);
break;
default:
@@ -5419,17 +6728,19 @@ print_indent_tree(FILE* f, Node* node, int indent)
break;
}
- if (type != N_LIST && type != N_ALT && type != N_QUALIFIER &&
- type != N_EFFECT)
+ if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
+ type != NT_ENCLOSE)
fprintf(f, "\n");
+
+ if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
+
fflush(f);
}
-#endif /* ONIG_DEBUG */
-#ifdef ONIG_DEBUG_PARSE_TREE
static void
print_tree(FILE* f, Node* node)
{
print_indent_tree(f, node, 0);
}
-#endif
+# endif /* ONIG_DEBUG_PARSE_TREE */
+#endif /* ONIG_DEBUG */