summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c3545
1 files changed, 2149 insertions, 1396 deletions
diff --git a/regexec.c b/regexec.c
index a7b948e99c..3210c7cc1b 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2,8 +2,8 @@
regexec.c - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2011-2019 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -55,7 +55,7 @@ rb_enc_asciicompat(OnigEncoding enc)
# define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
(rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
onigenc_ascii_is_code_ctype( \
- ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
+ ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
#endif /* RUBY */
#ifdef USE_CRNL_AS_LINE_TERMINATOR
@@ -66,28 +66,28 @@ rb_enc_asciicompat(OnigEncoding enc)
is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
static int
is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start,
- const UChar *end, OnigOptionType option, int check_prev)
+ const UChar *end, OnigOptionType option, int check_prev)
{
if (IS_NEWLINE_CRLF(option)) {
if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
if (check_prev) {
- const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
- if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
- return 0;
- else
- return 1;
+ const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
+ if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
+ return 0;
+ else
+ return 1;
}
else
- return 1;
+ return 1;
}
else {
const UChar *pnext = p + enclen(enc, p, end);
if (pnext < end &&
- ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
- ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
- return 1;
+ ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
+ ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
+ return 1;
if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
- return 1;
+ return 1;
return 0;
}
}
@@ -111,7 +111,7 @@ history_tree_clear(OnigCaptureTreeNode* node)
if (IS_NOT_NULL(node)) {
for (i = 0; i < node->num_childs; i++) {
if (IS_NOT_NULL(node->childs[i])) {
- history_tree_free(node->childs[i]);
+ history_tree_free(node->childs[i]);
}
}
for (i = 0; i < node->allocated; i++) {
@@ -170,18 +170,18 @@ history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
if (IS_NULL(parent->childs)) {
n = HISTORY_TREE_INIT_ALLOC_SIZE;
parent->childs =
- (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
+ (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
CHECK_NULL_RETURN_MEMERR(parent->childs);
}
else {
OnigCaptureTreeNode** tmp;
n = parent->allocated * 2;
tmp =
- (OnigCaptureTreeNode** )xrealloc(parent->childs,
- sizeof(OnigCaptureTreeNode*) * n);
+ (OnigCaptureTreeNode** )xrealloc(parent->childs,
+ sizeof(OnigCaptureTreeNode*) * n);
if (tmp == 0) {
- history_tree_clear(parent);
- return ONIGERR_MEMORY;
+ history_tree_clear(parent);
+ return ONIGERR_MEMORY;
}
parent->childs = tmp;
}
@@ -231,6 +231,659 @@ onig_get_capture_tree(OnigRegion* region)
}
#endif /* USE_CAPTURE_HISTORY */
+#ifdef USE_MATCH_CACHE
+
+/*
+Glossary for "match cache"
+
+"match cache" or "match cache optimization"
+The `Regexp#match` optimization by using a cache.
+
+"cache opcode"
+A cacheable opcode (e.g. `OP_PUSH`, `OP_REPEAT`, etc).
+It is corresponding to some cache points.
+
+"cache point"
+A cacheable point on matching.
+Usually, one-to-one corresponding between a cache opcode and a cache point exists,
+but cache opcodes between `OP_REPEAT` and `OP_REPEAT_INC` have some corresponding
+cache points depending on repetition counts.
+
+"match cache point"
+A pair of a cache point and a position on an input string.
+We encode a match cache point to an integer value by the following equation:
+"match cache point" = "position on input string" * "total number of cache points" + "cache point"
+
+"match cache buffer"
+A bit-array for memoizing (recording) match cache points once backtracked.
+*/
+
+static OnigPosition count_num_cache_opcodes_inner(
+ const regex_t* reg,
+ MemNumType current_repeat_mem, int lookaround_nesting,
+ UChar** pp, long* num_cache_opcodes_ptr
+)
+{
+ UChar* p = *pp;
+ UChar* pend = reg->p + reg->used;
+ LengthType len;
+ MemNumType repeat_mem;
+ OnigEncoding enc = reg->enc;
+ long num_cache_opcodes = *num_cache_opcodes_ptr;
+ OnigPosition result;
+
+ while (p < pend) {
+ switch (*p++) {
+ case OP_FINISH:
+ case OP_END:
+ break;
+
+ case OP_EXACT1: p++; break;
+ case OP_EXACT2: p += 2; break;
+ case OP_EXACT3: p += 3; break;
+ case OP_EXACT4: p += 4; break;
+ case OP_EXACT5: p += 5; break;
+ case OP_EXACTN:
+ GET_LENGTH_INC(len, p); p += len; break;
+ case OP_EXACTMB2N1: p += 2; break;
+ case OP_EXACTMB2N2: p += 4; break;
+ case OP_EXACTMB2N3: p += 6; break;
+ case OP_EXACTMB2N:
+ GET_LENGTH_INC(len, p); p += len * 2; break;
+ case OP_EXACTMB3N:
+ GET_LENGTH_INC(len, p); p += len * 3; break;
+ case OP_EXACTMBN:
+ {
+ int mb_len;
+ GET_LENGTH_INC(mb_len, p);
+ GET_LENGTH_INC(len, p);
+ p += mb_len * len;
+ }
+ break;
+
+ case OP_EXACT1_IC:
+ len = enclen(enc, p, pend); p += len; break;
+ case OP_EXACTN_IC:
+ GET_LENGTH_INC(len, p); p += len; break;
+
+ case OP_CCLASS:
+ case OP_CCLASS_NOT:
+ p += SIZE_BITSET; break;
+ case OP_CCLASS_MB:
+ case OP_CCLASS_MB_NOT:
+ GET_LENGTH_INC(len, p); p += len; break;
+ case OP_CCLASS_MIX:
+ case OP_CCLASS_MIX_NOT:
+ p += SIZE_BITSET;
+ GET_LENGTH_INC(len, p);
+ p += len;
+ break;
+
+ case OP_ANYCHAR:
+ case OP_ANYCHAR_ML:
+ break;
+ case OP_ANYCHAR_STAR:
+ case OP_ANYCHAR_ML_STAR:
+ num_cache_opcodes++; break;
+ case OP_ANYCHAR_STAR_PEEK_NEXT:
+ case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
+ p++; num_cache_opcodes++; break;
+
+ case OP_WORD:
+ case OP_NOT_WORD:
+ case OP_WORD_BOUND:
+ case OP_NOT_WORD_BOUND:
+ case OP_WORD_BEGIN:
+ case OP_WORD_END:
+ break;
+
+ case OP_ASCII_WORD:
+ case OP_NOT_ASCII_WORD:
+ case OP_ASCII_WORD_BOUND:
+ case OP_NOT_ASCII_WORD_BOUND:
+ case OP_ASCII_WORD_BEGIN:
+ case OP_ASCII_WORD_END:
+ break;
+
+ case OP_BEGIN_BUF:
+ case OP_END_BUF:
+ case OP_BEGIN_LINE:
+ case OP_END_LINE:
+ case OP_SEMI_END_BUF:
+ case OP_BEGIN_POSITION:
+ break;
+
+ case OP_BACKREF1:
+ case OP_BACKREF2:
+ case OP_BACKREFN:
+ case OP_BACKREFN_IC:
+ case OP_BACKREF_MULTI:
+ case OP_BACKREF_MULTI_IC:
+ case OP_BACKREF_WITH_LEVEL:
+ goto impossible;
+
+ case OP_MEMORY_START:
+ case OP_MEMORY_START_PUSH:
+ case OP_MEMORY_END_PUSH:
+ case OP_MEMORY_END_PUSH_REC:
+ case OP_MEMORY_END:
+ case OP_MEMORY_END_REC:
+ p += SIZE_MEMNUM;
+ // A memory (capture) in look-around is found.
+ if (lookaround_nesting != 0) {
+ goto impossible;
+ }
+ break;
+
+ case OP_KEEP:
+ break;
+
+ case OP_FAIL:
+ break;
+ case OP_JUMP:
+ p += SIZE_RELADDR;
+ break;
+ case OP_PUSH:
+ p += SIZE_RELADDR;
+ num_cache_opcodes++;
+ break;
+ case OP_POP:
+ break;
+ case OP_PUSH_OR_JUMP_EXACT1:
+ case OP_PUSH_IF_PEEK_NEXT:
+ p += SIZE_RELADDR + 1; num_cache_opcodes++; break;
+ case OP_REPEAT:
+ case OP_REPEAT_NG:
+ if (current_repeat_mem != -1) {
+ // A nested OP_REPEAT is not yet supported.
+ goto impossible;
+ }
+ GET_MEMNUM_INC(repeat_mem, p);
+ p += SIZE_RELADDR;
+ if (reg->repeat_range[repeat_mem].lower == 0 && reg->repeat_range[repeat_mem].upper == 0) {
+ long dummy_num_cache_opcodes = 0;
+ result = count_num_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &p, &dummy_num_cache_opcodes);
+ if (result < 0 || dummy_num_cache_opcodes < 0) {
+ goto fail;
+ }
+ } else {
+ if (reg->repeat_range[repeat_mem].lower == 0) {
+ num_cache_opcodes++;
+ }
+ result = count_num_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &p, &num_cache_opcodes);
+ if (result < 0 || num_cache_opcodes < 0) {
+ goto fail;
+ }
+ OnigRepeatRange *repeat_range = &reg->repeat_range[repeat_mem];
+ if (repeat_range->lower < repeat_range->upper) {
+ num_cache_opcodes++;
+ }
+ }
+ break;
+ case OP_REPEAT_INC:
+ case OP_REPEAT_INC_NG:
+ GET_MEMNUM_INC(repeat_mem, p);
+ if (repeat_mem != current_repeat_mem) {
+ // A lone or invalid OP_REPEAT_INC is found.
+ goto impossible;
+ }
+ goto exit;
+ case OP_REPEAT_INC_SG:
+ case OP_REPEAT_INC_NG_SG:
+ goto impossible;
+ case OP_NULL_CHECK_START:
+ p += SIZE_MEMNUM;
+ break;
+ case OP_NULL_CHECK_END:
+ case OP_NULL_CHECK_END_MEMST_PUSH:
+ p += SIZE_MEMNUM;
+ break;
+ case OP_NULL_CHECK_END_MEMST:
+ p += SIZE_MEMNUM;
+ break;
+
+ case OP_PUSH_POS:
+ if (lookaround_nesting < 0) {
+ // A look-around nested in a atomic grouping is found.
+ goto impossible;
+ }
+ result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
+ if (result < 0 || num_cache_opcodes < 0) {
+ goto fail;
+ }
+ break;
+ case OP_PUSH_POS_NOT:
+ if (lookaround_nesting < 0) {
+ // A look-around nested in a atomic grouping is found.
+ goto impossible;
+ }
+ p += SIZE_RELADDR;
+ result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
+ if (result < 0 || num_cache_opcodes < 0) {
+ goto fail;
+ }
+ break;
+ case OP_PUSH_LOOK_BEHIND_NOT:
+ if (lookaround_nesting < 0) {
+ // A look-around nested in a atomic grouping is found.
+ goto impossible;
+ }
+ p += SIZE_RELADDR;
+ p += SIZE_LENGTH;
+ result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
+ if (result < 0 || num_cache_opcodes < 0) {
+ goto fail;
+ }
+ break;
+ case OP_PUSH_STOP_BT:
+ if (lookaround_nesting != 0) {
+ // A nested atomic grouping is found.
+ goto impossible;
+ }
+ result = count_num_cache_opcodes_inner(reg, current_repeat_mem, -1, &p, &num_cache_opcodes);
+ if (result < 0 || num_cache_opcodes < 0) {
+ goto fail;
+ }
+ break;
+ case OP_POP_POS:
+ case OP_FAIL_POS:
+ case OP_FAIL_LOOK_BEHIND_NOT:
+ case OP_POP_STOP_BT:
+ goto exit;
+ case OP_LOOK_BEHIND:
+ p += SIZE_LENGTH;
+ break;
+
+ case OP_PUSH_ABSENT_POS:
+ case OP_ABSENT_END:
+ case OP_ABSENT:
+ goto impossible;
+
+ case OP_CALL:
+ case OP_RETURN:
+ goto impossible;
+
+ case OP_CONDITION:
+ goto impossible;
+
+ case OP_STATE_CHECK_PUSH:
+ case OP_STATE_CHECK_PUSH_OR_JUMP:
+ case OP_STATE_CHECK:
+ case OP_STATE_CHECK_ANYCHAR_STAR:
+ case OP_STATE_CHECK_ANYCHAR_ML_STAR:
+ goto impossible;
+
+ case OP_SET_OPTION_PUSH:
+ case OP_SET_OPTION:
+ p += SIZE_OPTION;
+ break;
+
+ default:
+ goto bytecode_error;
+ }
+ }
+
+exit:
+ *pp = p;
+ *num_cache_opcodes_ptr = num_cache_opcodes;
+ return 0;
+
+fail:
+ *num_cache_opcodes_ptr = num_cache_opcodes;
+ return result;
+
+impossible:
+ *num_cache_opcodes_ptr = NUM_CACHE_OPCODES_IMPOSSIBLE;
+ return 0;
+
+bytecode_error:
+ return ONIGERR_UNDEFINED_BYTECODE;
+}
+
+/* count the total number of cache opcodes for allocating a match cache buffer. */
+static OnigPosition
+count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr)
+{
+ UChar* p = reg->p;
+ *num_cache_opcodes_ptr = 0;
+ OnigPosition result = count_num_cache_opcodes_inner(reg, -1, 0, &p, num_cache_opcodes_ptr);
+ if (result == 0 && *num_cache_opcodes_ptr >= 0 && p != reg->p + reg->used) {
+ return ONIGERR_UNDEFINED_BYTECODE;
+ }
+
+ return result;
+}
+
+static OnigPosition
+init_cache_opcodes_inner(
+ const regex_t* reg,
+ MemNumType current_repeat_mem, int lookaround_nesting,
+ OnigCacheOpcode** cache_opcodes_ptr, UChar** pp, long* num_cache_points_ptr
+)
+{
+ UChar* p = *pp;
+ UChar* pend = reg->p + reg->used;
+ UChar* pbegin;
+ LengthType len;
+ MemNumType repeat_mem;
+ OnigEncoding enc = reg->enc;
+ long cache_point = *num_cache_points_ptr;
+ OnigCacheOpcode *cache_opcodes = *cache_opcodes_ptr;
+ OnigPosition result;
+
+# define INC_CACHE_OPCODES if (cache_opcodes != NULL) {\
+ cache_opcodes->addr = pbegin;\
+ cache_opcodes->cache_point = cache_point;\
+ cache_opcodes->outer_repeat_mem = current_repeat_mem;\
+ cache_opcodes->num_cache_points_at_outer_repeat = 0;\
+ cache_opcodes->num_cache_points_in_outer_repeat = 0;\
+ cache_opcodes->lookaround_nesting = lookaround_nesting;\
+ cache_opcodes->match_addr = NULL;\
+ cache_point += lookaround_nesting != 0 ? 2 : 1;\
+ cache_opcodes++;\
+ }
+
+ while (p < pend) {
+ pbegin = p;
+ switch (*p++) {
+ case OP_FINISH:
+ case OP_END:
+ break;
+
+ case OP_EXACT1: p++; break;
+ case OP_EXACT2: p += 2; break;
+ case OP_EXACT3: p += 3; break;
+ case OP_EXACT4: p += 4; break;
+ case OP_EXACT5: p += 5; break;
+ case OP_EXACTN:
+ GET_LENGTH_INC(len, p); p += len; break;
+ case OP_EXACTMB2N1: p += 2; break;
+ case OP_EXACTMB2N2: p += 4; break;
+ case OP_EXACTMB2N3: p += 6; break;
+ case OP_EXACTMB2N:
+ GET_LENGTH_INC(len, p); p += len * 2; break;
+ case OP_EXACTMB3N:
+ GET_LENGTH_INC(len, p); p += len * 3; break;
+ case OP_EXACTMBN:
+ {
+ int mb_len;
+ GET_LENGTH_INC(mb_len, p);
+ GET_LENGTH_INC(len, p);
+ p += mb_len * len;
+ }
+ break;
+
+ case OP_EXACT1_IC:
+ len = enclen(enc, p, pend); p += len; break;
+ case OP_EXACTN_IC:
+ GET_LENGTH_INC(len, p); p += len; break;
+
+ case OP_CCLASS:
+ case OP_CCLASS_NOT:
+ p += SIZE_BITSET; break;
+ case OP_CCLASS_MB:
+ case OP_CCLASS_MB_NOT:
+ GET_LENGTH_INC(len, p); p += len; break;
+ case OP_CCLASS_MIX:
+ case OP_CCLASS_MIX_NOT:
+ p += SIZE_BITSET;
+ GET_LENGTH_INC(len, p);
+ p += len;
+ break;
+
+ case OP_ANYCHAR:
+ case OP_ANYCHAR_ML:
+ break;
+ case OP_ANYCHAR_STAR:
+ case OP_ANYCHAR_ML_STAR:
+ INC_CACHE_OPCODES;
+ break;
+ case OP_ANYCHAR_STAR_PEEK_NEXT:
+ case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
+ p++;
+ INC_CACHE_OPCODES;
+ break;
+
+ case OP_WORD:
+ case OP_NOT_WORD:
+ case OP_WORD_BOUND:
+ case OP_NOT_WORD_BOUND:
+ case OP_WORD_BEGIN:
+ case OP_WORD_END:
+ break;
+
+ case OP_ASCII_WORD:
+ case OP_NOT_ASCII_WORD:
+ case OP_ASCII_WORD_BOUND:
+ case OP_NOT_ASCII_WORD_BOUND:
+ case OP_ASCII_WORD_BEGIN:
+ case OP_ASCII_WORD_END:
+ break;
+
+ case OP_BEGIN_BUF:
+ case OP_END_BUF:
+ case OP_BEGIN_LINE:
+ case OP_END_LINE:
+ case OP_SEMI_END_BUF:
+ case OP_BEGIN_POSITION:
+ break;
+
+ case OP_BACKREF1:
+ case OP_BACKREF2:
+ case OP_BACKREFN:
+ case OP_BACKREFN_IC:
+ case OP_BACKREF_MULTI:
+ case OP_BACKREF_MULTI_IC:
+ case OP_BACKREF_WITH_LEVEL:
+ goto unexpected_bytecode_error;
+
+ case OP_MEMORY_START:
+ case OP_MEMORY_START_PUSH:
+ case OP_MEMORY_END_PUSH:
+ case OP_MEMORY_END_PUSH_REC:
+ case OP_MEMORY_END:
+ case OP_MEMORY_END_REC:
+ p += SIZE_MEMNUM;
+ if (lookaround_nesting != 0) {
+ goto unexpected_bytecode_error;
+ }
+ break;
+
+ case OP_KEEP:
+ break;
+
+ case OP_FAIL:
+ break;
+ case OP_JUMP:
+ p += SIZE_RELADDR;
+ break;
+ case OP_PUSH:
+ p += SIZE_RELADDR;
+ INC_CACHE_OPCODES;
+ break;
+ case OP_POP:
+ break;
+ case OP_PUSH_OR_JUMP_EXACT1:
+ case OP_PUSH_IF_PEEK_NEXT:
+ p += SIZE_RELADDR + 1;
+ INC_CACHE_OPCODES;
+ break;
+ case OP_REPEAT:
+ case OP_REPEAT_NG:
+ GET_MEMNUM_INC(repeat_mem, p);
+ p += SIZE_RELADDR;
+ if (reg->repeat_range[repeat_mem].lower == 0 && reg->repeat_range[repeat_mem].upper == 0) {
+ long dummy_num_cache_points = 0;
+ OnigCacheOpcode* dummy_cache_opcodes = NULL;
+ result = init_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &dummy_cache_opcodes, &p, &dummy_num_cache_points);
+ if (result != 0) {
+ goto fail;
+ }
+ } else {
+ if (reg->repeat_range[repeat_mem].lower == 0) {
+ INC_CACHE_OPCODES;
+ }
+ {
+ long num_cache_points_in_repeat = 0;
+ long num_cache_points_at_repeat = cache_point;
+ OnigCacheOpcode* cache_opcodes_in_repeat = cache_opcodes;
+ result = init_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &cache_opcodes, &p, &num_cache_points_in_repeat);
+ if (result != 0) {
+ goto fail;
+ }
+ OnigRepeatRange *repeat_range = &reg->repeat_range[repeat_mem];
+ if (repeat_range->lower < repeat_range->upper) {
+ INC_CACHE_OPCODES;
+ cache_point -= lookaround_nesting != 0 ? 2 : 1;
+ }
+ int repeat_bounds = repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower;
+ cache_point += num_cache_points_in_repeat * repeat_range->lower + (num_cache_points_in_repeat + (lookaround_nesting != 0 ? 2 : 1)) * repeat_bounds;
+ for (; cache_opcodes_in_repeat < cache_opcodes; cache_opcodes_in_repeat++) {
+ cache_opcodes_in_repeat->num_cache_points_at_outer_repeat = num_cache_points_at_repeat;
+ cache_opcodes_in_repeat->num_cache_points_in_outer_repeat = num_cache_points_in_repeat;
+ }
+ }
+ }
+ break;
+ case OP_REPEAT_INC:
+ case OP_REPEAT_INC_NG:
+ p += SIZE_MEMNUM;
+ goto exit;
+ case OP_REPEAT_INC_SG:
+ case OP_REPEAT_INC_NG_SG:
+ goto unexpected_bytecode_error;
+ case OP_NULL_CHECK_START:
+ p += SIZE_MEMNUM;
+ break;
+ case OP_NULL_CHECK_END:
+ case OP_NULL_CHECK_END_MEMST_PUSH:
+ p += SIZE_MEMNUM;
+ break;
+ case OP_NULL_CHECK_END_MEMST:
+ p += SIZE_MEMNUM;
+ break;
+
+ case OP_PUSH_POS:
+ lookaround:
+ {
+ OnigCacheOpcode* cache_opcodes_in_lookaround = cache_opcodes;
+ result = init_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &cache_opcodes, &p, &cache_point);
+ if (result != 0) {
+ goto fail;
+ }
+ UChar* match_addr = p - 1;
+ for (; cache_opcodes_in_lookaround < cache_opcodes; cache_opcodes_in_lookaround++) {
+ if (cache_opcodes_in_lookaround->match_addr == NULL) {
+ cache_opcodes_in_lookaround->match_addr = match_addr;
+ }
+ }
+ }
+ break;
+ case OP_PUSH_POS_NOT:
+ p += SIZE_RELADDR;
+ goto lookaround;
+ case OP_PUSH_LOOK_BEHIND_NOT:
+ p += SIZE_RELADDR;
+ p += SIZE_LENGTH;
+ goto lookaround;
+ case OP_PUSH_STOP_BT:
+ {
+ OnigCacheOpcode* cache_opcodes_in_atomic = cache_opcodes;
+ result = init_cache_opcodes_inner(reg, current_repeat_mem, -1, &cache_opcodes, &p, &cache_point);
+ if (result != 0) {
+ goto fail;
+ }
+ UChar* match_addr = p - 1;
+ for (; cache_opcodes_in_atomic < cache_opcodes; cache_opcodes_in_atomic++) {
+ if (cache_opcodes_in_atomic->match_addr == NULL) {
+ cache_opcodes_in_atomic->match_addr = match_addr;
+ }
+ }
+ }
+ break;
+ case OP_POP_POS:
+ case OP_FAIL_POS:
+ case OP_FAIL_LOOK_BEHIND_NOT:
+ case OP_POP_STOP_BT:
+ goto exit;
+ case OP_LOOK_BEHIND:
+ p += SIZE_LENGTH;
+ break;
+
+ case OP_ABSENT_END:
+ case OP_ABSENT:
+ goto unexpected_bytecode_error;
+
+ case OP_CALL:
+ case OP_RETURN:
+ goto unexpected_bytecode_error;
+
+ case OP_CONDITION:
+ goto unexpected_bytecode_error;
+
+ case OP_STATE_CHECK_PUSH:
+ case OP_STATE_CHECK_PUSH_OR_JUMP:
+ case OP_STATE_CHECK:
+ case OP_STATE_CHECK_ANYCHAR_STAR:
+ case OP_STATE_CHECK_ANYCHAR_ML_STAR:
+ goto unexpected_bytecode_error;
+
+ case OP_SET_OPTION_PUSH:
+ case OP_SET_OPTION:
+ p += SIZE_OPTION;
+ break;
+
+ default:
+ goto bytecode_error;
+ }
+ }
+
+exit:
+ *cache_opcodes_ptr = cache_opcodes;
+ *pp = p;
+ *num_cache_points_ptr = cache_point;
+ return 0;
+
+fail:
+ return result;
+
+unexpected_bytecode_error:
+ return ONIGERR_UNEXPECTED_BYTECODE;
+
+bytecode_error:
+ return ONIGERR_UNDEFINED_BYTECODE;
+}
+
+/* collect cache opcodes from the given regex program, and compute the total number of cache points. */
+static OnigPosition
+init_cache_opcodes(const regex_t* reg, OnigCacheOpcode* cache_opcodes_ptr, long* num_cache_points_ptr)
+{
+ UChar* p = reg->p;
+ *num_cache_points_ptr = 0;
+ OnigPosition result = init_cache_opcodes_inner(reg, -1, 0, &cache_opcodes_ptr, &p, num_cache_points_ptr);
+ if (result == 0 && p != reg->p + reg->used) {
+ return ONIGERR_UNDEFINED_BYTECODE;
+ }
+
+ return result;
+}
+#else
+static OnigPosition
+count_num_cache_opcodes(regex_t* reg, long* num_cache_opcodes)
+{
+ *num_cache_opcodes = NUM_CACHE_OPCODES_IMPOSSIBLE;
+ return 0;
+}
+#endif /* USE_MATCH_CACHE */
+
+extern int
+onig_check_linear_time(OnigRegexType* reg)
+{
+ long num_cache_opcodes = 0;
+ count_num_cache_opcodes(reg, &num_cache_opcodes);
+ return num_cache_opcodes != NUM_CACHE_OPCODES_IMPOSSIBLE;
+}
+
extern void
onig_region_clear(OnigRegion* region)
{
@@ -344,14 +997,18 @@ onig_region_free(OnigRegion* r, int free_self)
{
if (r) {
if (r->allocated > 0) {
- if (r->beg) xfree(r->beg);
- if (r->end) xfree(r->end);
- r->allocated = 0;
+ xfree(r->beg);
+ xfree(r->end);
}
#ifdef USE_CAPTURE_HISTORY
history_root_free(r);
#endif
- if (free_self) xfree(r);
+ if (free_self) {
+ xfree(r);
+ }
+ else {
+ memset(r, 0, sizeof(OnigRegion));
+ }
}
}
@@ -387,31 +1044,53 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from)
/* stack type */
/* used by normal-POP */
-#define STK_ALT 0x0001
-#define STK_LOOK_BEHIND_NOT 0x0002
-#define STK_POS_NOT 0x0003
+#define STK_ALT 0x0001
+#define STK_LOOK_BEHIND_NOT 0x0002
+#define STK_POS_NOT 0x0003
/* handled by normal-POP */
-#define STK_MEM_START 0x0100
-#define STK_MEM_END 0x8200
-#define STK_REPEAT_INC 0x0300
-#define STK_STATE_CHECK_MARK 0x1000
+#define STK_MEM_START 0x0100
+#define STK_MEM_END 0x8200
+#define STK_REPEAT_INC 0x0300
+#define STK_STATE_CHECK_MARK 0x1000
/* avoided by normal-POP */
-#define STK_NULL_CHECK_START 0x3000
-#define STK_NULL_CHECK_END 0x5000 /* for recursive call */
-#define STK_MEM_END_MARK 0x8400
-#define STK_POS 0x0500 /* used when POP-POS */
-#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
-#define STK_REPEAT 0x0700
-#define STK_CALL_FRAME 0x0800
-#define STK_RETURN 0x0900
-#define STK_VOID 0x0a00 /* for fill a blank */
-#define STK_ABSENT_POS 0x0b00 /* for absent */
-#define STK_ABSENT 0x0c00 /* absent inner loop marker */
+#define STK_NULL_CHECK_START 0x3000
+#define STK_NULL_CHECK_END 0x5000 /* for recursive call */
+#define STK_MEM_END_MARK 0x8400
+#define STK_POS 0x0500 /* used when POP-POS */
+#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
+#define STK_REPEAT 0x0700
+#define STK_CALL_FRAME 0x0800
+#define STK_RETURN 0x0900
+#define STK_VOID 0x0a00 /* for fill a blank */
+#define STK_ABSENT_POS 0x0b00 /* for absent */
+#define STK_ABSENT 0x0c00 /* absent inner loop marker */
+#define STK_MATCH_CACHE_POINT 0x0d00 /* for the match cache optimization */
+#define STK_ATOMIC_MATCH_CACHE_POINT 0x0e00
/* stack type check mask */
-#define STK_MASK_POP_USED 0x00ff
-#define STK_MASK_TO_VOID_TARGET 0x10ff
-#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
+#define STK_MASK_POP_USED 0x00ff
+#define STK_MASK_TO_VOID_TARGET 0x10ff
+#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
+
+#ifdef USE_MATCH_CACHE
+#define MATCH_ARG_INIT_MATCH_CACHE(msa) do {\
+ (msa).match_cache_status = MATCH_CACHE_STATUS_UNINIT;\
+ (msa).num_fails = 0;\
+ (msa).num_cache_opcodes = NUM_CACHE_OPCODES_UNINIT;\
+ (msa).cache_opcodes = (OnigCacheOpcode*)NULL;\
+ (msa).num_cache_points = 0;\
+ (msa).match_cache_buf = (uint8_t*)NULL;\
+} while(0)
+#define MATCH_ARG_FREE_MATCH_CACHE(msa) do {\
+ xfree((msa).cache_opcodes);\
+ xfree((msa).match_cache_buf);\
+ (msa).cache_opcodes = (OnigCacheOpcode*)NULL;\
+ (msa).match_cache_buf = (uint8_t*)NULL;\
+} while(0)
+#else
+#define MATCH_ARG_INIT_MATCH_CACHE(msa)
+#define MATCH_ARG_FREE_MATCH_CACHE(msa)
+#endif
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
@@ -421,6 +1100,9 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from)
(msa).start = (arg_start);\
(msa).gpos = (arg_gpos);\
(msa).best_len = ONIG_MISMATCH;\
+ (msa).counter = 0;\
+ (msa).end_time = 0;\
+ MATCH_ARG_INIT_MATCH_CACHE(msa);\
} while(0)
#else
# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
@@ -429,6 +1111,9 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from)
(msa).region = (arg_region);\
(msa).start = (arg_start);\
(msa).gpos = (arg_gpos);\
+ (msa).counter = 0;\
+ (msa).end_time = 0;\
+ MATCH_ARG_INIT_MATCH_CACHE(msa);\
} while(0)
#endif
@@ -463,13 +1148,17 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from)
} while(0)
# define MATCH_ARG_FREE(msa) do {\
- if ((msa).stack_p) xfree((msa).stack_p);\
+ xfree((msa).stack_p);\
if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
- if ((msa).state_check_buff) xfree((msa).state_check_buff);\
+ xfree((msa).state_check_buff);\
}\
+ MATCH_ARG_FREE_MATCH_CACHE(msa);\
} while(0)
#else /* USE_COMBINATION_EXPLOSION_CHECK */
-# define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
+# define MATCH_ARG_FREE(msa) do {\
+ xfree((msa).stack_p);\
+ MATCH_ARG_FREE_MATCH_CACHE(msa);\
+} while (0)
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
@@ -485,13 +1174,15 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from)
stk_base = stk_alloc;\
stk = stk_base;\
stk_end = stk_base + msa->stack_n;\
- } else {\
+ }\
+ else {\
stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
stk_base = stk_alloc;\
stk = stk_base;\
stk_end = stk_base + (stack_num);\
}\
- } else if (msa->stack_p) {\
+ }\
+ else if (msa->stack_p) {\
alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
heap_addr = NULL;\
stk_alloc = (OnigStackType* )(msa->stack_p);\
@@ -501,7 +1192,7 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from)
}\
else {\
alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
- + sizeof(OnigStackType) * (stack_num));\
+ + sizeof(OnigStackType) * (stack_num));\
heap_addr = NULL;\
stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
stk_base = stk_alloc;\
@@ -534,7 +1225,7 @@ onig_set_match_stack_limit_size(unsigned int size)
static int
stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
- OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
+ OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
{
size_t n;
OnigStackType *x, *stk_base, *stk_end, *stk;
@@ -558,9 +1249,9 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
n *= 2;
if (limit_size != 0 && n > limit_size) {
if ((unsigned int )(stk_end - stk_base) == limit_size)
- return ONIGERR_MATCH_STACK_LIMIT_OVER;
+ return ONIGERR_MATCH_STACK_LIMIT_OVER;
else
- n = limit_size;
+ n = limit_size;
}
x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
if (IS_NULL(x)) {
@@ -579,7 +1270,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
if (r != 0) {\
STACK_SAVE;\
- if (xmalloc_base) xfree(xmalloc_base);\
+ xfree(xmalloc_base);\
return r;\
}\
}\
@@ -591,6 +1282,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#define STACK_PUSH_TYPE(stack_type) do {\
STACK_ENSURE(1);\
stk->type = (stack_type);\
+ stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
STACK_INC;\
} while(0)
@@ -660,6 +1352,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
STACK_ENSURE(1);\
stk->type = (stack_type);\
+ stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
stk->u.state.pcode = (pat);\
stk->u.state.pstr = (s);\
stk->u.state.pstr_prev = (sprev);\
@@ -669,6 +1362,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
# define STACK_PUSH_ENSURED(stack_type,pat) do {\
stk->type = (stack_type);\
+ stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
stk->u.state.pcode = (pat);\
STACK_INC;\
} while(0)
@@ -685,6 +1379,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#define STACK_PUSH_REPEAT(id, pat) do {\
STACK_ENSURE(1);\
stk->type = STK_REPEAT;\
+ stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
stk->u.repeat.num = (id);\
stk->u.repeat.pcode = (pat);\
stk->u.repeat.count = 0;\
@@ -694,6 +1389,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#define STACK_PUSH_REPEAT_INC(sindex) do {\
STACK_ENSURE(1);\
stk->type = STK_REPEAT_INC;\
+ stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
stk->u.repeat_inc.si = (sindex);\
STACK_INC;\
} while(0)
@@ -701,6 +1397,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#define STACK_PUSH_MEM_START(mnum, s) do {\
STACK_ENSURE(1);\
stk->type = STK_MEM_START;\
+ stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
stk->u.mem.num = (mnum);\
stk->u.mem.pstr = (s);\
stk->u.mem.start = mem_start_stk[mnum];\
@@ -713,6 +1410,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#define STACK_PUSH_MEM_END(mnum, s) do {\
STACK_ENSURE(1);\
stk->type = STK_MEM_END;\
+ stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
stk->u.mem.num = (mnum);\
stk->u.mem.pstr = (s);\
stk->u.mem.start = mem_start_stk[mnum];\
@@ -724,6 +1422,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#define STACK_PUSH_MEM_END_MARK(mnum) do {\
STACK_ENSURE(1);\
stk->type = STK_MEM_END_MARK;\
+ stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
stk->u.mem.num = (mnum);\
STACK_INC;\
} while(0)
@@ -765,6 +1464,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
STACK_ENSURE(1);\
stk->type = STK_NULL_CHECK_START;\
+ stk->null_check = (OnigStackIndex)(stk - stk_base);\
stk->u.null_check.num = (cnum);\
stk->u.null_check.pstr = (s);\
STACK_INC;\
@@ -773,6 +1473,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
STACK_ENSURE(1);\
stk->type = STK_NULL_CHECK_END;\
+ stk->null_check = (OnigStackIndex)(stk - stk_base);\
stk->u.null_check.num = (cnum);\
STACK_INC;\
} while(0)
@@ -780,6 +1481,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#define STACK_PUSH_CALL_FRAME(pat) do {\
STACK_ENSURE(1);\
stk->type = STK_CALL_FRAME;\
+ stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
stk->u.call_frame.ret_addr = (pat);\
STACK_INC;\
} while(0)
@@ -787,17 +1489,28 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#define STACK_PUSH_RETURN do {\
STACK_ENSURE(1);\
stk->type = STK_RETURN;\
+ stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
STACK_INC;\
} while(0)
#define STACK_PUSH_ABSENT_POS(start, end) do {\
STACK_ENSURE(1);\
stk->type = STK_ABSENT_POS;\
+ stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
stk->u.absent_pos.abs_pstr = (start);\
stk->u.absent_pos.end_pstr = (end);\
STACK_INC;\
} while(0)
+#define STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_MATCH_CACHE_POINT;\
+ stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
+ stk->u.match_cache_point.index = (match_cache_point_index);\
+ stk->u.match_cache_point.mask = (match_cache_point_mask);\
+ STACK_INC;\
+} while(0)
+
#ifdef ONIG_DEBUG
# define STACK_BASE_CHECK(p, at) \
@@ -809,6 +1522,43 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
# define STACK_BASE_CHECK(p, at)
#endif
+#ifdef ONIG_DEBUG_MATCH_CACHE
+# define MATCH_CACHE_DEBUG_MEMOIZE(stkp) fprintf(stderr, "MATCH CACHE: memoize (index=%ld mask=%d)\n", stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask);
+#else
+# define MATCH_CACHE_DEBUG_MEMOIZE(stkp) ((void) 0)
+#endif
+
+#ifdef USE_MATCH_CACHE
+# define INC_NUM_FAILS msa->num_fails++
+# define MEMOIZE_MATCH_CACHE_POINT do {\
+ if (stk->type == STK_MATCH_CACHE_POINT) {\
+ msa->match_cache_buf[stk->u.match_cache_point.index] |= stk->u.match_cache_point.mask;\
+ MATCH_CACHE_DEBUG_MEMOIZE(stk);\
+ }\
+ else if (stk->type == STK_ATOMIC_MATCH_CACHE_POINT) {\
+ memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\
+ MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
+ }\
+ } while(0)
+# define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stkp) do {\
+ if (stkp->type == STK_MATCH_CACHE_POINT) {\
+ stkp->type = STK_VOID;\
+ memoize_extended_match_cache_point(msa->match_cache_buf, stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask);\
+ MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
+ }\
+ } while(0)
+# define MEMOIZE_ATOMIC_MATCH_CACHE_POINT do {\
+ if (stk->type == STK_MATCH_CACHE_POINT) {\
+ memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\
+ MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
+ }\
+ } while(0)
+#else
+# define INC_NUM_FAILS ((void) 0)
+# define MEMOIZE_MATCH_CACHE_POINT ((void) 0)
+# define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stkp) ((void) 0)
+#endif
+
#define STACK_POP_ONE do {\
stk--;\
STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
@@ -822,6 +1572,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
STACK_BASE_CHECK(stk, "STACK_POP"); \
if ((stk->type & STK_MASK_POP_USED) != 0) break;\
ELSE_IF_STATE_CHECK_MARK(stk);\
+ MEMOIZE_MATCH_CACHE_POINT;\
}\
break;\
case STACK_POP_LEVEL_MEM_START:\
@@ -834,6 +1585,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
}\
ELSE_IF_STATE_CHECK_MARK(stk);\
+ MEMOIZE_MATCH_CACHE_POINT;\
}\
break;\
default:\
@@ -853,6 +1605,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
}\
ELSE_IF_STATE_CHECK_MARK(stk);\
+ MEMOIZE_MATCH_CACHE_POINT;\
}\
break;\
}\
@@ -874,7 +1627,11 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
}\
+ else if (IS_TO_VOID_TARGET(stk)) {\
+ INC_NUM_FAILS;\
+ }\
ELSE_IF_STATE_CHECK_MARK(stk);\
+ MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stk);\
}\
} while(0)
@@ -931,12 +1688,14 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
k--;\
STACK_BASE_CHECK(k, "STACK_POS_END"); \
if (IS_TO_VOID_TARGET(k)) {\
+ INC_NUM_FAILS;\
k->type = STK_VOID;\
}\
else if (k->type == STK_POS) {\
k->type = STK_VOID;\
break;\
}\
+ MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(k);\
}\
} while(0)
@@ -946,17 +1705,33 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
k--;\
STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
if (IS_TO_VOID_TARGET(k)) {\
+ INC_NUM_FAILS;\
k->type = STK_VOID;\
}\
else if (k->type == STK_STOP_BT) {\
k->type = STK_VOID;\
break;\
}\
+ else if (k->type == STK_MATCH_CACHE_POINT) {\
+ k->type = STK_ATOMIC_MATCH_CACHE_POINT;\
+ }\
+ }\
+} while(0)
+
+#define STACK_STOP_BT_FAIL do {\
+ while (1) {\
+ stk--;\
+ STACK_BASE_CHECK(stk, "STACK_STOP_BT_END"); \
+ if (stk->type == STK_STOP_BT) {\
+ stk->type = STK_VOID;\
+ break;\
+ }\
+ MEMOIZE_ATOMIC_MATCH_CACHE_POINT;\
}\
} while(0)
#define STACK_NULL_CHECK(isnull,id,s) do {\
- OnigStackType* k = stk;\
+ OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
@@ -971,7 +1746,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
int level = 0;\
- OnigStackType* k = stk;\
+ OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
@@ -991,7 +1766,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
} while(0)
#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
- OnigStackType* k = stk;\
+ OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
@@ -1031,7 +1806,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
int level = 0;\
- OnigStackType* k = stk;\
+ OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
@@ -1126,7 +1901,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
} while(0)
static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
- UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end)
+ UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end)
{
UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
@@ -1186,6 +1961,19 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
# define ABSENT_END_POS end
#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
+int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
+
+static inline int
+enclen_approx(OnigEncoding enc, const OnigUChar* p, const OnigUChar* e)
+{
+ if (enc->max_enc_len == enc->min_enc_len) {
+ return (p < e ? enc->min_enc_len : 0);
+ }
+ else {
+ return onigenc_mbclen_approximate(p, e, enc);
+ }
+}
+
#ifdef USE_CAPTURE_HISTORY
static int
@@ -1200,29 +1988,29 @@ make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
if (k->type == STK_MEM_START) {
n = k->u.mem.num;
if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
- BIT_STATUS_AT(reg->capture_history, n) != 0) {
- child = history_node_new();
- CHECK_NULL_RETURN_MEMERR(child);
- child->group = n;
- child->beg = k->u.mem.pstr - str;
- r = history_tree_add_child(node, child);
- if (r != 0) {
- history_tree_free(child);
- return r;
- }
- *kp = (k + 1);
- r = make_capture_history_tree(child, kp, stk_top, str, reg);
- if (r != 0) return r;
-
- k = *kp;
- child->end = k->u.mem.pstr - str;
+ BIT_STATUS_AT(reg->capture_history, n) != 0) {
+ child = history_node_new();
+ CHECK_NULL_RETURN_MEMERR(child);
+ child->group = n;
+ child->beg = k->u.mem.pstr - str;
+ r = history_tree_add_child(node, child);
+ if (r != 0) {
+ history_tree_free(child);
+ return r;
+ }
+ *kp = (k + 1);
+ r = make_capture_history_tree(child, kp, stk_top, str, reg);
+ if (r != 0) return r;
+
+ k = *kp;
+ child->end = k->u.mem.pstr - str;
}
}
else if (k->type == STK_MEM_END) {
if (k->u.mem.num == node->group) {
- node->end = k->u.mem.pstr - str;
- *kp = k;
- return 0;
+ node->end = k->u.mem.pstr - str;
+ *kp = k;
+ return 0;
}
}
k++;
@@ -1233,7 +2021,8 @@ make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
#endif /* USE_CAPTURE_HISTORY */
#ifdef USE_BACKREF_WITH_LEVEL
-static int mem_is_in_memp(int mem, int num, UChar* memp)
+static int
+mem_is_in_memp(int mem, int num, UChar* memp)
{
int i;
MemNumType m;
@@ -1246,9 +2035,9 @@ static int mem_is_in_memp(int mem, int num, UChar* memp)
}
static int backref_match_at_nested_level(regex_t* reg,
- OnigStackType* top, OnigStackType* stk_base,
- int ignore_case, int case_fold_flag,
- int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
+ OnigStackType* top, OnigStackType* stk_base,
+ int ignore_case, int case_fold_flag,
+ int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
{
UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
int level;
@@ -1266,33 +2055,33 @@ static int backref_match_at_nested_level(regex_t* reg,
}
else if (level == nest) {
if (k->type == STK_MEM_START) {
- if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
- pstart = k->u.mem.pstr;
- if (pend != NULL_UCHARP) {
- if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
- p = pstart;
- ss = *s;
-
- if (ignore_case != 0) {
- if (string_cmp_ic(reg->enc, case_fold_flag,
- pstart, &ss, pend - pstart, send) == 0)
- return 0; /* or goto next_mem; */
- }
- else {
- while (p < pend) {
- if (*p++ != *ss++) return 0; /* or goto next_mem; */
- }
- }
-
- *s = ss;
- return 1;
- }
- }
+ if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
+ pstart = k->u.mem.pstr;
+ if (pend != NULL_UCHARP) {
+ if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
+ p = pstart;
+ ss = *s;
+
+ if (ignore_case != 0) {
+ if (string_cmp_ic(reg->enc, case_fold_flag,
+ pstart, &ss, pend - pstart, send) == 0)
+ return 0; /* or goto next_mem; */
+ }
+ else {
+ while (p < pend) {
+ if (*p++ != *ss++) return 0; /* or goto next_mem; */
+ }
+ }
+
+ *s = ss;
+ return 1;
+ }
+ }
}
else if (k->type == STK_MEM_END) {
- if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
- pend = k->u.mem.pstr;
- }
+ if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
+ pend = k->u.mem.pstr;
+ }
}
}
k--;
@@ -1310,7 +2099,7 @@ static int backref_match_at_nested_level(regex_t* reg,
static LARGE_INTEGER ts, te, freq;
# define GETTIME(t) QueryPerformanceCounter(&(t))
# define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
- * 1000000 / freq.QuadPart)
+ * 1000000 / freq.QuadPart)
# else /* _WIN32 */
# define USE_TIMEOFDAY
@@ -1376,7 +2165,7 @@ onig_print_statistics(FILE* f)
fprintf(f, " count prev time\n");
for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
fprintf(f, "%8d: %8d: %10lu: %s\n",
- OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
+ OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
}
fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
}
@@ -1396,7 +2185,7 @@ onig_print_statistics(FILE* f)
#ifdef ONIG_DEBUG_MATCH
-static char *
+static const char *
stack_type_str(int stack_type)
{
switch (stack_type) {
@@ -1418,19 +2207,112 @@ stack_type_str(int stack_type)
case STK_VOID: return "Void ";
case STK_ABSENT_POS: return "AbsPos";
case STK_ABSENT: return "Absent";
+ case STK_MATCH_CACHE_POINT: return "MCache";
default: return " ";
}
}
#endif
+#ifdef USE_MATCH_CACHE
+
+static long
+bsearch_cache_opcodes(const OnigCacheOpcode *cache_opcodes, long num_cache_opcodes, const UChar* p)
+{
+ long l = 0, r = num_cache_opcodes - 1, m = 0;
+
+ while (l <= r) {
+ m = (l + r) / 2;
+ if (cache_opcodes[m].addr == p) break;
+ if (cache_opcodes[m].addr < p) l = m + 1;
+ else r = m - 1;
+ }
+ return m;
+}
+
+static long
+find_cache_point(regex_t* reg, const OnigCacheOpcode* cache_opcodes, long num_cache_opcodes, const UChar* p, const OnigStackType *stk, const OnigStackIndex *repeat_stk, const OnigCacheOpcode **cache_opcode_ptr)
+{
+ long m;
+ const OnigCacheOpcode* cache_opcode;
+ const OnigRepeatRange* range;
+ const OnigStackType *stkp;
+ int count = 0;
+ int is_inc = *p == OP_REPEAT_INC || *p == OP_REPEAT_INC_NG;
+ long cache_point;
+ long num_cache_points_at_outer_repeat;
+ long num_cache_points_in_outer_repeat;
+
+ m = bsearch_cache_opcodes(cache_opcodes, num_cache_opcodes, p);
+
+ if (!(0 <= m && m < num_cache_opcodes && cache_opcodes[m].addr == p)) {
+ return -1;
+ }
+
+ cache_opcode = &cache_opcodes[m];
+ *cache_opcode_ptr = &cache_opcodes[m];
+ cache_point = cache_opcode->cache_point;
+ if (cache_opcode->outer_repeat_mem == -1) {
+ return cache_point;
+ }
+
+ num_cache_points_at_outer_repeat = cache_opcode->num_cache_points_at_outer_repeat;
+ num_cache_points_in_outer_repeat = cache_opcode->num_cache_points_in_outer_repeat;
+
+ range = &reg->repeat_range[cache_opcode->outer_repeat_mem];
+
+ stkp = &stk[repeat_stk[cache_opcode->outer_repeat_mem]];
+ count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count;
+
+ if (count < range->lower) {
+ return num_cache_points_at_outer_repeat +
+ num_cache_points_in_outer_repeat * count +
+ cache_point;
+ }
+
+ if (range->upper == 0x7fffffff) {
+ return num_cache_points_at_outer_repeat +
+ num_cache_points_in_outer_repeat * (range->lower - (is_inc ? 1 : 0)) + (is_inc ? 0 : 1) +
+ cache_point;
+ }
+
+ return num_cache_points_at_outer_repeat +
+ num_cache_points_in_outer_repeat * (range->lower - 1) +
+ (num_cache_points_in_outer_repeat + 1) * (count - range->lower + 1) +
+ cache_point;
+}
+
+static int
+check_extended_match_cache_point(uint8_t *match_cache_buf, long match_cache_point_index, uint8_t match_cache_point_mask)
+{
+ if (match_cache_point_mask & 0x80) {
+ return (match_cache_buf[match_cache_point_index + 1] & 0x01) > 0;
+ }
+ else {
+ return (match_cache_buf[match_cache_point_index] & (match_cache_point_mask << 1)) > 0;
+ }
+}
+
+static void
+memoize_extended_match_cache_point(uint8_t *match_cache_buf, long match_cache_point_index, uint8_t match_cache_point_mask)
+{
+ match_cache_buf[match_cache_point_index] |= match_cache_point_mask;
+ if (match_cache_point_mask & 0x80) {
+ match_cache_buf[match_cache_point_index + 1] |= 0x01;
+ }
+ else {
+ match_cache_buf[match_cache_point_index] |= match_cache_point_mask << 1;
+ }
+}
+
+#endif /* USE_MATCH_CACHE */
/* match data(str - end) from position (sstart). */
/* if sstart == str then set sprev to NULL. */
static OnigPosition
match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
- const UChar* right_range,
+ const UChar* right_range,
#endif
- const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
+ const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
{
static const UChar FinishCode[] = { OP_FINISH };
@@ -1444,10 +2326,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
UChar *s, *q, *sbegin;
UChar *p = reg->p;
+ UChar *pbegin = p;
UChar *pkeep;
char *alloca_base;
char *xmalloc_base = NULL;
- OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
+ OnigStackType *stk_alloc, *stk_base = NULL, *stk, *stk_end;
OnigStackType *stkp; /* used as any purpose. */
OnigStackIndex si;
OnigStackIndex *repeat_stk;
@@ -1465,7 +2348,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
# define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
# define DEFAULT L_DEFAULT:
# define NEXT sprev = sbegin; JUMP
-# define JUMP RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
+# define JUMP pbegin = p; RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
RB_GNUC_EXTENSION static const void *oplabels[] = {
&&L_OP_FINISH, /* matching process terminator (no more alternative) */
@@ -1641,6 +2524,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
# define VM_LOOP \
while (1) { \
OPCODE_EXEC_HOOK; \
+ pbegin = p; \
sbegin = s; \
switch (*p++) {
# define VM_LOOP_END } sprev = sbegin; }
@@ -1678,16 +2562,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
#ifndef USE_SUBEXP_CALL
mem_start_stk--; /* for index start from 1,
- mem_start_stk[1]..mem_start_stk[num_mem] */
+ mem_start_stk[1]..mem_start_stk[num_mem] */
mem_end_stk--; /* for index start from 1,
- mem_end_stk[1]..mem_end_stk[num_mem] */
+ mem_end_stk[1]..mem_end_stk[num_mem] */
#endif
#ifdef ONIG_DEBUG_MATCH
fprintf(stderr, "match_at: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), start: %"PRIuPTR" (%p), sprev: %"PRIuPTR" (%p)\n",
- (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
+ (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
fprintf(stderr, "size: %d, start offset: %d\n",
- (int )(end - str), (int )(sstart - str));
+ (int )(end - str), (int )(sstart - str));
fprintf(stderr, "\n ofs> str stk:type addr:opcode\n");
#endif
@@ -1707,10 +2591,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
bp = buf; \
q = s; \
if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \
- for (i = 0; i < 7 && q < end; i++) { \
- len = enclen(encode, q, end); \
- while (len-- > 0) *bp++ = *q++; \
- } \
+ for (i = 0; i < 7 && q < end; i++) { \
+ len = enclen(encode, q, end); \
+ while (len-- > 0) *bp++ = *q++; \
+ } \
if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
} \
xmemcpy(bp, "\"", 1); bp += 1; \
@@ -1718,9 +2602,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
fputs((char* )buf, stderr); \
for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
- stk - stk_base - 1, \
- (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
- (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
+ stk - stk_base - 1, \
+ (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
+ (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
fprintf(stderr, "\n"); \
}
@@ -1728,71 +2612,117 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
# define OPCODE_EXEC_HOOK ((void) 0)
#endif
+#ifdef USE_MATCH_CACHE
+#ifdef ONIG_DEBUG_MATCH_CACHE
+#define MATCH_CACHE_DEBUG fprintf(stderr, "MATCH CACHE: cache %ld (p=%p index=%ld mask=%d)\n", match_cache_point, pbegin, match_cache_point_index, match_cache_point_mask)
+#define MATCH_CACHE_DEBUG_HIT fprintf(stderr, "MATCH CACHE: cache hit\n")
+#else
+#define MATCH_CACHE_DEBUG ((void) 0)
+#define MATCH_CACHE_DEBUG_HIT ((void) 0)
+#endif
+
+#define MATCH_CACHE_HIT ((void) 0)
+
+# define CHECK_MATCH_CACHE do {\
+ if (msa->match_cache_status == MATCH_CACHE_STATUS_ENABLED) {\
+ const OnigCacheOpcode *cache_opcode;\
+ long cache_point = find_cache_point(reg, msa->cache_opcodes, msa->num_cache_opcodes, pbegin, stk_base, repeat_stk, &cache_opcode);\
+ if (cache_point >= 0) {\
+ long match_cache_point = msa->num_cache_points * (long)(s - str) + cache_point;\
+ long match_cache_point_index = match_cache_point >> 3;\
+ uint8_t match_cache_point_mask = 1 << (match_cache_point & 7);\
+ MATCH_CACHE_DEBUG;\
+ if (msa->match_cache_buf[match_cache_point_index] & match_cache_point_mask) {\
+ MATCH_CACHE_DEBUG_HIT; MATCH_CACHE_HIT;\
+ if (cache_opcode->lookaround_nesting == 0) goto fail;\
+ else if (cache_opcode->lookaround_nesting < 0) {\
+ if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\
+ STACK_STOP_BT_FAIL;\
+ goto fail;\
+ }\
+ else goto fail;\
+ }\
+ else {\
+ if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\
+ p = cache_opcode->match_addr;\
+ MOP_OUT;\
+ JUMP;\
+ }\
+ else goto fail;\
+ }\
+ }\
+ STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask);\
+ }\
+ }\
+} while (0)
+#else
+# define CHECK_MATCH_CACHE ((void) 0)
+#endif
VM_LOOP {
CASE(OP_END) MOP_IN(OP_END);
n = s - sstart;
if (n > best_len) {
- OnigRegion* region;
+ OnigRegion* region;
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
- if (IS_FIND_LONGEST(option)) {
- if (n > msa->best_len) {
- msa->best_len = n;
- msa->best_s = (UChar* )sstart;
- }
- else
- goto end_best_len;
- }
+ if (IS_FIND_LONGEST(option)) {
+ if (n > msa->best_len) {
+ msa->best_len = n;
+ msa->best_s = (UChar* )sstart;
+ }
+ else
+ goto end_best_len;
+ }
#endif
- best_len = n;
- region = msa->region;
- if (region) {
- region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
- region->end[0] = s - str;
- for (i = 1; i <= num_mem; i++) {
- if (mem_end_stk[i] != INVALID_STACK_INDEX) {
- if (BIT_STATUS_AT(reg->bt_mem_start, i))
- region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
- else
- region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
-
- region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
- ? STACK_AT(mem_end_stk[i])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[i])) - str;
- }
- else {
- region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
- }
- }
+ best_len = n;
+ region = msa->region;
+ if (region) {
+ region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
+ region->end[0] = s - str;
+ for (i = 1; i <= num_mem; i++) {
+ if (mem_end_stk[i] != INVALID_STACK_INDEX) {
+ if (BIT_STATUS_AT(reg->bt_mem_start, i))
+ region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
+ else
+ region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
+
+ region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
+ ? STACK_AT(mem_end_stk[i])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[i])) - str;
+ }
+ else {
+ region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
+ }
+ }
#ifdef USE_CAPTURE_HISTORY
- if (reg->capture_history != 0) {
- int r;
- OnigCaptureTreeNode* node;
-
- if (IS_NULL(region->history_root)) {
- region->history_root = node = history_node_new();
- CHECK_NULL_RETURN_MEMERR(node);
- }
- else {
- node = region->history_root;
- history_tree_clear(node);
- }
-
- node->group = 0;
- node->beg = ((pkeep > s) ? s : pkeep) - str;
- node->end = s - str;
-
- stkp = stk_base;
- r = make_capture_history_tree(region->history_root, &stkp,
- stk, (UChar* )str, reg);
- if (r < 0) {
- best_len = r; /* error code */
- goto finish;
- }
- }
+ if (reg->capture_history != 0) {
+ int r;
+ OnigCaptureTreeNode* node;
+
+ if (IS_NULL(region->history_root)) {
+ region->history_root = node = history_node_new();
+ CHECK_NULL_RETURN_MEMERR(node);
+ }
+ else {
+ node = region->history_root;
+ history_tree_clear(node);
+ }
+
+ node->group = 0;
+ node->beg = ((pkeep > s) ? s : pkeep) - str;
+ node->end = s - str;
+
+ stkp = stk_base;
+ r = make_capture_history_tree(region->history_root, &stkp,
+ stk, (UChar* )str, reg);
+ if (r < 0) {
+ best_len = r; /* error code */
+ goto finish;
+ }
+ }
#endif /* USE_CAPTURE_HISTORY */
- } /* if (region) */
+ } /* if (region) */
} /* n > best_len */
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
@@ -1801,18 +2731,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
MOP_OUT;
if (IS_FIND_CONDITION(option)) {
- if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
- best_len = ONIG_MISMATCH;
- goto fail; /* for retry */
- }
- if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
- goto fail; /* for retry */
- }
+ if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
+ best_len = ONIG_MISMATCH;
+ goto fail; /* for retry */
+ }
+ if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
+ goto fail; /* for retry */
+ }
}
/* default behavior: return first-matching result. */
goto finish;
- NEXT;
CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
DATA_ENSURE(1);
@@ -1823,22 +2752,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC);
{
- int len;
- UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
-
- DATA_ENSURE(1);
- len = ONIGENC_MBC_CASE_FOLD(encode,
- /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
- case_fold_flag,
- &s, end, lowbuf);
- DATA_ENSURE(0);
- q = lowbuf;
- while (len-- > 0) {
- if (*p != *q) {
- goto fail;
- }
- p++; q++;
- }
+ int len;
+ UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+
+ DATA_ENSURE(1);
+ len = ONIGENC_MBC_CASE_FOLD(encode,
+ /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
+ case_fold_flag,
+ &s, end, lowbuf);
+ DATA_ENSURE(0);
+ q = lowbuf;
+ while (len-- > 0) {
+ if (*p != *q) {
+ goto fail;
+ }
+ p++; q++;
+ }
}
MOP_OUT;
NEXT;
@@ -1899,7 +2828,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
GET_LENGTH_INC(tlen, p);
DATA_ENSURE(tlen);
while (tlen-- > 0) {
- if (*p++ != *s++) goto fail;
+ if (*p++ != *s++) goto fail;
}
sprev = s - 1;
MOP_OUT;
@@ -1907,26 +2836,26 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC);
{
- int len;
- UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
-
- GET_LENGTH_INC(tlen, p);
- endp = p + tlen;
-
- while (p < endp) {
- sprev = s;
- DATA_ENSURE(1);
- len = ONIGENC_MBC_CASE_FOLD(encode,
- /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
- case_fold_flag,
- &s, end, lowbuf);
- DATA_ENSURE(0);
- q = lowbuf;
- while (len-- > 0) {
- if (*p != *q) goto fail;
- p++; q++;
- }
- }
+ int len;
+ UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+
+ GET_LENGTH_INC(tlen, p);
+ endp = p + tlen;
+
+ while (p < endp) {
+ sprev = s;
+ DATA_ENSURE(1);
+ len = ONIGENC_MBC_CASE_FOLD(encode,
+ /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
+ case_fold_flag,
+ &s, end, lowbuf);
+ DATA_ENSURE(0);
+ q = lowbuf;
+ while (len-- > 0) {
+ if (*p != *q) goto fail;
+ p++; q++;
+ }
+ }
}
MOP_OUT;
@@ -1977,10 +2906,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
GET_LENGTH_INC(tlen, p);
DATA_ENSURE(tlen * 2);
while (tlen-- > 0) {
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
}
sprev = s - 2;
MOP_OUT;
@@ -1990,12 +2919,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
GET_LENGTH_INC(tlen, p);
DATA_ENSURE(tlen * 3);
while (tlen-- > 0) {
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
}
sprev = s - 3;
MOP_OUT;
@@ -2007,8 +2936,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
tlen2 *= tlen;
DATA_ENSURE(tlen2);
while (tlen2-- > 0) {
- if (*p != *s) goto fail;
- p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
}
sprev = s - tlen;
MOP_OUT;
@@ -2028,23 +2957,23 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
cclass_mb:
GET_LENGTH_INC(tlen, p);
{
- OnigCodePoint code;
- UChar *ss;
- int mb_len;
+ OnigCodePoint code;
+ UChar *ss;
+ int mb_len;
- DATA_ENSURE(1);
- mb_len = enclen(encode, s, end);
- DATA_ENSURE(mb_len);
- ss = s;
- s += mb_len;
- code = ONIGENC_MBC_TO_CODE(encode, ss, s);
+ DATA_ENSURE(1);
+ mb_len = enclen_approx(encode, s, end);
+ DATA_ENSURE(mb_len);
+ ss = s;
+ s += mb_len;
+ code = ONIGENC_MBC_TO_CODE(encode, ss, s);
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
- if (! onig_is_in_code_range(p, code)) goto fail;
+ if (! onig_is_in_code_range(p, code)) goto fail;
#else
- q = p;
- ALIGNMENT_RIGHT(q);
- if (! onig_is_in_code_range(q, code)) goto fail;
+ q = p;
+ ALIGNMENT_RIGHT(q);
+ if (! onig_is_in_code_range(q, code)) goto fail;
#endif
}
p += tlen;
@@ -2054,17 +2983,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX);
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
- p += SIZE_BITSET;
- goto cclass_mb;
+ p += SIZE_BITSET;
+ goto cclass_mb;
}
else {
- if (BITSET_AT(((BitSetRef )p), *s) == 0)
- goto fail;
+ if (BITSET_AT(((BitSetRef )p), *s) == 0)
+ goto fail;
- p += SIZE_BITSET;
- GET_LENGTH_INC(tlen, p);
- p += tlen;
- s++;
+ p += SIZE_BITSET;
+ GET_LENGTH_INC(tlen, p);
+ p += tlen;
+ s++;
}
MOP_OUT;
NEXT;
@@ -2080,36 +3009,36 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT);
DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
- s++;
- GET_LENGTH_INC(tlen, p);
- p += tlen;
- goto cc_mb_not_success;
+ s++;
+ GET_LENGTH_INC(tlen, p);
+ p += tlen;
+ goto cc_mb_not_success;
}
cclass_mb_not:
GET_LENGTH_INC(tlen, p);
{
- OnigCodePoint code;
- UChar *ss;
- int mb_len = enclen(encode, s, end);
-
- if (! DATA_ENSURE_CHECK(mb_len)) {
- DATA_ENSURE(1);
- s = (UChar* )end;
- p += tlen;
- goto cc_mb_not_success;
- }
+ OnigCodePoint code;
+ UChar *ss;
+ int mb_len = enclen(encode, s, end);
+
+ if (! DATA_ENSURE_CHECK(mb_len)) {
+ DATA_ENSURE(1);
+ s = (UChar* )end;
+ p += tlen;
+ goto cc_mb_not_success;
+ }
- ss = s;
- s += mb_len;
- code = ONIGENC_MBC_TO_CODE(encode, ss, s);
+ ss = s;
+ s += mb_len;
+ code = ONIGENC_MBC_TO_CODE(encode, ss, s);
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
- if (onig_is_in_code_range(p, code)) goto fail;
+ if (onig_is_in_code_range(p, code)) goto fail;
#else
- q = p;
- ALIGNMENT_RIGHT(q);
- if (onig_is_in_code_range(q, code)) goto fail;
+ q = p;
+ ALIGNMENT_RIGHT(q);
+ if (onig_is_in_code_range(q, code)) goto fail;
#endif
}
p += tlen;
@@ -2121,24 +3050,24 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT);
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
- p += SIZE_BITSET;
- goto cclass_mb_not;
+ p += SIZE_BITSET;
+ goto cclass_mb_not;
}
else {
- if (BITSET_AT(((BitSetRef )p), *s) != 0)
- goto fail;
+ if (BITSET_AT(((BitSetRef )p), *s) != 0)
+ goto fail;
- p += SIZE_BITSET;
- GET_LENGTH_INC(tlen, p);
- p += tlen;
- s++;
+ p += SIZE_BITSET;
+ GET_LENGTH_INC(tlen, p);
+ p += tlen;
+ s++;
}
MOP_OUT;
NEXT;
CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
DATA_ENSURE(1);
- n = enclen(encode, s, end);
+ n = enclen_approx(encode, s, end);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
s += n;
@@ -2147,7 +3076,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
DATA_ENSURE(1);
- n = enclen(encode, s, end);
+ n = enclen_approx(encode, s, end);
DATA_ENSURE(n);
s += n;
MOP_OUT;
@@ -2155,43 +3084,52 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR);
while (DATA_ENSURE_CHECK1) {
- STACK_PUSH_ALT(p, s, sprev, pkeep);
- n = enclen(encode, s, end);
- DATA_ENSURE(n);
- if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
- sprev = s;
- s += n;
+ CHECK_MATCH_CACHE;
+ STACK_PUSH_ALT(p, s, sprev, pkeep);
+ n = enclen_approx(encode, s, end);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
+ sprev = s;
+ s += n;
}
MOP_OUT;
JUMP;
CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR);
while (DATA_ENSURE_CHECK1) {
- STACK_PUSH_ALT(p, s, sprev, pkeep);
- n = enclen(encode, s, end);
- if (n > 1) {
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- sprev = s;
- s++;
- }
+ CHECK_MATCH_CACHE;
+ STACK_PUSH_ALT(p, s, sprev, pkeep);
+ n = enclen_approx(encode, s, end);
+ if (n > 1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ sprev = s;
+ s++;
+ }
}
MOP_OUT;
JUMP;
CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
while (DATA_ENSURE_CHECK1) {
- if (*p == *s) {
- STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
- }
- n = enclen(encode, s, end);
- DATA_ENSURE(n);
- if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
- sprev = s;
- s += n;
+ CHECK_MATCH_CACHE;
+ if (*p == *s) {
+ STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
+ } else {
+#ifdef USE_MATCH_CACHE
+ /* We need to increment num_fails here, for invoking a cache optimization correctly. */
+ /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR` simply in this case.*/
+ msa->num_fails++;
+#endif
+ }
+ n = enclen_approx(encode, s, end);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
+ sprev = s;
+ s += n;
}
p++;
MOP_OUT;
@@ -2199,19 +3137,26 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
while (DATA_ENSURE_CHECK1) {
- if (*p == *s) {
- STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
- }
- n = enclen(encode, s, end);
- if (n > 1) {
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- sprev = s;
- s++;
- }
+ CHECK_MATCH_CACHE;
+ if (*p == *s) {
+ STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
+ } else {
+#ifdef USE_MATCH_CACHE
+ /* We need to increment num_fails here, for invoking a cache optimization correctly. */
+ /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR_ML` simply in this case.*/
+ msa->num_fails++;
+#endif
+ }
+ n = enclen_approx(encode, s, end);
+ if (n > 1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ sprev = s;
+ s++;
+ }
}
p++;
MOP_OUT;
@@ -2221,15 +3166,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
GET_STATE_CHECK_NUM_INC(mem, p);
while (DATA_ENSURE_CHECK1) {
- STATE_CHECK_VAL(scv, mem);
- if (scv) goto fail;
-
- STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
- n = enclen(encode, s, end);
- DATA_ENSURE(n);
- if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
- sprev = s;
- s += n;
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
+ n = enclen_approx(encode, s, end);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
+ sprev = s;
+ s += n;
}
MOP_OUT;
NEXT;
@@ -2239,20 +3184,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
GET_STATE_CHECK_NUM_INC(mem, p);
while (DATA_ENSURE_CHECK1) {
- STATE_CHECK_VAL(scv, mem);
- if (scv) goto fail;
-
- STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
- n = enclen(encode, s, end);
- if (n > 1) {
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- sprev = s;
- s++;
- }
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
+ n = enclen_approx(encode, s, end);
+ if (n > 1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ sprev = s;
+ s++;
+ }
}
MOP_OUT;
NEXT;
@@ -2261,7 +3206,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_WORD) MOP_IN(OP_WORD);
DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_WORD(encode, s, end))
- goto fail;
+ goto fail;
s += enclen(encode, s, end);
MOP_OUT;
@@ -2270,7 +3215,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD);
DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
- goto fail;
+ goto fail;
s += enclen(encode, s, end);
MOP_OUT;
@@ -2279,7 +3224,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD);
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_WORD(encode, s, end))
- goto fail;
+ goto fail;
s += enclen(encode, s, end);
MOP_OUT;
@@ -2288,7 +3233,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD);
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
- goto fail;
+ goto fail;
s += enclen(encode, s, end);
MOP_OUT;
@@ -2296,70 +3241,70 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND);
if (ON_STR_BEGIN(s)) {
- DATA_ENSURE(1);
- if (! ONIGENC_IS_MBC_WORD(encode, s, end))
- goto fail;
+ DATA_ENSURE(1);
+ if (! ONIGENC_IS_MBC_WORD(encode, s, end))
+ goto fail;
}
else if (ON_STR_END(s)) {
- if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
- goto fail;
+ if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
}
else {
- if (ONIGENC_IS_MBC_WORD(encode, s, end)
- == ONIGENC_IS_MBC_WORD(encode, sprev, end))
- goto fail;
+ if (ONIGENC_IS_MBC_WORD(encode, s, end)
+ == ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
}
MOP_OUT;
JUMP;
CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND);
if (ON_STR_BEGIN(s)) {
- DATA_ENSURE(1);
- if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
- goto fail;
+ DATA_ENSURE(1);
+ if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
+ goto fail;
}
else if (ON_STR_END(s)) {
- if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
- goto fail;
+ if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
+ goto fail;
}
else {
- if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
- == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
- goto fail;
+ if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
+ == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
+ goto fail;
}
MOP_OUT;
JUMP;
CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND);
if (ON_STR_BEGIN(s)) {
- if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
- goto fail;
+ if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
+ goto fail;
}
else if (ON_STR_END(s)) {
- if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
- goto fail;
+ if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
}
else {
- if (ONIGENC_IS_MBC_WORD(encode, s, end)
- != ONIGENC_IS_MBC_WORD(encode, sprev, end))
- goto fail;
+ if (ONIGENC_IS_MBC_WORD(encode, s, end)
+ != ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
}
MOP_OUT;
JUMP;
CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND);
if (ON_STR_BEGIN(s)) {
- if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
- goto fail;
+ if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
+ goto fail;
}
else if (ON_STR_END(s)) {
- if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
- goto fail;
+ if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
+ goto fail;
}
else {
- if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
- != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
- goto fail;
+ if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
+ != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
+ goto fail;
}
MOP_OUT;
JUMP;
@@ -2367,43 +3312,39 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifdef USE_WORD_BEGIN_END
CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN);
if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
- if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
- MOP_OUT;
- JUMP;
+ if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
+ MOP_OUT;
+ JUMP;
}
}
goto fail;
- NEXT;
CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
- if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
- MOP_OUT;
- JUMP;
+ if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
+ MOP_OUT;
+ JUMP;
}
}
goto fail;
- NEXT;
CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
- if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
- MOP_OUT;
- JUMP;
+ if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
+ MOP_OUT;
+ JUMP;
}
}
goto fail;
- NEXT;
CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
- if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
- MOP_OUT;
- JUMP;
+ if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
+ MOP_OUT;
+ JUMP;
}
}
goto fail;
- NEXT;
#endif
CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
@@ -2422,76 +3363,73 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE);
if (ON_STR_BEGIN(s)) {
- if (IS_NOTBOL(msa->options)) goto fail;
- MOP_OUT;
- JUMP;
+ if (IS_NOTBOL(msa->options)) goto fail;
+ MOP_OUT;
+ JUMP;
}
else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
#ifdef USE_CRNL_AS_LINE_TERMINATOR
- && !(IS_NEWLINE_CRLF(option)
- && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
+ && !(IS_NEWLINE_CRLF(option)
+ && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
#endif
- && !ON_STR_END(s)) {
- MOP_OUT;
+ && !ON_STR_END(s)) {
+ MOP_OUT;
JUMP;
}
goto fail;
- NEXT;
CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
+ if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
#endif
- if (IS_NOTEOL(msa->options)) goto fail;
- MOP_OUT;
- JUMP;
+ if (IS_NOTEOL(msa->options)) goto fail;
+ MOP_OUT;
+ JUMP;
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- }
+ }
#endif
}
else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
- MOP_OUT;
+ MOP_OUT;
JUMP;
}
goto fail;
- NEXT;
CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
+ if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
#endif
- if (IS_NOTEOL(msa->options)) goto fail;
- MOP_OUT;
- JUMP;
+ if (IS_NOTEOL(msa->options)) goto fail;
+ MOP_OUT;
+ JUMP;
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- }
+ }
#endif
}
else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
- UChar* ss = s + enclen(encode, s, end);
- if (ON_STR_END(ss)) {
- MOP_OUT;
- JUMP;
- }
+ UChar* ss = s + enclen(encode, s, end);
+ if (ON_STR_END(ss)) {
+ MOP_OUT;
+ JUMP;
+ }
#ifdef USE_CRNL_AS_LINE_TERMINATOR
- else if (IS_NEWLINE_CRLF(option)
- && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
- ss += enclen(encode, ss, end);
- if (ON_STR_END(ss)) {
- MOP_OUT;
- JUMP;
- }
- }
+ else if (IS_NEWLINE_CRLF(option)
+ && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
+ ss += enclen(encode, ss, end);
+ if (ON_STR_END(ss)) {
+ MOP_OUT;
+ JUMP;
+ }
+ }
#endif
}
goto fail;
- NEXT;
CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
if (s != msa->gpos)
- goto fail;
+ goto fail;
MOP_OUT;
JUMP;
@@ -2530,8 +3468,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC);
GET_MEMNUM_INC(mem, p);
STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
- STACK_PUSH_MEM_END(mem, s);
mem_start_stk[mem] = GET_STACK_INDEX(stkp);
+ STACK_PUSH_MEM_END(mem, s);
MOP_OUT;
JUMP;
@@ -2541,9 +3479,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_GET_MEM_START(mem, stkp);
if (BIT_STATUS_AT(reg->bt_mem_start, mem))
- mem_start_stk[mem] = GET_STACK_INDEX(stkp);
+ mem_start_stk[mem] = GET_STACK_INDEX(stkp);
else
- mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
+ mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
STACK_PUSH_MEM_END_MARK(mem);
MOP_OUT;
@@ -2553,178 +3491,176 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
mem = 1;
goto backref;
- NEXT;
CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
mem = 2;
goto backref;
- NEXT;
CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
GET_MEMNUM_INC(mem, p);
backref:
{
- int len;
- UChar *pstart, *pend;
-
- /* if you want to remove following line,
- you should check in parse and compile time. */
- if (mem > num_mem) goto fail;
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
-
- if (BIT_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
- n = pend - pstart;
- DATA_ENSURE(n);
- sprev = s;
- STRING_CMP(pstart, s, n);
- while (sprev + (len = enclen(encode, sprev, end)) < s)
- sprev += len;
-
- MOP_OUT;
- JUMP;
+ int len;
+ UChar *pstart, *pend;
+
+ /* if you want to remove following line,
+ you should check in parse and compile time. */
+ if (mem > num_mem) goto fail;
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ STRING_CMP(pstart, s, n);
+ while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
+ sprev += len;
+
+ MOP_OUT;
+ JUMP;
}
CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC);
GET_MEMNUM_INC(mem, p);
{
- int len;
- UChar *pstart, *pend;
-
- /* if you want to remove following line,
- you should check in parse and compile time. */
- if (mem > num_mem) goto fail;
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
-
- if (BIT_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
- n = pend - pstart;
- DATA_ENSURE(n);
- sprev = s;
- STRING_CMP_IC(case_fold_flag, pstart, &s, (int)n, end);
- while (sprev + (len = enclen(encode, sprev, end)) < s)
- sprev += len;
-
- MOP_OUT;
- JUMP;
+ int len;
+ UChar *pstart, *pend;
+
+ /* if you want to remove following line,
+ you should check in parse and compile time. */
+ if (mem > num_mem) goto fail;
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ STRING_CMP_IC(case_fold_flag, pstart, &s, n, end);
+ while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
+ sprev += len;
+
+ MOP_OUT;
+ JUMP;
}
NEXT;
CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI);
{
- int len, is_fail;
- UChar *pstart, *pend, *swork;
-
- GET_LENGTH_INC(tlen, p);
- for (i = 0; i < tlen; i++) {
- GET_MEMNUM_INC(mem, p);
-
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
-
- if (BIT_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
- n = pend - pstart;
- DATA_ENSURE_CONTINUE(n);
- sprev = s;
- swork = s;
- STRING_CMP_VALUE(pstart, swork, n, is_fail);
- if (is_fail) continue;
- s = swork;
- while (sprev + (len = enclen(encode, sprev, end)) < s)
- sprev += len;
-
- p += (SIZE_MEMNUM * (tlen - i - 1));
- break; /* success */
- }
- if (i == tlen) goto fail;
- MOP_OUT;
- JUMP;
+ int len, is_fail;
+ UChar *pstart, *pend, *swork;
+
+ GET_LENGTH_INC(tlen, p);
+ for (i = 0; i < tlen; i++) {
+ GET_MEMNUM_INC(mem, p);
+
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE_CONTINUE(n);
+ sprev = s;
+ swork = s;
+ STRING_CMP_VALUE(pstart, swork, n, is_fail);
+ if (is_fail) continue;
+ s = swork;
+ while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
+ sprev += len;
+
+ p += (SIZE_MEMNUM * (tlen - i - 1));
+ break; /* success */
+ }
+ if (i == tlen) goto fail;
+ MOP_OUT;
+ JUMP;
}
NEXT;
CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC);
{
- int len, is_fail;
- UChar *pstart, *pend, *swork;
-
- GET_LENGTH_INC(tlen, p);
- for (i = 0; i < tlen; i++) {
- GET_MEMNUM_INC(mem, p);
-
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
-
- if (BIT_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
- n = pend - pstart;
- DATA_ENSURE_CONTINUE(n);
- sprev = s;
- swork = s;
- STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
- if (is_fail) continue;
- s = swork;
- while (sprev + (len = enclen(encode, sprev, end)) < s)
- sprev += len;
-
- p += (SIZE_MEMNUM * (tlen - i - 1));
- break; /* success */
- }
- if (i == tlen) goto fail;
- MOP_OUT;
- JUMP;
+ int len, is_fail;
+ UChar *pstart, *pend, *swork;
+
+ GET_LENGTH_INC(tlen, p);
+ for (i = 0; i < tlen; i++) {
+ GET_MEMNUM_INC(mem, p);
+
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE_CONTINUE(n);
+ sprev = s;
+ swork = s;
+ STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
+ if (is_fail) continue;
+ s = swork;
+ while (sprev + (len = enclen(encode, sprev, end)) < s)
+ sprev += len;
+
+ p += (SIZE_MEMNUM * (tlen - i - 1));
+ break; /* success */
+ }
+ if (i == tlen) goto fail;
+ MOP_OUT;
+ JUMP;
}
#ifdef USE_BACKREF_WITH_LEVEL
CASE(OP_BACKREF_WITH_LEVEL)
{
- int len;
- OnigOptionType ic;
- LengthType level;
-
- GET_OPTION_INC(ic, p);
- GET_LENGTH_INC(level, p);
- GET_LENGTH_INC(tlen, p);
-
- sprev = s;
- if (backref_match_at_nested_level(reg, stk, stk_base, ic,
- case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
- while (sprev + (len = enclen(encode, sprev, end)) < s)
- sprev += len;
-
- p += (SIZE_MEMNUM * tlen);
- }
- else
- goto fail;
-
- MOP_OUT;
- JUMP;
+ int len;
+ OnigOptionType ic;
+ LengthType level;
+
+ GET_OPTION_INC(ic, p);
+ GET_LENGTH_INC(level, p);
+ GET_LENGTH_INC(tlen, p);
+
+ sprev = s;
+ if (backref_match_at_nested_level(reg, stk, stk_base, ic,
+ case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
+ while (sprev + (len = enclen(encode, sprev, end)) < s)
+ sprev += len;
+
+ p += (SIZE_MEMNUM * tlen);
+ }
+ else
+ goto fail;
+
+ MOP_OUT;
+ JUMP;
}
#endif
@@ -2751,33 +3687,33 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END);
{
- int isnull;
+ int isnull;
- GET_MEMNUM_INC(mem, p); /* mem: null check id */
- STACK_NULL_CHECK(isnull, mem, s);
- if (isnull) {
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
+ STACK_NULL_CHECK(isnull, mem, s);
+ if (isnull) {
#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n",
- (int )mem, (uintptr_t )s, s);
+ fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n",
+ (int )mem, (uintptr_t )s, s);
#endif
- null_check_found:
- /* empty loop founded, skip next instruction */
- switch (*p++) {
- case OP_JUMP:
- case OP_PUSH:
- p += SIZE_RELADDR;
- break;
- case OP_REPEAT_INC:
- case OP_REPEAT_INC_NG:
- case OP_REPEAT_INC_SG:
- case OP_REPEAT_INC_NG_SG:
- p += SIZE_MEMNUM;
- break;
- default:
- goto unexpected_bytecode_error;
- break;
- }
- }
+ null_check_found:
+ /* empty loop founded, skip next instruction */
+ switch (*p++) {
+ case OP_JUMP:
+ case OP_PUSH:
+ p += SIZE_RELADDR;
+ break;
+ case OP_REPEAT_INC:
+ case OP_REPEAT_INC_NG:
+ case OP_REPEAT_INC_SG:
+ case OP_REPEAT_INC_NG_SG:
+ p += SIZE_MEMNUM;
+ break;
+ default:
+ goto unexpected_bytecode_error;
+ break;
+ }
+ }
}
MOP_OUT;
JUMP;
@@ -2785,18 +3721,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST);
{
- int isnull;
+ int isnull;
- GET_MEMNUM_INC(mem, p); /* mem: null check id */
- STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
- if (isnull) {
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
+ STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
+ if (isnull) {
# ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n",
- (int )mem, (uintptr_t )s, s);
+ fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n",
+ (int )mem, (uintptr_t )s, s);
# endif
- if (isnull == -1) goto fail;
- goto null_check_found;
- }
+ if (isnull == -1) goto fail;
+ goto null_check_found;
+ }
}
MOP_OUT;
JUMP;
@@ -2806,25 +3742,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_NULL_CHECK_END_MEMST_PUSH)
MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
{
- int isnull;
+ int isnull;
- GET_MEMNUM_INC(mem, p); /* mem: null check id */
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
- STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
+ STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
# else
- STACK_NULL_CHECK_REC(isnull, mem, s);
+ STACK_NULL_CHECK_REC(isnull, mem, s);
# endif
- if (isnull) {
+ if (isnull) {
# ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n",
- (int )mem, (uintptr_t )s, s);
+ fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n",
+ (int )mem, (uintptr_t )s, s);
# endif
- if (isnull == -1) goto fail;
- goto null_check_found;
- }
- else {
- STACK_PUSH_NULL_CHECK_END(mem);
- }
+ if (isnull == -1) goto fail;
+ goto null_check_found;
+ }
+ else {
+ STACK_PUSH_NULL_CHECK_END(mem);
+ }
}
MOP_OUT;
JUMP;
@@ -2839,6 +3775,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_PUSH) MOP_IN(OP_PUSH);
GET_RELADDR_INC(addr, p);
+ CHECK_MATCH_CACHE;
STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
MOP_OUT;
JUMP;
@@ -2859,10 +3796,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
GET_RELADDR_INC(addr, p);
STATE_CHECK_VAL(scv, mem);
if (scv) {
- p += addr;
+ p += addr;
}
else {
- STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
}
MOP_OUT;
JUMP;
@@ -2879,6 +3816,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_POP) MOP_IN(OP_POP);
STACK_POP_ONE;
+#ifdef USE_MATCH_CACHE
+ /* We need to increment num_fails here, for invoking a cache optimization correctly, */
+ /* because Onigmo makes a loop, which is pairwise disjoint to the following set, as atomic. */
+ msa->num_fails++;
+#endif
MOP_OUT;
JUMP;
@@ -2886,10 +3828,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
GET_RELADDR_INC(addr, p);
if (*p == *s && DATA_ENSURE_CHECK1) {
- p++;
- STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
- MOP_OUT;
- JUMP;
+ p++;
+ CHECK_MATCH_CACHE;
+ STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
+ MOP_OUT;
+ JUMP;
}
p += (addr + 1);
MOP_OUT;
@@ -2898,45 +3841,49 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT);
GET_RELADDR_INC(addr, p);
+ CHECK_MATCH_CACHE;
if (*p == *s) {
- p++;
- STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
- MOP_OUT;
- JUMP;
+ p++;
+ STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
+ MOP_OUT;
+ JUMP;
}
p++;
+ INC_NUM_FAILS;
MOP_OUT;
JUMP;
CASE(OP_REPEAT) MOP_IN(OP_REPEAT);
{
- GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
- GET_RELADDR_INC(addr, p);
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ GET_RELADDR_INC(addr, p);
- STACK_ENSURE(1);
- repeat_stk[mem] = GET_STACK_INDEX(stk);
- STACK_PUSH_REPEAT(mem, p);
+ STACK_ENSURE(1);
+ repeat_stk[mem] = GET_STACK_INDEX(stk);
+ STACK_PUSH_REPEAT(mem, p);
- if (reg->repeat_range[mem].lower == 0) {
- STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
- }
+ if (reg->repeat_range[mem].lower == 0) {
+ CHECK_MATCH_CACHE;
+ STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
+ }
}
MOP_OUT;
JUMP;
CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG);
{
- GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
- GET_RELADDR_INC(addr, p);
-
- STACK_ENSURE(1);
- repeat_stk[mem] = GET_STACK_INDEX(stk);
- STACK_PUSH_REPEAT(mem, p);
-
- if (reg->repeat_range[mem].lower == 0) {
- STACK_PUSH_ALT(p, s, sprev, pkeep);
- p += addr;
- }
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ GET_RELADDR_INC(addr, p);
+
+ STACK_ENSURE(1);
+ repeat_stk[mem] = GET_STACK_INDEX(stk);
+ STACK_PUSH_REPEAT(mem, p);
+
+ if (reg->repeat_range[mem].lower == 0) {
+ CHECK_MATCH_CACHE;
+ STACK_PUSH_ALT(p, s, sprev, pkeep);
+ p += addr;
+ }
}
MOP_OUT;
JUMP;
@@ -2949,14 +3896,23 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
repeat_inc:
stkp->u.repeat.count++;
if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
- /* end of repeat. Nothing to do. */
+ /* end of repeat. Nothing to do. */
}
else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
- STACK_PUSH_ALT(p, s, sprev, pkeep);
- p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
+#ifdef USE_MATCH_CACHE
+ if (*pbegin == OP_REPEAT_INC) {
+#undef MATCH_CACHE_HIT
+#define MATCH_CACHE_HIT stkp->u.repeat.count--;
+ CHECK_MATCH_CACHE;
+#undef MATCH_CACHE_HIT
+#define MATCH_CACHE_HIT ((void) 0)
+ }
+#endif
+ STACK_PUSH_ALT(p, s, sprev, pkeep);
+ p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
}
else {
- p = stkp->u.repeat.pcode;
+ p = stkp->u.repeat.pcode;
}
STACK_PUSH_REPEAT_INC(si);
MOP_OUT;
@@ -2968,7 +3924,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_GET_REPEAT(mem, stkp);
si = GET_STACK_INDEX(stkp);
goto repeat_inc;
- NEXT;
CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
@@ -2978,19 +3933,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
repeat_inc_ng:
stkp->u.repeat.count++;
if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
- if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
- UChar* pcode = stkp->u.repeat.pcode;
-
- STACK_PUSH_REPEAT_INC(si);
- STACK_PUSH_ALT(pcode, s, sprev, pkeep);
- }
- else {
- p = stkp->u.repeat.pcode;
- STACK_PUSH_REPEAT_INC(si);
- }
+ if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
+ UChar* pcode = stkp->u.repeat.pcode;
+
+ STACK_PUSH_REPEAT_INC(si);
+ if (*pbegin == OP_REPEAT_INC_NG) {
+ CHECK_MATCH_CACHE;
+ }
+ STACK_PUSH_ALT(pcode, s, sprev, pkeep);
+ }
+ else {
+ p = stkp->u.repeat.pcode;
+ STACK_PUSH_REPEAT_INC(si);
+ }
}
else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
- STACK_PUSH_REPEAT_INC(si);
+ STACK_PUSH_REPEAT_INC(si);
}
MOP_OUT;
CHECK_INTERRUPT_IN_MATCH_AT;
@@ -3001,7 +3959,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_GET_REPEAT(mem, stkp);
si = GET_STACK_INDEX(stkp);
goto repeat_inc_ng;
- NEXT;
CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
STACK_PUSH_POS(s, sprev, pkeep);
@@ -3010,9 +3967,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_POP_POS) MOP_IN(OP_POP_POS);
{
- STACK_POS_END(stkp);
- s = stkp->u.state.pstr;
- sprev = stkp->u.state.pstr_prev;
+ STACK_POS_END(stkp);
+ s = stkp->u.state.pstr;
+ sprev = stkp->u.state.pstr_prev;
}
MOP_OUT;
JUMP;
@@ -3026,7 +3983,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
STACK_POP_TIL_POS_NOT;
goto fail;
- NEXT;
CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
STACK_PUSH_STOP_BT;
@@ -3051,15 +4007,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
GET_LENGTH_INC(tlen, p);
q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
if (IS_NULL(q)) {
- /* too short case -> success. ex. /(?<!XXX)a/.match("a")
- If you want to change to fail, replace following line. */
- p += addr;
- /* goto fail; */
+ /* too short case -> success. ex. /(?<!XXX)a/.match("a")
+ If you want to change to fail, replace following line. */
+ p += addr;
+ /* goto fail; */
}
else {
- STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
- s = q;
- sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
+ STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
+ s = q;
+ sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
}
MOP_OUT;
JUMP;
@@ -3067,7 +4023,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
STACK_POP_TIL_LOOK_BEHIND_NOT;
goto fail;
- NEXT;
CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
/* Save the absent-start-pos and the original end-pos. */
@@ -3077,39 +4032,44 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_ABSENT) MOP_IN(OP_ABSENT);
{
- const UChar* aend = ABSENT_END_POS;
- UChar* absent;
- UChar* selfp = p - 1;
+ const UChar* aend = ABSENT_END_POS;
+ UChar* absent;
+ UChar* selfp = p - 1;
- STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */
- GET_RELADDR_INC(addr, p);
+ STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */
+ GET_RELADDR_INC(addr, p);
#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
+ fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
#endif
- if ((absent > aend) && (s > absent)) {
- /* An empty match occurred in (?~...) at the start point.
- * Never match. */
- STACK_POP;
- goto fail;
- }
- else if ((s >= aend) && (s > absent)) {
- if (s > aend) {
- /* Only one (or less) character matched in the last iteration.
- * This is not a possible point. */
- goto fail;
- }
- /* All possible points were found. Try matching after (?~...). */
- DATA_ENSURE(0);
- p += addr;
- }
- else {
- STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */
- n = enclen(encode, s, end);
- STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */
- STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */
- STACK_PUSH_ABSENT;
- ABSENT_END_POS = aend;
- }
+ if ((absent > aend) && (s > absent)) {
+ /* An empty match occurred in (?~...) at the start point.
+ * Never match. */
+ STACK_POP;
+ goto fail;
+ }
+ else if ((s >= aend) && (s > absent)) {
+ if (s > aend) {
+ /* Only one (or less) character matched in the last iteration.
+ * This is not a possible point. */
+ goto fail;
+ }
+ /* All possible points were found. Try matching after (?~...). */
+ DATA_ENSURE(0);
+ p += addr;
+ }
+ else if (s == end) {
+ /* At the end of the string, just match with it */
+ DATA_ENSURE(0);
+ p += addr;
+ }
+ else {
+ STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */
+ n = enclen(encode, s, end);
+ STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */
+ STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */
+ STACK_PUSH_ABSENT;
+ ABSENT_END_POS = aend;
+ }
}
MOP_OUT;
JUMP;
@@ -3118,13 +4078,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
/* The pattern inside (?~...) was matched.
* Set the end-pos temporary and go to next iteration. */
if (sprev < ABSENT_END_POS)
- ABSENT_END_POS = sprev;
+ ABSENT_END_POS = sprev;
#ifdef ONIG_DEBUG_MATCH
fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS);
#endif
STACK_POP_TIL_ABSENT;
goto fail;
- NEXT;
#ifdef USE_SUBEXP_CALL
CASE(OP_CALL) MOP_IN(OP_CALL);
@@ -3145,22 +4104,21 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
GET_MEMNUM_INC(mem, p);
GET_RELADDR_INC(addr, p);
if ((mem > num_mem) ||
- (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
- (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
- p += addr;
+ (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
+ (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
+ p += addr;
}
MOP_OUT;
JUMP;
CASE(OP_FINISH)
goto finish;
- NEXT;
CASE(OP_FAIL)
if (0) {
- /* fall */
+ /* fall */
fail:
- MOP_OUT;
+ MOP_OUT;
}
MOP_IN(OP_FAIL);
STACK_POP;
@@ -3169,14 +4127,78 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
sprev = stk->u.state.pstr_prev;
pkeep = stk->u.state.pkeep;
+#ifdef USE_MATCH_CACHE
+ if (
+ msa->match_cache_status != MATCH_CACHE_STATUS_DISABLED &&
+ ++msa->num_fails >= (long)(end - str) * msa->num_cache_opcodes
+ ) {
+ if (msa->match_cache_status == MATCH_CACHE_STATUS_UNINIT) {
+ msa->match_cache_status = MATCH_CACHE_STATUS_INIT;
+ OnigPosition r = count_num_cache_opcodes(reg, &msa->num_cache_opcodes);
+ if (r < 0) goto bytecode_error;
+ }
+ if (msa->num_cache_opcodes == NUM_CACHE_OPCODES_IMPOSSIBLE || msa->num_cache_opcodes == 0) {
+ msa->match_cache_status = MATCH_CACHE_STATUS_DISABLED;
+ goto fail_match_cache;
+ }
+ if (msa->num_fails < (long)(end - str) * msa->num_cache_opcodes) {
+ goto fail_match_cache;
+ }
+ if (msa->cache_opcodes == NULL) {
+ msa->match_cache_status = MATCH_CACHE_STATUS_ENABLED;
+ OnigCacheOpcode* cache_opcodes = (OnigCacheOpcode*)xmalloc(msa->num_cache_opcodes * sizeof(OnigCacheOpcode));
+ if (cache_opcodes == NULL) {
+ return ONIGERR_MEMORY;
+ }
+ OnigPosition r = init_cache_opcodes(reg, cache_opcodes, &msa->num_cache_points);
+ if (r < 0) {
+ if (r == ONIGERR_UNEXPECTED_BYTECODE) goto unexpected_bytecode_error;
+ else goto bytecode_error;
+ }
+ msa->cache_opcodes = cache_opcodes;
+#ifdef ONIG_DEBUG_MATCH_CACHE
+ fprintf(stderr, "MATCH CACHE: #cache opcodes = %ld\n", msa->num_cache_opcodes);
+ fprintf(stderr, "MATCH CACHE: #cache points = %ld\n", msa->num_cache_points);
+ fprintf(stderr, "MATCH CACHE: cache opcodes (%p):\n", msa->cache_opcodes);
+ for (int i = 0; i < msa->num_cache_opcodes; i++) {
+ fprintf(stderr, "MATCH CACHE: [%p] cache_point=%ld outer_repeat_mem=%d num_cache_opcodes_at_outer_repeat=%ld num_cache_opcodes_in_outer_repeat=%ld lookaround_nesting=%d match_addr=%p\n", msa->cache_opcodes[i].addr, msa->cache_opcodes[i].cache_point, msa->cache_opcodes[i].outer_repeat_mem, msa->cache_opcodes[i].num_cache_points_at_outer_repeat, msa->cache_opcodes[i].num_cache_points_in_outer_repeat, msa->cache_opcodes[i].lookaround_nesting, msa->cache_opcodes[i].match_addr);
+ }
+#endif
+ }
+ if (msa->match_cache_buf == NULL) {
+ size_t length = (end - str) + 1;
+ size_t num_match_cache_points = (size_t)msa->num_cache_points * length;
+#ifdef ONIG_DEBUG_MATCH_CACHE
+ fprintf(stderr, "MATCH CACHE: #match cache points = %zu (length = %zu)\n", num_match_cache_points, length);
+#endif
+ /* Overflow check */
+ if (num_match_cache_points / length != (size_t)msa->num_cache_points) {
+ return ONIGERR_MEMORY;
+ }
+ if (num_match_cache_points >= LONG_MAX_LIMIT) {
+ return ONIGERR_MEMORY;
+ }
+ size_t match_cache_buf_length = (num_match_cache_points >> 3) + (num_match_cache_points & 7 ? 1 : 0) + 1;
+ uint8_t* match_cache_buf = (uint8_t*)xmalloc(match_cache_buf_length * sizeof(uint8_t));
+ if (match_cache_buf == NULL) {
+ return ONIGERR_MEMORY;
+ }
+ xmemset(match_cache_buf, 0, match_cache_buf_length * sizeof(uint8_t));
+ msa->match_cache_buf = match_cache_buf;
+ }
+ }
+ fail_match_cache:
+#endif
+
#ifdef USE_COMBINATION_EXPLOSION_CHECK
if (stk->u.state.state_check != 0) {
- stk->type = STK_STATE_CHECK_MARK;
- stk++;
+ stk->type = STK_STATE_CHECK_MARK;
+ stk++;
}
#endif
MOP_OUT;
+ CHECK_INTERRUPT_IN_MATCH_AT;
JUMP;
DEFAULT
@@ -3185,31 +4207,36 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
finish:
STACK_SAVE;
- if (xmalloc_base) xfree(xmalloc_base);
+ xfree(xmalloc_base);
return best_len;
#ifdef ONIG_DEBUG
stack_error:
STACK_SAVE;
- if (xmalloc_base) xfree(xmalloc_base);
+ xfree(xmalloc_base);
return ONIGERR_STACK_BUG;
#endif
bytecode_error:
STACK_SAVE;
- if (xmalloc_base) xfree(xmalloc_base);
+ xfree(xmalloc_base);
return ONIGERR_UNDEFINED_BYTECODE;
unexpected_bytecode_error:
STACK_SAVE;
- if (xmalloc_base) xfree(xmalloc_base);
+ xfree(xmalloc_base);
return ONIGERR_UNEXPECTED_BYTECODE;
+
+ timeout:
+ STACK_SAVE;
+ xfree(xmalloc_base);
+ return ONIGERR_TIMEOUT;
}
static UChar*
slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
- const UChar* text, const UChar* text_end, UChar* text_range)
+ const UChar* text, const UChar* text_end, UChar* text_range)
{
UChar *t, *p, *s, *end;
@@ -3225,10 +4252,10 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
while (s < end) {
if (*s == *target) {
- p = s + 1;
- t = target + 1;
- if (target_end == t || memcmp(t, p, target_end - t) == 0)
- return s;
+ p = s + 1;
+ t = target + 1;
+ if (target_end == t || memcmp(t, p, target_end - t) == 0)
+ return s;
}
s += n;
}
@@ -3239,7 +4266,7 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
p = s + 1;
t = target + 1;
if (target_end == t || memcmp(t, p, target_end - t) == 0)
- return s;
+ return s;
}
s += enclen(enc, s, text_end);
}
@@ -3249,8 +4276,8 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
static int
str_lower_case_match(OnigEncoding enc, int case_fold_flag,
- const UChar* t, const UChar* tend,
- const UChar* p, const UChar* end)
+ const UChar* t, const UChar* tend,
+ const UChar* p, const UChar* end)
{
int lowlen;
UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
@@ -3269,8 +4296,8 @@ str_lower_case_match(OnigEncoding enc, int case_fold_flag,
static UChar*
slow_search_ic(OnigEncoding enc, int case_fold_flag,
- UChar* target, UChar* target_end,
- const UChar* text, const UChar* text_end, UChar* text_range)
+ UChar* target, UChar* target_end,
+ const UChar* text, const UChar* text_end, UChar* text_range)
{
UChar *s, *end;
@@ -3283,7 +4310,7 @@ slow_search_ic(OnigEncoding enc, int case_fold_flag,
while (s < end) {
if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- s, text_end))
+ s, text_end))
return s;
s += enclen(enc, s, text_end);
@@ -3294,8 +4321,8 @@ slow_search_ic(OnigEncoding enc, int case_fold_flag,
static UChar*
slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
- const UChar* text, const UChar* adjust_text,
- const UChar* text_end, const UChar* text_start)
+ const UChar* text, const UChar* adjust_text,
+ const UChar* text_end, const UChar* text_start)
{
UChar *t, *p, *s;
@@ -3311,12 +4338,12 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
p = s + 1;
t = target + 1;
while (t < target_end) {
- if (*t != *p++)
- break;
- t++;
+ if (*t != *p++)
+ break;
+ t++;
}
if (t == target_end)
- return s;
+ return s;
}
s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
}
@@ -3326,9 +4353,9 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
static UChar*
slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
- UChar* target, UChar* target_end,
- const UChar* text, const UChar* adjust_text,
- const UChar* text_end, const UChar* text_start)
+ UChar* target, UChar* target_end,
+ const UChar* text, const UChar* adjust_text,
+ const UChar* text_end, const UChar* text_start)
{
UChar *s;
@@ -3341,7 +4368,7 @@ slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
while (s >= text) {
if (str_lower_case_match(enc, case_fold_flag,
- target, target_end, s, text_end))
+ target, target_end, s, text_end))
return s;
s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
@@ -3350,224 +4377,11 @@ slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
return (UChar* )NULL;
}
-#ifndef USE_SUNDAY_QUICK_SEARCH
-/* Boyer-Moore-Horspool search applied to a multibyte string */
-static UChar*
-bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end,
- const UChar* text_range)
-{
- const UChar *s, *se, *t, *p, *end;
- const UChar *tail;
- ptrdiff_t skip, tlen1;
-
-# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
- (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
-# endif
-
- tail = target_end - 1;
- tlen1 = tail - target;
- end = text_range;
- if (end + tlen1 > text_end)
- end = text_end - tlen1;
-
- s = text;
-
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = se = s + tlen1;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )s;
- p--; t--;
- }
- skip = reg->map[*se];
- t = s;
- do {
- s += enclen(reg->enc, s, end);
- } while ((s - t) < skip && s < end);
- }
- }
- else {
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- p = se = s + tlen1;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )s;
- p--; t--;
- }
- skip = reg->int_map[*se];
- t = s;
- do {
- s += enclen(reg->enc, s, end);
- } while ((s - t) < skip && s < end);
- }
-# endif
- }
-
- return (UChar* )NULL;
-}
-
-/* Boyer-Moore-Horspool search */
-static UChar*
-bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end, const UChar* text_range)
-{
- const UChar *s, *t, *p, *end;
- const UChar *tail;
-
-# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
- (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
-# endif
-
- end = text_range + (target_end - target) - 1;
- if (end > text_end)
- end = text_end;
-
- tail = target_end - 1;
- s = text + (target_end - target) - 1;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = s;
- t = tail;
-# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n",
- (intptr_t )(s - text), s);
-# endif
- while (*p == *t) {
- if (t == target) return (UChar* )p;
- p--; t--;
- }
- s += reg->map[*s];
- }
- }
- else { /* see int_map[] */
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- p = s;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )p;
- p--; t--;
- }
- s += reg->int_map[*s];
- }
-# endif
- }
- return (UChar* )NULL;
-}
-
-/* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */
-static UChar*
-bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end,
- const UChar* text_range)
-{
- const UChar *s, *se, *t, *end;
- const UChar *tail;
- ptrdiff_t skip, tlen1;
- OnigEncoding enc = reg->enc;
- int case_fold_flag = reg->case_fold_flag;
-
-# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
- (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
-# endif
-
- tail = target_end - 1;
- tlen1 = tail - target;
- end = text_range;
- if (end + tlen1 > text_end)
- end = text_end - tlen1;
-
- s = text;
-
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- se = s + tlen1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- s, se + 1))
- return (UChar* )s;
- skip = reg->map[*se];
- t = s;
- do {
- s += enclen(reg->enc, s, end);
- } while ((s - t) < skip && s < end);
- }
- }
- else {
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- se = s + tlen1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- s, se + 1))
- return (UChar* )s;
- skip = reg->int_map[*se];
- t = s;
- do {
- s += enclen(reg->enc, s, end);
- } while ((s - t) < skip && s < end);
- }
-# endif
- }
-
- return (UChar* )NULL;
-}
-
-/* Boyer-Moore-Horspool search (ignore case) */
-static UChar*
-bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end, const UChar* text_range)
-{
- const UChar *s, *p, *end;
- const UChar *tail;
- OnigEncoding enc = reg->enc;
- int case_fold_flag = reg->case_fold_flag;
-
-# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
- (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
-# endif
-
- end = text_range + (target_end - target) - 1;
- if (end > text_end)
- end = text_end;
-
- tail = target_end - 1;
- s = text + (target_end - target) - 1;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = s - (target_end - target) + 1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- p, s + 1))
- return (UChar* )p;
- s += reg->map[*s];
- }
- }
- else { /* see int_map[] */
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- p = s - (target_end - target) + 1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- p, s + 1))
- return (UChar* )p;
- s += reg->int_map[*s];
- }
-# endif
- }
- return (UChar* )NULL;
-}
-
-#else /* USE_SUNDAY_QUICK_SEARCH */
-
/* Sunday's quick search applied to a multibyte string */
static UChar*
bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end,
- const UChar* text_range)
+ const UChar* text, const UChar* text_end,
+ const UChar* text_range)
{
const UChar *s, *se, *t, *p, *end;
const UChar *tail;
@@ -3576,7 +4390,7 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
# ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
- (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
+ (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
# endif
tail = target_end - 1;
@@ -3587,39 +4401,19 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
s = text;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = se = s + tlen1;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )s;
- p--; t--;
- }
- if (s + 1 >= end) break;
- skip = reg->map[se[1]];
- t = s;
- do {
- s += enclen(enc, s, end);
- } while ((s - t) < skip && s < end);
- }
- }
- else {
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- p = se = s + tlen1;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )s;
- p--; t--;
- }
- if (s + 1 >= end) break;
- skip = reg->int_map[se[1]];
- t = s;
- do {
- s += enclen(enc, s, end);
- } while ((s - t) < skip && s < end);
+ while (s < end) {
+ p = se = s + tlen1;
+ t = tail;
+ while (*p == *t) {
+ if (t == target) return (UChar* )s;
+ p--; t--;
}
-# endif
+ if (s + 1 >= end) break;
+ skip = reg->map[se[1]];
+ t = s;
+ do {
+ s += enclen(enc, s, end);
+ } while ((s - t) < skip && s < end);
}
return (UChar* )NULL;
@@ -3628,7 +4422,7 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
/* Sunday's quick search */
static UChar*
bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end, const UChar* text_range)
+ const UChar* text, const UChar* text_end, const UChar* text_range)
{
const UChar *s, *t, *p, *end;
const UChar *tail;
@@ -3636,7 +4430,7 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
# ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
- (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
+ (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
# endif
tail = target_end - 1;
@@ -3646,40 +4440,25 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
end = text_end;
s = text + tlen1;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = s;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )p;
- p--; t--;
- }
- if (s + 1 >= end) break;
- s += reg->map[s[1]];
- }
- }
- else { /* see int_map[] */
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- p = s;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )p;
- p--; t--;
- }
- if (s + 1 >= end) break;
- s += reg->int_map[s[1]];
+ while (s < end) {
+ p = s;
+ t = tail;
+ while (*p == *t) {
+ if (t == target) return (UChar* )p;
+ p--; t--;
}
-# endif
+ if (s + 1 >= end) break;
+ s += reg->map[s[1]];
}
+
return (UChar* )NULL;
}
/* Sunday's quick search applied to a multibyte string (ignore case) */
static UChar*
bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end,
- const UChar* text_range)
+ const UChar* text, const UChar* text_end,
+ const UChar* text_range)
{
const UChar *s, *se, *t, *end;
const UChar *tail;
@@ -3689,7 +4468,7 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
# ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
- (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
+ (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
# endif
tail = target_end - 1;
@@ -3700,35 +4479,17 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
s = text;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- se = s + tlen1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- s, se + 1))
- return (UChar* )s;
- if (s + 1 >= end) break;
- skip = reg->map[se[1]];
- t = s;
- do {
- s += enclen(enc, s, end);
- } while ((s - t) < skip && s < end);
- }
- }
- else {
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- se = s + tlen1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- s, se + 1))
- return (UChar* )s;
- if (s + 1 >= end) break;
- skip = reg->int_map[se[1]];
- t = s;
- do {
- s += enclen(enc, s, end);
- } while ((s - t) < skip && s < end);
- }
-# endif
+ while (s < end) {
+ se = s + tlen1;
+ if (str_lower_case_match(enc, case_fold_flag, target, target_end,
+ s, se + 1))
+ return (UChar* )s;
+ if (s + 1 >= end) break;
+ skip = reg->map[se[1]];
+ t = s;
+ do {
+ s += enclen(enc, s, end);
+ } while ((s - t) < skip && s < end);
}
return (UChar* )NULL;
@@ -3737,7 +4498,7 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
/* Sunday's quick search (ignore case) */
static UChar*
bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end, const UChar* text_range)
+ const UChar* text, const UChar* text_end, const UChar* text_range)
{
const UChar *s, *p, *end;
const UChar *tail;
@@ -3747,7 +4508,7 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
# ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
- (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
+ (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
# endif
tail = target_end - 1;
@@ -3757,87 +4518,21 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
end = text_end;
s = text + tlen1;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = s - tlen1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- p, s + 1))
- return (UChar* )p;
- if (s + 1 >= end) break;
- s += reg->map[s[1]];
- }
- }
- else { /* see int_map[] */
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- p = s - tlen1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- p, s + 1))
- return (UChar* )p;
- if (s + 1 >= end) break;
- s += reg->int_map[s[1]];
- }
-# endif
- }
- return (UChar* )NULL;
-}
-#endif /* USE_SUNDAY_QUICK_SEARCH */
-
-#ifdef USE_INT_MAP_BACKWARD
-static int
-set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
- int** skip)
-{
- int i, len;
-
- if (IS_NULL(*skip)) {
- *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
- if (IS_NULL(*skip)) return ONIGERR_MEMORY;
- }
-
- len = (int )(end - s);
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
- (*skip)[i] = len;
-
- for (i = len - 1; i > 0; i--)
- (*skip)[s[i]] = i;
-
- return 0;
-}
-
-static UChar*
-bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* adjust_text,
- const UChar* text_end, const UChar* text_start)
-{
- const UChar *s, *t, *p;
-
- s = text_end - (target_end - target);
- if (text_start < s)
- s = text_start;
- else
- s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
-
- while (s >= text) {
- p = s;
- t = target;
- while (t < target_end && *p == *t) {
- p++; t++;
- }
- if (t == target_end)
- return (UChar* )s;
-
- s -= reg->int_map_backward[*s];
- s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
+ while (s < end) {
+ p = s - tlen1;
+ if (str_lower_case_match(enc, case_fold_flag, target, target_end,
+ p, s + 1))
+ return (UChar* )p;
+ if (s + 1 >= end) break;
+ s += reg->map[s[1]];
}
return (UChar* )NULL;
}
-#endif
static UChar*
map_search(OnigEncoding enc, UChar map[],
- const UChar* text, const UChar* text_range, const UChar* text_end)
+ const UChar* text, const UChar* text_range, const UChar* text_end)
{
const UChar *s = text;
@@ -3851,8 +4546,8 @@ map_search(OnigEncoding enc, UChar map[],
static UChar*
map_search_backward(OnigEncoding enc, UChar map[],
- const UChar* text, const UChar* adjust_text,
- const UChar* text_start, const UChar* text_end)
+ const UChar* text, const UChar* adjust_text,
+ const UChar* text_start, const UChar* text_end)
{
const UChar *s = text_start;
@@ -3866,7 +4561,7 @@ map_search_backward(OnigEncoding enc, UChar map[],
extern OnigPosition
onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
- OnigOptionType option)
+ OnigOptionType option)
{
ptrdiff_t r;
UChar *prev;
@@ -3890,9 +4585,9 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
r = match_at(reg, str, end,
#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
- end,
+ end,
#endif
- at, prev, &msa);
+ at, prev, &msa);
}
MATCH_ARG_FREE(msa);
@@ -3901,24 +4596,29 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
static int
forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
- UChar* range, UChar** low, UChar** high, UChar** low_prev)
+ UChar* range, UChar** low, UChar** high, UChar** low_prev)
{
UChar *p, *pprev = (UChar* )NULL;
+ size_t input_len = end - str;
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
- (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
+ (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
#endif
+ if (reg->dmin > input_len) {
+ return 0;
+ }
+
p = s;
- if (reg->dmin > 0) {
+ if (reg->dmin != 0) {
+ if ((OnigDistance)(end - p) <= reg->dmin) return 0; /* fail */
if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
p += reg->dmin;
}
else {
UChar *q = p + reg->dmin;
- if (q >= end) return 0; /* fail */
while (p < q) p += enclen(reg->enc, p, end);
}
}
@@ -3930,7 +4630,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
break;
case ONIG_OPTIMIZE_EXACT_IC:
p = slow_search_ic(reg->enc, reg->case_fold_flag,
- reg->exact, reg->exact_end, p, end, range);
+ reg->exact, reg->exact_end, p, end, range);
break;
case ONIG_OPTIMIZE_EXACT_BM:
@@ -3955,7 +4655,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
}
if (p && p < range) {
- if (p - reg->dmin < s) {
+ if ((OnigDistance)(p - s) < reg->dmin) {
retry_gate:
pprev = p;
p += enclen(reg->enc, p, end);
@@ -3967,70 +4667,74 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
switch (reg->sub_anchor) {
case ANCHOR_BEGIN_LINE:
- if (!ON_STR_BEGIN(p)) {
- prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), p, end);
- if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
- goto retry_gate;
- }
- break;
+ if (!ON_STR_BEGIN(p)) {
+ prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), p, end);
+ if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
+ goto retry_gate;
+ }
+ break;
case ANCHOR_END_LINE:
- if (ON_STR_END(p)) {
+ if (ON_STR_END(p)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), p);
- if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
- goto retry_gate;
+ prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), p);
+ if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
+ goto retry_gate;
#endif
- }
- else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
- goto retry_gate;
- break;
+ }
+ else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
+ goto retry_gate;
+ break;
}
}
if (reg->dmax == 0) {
*low = p;
if (low_prev) {
- if (*low > s)
- *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
- else
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), p, end);
+ if (*low > s)
+ *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
+ else
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), p, end);
}
+ *high = p;
}
else {
if (reg->dmax != ONIG_INFINITE_DISTANCE) {
- if (p < str + reg->dmax) {
- *low = (UChar* )str;
- if (low_prev)
- *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
- }
- else {
- *low = p - reg->dmax;
- if (*low > s) {
- *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
- *low, end, (const UChar** )low_prev);
- if (low_prev && IS_NULL(*low_prev))
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : s), *low, end);
- }
- else {
- if (low_prev)
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), *low, end);
- }
- }
+ if ((OnigDistance)(p - str) < reg->dmax) {
+ *low = (UChar* )str;
+ if (low_prev)
+ *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
+ }
+ else {
+ *low = p - reg->dmax;
+ if (*low > s) {
+ *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
+ *low, end, (const UChar** )low_prev);
+ if (low_prev && IS_NULL(*low_prev))
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : s), *low, end);
+ }
+ else {
+ if (low_prev)
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), *low, end);
+ }
+ }
}
+ /* no needs to adjust *high, *high is used as range check only */
+ if ((OnigDistance)(p - str) < reg->dmin)
+ *high = (UChar* )str;
+ else
+ *high = p - reg->dmin;
}
- /* no needs to adjust *high, *high is used as range check only */
- *high = p - reg->dmin;
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr,
"forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n",
- *low - str, *high - str, reg->dmin, reg->dmax);
+ *low - str, *high - str, reg->dmin, reg->dmax);
#endif
return 1; /* success */
}
@@ -4042,12 +4746,16 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
static int
backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
- UChar* s, const UChar* range, UChar* adjrange,
- UChar** low, UChar** high)
+ UChar* s, const UChar* range, UChar* adjrange,
+ UChar** low, UChar** high)
{
UChar *p;
+ size_t input_len = end - str;
+
+ if (reg->dmin > input_len) {
+ return 0;
+ }
- range += reg->dmin;
p = s;
retry:
@@ -4055,34 +4763,20 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
case ONIG_OPTIMIZE_EXACT:
exact_method:
p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
- range, adjrange, end, p);
+ range, adjrange, end, p);
break;
case ONIG_OPTIMIZE_EXACT_IC:
case ONIG_OPTIMIZE_EXACT_BM_IC:
case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
- reg->exact, reg->exact_end,
- range, adjrange, end, p);
+ reg->exact, reg->exact_end,
+ range, adjrange, end, p);
break;
case ONIG_OPTIMIZE_EXACT_BM:
case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
-#ifdef USE_INT_MAP_BACKWARD
- if (IS_NULL(reg->int_map_backward)) {
- int r;
- if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
- goto exact_method;
-
- r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
- &(reg->int_map_backward));
- if (r) return r;
- }
- p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
- end, p);
-#else
goto exact_method;
-#endif
break;
case ONIG_OPTIMIZE_MAP:
@@ -4096,45 +4790,57 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
switch (reg->sub_anchor) {
case ANCHOR_BEGIN_LINE:
- if (!ON_STR_BEGIN(p)) {
- prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
- if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
- p = prev;
- goto retry;
- }
- }
- break;
+ if (!ON_STR_BEGIN(p)) {
+ prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
+ if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
+ p = prev;
+ goto retry;
+ }
+ }
+ break;
case ANCHOR_END_LINE:
- if (ON_STR_END(p)) {
+ if (ON_STR_END(p)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
- if (IS_NULL(prev)) goto fail;
- if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
- p = prev;
- goto retry;
- }
+ prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
+ if (IS_NULL(prev)) goto fail;
+ if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
+ p = prev;
+ goto retry;
+ }
#endif
- }
- else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
- p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
- if (IS_NULL(p)) goto fail;
- goto retry;
- }
- break;
+ }
+ else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
+ p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
+ if (IS_NULL(p)) goto fail;
+ goto retry;
+ }
+ break;
}
}
- /* no needs to adjust *high, *high is used as range check only */
if (reg->dmax != ONIG_INFINITE_DISTANCE) {
- *low = p - reg->dmax;
- *high = p - reg->dmin;
+ if ((OnigDistance)(p - str) < reg->dmax)
+ *low = (UChar* )str;
+ else
+ *low = p - reg->dmax;
+
+ if (reg->dmin != 0) {
+ if ((OnigDistance)(p - str) < reg->dmin)
+ *high = (UChar* )str;
+ else
+ *high = p - reg->dmin;
+ }
+ else {
+ *high = p;
+ }
+
*high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
}
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
- (int )(*low - str), (int )(*high - str));
+ (int )(*low - str), (int )(*high - str));
#endif
return 1; /* success */
}
@@ -4149,15 +4855,15 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
extern OnigPosition
onig_search(regex_t* reg, const UChar* str, const UChar* end,
- const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
+ const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
{
return onig_search_gpos(reg, str, end, start, start, range, region, option);
}
extern OnigPosition
onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
- const UChar* global_pos,
- const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
+ const UChar* global_pos,
+ const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
{
ptrdiff_t r;
UChar *s, *prev;
@@ -4185,44 +4891,64 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
# define MATCH_AND_RETURN_CHECK(upper_range) \
r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
- if (r != ONIG_MISMATCH) {\
- if (r >= 0) {\
- if (! IS_FIND_LONGEST(reg->options)) {\
- goto match;\
+ switch (r) { \
+ case ONIG_MISMATCH: \
+ break; \
+ case ONIGERR_TIMEOUT: \
+ goto timeout; \
+ default: \
+ if (r >= 0) { \
+ if (! IS_FIND_LONGEST(reg->options)) { \
+ goto match; \
+ }\
}\
- }\
- else goto finish; /* error */ \
+ else goto finish; /* error */ \
}
# else
# define MATCH_AND_RETURN_CHECK(upper_range) \
r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
- if (r != ONIG_MISMATCH) {\
- if (r >= 0) {\
- goto match;\
- }\
- else goto finish; /* error */ \
+ switch (r) { \
+ case ONIG_MISMATCH: \
+ break; \
+ case ONIGERR_TIMEOUT: \
+ goto timeout; \
+ default: \
+ if (r >= 0) { \
+ goto match; \
+ }\
+ else goto finish; /* error */ \
}
# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
#else
# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
# define MATCH_AND_RETURN_CHECK(none) \
r = match_at(reg, str, end, s, prev, &msa);\
- if (r != ONIG_MISMATCH) {\
- if (r >= 0) {\
- if (! IS_FIND_LONGEST(reg->options)) {\
- goto match;\
- }\
- }\
- else goto finish; /* error */ \
+ switch (r) { \
+ case ONIG_MISMATCH: \
+ break; \
+ case ONIGERR_TIMEOUT: \
+ goto timeout; \
+ default: \
+ if (r >= 0) { \
+ if (! IS_FIND_LONGEST(reg->options)) { \
+ goto match; \
+ } \
+ } \
+ else goto finish; /* error */ \
}
# else
# define MATCH_AND_RETURN_CHECK(none) \
r = match_at(reg, str, end, s, prev, &msa);\
- if (r != ONIG_MISMATCH) {\
- if (r >= 0) {\
- goto match;\
- }\
- else goto finish; /* error */ \
+ switch (r) { \
+ case ONIG_MISMATCH: \
+ break; \
+ case ONIGERR_TIMEOUT: \
+ goto timeout; \
+ default: \
+ if (r >= 0) { \
+ goto match; \
+ } \
+ else goto finish; /* error */ \
}
# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
@@ -4237,62 +4963,69 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
begin_position:
if (range > start)
{
- if (global_pos > start)
- {
- if (global_pos < range)
- range = global_pos + 1;
- }
- else
- range = start + 1;
+ if (global_pos > start)
+ {
+ if (global_pos < range)
+ range = global_pos + 1;
+ }
+ else
+ range = start + 1;
}
else
- range = start;
+ range = start;
}
else if (reg->anchor & ANCHOR_BEGIN_BUF) {
/* search str-position only */
if (range > start) {
- if (start != str) goto mismatch_no_msa;
- range = str + 1;
+ if (start != str) goto mismatch_no_msa;
+ range = str + 1;
}
else {
- if (range <= str) {
- start = str;
- range = str;
- }
- else
- goto mismatch_no_msa;
+ if (range <= str) {
+ start = str;
+ range = str;
+ }
+ else
+ goto mismatch_no_msa;
}
}
else if (reg->anchor & ANCHOR_END_BUF) {
min_semi_end = max_semi_end = (UChar* )end;
end_buf:
- if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
- goto mismatch_no_msa;
+ if ((OnigDistance)(max_semi_end - str) < reg->anchor_dmin)
+ goto mismatch_no_msa;
if (range > start) {
- if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
- start = min_semi_end - reg->anchor_dmax;
- if (start < end)
- start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
- }
- if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
- range = max_semi_end - reg->anchor_dmin + 1;
- }
-
- if (start > range) goto mismatch_no_msa;
- /* If start == range, match with empty at end.
- Backward search is used. */
+ if ((OnigDistance)(min_semi_end - start) > reg->anchor_dmax) {
+ start = min_semi_end - reg->anchor_dmax;
+ if (start < end)
+ start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
+ }
+ if ((OnigDistance)(max_semi_end - (range - 1)) < reg->anchor_dmin) {
+ if ((OnigDistance)(max_semi_end - str + 1) < reg->anchor_dmin)
+ goto mismatch_no_msa;
+ else
+ range = max_semi_end - reg->anchor_dmin + 1;
+ }
+
+ if (start > range) goto mismatch_no_msa;
+ /* If start == range, match with empty at end.
+ Backward search is used. */
}
else {
- if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
- range = min_semi_end - reg->anchor_dmax;
- }
- if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
- start = max_semi_end - reg->anchor_dmin;
- start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
- }
- if (range > start) goto mismatch_no_msa;
+ if ((OnigDistance)(min_semi_end - range) > reg->anchor_dmax) {
+ range = min_semi_end - reg->anchor_dmax;
+ }
+ if ((OnigDistance)(max_semi_end - start) < reg->anchor_dmin) {
+ if ((OnigDistance)(max_semi_end - str) < reg->anchor_dmin)
+ goto mismatch_no_msa;
+ else {
+ start = max_semi_end - reg->anchor_dmin;
+ start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
+ }
+ }
+ if (range > start) goto mismatch_no_msa;
}
}
else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
@@ -4300,23 +5033,23 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
max_semi_end = (UChar* )end;
if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
- min_semi_end = pre_end;
+ min_semi_end = pre_end;
#ifdef USE_CRNL_AS_LINE_TERMINATOR
- pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
- if (IS_NOT_NULL(pre_end) &&
- IS_NEWLINE_CRLF(reg->options) &&
- ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
- min_semi_end = pre_end;
- }
+ pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
+ if (IS_NOT_NULL(pre_end) &&
+ IS_NEWLINE_CRLF(reg->options) &&
+ ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
+ min_semi_end = pre_end;
+ }
#endif
- if (min_semi_end > str && start <= min_semi_end) {
- goto end_buf;
- }
+ if (min_semi_end > str && start <= min_semi_end) {
+ goto end_buf;
+ }
}
else {
- min_semi_end = (UChar* )end;
- goto end_buf;
+ min_semi_end = (UChar* )end;
+ goto end_buf;
}
}
else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
@@ -4348,7 +5081,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
- (int )(end - str), (int )(start - str), (int )(range - str));
+ (int )(end - str), (int )(start - str), (int )(range - str));
#endif
MATCH_ARG_INIT(msa, option, region, start, global_pos);
@@ -4369,55 +5102,59 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
if (reg->optimize != ONIG_OPTIMIZE_NONE) {
UChar *sch_range, *low, *high, *low_prev;
- sch_range = (UChar* )range;
if (reg->dmax != 0) {
- if (reg->dmax == ONIG_INFINITE_DISTANCE)
- sch_range = (UChar* )end;
- else {
- sch_range += reg->dmax;
- if (sch_range > end) sch_range = (UChar* )end;
- }
+ if (reg->dmax == ONIG_INFINITE_DISTANCE)
+ sch_range = (UChar* )end;
+ else {
+ if ((OnigDistance)(end - range) < reg->dmax)
+ sch_range = (UChar* )end;
+ else {
+ sch_range = (UChar* )range + reg->dmax;
+ }
+ }
}
+ else
+ sch_range = (UChar* )range;
if ((end - start) < reg->threshold_len)
- goto mismatch;
+ goto mismatch;
if (reg->dmax != ONIG_INFINITE_DISTANCE) {
- do {
- if (! forward_search_range(reg, str, end, s, sch_range,
- &low, &high, &low_prev)) goto mismatch;
- if (s < low) {
- s = low;
- prev = low_prev;
- }
- while (s <= high) {
- MATCH_AND_RETURN_CHECK(orig_range);
- prev = s;
- s += enclen(reg->enc, s, end);
- }
- } while (s < range);
- goto mismatch;
+ do {
+ if (! forward_search_range(reg, str, end, s, sch_range,
+ &low, &high, &low_prev)) goto mismatch;
+ if (s < low) {
+ s = low;
+ prev = low_prev;
+ }
+ while (s <= high) {
+ MATCH_AND_RETURN_CHECK(orig_range);
+ prev = s;
+ s += enclen(reg->enc, s, end);
+ }
+ } while (s < range);
+ goto mismatch;
}
else { /* check only. */
- if (! forward_search_range(reg, str, end, s, sch_range,
- &low, &high, (UChar** )NULL)) goto mismatch;
-
- if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
- do {
- MATCH_AND_RETURN_CHECK(orig_range);
- prev = s;
- s += enclen(reg->enc, s, end);
-
- if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
- while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
- && s < range) {
- prev = s;
- s += enclen(reg->enc, s, end);
- }
- }
- } while (s < range);
- goto mismatch;
- }
+ if (! forward_search_range(reg, str, end, s, sch_range,
+ &low, &high, (UChar** )NULL)) goto mismatch;
+
+ if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
+ do {
+ MATCH_AND_RETURN_CHECK(orig_range);
+ prev = s;
+ s += enclen(reg->enc, s, end);
+
+ if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
+ while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
+ && s < range) {
+ prev = s;
+ s += enclen(reg->enc, s, end);
+ }
+ }
+ } while (s < range);
+ goto mismatch;
+ }
}
}
@@ -4434,49 +5171,61 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
else { /* backward search */
if (reg->optimize != ONIG_OPTIMIZE_NONE) {
UChar *low, *high, *adjrange, *sch_start;
+ const UChar *min_range;
if (range < end)
- adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
+ adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
+ else
+ adjrange = (UChar* )end;
+
+ if ((OnigDistance)(end - range) > reg->dmin)
+ min_range = range + reg->dmin;
else
- adjrange = (UChar* )end;
+ min_range = end;
if (reg->dmax != ONIG_INFINITE_DISTANCE &&
- (end - range) >= reg->threshold_len) {
- do {
- sch_start = s + reg->dmax;
- if (sch_start > end) sch_start = (UChar* )end;
- if (backward_search_range(reg, str, end, sch_start, range, adjrange,
- &low, &high) <= 0)
- goto mismatch;
-
- if (s > high)
- s = high;
-
- while (s >= low) {
- prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
- MATCH_AND_RETURN_CHECK(orig_start);
- s = prev;
- }
- } while (s >= range);
- goto mismatch;
+ end - range >= reg->threshold_len) {
+ do {
+ if ((OnigDistance)(end - s) > reg->dmax)
+ sch_start = s + reg->dmax;
+ else
+ sch_start = (UChar* )end;
+
+ if (backward_search_range(reg, str, end, sch_start, min_range, adjrange,
+ &low, &high) <= 0)
+ goto mismatch;
+
+ if (s > high)
+ s = high;
+
+ while (s >= low) {
+ prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
+ MATCH_AND_RETURN_CHECK(orig_start);
+ s = prev;
+ }
+ } while (s >= range);
+ goto mismatch;
}
else { /* check only. */
- if ((end - range) < reg->threshold_len) goto mismatch;
-
- sch_start = s;
- if (reg->dmax != 0) {
- if (reg->dmax == ONIG_INFINITE_DISTANCE)
- sch_start = (UChar* )end;
- else {
- sch_start += reg->dmax;
- if (sch_start > end) sch_start = (UChar* )end;
- else
- sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
- start, sch_start, end);
- }
- }
- if (backward_search_range(reg, str, end, sch_start, range, adjrange,
- &low, &high) <= 0) goto mismatch;
+ if (end - range < reg->threshold_len) goto mismatch;
+
+ if (reg->dmax != 0) {
+ if (reg->dmax == ONIG_INFINITE_DISTANCE)
+ sch_start = (UChar* )end;
+ else {
+ if ((OnigDistance)(end - s) > reg->dmax) {
+ sch_start = s + reg->dmax;
+ sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
+ start, sch_start, end);
+ } else
+ sch_start = (UChar* )end;
+ }
+ }
+ else
+ sch_start = (UChar* )s;
+
+ if (backward_search_range(reg, str, end, sch_start, min_range, adjrange,
+ &low, &high) <= 0) goto mismatch;
}
}
@@ -4525,13 +5274,17 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
match:
MATCH_ARG_FREE(msa);
return s - str;
+
+timeout:
+ MATCH_ARG_FREE(msa);
+ return ONIGERR_TIMEOUT;
}
extern OnigPosition
onig_scan(regex_t* reg, const UChar* str, const UChar* end,
- OnigRegion* region, OnigOptionType option,
- int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*),
- void* callback_arg)
+ OnigRegion* region, OnigOptionType option,
+ int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*),
+ void* callback_arg)
{
OnigPosition r;
OnigPosition n;
@@ -4546,17 +5299,17 @@ onig_scan(regex_t* reg, const UChar* str, const UChar* end,
rs = scan_callback(n, r, region, callback_arg);
n++;
if (rs != 0)
- return rs;
+ return rs;
if (region->end[0] == start - str) {
- if (start >= end) break;
- start += enclen(reg->enc, start, end);
+ if (start >= end) break;
+ start += enclen(reg->enc, start, end);
}
else
- start = str + region->end[0];
+ start = str + region->end[0];
if (start > end)
- break;
+ break;
}
else if (r == ONIG_MISMATCH) {
break;