summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c456
1 files changed, 47 insertions, 409 deletions
diff --git a/regexec.c b/regexec.c
index b8d174ec8e..3210c7cc1b 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2,8 +2,8 @@
regexec.c - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2011-2019 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -2742,7 +2742,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
/* default behavior: return first-matching result. */
goto finish;
- NEXT;
CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
DATA_ENSURE(1);
@@ -3316,40 +3315,36 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
MOP_OUT;
JUMP;
- }
+ }
}
goto fail;
- NEXT;
CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
MOP_OUT;
JUMP;
- }
+ }
}
goto fail;
- NEXT;
CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
MOP_OUT;
JUMP;
- }
+ }
}
goto fail;
- NEXT;
CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
MOP_OUT;
JUMP;
- }
+ }
}
goto fail;
- NEXT;
#endif
CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
@@ -3379,10 +3374,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#endif
&& !ON_STR_END(s)) {
MOP_OUT;
- JUMP;
+ JUMP;
}
goto fail;
- NEXT;
CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
if (ON_STR_END(s)) {
@@ -3398,10 +3392,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
MOP_OUT;
- JUMP;
+ JUMP;
}
goto fail;
- NEXT;
CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
if (ON_STR_END(s)) {
@@ -3433,7 +3426,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#endif
}
goto fail;
- NEXT;
CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
if (s != msa->gpos)
@@ -3499,12 +3491,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
mem = 1;
goto backref;
- NEXT;
CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
mem = 2;
goto backref;
- NEXT;
CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
GET_MEMNUM_INC(mem, p);
@@ -3934,7 +3924,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_GET_REPEAT(mem, stkp);
si = GET_STACK_INDEX(stkp);
goto repeat_inc;
- NEXT;
CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
@@ -3970,7 +3959,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_GET_REPEAT(mem, stkp);
si = GET_STACK_INDEX(stkp);
goto repeat_inc_ng;
- NEXT;
CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
STACK_PUSH_POS(s, sprev, pkeep);
@@ -3995,7 +3983,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
STACK_POP_TIL_POS_NOT;
goto fail;
- NEXT;
CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
STACK_PUSH_STOP_BT;
@@ -4036,7 +4023,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
STACK_POP_TIL_LOOK_BEHIND_NOT;
goto fail;
- NEXT;
CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
/* Save the absent-start-pos and the original end-pos. */
@@ -4098,7 +4084,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#endif
STACK_POP_TIL_ABSENT;
goto fail;
- NEXT;
#ifdef USE_SUBEXP_CALL
CASE(OP_CALL) MOP_IN(OP_CALL);
@@ -4128,7 +4113,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_FINISH)
goto finish;
- NEXT;
CASE(OP_FAIL)
if (0) {
@@ -4393,219 +4377,6 @@ slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
return (UChar* )NULL;
}
-#ifndef USE_SUNDAY_QUICK_SEARCH
-/* Boyer-Moore-Horspool search applied to a multibyte string */
-static UChar*
-bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end,
- const UChar* text_range)
-{
- const UChar *s, *se, *t, *p, *end;
- const UChar *tail;
- ptrdiff_t skip, tlen1;
-
-# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
- (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
-# endif
-
- tail = target_end - 1;
- tlen1 = tail - target;
- end = text_range;
- if (end + tlen1 > text_end)
- end = text_end - tlen1;
-
- s = text;
-
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = se = s + tlen1;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )s;
- p--; t--;
- }
- skip = reg->map[*se];
- t = s;
- do {
- s += enclen(reg->enc, s, end);
- } while ((s - t) < skip && s < end);
- }
- }
- else {
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- p = se = s + tlen1;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )s;
- p--; t--;
- }
- skip = reg->int_map[*se];
- t = s;
- do {
- s += enclen(reg->enc, s, end);
- } while ((s - t) < skip && s < end);
- }
-# endif
- }
-
- return (UChar* )NULL;
-}
-
-/* Boyer-Moore-Horspool search */
-static UChar*
-bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end, const UChar* text_range)
-{
- const UChar *s, *t, *p, *end;
- const UChar *tail;
-
-# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
- (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
-# endif
-
- end = text_range + (target_end - target) - 1;
- if (end > text_end)
- end = text_end;
-
- tail = target_end - 1;
- s = text + (target_end - target) - 1;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = s;
- t = tail;
-# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n",
- (intptr_t )(s - text), s);
-# endif
- while (*p == *t) {
- if (t == target) return (UChar* )p;
- p--; t--;
- }
- s += reg->map[*s];
- }
- }
- else { /* see int_map[] */
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- p = s;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )p;
- p--; t--;
- }
- s += reg->int_map[*s];
- }
-# endif
- }
- return (UChar* )NULL;
-}
-
-/* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */
-static UChar*
-bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end,
- const UChar* text_range)
-{
- const UChar *s, *se, *t, *end;
- const UChar *tail;
- ptrdiff_t skip, tlen1;
- OnigEncoding enc = reg->enc;
- int case_fold_flag = reg->case_fold_flag;
-
-# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
- (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
-# endif
-
- tail = target_end - 1;
- tlen1 = tail - target;
- end = text_range;
- if (end + tlen1 > text_end)
- end = text_end - tlen1;
-
- s = text;
-
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- se = s + tlen1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- s, se + 1))
- return (UChar* )s;
- skip = reg->map[*se];
- t = s;
- do {
- s += enclen(reg->enc, s, end);
- } while ((s - t) < skip && s < end);
- }
- }
- else {
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- se = s + tlen1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- s, se + 1))
- return (UChar* )s;
- skip = reg->int_map[*se];
- t = s;
- do {
- s += enclen(reg->enc, s, end);
- } while ((s - t) < skip && s < end);
- }
-# endif
- }
-
- return (UChar* )NULL;
-}
-
-/* Boyer-Moore-Horspool search (ignore case) */
-static UChar*
-bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end, const UChar* text_range)
-{
- const UChar *s, *p, *end;
- const UChar *tail;
- OnigEncoding enc = reg->enc;
- int case_fold_flag = reg->case_fold_flag;
-
-# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
- (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
-# endif
-
- end = text_range + (target_end - target) - 1;
- if (end > text_end)
- end = text_end;
-
- tail = target_end - 1;
- s = text + (target_end - target) - 1;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = s - (target_end - target) + 1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- p, s + 1))
- return (UChar* )p;
- s += reg->map[*s];
- }
- }
- else { /* see int_map[] */
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- p = s - (target_end - target) + 1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- p, s + 1))
- return (UChar* )p;
- s += reg->int_map[*s];
- }
-# endif
- }
- return (UChar* )NULL;
-}
-
-#else /* USE_SUNDAY_QUICK_SEARCH */
-
/* Sunday's quick search applied to a multibyte string */
static UChar*
bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
@@ -4630,39 +4401,19 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
s = text;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = se = s + tlen1;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )s;
- p--; t--;
- }
- if (s + 1 >= end) break;
- skip = reg->map[se[1]];
- t = s;
- do {
- s += enclen(enc, s, end);
- } while ((s - t) < skip && s < end);
- }
- }
- else {
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- p = se = s + tlen1;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )s;
- p--; t--;
- }
- if (s + 1 >= end) break;
- skip = reg->int_map[se[1]];
- t = s;
- do {
- s += enclen(enc, s, end);
- } while ((s - t) < skip && s < end);
+ while (s < end) {
+ p = se = s + tlen1;
+ t = tail;
+ while (*p == *t) {
+ if (t == target) return (UChar* )s;
+ p--; t--;
}
-# endif
+ if (s + 1 >= end) break;
+ skip = reg->map[se[1]];
+ t = s;
+ do {
+ s += enclen(enc, s, end);
+ } while ((s - t) < skip && s < end);
}
return (UChar* )NULL;
@@ -4689,32 +4440,17 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
end = text_end;
s = text + tlen1;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = s;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )p;
- p--; t--;
- }
- if (s + 1 >= end) break;
- s += reg->map[s[1]];
- }
- }
- else { /* see int_map[] */
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- p = s;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )p;
- p--; t--;
- }
- if (s + 1 >= end) break;
- s += reg->int_map[s[1]];
+ while (s < end) {
+ p = s;
+ t = tail;
+ while (*p == *t) {
+ if (t == target) return (UChar* )p;
+ p--; t--;
}
-# endif
+ if (s + 1 >= end) break;
+ s += reg->map[s[1]];
}
+
return (UChar* )NULL;
}
@@ -4743,35 +4479,17 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
s = text;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- se = s + tlen1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- s, se + 1))
- return (UChar* )s;
- if (s + 1 >= end) break;
- skip = reg->map[se[1]];
- t = s;
- do {
- s += enclen(enc, s, end);
- } while ((s - t) < skip && s < end);
- }
- }
- else {
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- se = s + tlen1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- s, se + 1))
- return (UChar* )s;
- if (s + 1 >= end) break;
- skip = reg->int_map[se[1]];
- t = s;
- do {
- s += enclen(enc, s, end);
- } while ((s - t) < skip && s < end);
- }
-# endif
+ while (s < end) {
+ se = s + tlen1;
+ if (str_lower_case_match(enc, case_fold_flag, target, target_end,
+ s, se + 1))
+ return (UChar* )s;
+ if (s + 1 >= end) break;
+ skip = reg->map[se[1]];
+ t = s;
+ do {
+ s += enclen(enc, s, end);
+ } while ((s - t) < skip && s < end);
}
return (UChar* )NULL;
@@ -4800,83 +4518,17 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
end = text_end;
s = text + tlen1;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = s - tlen1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- p, s + 1))
- return (UChar* )p;
- if (s + 1 >= end) break;
- s += reg->map[s[1]];
- }
- }
- else { /* see int_map[] */
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- p = s - tlen1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- p, s + 1))
- return (UChar* )p;
- if (s + 1 >= end) break;
- s += reg->int_map[s[1]];
- }
-# endif
- }
- return (UChar* )NULL;
-}
-#endif /* USE_SUNDAY_QUICK_SEARCH */
-
-#ifdef USE_INT_MAP_BACKWARD
-static int
-set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
- int** skip)
-{
- int i, len;
-
- if (IS_NULL(*skip)) {
- *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
- if (IS_NULL(*skip)) return ONIGERR_MEMORY;
- }
-
- len = (int )(end - s);
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
- (*skip)[i] = len;
-
- for (i = len - 1; i > 0; i--)
- (*skip)[s[i]] = i;
-
- return 0;
-}
-
-static UChar*
-bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* adjust_text,
- const UChar* text_end, const UChar* text_start)
-{
- const UChar *s, *t, *p;
-
- s = text_end - (target_end - target);
- if (text_start < s)
- s = text_start;
- else
- s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
-
- while (s >= text) {
- p = s;
- t = target;
- while (t < target_end && *p == *t) {
- p++; t++;
- }
- if (t == target_end)
- return (UChar* )s;
-
- s -= reg->int_map_backward[*s];
- s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
+ while (s < end) {
+ p = s - tlen1;
+ if (str_lower_case_match(enc, case_fold_flag, target, target_end,
+ p, s + 1))
+ return (UChar* )p;
+ if (s + 1 >= end) break;
+ s += reg->map[s[1]];
}
return (UChar* )NULL;
}
-#endif
static UChar*
map_search(OnigEncoding enc, UChar map[],
@@ -5124,21 +4776,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
case ONIG_OPTIMIZE_EXACT_BM:
case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
-#ifdef USE_INT_MAP_BACKWARD
- if (IS_NULL(reg->int_map_backward)) {
- int r;
- if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
- goto exact_method;
-
- r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
- &(reg->int_map_backward));
- if (r) return r;
- }
- p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
- end, p);
-#else
goto exact_method;
-#endif
break;
case ONIG_OPTIMIZE_MAP: