diff options
Diffstat (limited to 'regexec.c')
| -rw-r--r-- | regexec.c | 456 |
1 files changed, 47 insertions, 409 deletions
@@ -2,8 +2,8 @@ regexec.c - Onigmo (Oniguruma-mod) (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2011-2019 K.Takata <kentkt AT csc DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -2742,7 +2742,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, /* default behavior: return first-matching result. */ goto finish; - NEXT; CASE(OP_EXACT1) MOP_IN(OP_EXACT1); DATA_ENSURE(1); @@ -3316,40 +3315,36 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { MOP_OUT; JUMP; - } + } } goto fail; - NEXT; CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN); if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) { if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) { MOP_OUT; JUMP; - } + } } goto fail; - NEXT; CASE(OP_WORD_END) MOP_IN(OP_WORD_END); if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { MOP_OUT; JUMP; - } + } } goto fail; - NEXT; CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END); if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) { if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) { MOP_OUT; JUMP; - } + } } goto fail; - NEXT; #endif CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF); @@ -3379,10 +3374,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif && !ON_STR_END(s)) { MOP_OUT; - JUMP; + JUMP; } goto fail; - NEXT; CASE(OP_END_LINE) MOP_IN(OP_END_LINE); if (ON_STR_END(s)) { @@ -3398,10 +3392,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) { MOP_OUT; - JUMP; + JUMP; } goto fail; - NEXT; CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF); if (ON_STR_END(s)) { @@ -3433,7 +3426,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif } goto fail; - NEXT; CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION); if (s != msa->gpos) @@ -3499,12 +3491,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1); mem = 1; goto backref; - NEXT; CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2); mem = 2; goto backref; - NEXT; CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN); GET_MEMNUM_INC(mem, p); @@ -3934,7 +3924,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc; - NEXT; CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ @@ -3970,7 +3959,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc_ng; - NEXT; CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS); STACK_PUSH_POS(s, sprev, pkeep); @@ -3995,7 +3983,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS); STACK_POP_TIL_POS_NOT; goto fail; - NEXT; CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT); STACK_PUSH_STOP_BT; @@ -4036,7 +4023,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT); STACK_POP_TIL_LOOK_BEHIND_NOT; goto fail; - NEXT; CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS); /* Save the absent-start-pos and the original end-pos. */ @@ -4098,7 +4084,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif STACK_POP_TIL_ABSENT; goto fail; - NEXT; #ifdef USE_SUBEXP_CALL CASE(OP_CALL) MOP_IN(OP_CALL); @@ -4128,7 +4113,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_FINISH) goto finish; - NEXT; CASE(OP_FAIL) if (0) { @@ -4393,219 +4377,6 @@ slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, return (UChar* )NULL; } -#ifndef USE_SUNDAY_QUICK_SEARCH -/* Boyer-Moore-Horspool search applied to a multibyte string */ -static UChar* -bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, - const UChar* text_range) -{ - const UChar *s, *se, *t, *p, *end; - const UChar *tail; - ptrdiff_t skip, tlen1; - -# ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", - (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); -# endif - - tail = target_end - 1; - tlen1 = tail - target; - end = text_range; - if (end + tlen1 > text_end) - end = text_end - tlen1; - - s = text; - - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - skip = reg->map[*se]; - t = s; - do { - s += enclen(reg->enc, s, end); - } while ((s - t) < skip && s < end); - } - } - else { -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - skip = reg->int_map[*se]; - t = s; - do { - s += enclen(reg->enc, s, end); - } while ((s - t) < skip && s < end); - } -# endif - } - - return (UChar* )NULL; -} - -/* Boyer-Moore-Horspool search */ -static UChar* -bm_search(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, const UChar* text_range) -{ - const UChar *s, *t, *p, *end; - const UChar *tail; - -# ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", - (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); -# endif - - end = text_range + (target_end - target) - 1; - if (end > text_end) - end = text_end; - - tail = target_end - 1; - s = text + (target_end - target) - 1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s; - t = tail; -# ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n", - (intptr_t )(s - text), s); -# endif - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - s += reg->map[*s]; - } - } - else { /* see int_map[] */ -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - s += reg->int_map[*s]; - } -# endif - } - return (UChar* )NULL; -} - -/* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */ -static UChar* -bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, - const UChar* text_range) -{ - const UChar *s, *se, *t, *end; - const UChar *tail; - ptrdiff_t skip, tlen1; - OnigEncoding enc = reg->enc; - int case_fold_flag = reg->case_fold_flag; - -# ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n", - (int )text, text, (int )text_end, text_end, (int )text_range, text_range); -# endif - - tail = target_end - 1; - tlen1 = tail - target; - end = text_range; - if (end + tlen1 > text_end) - end = text_end - tlen1; - - s = text; - - if (IS_NULL(reg->int_map)) { - while (s < end) { - se = s + tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, se + 1)) - return (UChar* )s; - skip = reg->map[*se]; - t = s; - do { - s += enclen(reg->enc, s, end); - } while ((s - t) < skip && s < end); - } - } - else { -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - se = s + tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, se + 1)) - return (UChar* )s; - skip = reg->int_map[*se]; - t = s; - do { - s += enclen(reg->enc, s, end); - } while ((s - t) < skip && s < end); - } -# endif - } - - return (UChar* )NULL; -} - -/* Boyer-Moore-Horspool search (ignore case) */ -static UChar* -bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, const UChar* text_range) -{ - const UChar *s, *p, *end; - const UChar *tail; - OnigEncoding enc = reg->enc; - int case_fold_flag = reg->case_fold_flag; - -# ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n", - (int )text, text, (int )text_end, text_end, (int )text_range, text_range); -# endif - - end = text_range + (target_end - target) - 1; - if (end > text_end) - end = text_end; - - tail = target_end - 1; - s = text + (target_end - target) - 1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s - (target_end - target) + 1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - p, s + 1)) - return (UChar* )p; - s += reg->map[*s]; - } - } - else { /* see int_map[] */ -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = s - (target_end - target) + 1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - p, s + 1)) - return (UChar* )p; - s += reg->int_map[*s]; - } -# endif - } - return (UChar* )NULL; -} - -#else /* USE_SUNDAY_QUICK_SEARCH */ - /* Sunday's quick search applied to a multibyte string */ static UChar* bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, @@ -4630,39 +4401,19 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, s = text; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - if (s + 1 >= end) break; - skip = reg->map[se[1]]; - t = s; - do { - s += enclen(enc, s, end); - } while ((s - t) < skip && s < end); - } - } - else { -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - if (s + 1 >= end) break; - skip = reg->int_map[se[1]]; - t = s; - do { - s += enclen(enc, s, end); - } while ((s - t) < skip && s < end); + while (s < end) { + p = se = s + tlen1; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )s; + p--; t--; } -# endif + if (s + 1 >= end) break; + skip = reg->map[se[1]]; + t = s; + do { + s += enclen(enc, s, end); + } while ((s - t) < skip && s < end); } return (UChar* )NULL; @@ -4689,32 +4440,17 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, end = text_end; s = text + tlen1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - if (s + 1 >= end) break; - s += reg->map[s[1]]; - } - } - else { /* see int_map[] */ -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - if (s + 1 >= end) break; - s += reg->int_map[s[1]]; + while (s < end) { + p = s; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )p; + p--; t--; } -# endif + if (s + 1 >= end) break; + s += reg->map[s[1]]; } + return (UChar* )NULL; } @@ -4743,35 +4479,17 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, s = text; - if (IS_NULL(reg->int_map)) { - while (s < end) { - se = s + tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, se + 1)) - return (UChar* )s; - if (s + 1 >= end) break; - skip = reg->map[se[1]]; - t = s; - do { - s += enclen(enc, s, end); - } while ((s - t) < skip && s < end); - } - } - else { -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - se = s + tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, se + 1)) - return (UChar* )s; - if (s + 1 >= end) break; - skip = reg->int_map[se[1]]; - t = s; - do { - s += enclen(enc, s, end); - } while ((s - t) < skip && s < end); - } -# endif + while (s < end) { + se = s + tlen1; + if (str_lower_case_match(enc, case_fold_flag, target, target_end, + s, se + 1)) + return (UChar* )s; + if (s + 1 >= end) break; + skip = reg->map[se[1]]; + t = s; + do { + s += enclen(enc, s, end); + } while ((s - t) < skip && s < end); } return (UChar* )NULL; @@ -4800,83 +4518,17 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, end = text_end; s = text + tlen1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s - tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - p, s + 1)) - return (UChar* )p; - if (s + 1 >= end) break; - s += reg->map[s[1]]; - } - } - else { /* see int_map[] */ -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = s - tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - p, s + 1)) - return (UChar* )p; - if (s + 1 >= end) break; - s += reg->int_map[s[1]]; - } -# endif - } - return (UChar* )NULL; -} -#endif /* USE_SUNDAY_QUICK_SEARCH */ - -#ifdef USE_INT_MAP_BACKWARD -static int -set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, - int** skip) -{ - int i, len; - - if (IS_NULL(*skip)) { - *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); - if (IS_NULL(*skip)) return ONIGERR_MEMORY; - } - - len = (int )(end - s); - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) - (*skip)[i] = len; - - for (i = len - 1; i > 0; i--) - (*skip)[s[i]] = i; - - return 0; -} - -static UChar* -bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) -{ - const UChar *s, *t, *p; - - s = text_end - (target_end - target); - if (text_start < s) - s = text_start; - else - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end); - - while (s >= text) { - p = s; - t = target; - while (t < target_end && *p == *t) { - p++; t++; - } - if (t == target_end) - return (UChar* )s; - - s -= reg->int_map_backward[*s]; - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end); + while (s < end) { + p = s - tlen1; + if (str_lower_case_match(enc, case_fold_flag, target, target_end, + p, s + 1)) + return (UChar* )p; + if (s + 1 >= end) break; + s += reg->map[s[1]]; } return (UChar* )NULL; } -#endif static UChar* map_search(OnigEncoding enc, UChar map[], @@ -5124,21 +4776,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, case ONIG_OPTIMIZE_EXACT_BM: case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: -#ifdef USE_INT_MAP_BACKWARD - if (IS_NULL(reg->int_map_backward)) { - int r; - if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) - goto exact_method; - - r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, - &(reg->int_map_backward)); - if (r) return r; - } - p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, - end, p); -#else goto exact_method; -#endif break; case ONIG_OPTIMIZE_MAP: |
