diff options
| author | K.Takata <kentkt@csc.jp> | 2017-11-21 20:30:00 +0900 |
|---|---|---|
| committer | nagachika <nagachika@ruby-lang.org> | 2025-11-02 14:09:45 +0900 |
| commit | 6809ba2fe57c2548836cf6512dae12599ced8e39 (patch) | |
| tree | 1326de8d20a61b2b793e6a8ec48a520c0f3deb40 | |
| parent | e02c892ade90ee401daf26292fbb99d32af7f619 (diff) | |
Fix performance problem with /k/i and /s/i (Close k-takata/Onigmo#97)
E.g.
For the pattern `/----k/i`, optimization was totally turned off.
Make it possible to use the characters before `k` (i.e. `----`) for
optimization.
https://github.com/k-takata/Onigmo/commit/9c13de8d0684ebde97e3709d7693997c81ca374b
| -rw-r--r-- | regcomp.c | 67 |
1 files changed, 43 insertions, 24 deletions
@@ -4217,7 +4217,7 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg, { OnigDistance i, len; int clen, flen, n, j, k; - UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; OnigEncoding enc = reg->enc; @@ -4299,7 +4299,7 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg, { OnigDistance i, len; int clen, flen, n, j, k; - UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; OnigEncoding enc = reg->enc; @@ -4307,6 +4307,34 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg, if (len < ONIG_CHAR_TABLE_SIZE) { for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1); + if (ignore_case) { + for (i = 0; i < len; i += clen) { + p = s + i; + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, + p, end, items); + clen = enclen(enc, p, end); + if (p + clen > end) + clen = (int )(end - p); + + for (j = 0; j < n; j++) { + if ((items[j].code_len != 1) || (items[j].byte_len != clen)) { + /* Different length isn't supported. Stop optimization at here. */ + end = p; + goto endcheck; + } + flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf); + if (flen != clen) { + /* Different length isn't supported. Stop optimization at here. */ + end = p; + goto endcheck; + } + } + } +endcheck: + ; + } + + len = end - s; n = 0; for (i = 0; i < len; i += clen) { p = s + i; @@ -4317,17 +4345,11 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg, if (p + clen > end) clen = (int )(end - p); - for (j = 0; j < n; j++) { - if ((items[j].code_len != 1) || (items[j].byte_len != clen)) - return 1; /* different length isn't supported. */ - flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); - if (flen != clen) - return 1; /* different length isn't supported. */ - } for (j = 0; j < clen; j++) { skip[s[i + j]] = (UChar )(len - i - j); for (k = 0; k < n; k++) { - skip[buf[k][j]] = (UChar )(len - i - j); + ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf); + skip[buf[j]] = (UChar )(len - i - j); } } } @@ -4369,7 +4391,7 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg, } # endif } - return 0; + return (int)len; } #endif /* USE_SUNDAY_QUICK_SEARCH */ @@ -5342,7 +5364,6 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) static int set_optimize_exact_info(regex_t* reg, OptExactInfo* e) { - int r; int allow_reverse; if (e->len == 0) return 0; @@ -5357,15 +5378,18 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) if (e->ignore_case > 0) { if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { - r = set_bm_skip(reg->exact, reg->exact_end, reg, + e->len = set_bm_skip(reg->exact, reg->exact_end, reg, reg->map, &(reg->int_map), 1); - if (r == 0) { + reg->exact_end = reg->exact + e->len; + if (e->len >= 3) { reg->optimize = (allow_reverse != 0 ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC); } - else { + else if (e->len > 0) { reg->optimize = ONIG_OPTIMIZE_EXACT_IC; } + else + return 0; } else { reg->optimize = ONIG_OPTIMIZE_EXACT_IC; @@ -5373,15 +5397,10 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) } else { if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { - r = set_bm_skip(reg->exact, reg->exact_end, reg, - reg->map, &(reg->int_map), 0); - if (r == 0) { - reg->optimize = (allow_reverse != 0 - ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); - } - else { - reg->optimize = ONIG_OPTIMIZE_EXACT; - } + set_bm_skip(reg->exact, reg->exact_end, reg, + reg->map, &(reg->int_map), 0); + reg->optimize = (allow_reverse != 0 + ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); } else { reg->optimize = ONIG_OPTIMIZE_EXACT; |
