summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorK.Takata <kentkt@csc.jp>2017-11-21 20:30:00 +0900
committernagachika <nagachika@ruby-lang.org>2025-11-02 14:09:45 +0900
commit6809ba2fe57c2548836cf6512dae12599ced8e39 (patch)
tree1326de8d20a61b2b793e6a8ec48a520c0f3deb40
parente02c892ade90ee401daf26292fbb99d32af7f619 (diff)
Fix performance problem with /k/i and /s/i (Close k-takata/Onigmo#97)
E.g. For the pattern `/----k/i`, optimization was totally turned off. Make it possible to use the characters before `k` (i.e. `----`) for optimization. https://github.com/k-takata/Onigmo/commit/9c13de8d0684ebde97e3709d7693997c81ca374b
-rw-r--r--regcomp.c67
1 files changed, 43 insertions, 24 deletions
diff --git a/regcomp.c b/regcomp.c
index 4521ed92b0..3cf3885d39 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4217,7 +4217,7 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg,
{
OnigDistance i, len;
int clen, flen, n, j, k;
- UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
+ UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
OnigEncoding enc = reg->enc;
@@ -4299,7 +4299,7 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg,
{
OnigDistance i, len;
int clen, flen, n, j, k;
- UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
+ UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
OnigEncoding enc = reg->enc;
@@ -4307,6 +4307,34 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg,
if (len < ONIG_CHAR_TABLE_SIZE) {
for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
+ if (ignore_case) {
+ for (i = 0; i < len; i += clen) {
+ p = s + i;
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
+ p, end, items);
+ clen = enclen(enc, p, end);
+ if (p + clen > end)
+ clen = (int )(end - p);
+
+ for (j = 0; j < n; j++) {
+ if ((items[j].code_len != 1) || (items[j].byte_len != clen)) {
+ /* Different length isn't supported. Stop optimization at here. */
+ end = p;
+ goto endcheck;
+ }
+ flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf);
+ if (flen != clen) {
+ /* Different length isn't supported. Stop optimization at here. */
+ end = p;
+ goto endcheck;
+ }
+ }
+ }
+endcheck:
+ ;
+ }
+
+ len = end - s;
n = 0;
for (i = 0; i < len; i += clen) {
p = s + i;
@@ -4317,17 +4345,11 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg,
if (p + clen > end)
clen = (int )(end - p);
- for (j = 0; j < n; j++) {
- if ((items[j].code_len != 1) || (items[j].byte_len != clen))
- return 1; /* different length isn't supported. */
- flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
- if (flen != clen)
- return 1; /* different length isn't supported. */
- }
for (j = 0; j < clen; j++) {
skip[s[i + j]] = (UChar )(len - i - j);
for (k = 0; k < n; k++) {
- skip[buf[k][j]] = (UChar )(len - i - j);
+ ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
+ skip[buf[j]] = (UChar )(len - i - j);
}
}
}
@@ -4369,7 +4391,7 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg,
}
# endif
}
- return 0;
+ return (int)len;
}
#endif /* USE_SUNDAY_QUICK_SEARCH */
@@ -5342,7 +5364,6 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
static int
set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
{
- int r;
int allow_reverse;
if (e->len == 0) return 0;
@@ -5357,15 +5378,18 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
if (e->ignore_case > 0) {
if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
- r = set_bm_skip(reg->exact, reg->exact_end, reg,
+ e->len = set_bm_skip(reg->exact, reg->exact_end, reg,
reg->map, &(reg->int_map), 1);
- if (r == 0) {
+ reg->exact_end = reg->exact + e->len;
+ if (e->len >= 3) {
reg->optimize = (allow_reverse != 0
? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
}
- else {
+ else if (e->len > 0) {
reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
}
+ else
+ return 0;
}
else {
reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
@@ -5373,15 +5397,10 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
}
else {
if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
- r = set_bm_skip(reg->exact, reg->exact_end, reg,
- reg->map, &(reg->int_map), 0);
- if (r == 0) {
- reg->optimize = (allow_reverse != 0
- ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
- }
- else {
- reg->optimize = ONIG_OPTIMIZE_EXACT;
- }
+ set_bm_skip(reg->exact, reg->exact_end, reg,
+ reg->map, &(reg->int_map), 0);
+ reg->optimize = (allow_reverse != 0
+ ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
}
else {
reg->optimize = ONIG_OPTIMIZE_EXACT;