From 75ff8fdb16fa0a733512e61350c9844ea530ad35 Mon Sep 17 00:00:00 2001 From: matz Date: Tue, 6 Oct 1998 03:28:28 +0000 Subject: join 1.1c6 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/v1_1dev@310 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- regex.c | 121 ++++++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 69 insertions(+), 52 deletions(-) (limited to 'regex.c') diff --git a/regex.c b/regex.c index ae64b11af1..279297ad76 100644 --- a/regex.c +++ b/regex.c @@ -865,7 +865,7 @@ calculate_must_string(start, end) char *start; char *end; { - int mcnt, mcnt2; + int mcnt; int max = 0; char *p = start; char *pend = end; @@ -925,8 +925,7 @@ calculate_must_string(start, end) p += mcnt; mcnt = EXTRACT_UNSIGNED_AND_INCR(p); while (mcnt--) { - EXTRACT_MBC_AND_INCR(p); - EXTRACT_MBC_AND_INCR(p); + p += 4; } break; @@ -1069,6 +1068,7 @@ re_compile_pattern(pattern, size, bufp) bufp->fastmap_accurate = 0; bufp->must = 0; bufp->must_skip = 0; + bufp->stclass = 0; /* Initialize the syntax table. */ init_syntax_once(); @@ -1585,23 +1585,22 @@ re_compile_pattern(pattern, size, bufp) if ((options ^ stackp[-1]) & RE_OPTION_IGNORECASE) { BUFPUSH((options&RE_OPTION_IGNORECASE)?casefold_off:casefold_on); } - options = *--stackp; - switch (c = *--stackp) { - case '(': - case ':': - pending_exact = 0; - if (fixup_alt_jump) - { /* Push a dummy failure point at the end of the - alternative for a possible future - `finalize_jump' to pop. See comments at - `push_dummy_failure' in `re_match'. */ - BUFPUSH(push_dummy_failure); + pending_exact = 0; + if (fixup_alt_jump) + { /* Push a dummy failure point at the end of the + alternative for a possible future + `finalize_jump' to pop. See comments at + `push_dummy_failure' in `re_match'. */ + BUFPUSH(push_dummy_failure); - /* We allocated space for this jump when we assigned - to `fixup_alt_jump', in the `handle_alt' case below. */ - store_jump(fixup_alt_jump, jump, b); - } - if (c == '(') { + /* We allocated space for this jump when we assigned + to `fixup_alt_jump', in the `handle_alt' case below. */ + store_jump(fixup_alt_jump, jump, b); + } + options = *--stackp; + switch (c = *--stackp) { + case '(': + { char *loc = bufp->buffer + *--stackp; *loc = regnum - stackp[-1]; BUFPUSH(stop_memory); @@ -2034,12 +2033,13 @@ re_compile_pattern(pattern, size, bufp) bufp->options |= RE_OPTIMIZE_ANCHOR; } else if (*laststart == charset || *laststart == charset_not) { - mcnt = *++laststart; - laststart += mcnt+1; - mcnt = EXTRACT_UNSIGNED_AND_INCR(laststart); - laststart += 4*mcnt; - if (*laststart == maybe_finalize_jump) { - bufp->options |= RE_OPTIMIZE_CCLASS; + p0 = laststart; + mcnt = *++p0 ; + p0 += mcnt+1; + mcnt = EXTRACT_UNSIGNED_AND_INCR(p0); + p0 += 4*mcnt; + if (*p0 == maybe_finalize_jump) { + bufp->stclass = laststart; } } } @@ -2239,7 +2239,6 @@ slow_match(little, lend, big, bend, translate) if (c == 0xff) c = *little++; if (!trans_eq(*big++, c, translate)) break; - little++; } if (little == lend) return 1; return 0; @@ -2330,11 +2329,9 @@ bm_search(little, llen, big, blen, skip, translate) unsigned char *big; int blen; int *skip; - char *translate; + unsigned char *translate; { - int next[256]; int i, j, k; - unsigned char c; i = llen-1; if (translate) { @@ -2738,27 +2735,30 @@ re_search(bufp, string, size, startpos, range, regs) } if (bufp->must) { - int r = range; int len = ((unsigned char*)bufp->must)[0]; - int pos; + int pos, pbeg, pend; - if (range >= 0) { - r = 0; + pbeg = startpos; + pend = startpos + range; + if (pbeg > pend) { /* swap pbeg,pend */ + pos = pend; pend = pbeg; pbeg = pos; } + if (pend > size) pend = size; if (bufp->options & RE_OPTIMIZE_NO_BM) { pos = slow_search(bufp->must+1, len, - string+startpos, size-startpos-r, + string+pbeg, pend-pbeg, MAY_TRANSLATE()?translate:0); } else { pos = bm_search(bufp->must+1, len, - string+startpos, size-startpos-r, + string+pbeg, pend-pbeg, bufp->must_skip, MAY_TRANSLATE()?translate:0); } if (pos == -1) return -1; - if (bufp->options & RE_OPTIMIZE_EXACTN) { + if (range > 0 && (bufp->options & RE_OPTIMIZE_EXACTN)) { startpos += pos; + range -= pos; } } @@ -2827,29 +2827,33 @@ re_search(bufp, string, size, startpos, range, regs) #endif /* NO_ALLOCA */ if (range > 0) { - if (anchor && startpos < size && string[startpos-1] != '\n') { + if (anchor && startpos < size && startpos > 0 && string[startpos-1] != '\n') { while (range > 0 && string[startpos] != '\n') { range--; startpos++; } } - else if (fastmap && (bufp->options & RE_OPTIMIZE_CCLASS)) { - register unsigned char *p, c; + else if (fastmap && (bufp->stclass)) { + register unsigned char *p; + register unsigned short c; int irange = range; p = (unsigned char *)string+startpos; while (range > 0) { c = *p++; - if (ismbchar(c)) { - if (!fastmap[c]) break; - c = *p++; - range--; - if (fastmap[c] != 2) break; + if (ismbchar(c) && fastmap[c] != 2) { + c = c << 8 | *p++; + } + else if (MAY_TRANSLATE()) + c = translate[c]; + if (*bufp->stclass == charset) { + if (!is_in_list(c, bufp->stclass+1)) break; + } + else { + if (is_in_list(c, bufp->stclass+1)) break; } - else - if (!fastmap[MAY_TRANSLATE() ? translate[c] : c]) - break; range--; + if (c > 256) range--; } startpos += irange - range; } @@ -3023,9 +3027,6 @@ typedef union #define AT_STRINGS_BEG(d) (d == string) #define AT_STRINGS_END(d) (d == dend) -#define AT_WORD_BOUNDARY(d) \ - (AT_STRINGS_BEG(d) || AT_STRINGS_END(d) || IS_A_LETTER(d - 1) != IS_A_LETTER(d)) - /* We have two special cases to check for: 1) if we're past the end of string1, we have to look at the first character in string2; @@ -3752,12 +3753,28 @@ re_match(bufp, string_arg, size, pos, regs) continue; case wordbound: - if (AT_WORD_BOUNDARY(d)) + if (AT_STRINGS_BEG(d)) { + if (IS_A_LETTER(d)) break; + else goto fail; + } + if (AT_STRINGS_BEG(d)) { + if (IS_A_LETTER(d-1)) break; + else goto fail; + } + if (IS_A_LETTER(d - 1) != IS_A_LETTER(d)) break; goto fail; case notwordbound: - if (AT_WORD_BOUNDARY(d)) + if (AT_STRINGS_BEG(d)) { + if (IS_A_LETTER(d)) goto fail; + else break; + } + if (AT_STRINGS_END(d)) { + if (IS_A_LETTER(d-1)) goto fail; + else break; + } + if (IS_A_LETTER(d - 1) != IS_A_LETTER(d)) goto fail; break; -- cgit v1.2.3