From a19d6b33d7419ed3724ee6646fa9303d9542b5e5 Mon Sep 17 00:00:00 2001 From: ksaito Date: Fri, 28 Jan 2005 15:21:48 +0000 Subject: * ascii.c, euc_jp.c, hash.c, oniggnu.h, oniguruma.h, regcomp.c, regenc.c, regenc.h, regerror.c, regexec.c, reggnu.c, regint.h, regparse.c, regparse.h, sjis.c, st.c, st.h, utf8.c: imported Oni Guruma 3.5.4. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7846 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- regexec.c | 184 +++++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 123 insertions(+), 61 deletions(-) (limited to 'regexec.c') diff --git a/regexec.c b/regexec.c index 07af4fe104..795a26dd76 100644 --- a/regexec.c +++ b/regexec.c @@ -2,7 +2,7 @@ regexec.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2004 K.Kosako + * Copyright (c) 2002-2005 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -274,7 +274,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) /** stack **/ #define INVALID_STACK_INDEX -1 -typedef int StackIndex; +typedef long StackIndex; typedef struct _StackType { unsigned int type; @@ -986,7 +986,7 @@ trap_ensure(VALUE arg) TrapEnsureArg* ta = (TrapEnsureArg* )arg; if (ta->state == 0) { /* trap_exec() is not normal return */ - ta->reg->state--; + ONIG_STATE_DEC(ta->reg); if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p) xfree(ta->stk_base); @@ -1147,6 +1147,27 @@ onig_is_in_code_range(UChar* p, OnigCodePoint code) return ((low < n && code >= data[low * 2]) ? 1 : 0); } +static int +code_is_in_cclass_node(void* node, OnigCodePoint code, int enclen) +{ + unsigned int in_cc; + CClassNode* cc = (CClassNode* )node; + + if (enclen == 1) { + in_cc = BITSET_AT(cc->bs, code); + } + else { + UChar* p = ((BBuf* )(cc->mbuf))->p; + in_cc = onig_is_in_code_range(p, code); + } + + if (IS_CCLASS_NOT(cc)) { + return (in_cc ? 0 : 1); + } + else { + return (in_cc ? 1 : 0); + } +} /* matching region of POSIX API */ typedef int regoff_t; @@ -1340,14 +1361,31 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC); { int len; - UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; DATA_ENSURE(1); + ss = s; + sp = p; + + exact1_ic_retry: len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); DATA_ENSURE(0); q = lowbuf; while (len-- > 0) { - if (*p != *q) goto fail; + if (*p != *q) { +#if 1 + if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { + ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; + s = ss; + p = sp; + goto exact1_ic_retry; + } + else + goto fail; +#else + goto fail; +#endif + } p++; q++; } } @@ -1424,7 +1462,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC); { int len; - UChar *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; GET_LENGTH_INC(tlen, p); endp = p + tlen; @@ -1432,11 +1470,28 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, while (p < endp) { sprev = s; DATA_ENSURE(1); + ss = s; + sp = p; + + exactn_ic_retry: len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); DATA_ENSURE(0); q = lowbuf; while (len-- > 0) { - if (*p != *q) goto fail; + if (*p != *q) { +#if 1 + if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { + ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; + s = ss; + p = sp; + goto exactn_ic_retry; + } + else + goto fail; +#else + goto fail; +#endif + } p++; q++; } } @@ -1655,6 +1710,24 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, STAT_OP_OUT; break; + case OP_CCLASS_NODE: STAT_OP_IN(OP_CCLASS_NODE); + { + OnigCodePoint code; + void *node; + int mb_len; + UChar *ss; + + DATA_ENSURE(1); + GET_POINTER_INC(node, p); + mb_len = enc_len(encode, s); + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + if (code_is_in_cclass_node(node, code, mb_len) == 0) goto fail; + } + STAT_OP_OUT; + break; + case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR); DATA_ENSURE(1); n = enc_len(encode, s); @@ -2519,13 +2592,26 @@ str_lower_case_match(OnigEncoding enc, int ambig_flag, UChar* t, UChar* tend, UChar* p, UChar* end) { int lowlen; - UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar *q, *tsave, *psave, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + + tsave = t; + psave = p; + retry: while (t < tend) { lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf); q = lowbuf; while (lowlen > 0) { - if (*t++ != *q++) return 0; + if (*t++ != *q++) { + if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { + ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; + t = tsave; + p = psave; + goto retry; + } + else + return 0; + } lowlen--; } } @@ -2538,9 +2624,7 @@ slow_search_ic(OnigEncoding enc, int ambig_flag, UChar* target, UChar* target_end, UChar* text, UChar* text_end, UChar* text_range) { - int lowlen; - UChar *t, *p, *s, *end, *z; - UChar lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar *s, *end; end = text_end - (target_end - target) + 1; if (end > text_range) @@ -2549,21 +2633,10 @@ slow_search_ic(OnigEncoding enc, int ambig_flag, s = text; while (s < end) { - z = s; - lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s, text_end, lowbuf); - if (*target == *lowbuf) { - p = lowbuf + 1; - t = target + 1; - while (--lowlen > 0) { - if (*p != *t) break; - p++; t++; - } - if (lowlen == 0) { - if (str_lower_case_match(enc, ambig_flag, - t, target_end, s, text_end)) - return z; - } - } + if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end)) + return s; + + s += enc_len(enc, s); } return (UChar* )NULL; @@ -2605,9 +2678,7 @@ slow_search_backward_ic(OnigEncoding enc, int ambig_flag, UChar* text, UChar* adjust_text, UChar* text_end, UChar* text_start) { - int len, lowlen; - UChar *t, *p, *s, *z; - UChar lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar *s; s = text_end - (target_end - target); if (s > text_start) @@ -2616,24 +2687,11 @@ slow_search_backward_ic(OnigEncoding enc, int ambig_flag, s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); while (s >= text) { - len = enc_len(enc, s); - z = s; - lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s, text_end, lowbuf); - if (*target == *lowbuf) { - p = lowbuf + 1; - t = target + 1; - while (--lowlen > 0) { - if (*p != *t) break; - p++; t++; - } - if (lowlen == 0) { - if (str_lower_case_match(enc, ambig_flag, - t, target_end, s, text_end)) - return z; - } - } + if (str_lower_case_match(enc, ambig_flag, + target, target_end, s, text_end)) + return s; - s = onigenc_get_prev_char_head(enc, adjust_text, z); + s = onigenc_get_prev_char_head(enc, adjust_text, s); } return (UChar* )NULL; @@ -2828,11 +2886,12 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region, UChar *prev; MatchArg msa; - if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - reg->state++; /* increment as search counter */ - if (IS_NOT_NULL(reg->chain)) { +#ifdef USE_MULTI_THREAD_SYSTEM + if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(reg); + if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { onig_chain_reduce(reg); - reg->state++; + ONIG_STATE_INC(reg); } } else { @@ -2842,8 +2901,9 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region, return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; THREAD_PASS; } - reg->state++; /* increment as search counter */ + ONIG_STATE_INC(reg); } +#endif /* USE_MULTI_THREAD_SYSTEM */ MATCH_ARG_INIT(msa, option, region, at); @@ -2863,7 +2923,7 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region, } MATCH_ARG_FREE(msa); - reg->state--; /* decrement as search counter */ + ONIG_STATE_DEC(reg); return r; } @@ -3098,11 +3158,12 @@ onig_search(regex_t* reg, UChar* str, UChar* end, UChar *s, *prev; MatchArg msa; - if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - reg->state++; /* increment as search counter */ - if (IS_NOT_NULL(reg->chain)) { +#ifdef USE_MULTI_THREAD_SYSTEM + if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(reg); + if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { onig_chain_reduce(reg); - reg->state++; + ONIG_STATE_INC(reg); } } else { @@ -3112,8 +3173,9 @@ onig_search(regex_t* reg, UChar* str, UChar* end, return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; THREAD_PASS; } - reg->state++; /* increment as search counter */ + ONIG_STATE_INC(reg); } +#endif /* USE_MULTI_THREAD_SYSTEM */ #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n", @@ -3360,7 +3422,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end, finish: MATCH_ARG_FREE(msa); - reg->state--; /* decrement as search counter */ + ONIG_STATE_DEC(reg); /* If result is mismatch and no FIND_NOT_EMPTY option, then the region is not setted in match_at(). */ @@ -3381,7 +3443,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end, mismatch_no_msa: r = ONIG_MISMATCH; finish_no_msa: - reg->state--; /* decrement as search counter */ + ONIG_STATE_DEC(reg); #ifdef ONIG_DEBUG if (r != ONIG_MISMATCH) fprintf(stderr, "onig_search: error %d\n", r); @@ -3389,7 +3451,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end, return r; match: - reg->state--; /* decrement as search counter */ + ONIG_STATE_DEC(reg); MATCH_ARG_FREE(msa); return s - str; } -- cgit v1.2.3