From d92db05a27d7c97386dfa8bc77fb865aba9df751 Mon Sep 17 00:00:00 2001 From: kosako Date: Sat, 5 Aug 2006 13:54:40 +0000 Subject: merge Oniguruma 4.2.2 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10684 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 16 +++++ oniguruma.h | 4 +- regcomp.c | 159 ++++++++++++++++++++++++++--------------- regerror.c | 2 +- regexec.c | 230 ++++++++++++++++++++++++++++++++++++++++++++---------------- regint.h | 29 ++++++-- regparse.c | 125 +++++++++++++++++++++++++++++++-- regparse.h | 5 +- 8 files changed, 437 insertions(+), 133 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3c45bd4389..559de02a4c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +Sat Aug 5 22:53:41 2006 K.Kosako + + * oniguruma.h: Version 4.2.2 + + * regint.h: ditto. + + * regparse.h: ditto. + + * regexec.c: ditto. + + * regcomp.c ditto. + + * regerror.c: ditto. + + * regparse.c: ditto. + Sat Aug 5 13:49:43 2006 Tadayoshi Funaba * lib/date/format.rb (str[fp]time): "%\n" means "\n". diff --git a/oniguruma.h b/oniguruma.h index ed31b9e65f..4560c4eb98 100644 --- a/oniguruma.h +++ b/oniguruma.h @@ -35,8 +35,8 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 4 -#define ONIGURUMA_VERSION_MINOR 0 -#define ONIGURUMA_VERSION_TEENY 3 +#define ONIGURUMA_VERSION_MINOR 2 +#define ONIGURUMA_VERSION_TEENY 2 #ifdef __cplusplus # ifndef HAVE_PROTOTYPES diff --git a/regcomp.c b/regcomp.c index 922e65b086..530237c28a 100644 --- a/regcomp.c +++ b/regcomp.c @@ -1268,6 +1268,13 @@ compile_length_tree(Node* node, regex_t* reg) { BackrefNode* br = &(NBACKREF(node)); +#ifdef USE_BACKREF_AT_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); + } + else +#endif if (br->back_num == 1) { r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 3) ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); @@ -1381,9 +1388,21 @@ compile_tree(Node* node, regex_t* reg) case N_BACKREF: { - int i; BackrefNode* br = &(NBACKREF(node)); +#ifdef USE_BACKREF_AT_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + r = add_opcode(reg, OP_BACKREF_AT_LEVEL); + if (r) return r; + r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); + if (r) return r; + r = add_length(reg, br->nest_level); + if (r) return r; + + goto add_bacref_mems; + } + else +#endif if (br->back_num == 1) { n = br->back_static[0]; if (IS_IGNORECASE(reg->options)) { @@ -1405,17 +1424,19 @@ compile_tree(Node* node, regex_t* reg) } } else { + int i; int* p; if (IS_IGNORECASE(reg->options)) { - add_opcode(reg, OP_BACKREF_MULTI_IC); + r = add_opcode(reg, OP_BACKREF_MULTI_IC); } else { - add_opcode(reg, OP_BACKREF_MULTI); + r = add_opcode(reg, OP_BACKREF_MULTI); } - if (r) return r; - add_length(reg, br->back_num); + + add_bacref_mems: + r = add_length(reg, br->back_num); if (r) return r; p = BACKREFS_P(br); for (i = br->back_num - 1; i >= 0; i--) { @@ -3088,6 +3109,11 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); +#ifdef USE_BACKREF_AT_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); + } +#endif SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED); } } @@ -3235,11 +3261,9 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) #define ALLOWED_EFFECT_IN_LB_NOT 0 #define ALLOWED_ANCHOR_IN_LB \ -( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF ) +( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) #define ALLOWED_ANCHOR_IN_LB_NOT \ -( ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF ) - /* can't allow all anchors, because \G in look-behind through Search(). - ex. /(?<=\G)zz/.match("azz") => success. */ +( ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) case ANCHOR_LOOK_BEHIND: { @@ -3576,9 +3600,10 @@ copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from) } static void -concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add) +concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) { - int i, n; + int i, j, len; + UChar *p, *end; OptAncInfo tanc; if (! to->ignore_case && add->ignore_case) { @@ -3587,11 +3612,17 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add) to->ignore_case = 1; } - for (i = to->len, n = 0; n < add->len && i < OPT_EXACT_MAXLEN; i++, n++) - to->s[i] = add->s[n]; + p = add->s; + end = p + add->len; + for (i = to->len; p < end; ) { + len = enc_len(enc, p); + if (i + len > OPT_EXACT_MAXLEN) break; + for (j = 0; j < len && p < end; j++) + to->s[i++] = *p++; + } to->len = i; - to->reach_end = (n == add->len ? add->reach_end : 0); + to->reach_end = (p == end ? add->reach_end : 0); concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); if (! to->reach_end) tanc.right_anchor = 0; @@ -3606,15 +3637,10 @@ concat_opt_exact_info_str(OptExactInfo* to, UChar *p; for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { - if (raw) { + len = enc_len(enc, p); + if (i + len > OPT_EXACT_MAXLEN) break; + for (j = 0; j < len && p < end; j++) to->s[i++] = *p++; - } - else { - len = enc_len(enc, p); - if (i + len > OPT_EXACT_MAXLEN) break; - for (j = 0; j < len; j++) - to->s[i++] = *p++; - } } to->len = i; @@ -3879,11 +3905,11 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) if (add->exb.len > 0) { if (exb_reach) { - concat_opt_exact_info(&to->exb, &add->exb); + concat_opt_exact_info(&to->exb, &add->exb, enc); clear_opt_exact_info(&add->exb); } else if (exm_reach) { - concat_opt_exact_info(&to->exm, &add->exb); + concat_opt_exact_info(&to->exm, &add->exb, enc); clear_opt_exact_info(&add->exb); } } @@ -4182,7 +4208,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) if (nopt.exb.reach_end) { for (i = 2; i < qn->lower && ! is_full_opt_exact_info(&opt->exb); i++) { - concat_opt_exact_info(&opt->exb, &nopt.exb); + concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); } if (i < qn->lower) { opt->exb.reach_end = 0; @@ -4593,7 +4619,6 @@ onig_chain_reduce(regex_t* reg) { regex_t *head, *prev; - THREAD_ATOMIC_START; prev = reg; head = prev->chain; if (IS_NOT_NULL(head)) { @@ -4605,7 +4630,6 @@ onig_chain_reduce(regex_t* reg) prev->chain = (regex_t* )NULL; REGEX_TRANSFER(reg, head); } - THREAD_ATOMIC_END; } #if 0 @@ -4844,6 +4868,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, return r; } +#ifdef USE_RECOMPILE_API extern int onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, @@ -4862,6 +4887,7 @@ onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, } return 0; } +#endif static int onig_inited = 0; @@ -4965,14 +4991,14 @@ onig_end() onig_print_statistics(stderr); #endif -#ifdef USE_RECYCLE_NODE - onig_free_node_list(); -#endif - #ifdef USE_SHARED_CCLASS_TABLE onig_free_shared_cclass_table(); #endif +#ifdef USE_RECYCLE_NODE + onig_free_node_list(); +#endif + onig_inited = 0; THREAD_ATOMIC_END; @@ -5026,35 +5052,36 @@ OnigOpInfoType OnigOpInfo[] = { { OP_END_LINE, "end-line", ARG_NON }, { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, { OP_BEGIN_POSITION, "begin-position", ARG_NON }, - { OP_BACKREF1, "backref1", ARG_NON }, - { OP_BACKREF2, "backref2", ARG_NON }, - { OP_BACKREF3, "backref3", ARG_NON }, - { OP_BACKREFN, "backrefn", ARG_MEMNUM }, - { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, - { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, - { OP_BACKREF_MULTI_IC, "backref_multi-ic",ARG_SPECIAL }, - { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, - { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, + { OP_BACKREF1, "backref1", ARG_NON }, + { OP_BACKREF2, "backref2", ARG_NON }, + { OP_BACKREF3, "backref3", ARG_NON }, + { OP_BACKREFN, "backrefn", ARG_MEMNUM }, + { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, + { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, + { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, + { OP_BACKREF_AT_LEVEL, "backref_at_level", ARG_SPECIAL }, + { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, + { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, - { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, - { OP_SET_OPTION, "set-option", ARG_OPTION }, - { OP_FAIL, "fail", ARG_NON }, - { OP_JUMP, "jump", ARG_RELADDR }, - { OP_PUSH, "push", ARG_RELADDR }, - { OP_POP, "pop", ARG_NON }, - { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, - { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, - { OP_REPEAT, "repeat", ARG_SPECIAL }, - { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, - { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, - { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, - { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, - { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, - { OP_NULL_CHECK_START, "null-check-start",ARG_MEMNUM }, - { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, + { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, + { OP_SET_OPTION, "set-option", ARG_OPTION }, + { OP_FAIL, "fail", ARG_NON }, + { OP_JUMP, "jump", ARG_RELADDR }, + { OP_PUSH, "push", ARG_RELADDR }, + { OP_POP, "pop", ARG_NON }, + { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, + { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, + { OP_REPEAT, "repeat", ARG_SPECIAL }, + { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, + { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, + { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, + { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, + { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, + { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM }, + { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM }, { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM }, { OP_PUSH_POS, "push-pos", ARG_NON }, @@ -5286,6 +5313,26 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, } break; + case OP_BACKREF_AT_LEVEL: + { + OnigOptionType option; + LengthType level; + + GET_OPTION_INC(option, bp); + fprintf(f, ":%d", option); + GET_LENGTH_INC(level, bp); + fprintf(f, ":%d", level); + + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + } + break; + case OP_REPEAT: case OP_REPEAT_NG: { diff --git a/regerror.c b/regerror.c index 811ca2b012..ad73b76c3c 100644 --- a/regerror.c +++ b/regerror.c @@ -276,7 +276,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) UChar bs[6]; va_list args; - va_init_list(args, (const char* )fmt); + va_init_list(args, fmt); n = vsnprintf((char* )buf, bufsize, (const char* )fmt, args); va_end(args); diff --git a/regexec.c b/regexec.c index 90514d403c..78d8094202 100644 --- a/regexec.c +++ b/regexec.c @@ -610,15 +610,18 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, #ifdef ONIG_DEBUG -#define STACK_BASE_CHECK(p) \ - if ((p) < stk_base) goto stack_error; +#define STACK_BASE_CHECK(p, at) \ + if ((p) < stk_base) {\ + fprintf(stderr, "at %s\n", at);\ + goto stack_error;\ + } #else -#define STACK_BASE_CHECK(p) +#define STACK_BASE_CHECK(p, at) #endif #define STACK_POP_ONE do {\ stk--;\ - STACK_BASE_CHECK(stk); \ + STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ } while(0) #define STACK_POP do {\ @@ -626,14 +629,14 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, case STACK_POP_LEVEL_FREE:\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk); \ + STACK_BASE_CHECK(stk, "STACK_POP"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ }\ break;\ case STACK_POP_LEVEL_MEM_START:\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk); \ + STACK_BASE_CHECK(stk, "STACK_POP 2"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ else if (stk->type == STK_MEM_START) {\ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ @@ -644,7 +647,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, default:\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk); \ + STACK_BASE_CHECK(stk, "STACK_POP 3"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ else if (stk->type == STK_MEM_START) {\ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ @@ -665,7 +668,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, #define STACK_POP_TIL_POS_NOT do {\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk); \ + STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \ if (stk->type == STK_POS_NOT) break;\ else if (stk->type == STK_MEM_START) {\ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ @@ -684,7 +687,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk); \ + STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \ if (stk->type == STK_LOOK_BEHIND_NOT) break;\ else if (stk->type == STK_MEM_START) {\ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ @@ -704,7 +707,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_POS_END"); \ if (IS_TO_VOID_TARGET(k)) {\ k->type = STK_VOID;\ }\ @@ -719,7 +722,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType *k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ if (IS_TO_VOID_TARGET(k)) {\ k->type = STK_VOID;\ }\ @@ -734,7 +737,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ if (k->type == STK_NULL_CHECK_START) {\ if (k->u.null_check.num == (id)) {\ (isnull) = (k->u.null_check.pstr == (s));\ @@ -749,7 +752,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ if (k->type == STK_NULL_CHECK_START) {\ if (k->u.null_check.num == (id)) {\ if (level == 0) {\ @@ -769,7 +772,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ if (k->type == STK_NULL_CHECK_START) {\ if (k->u.null_check.num == (id)) {\ if (k->u.null_check.pstr != (s)) {\ @@ -809,7 +812,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ if (k->type == STK_NULL_CHECK_START) {\ if (k->u.null_check.num == (id)) {\ if (level == 0) {\ @@ -857,7 +860,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ if (k->type == STK_REPEAT) {\ if (level == 0) {\ if (k->u.repeat.num == (id)) {\ @@ -875,7 +878,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_RETURN"); \ if (k->type == STK_CALL_FRAME) {\ if (level == 0) {\ (addr) = k->u.call_frame.ret_addr;\ @@ -995,6 +998,77 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, } #endif +#ifdef USE_BACKREF_AT_LEVEL +static int mem_is_in_memp(int mem, int num, UChar* memp) +{ + int i; + MemNumType m; + + for (i = 0; i < num; i++) { + GET_MEMNUM_INC(m, memp); + if (mem == (int )m) return 1; + } + return 0; +} + +static int backref_match_at_nested_level(regex_t* reg + , StackType* top, StackType* stk_base + , int ignore_case, int ambig_flag + , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) +{ + UChar *ss, *p, *pstart, *pend = NULL_UCHARP; + int level; + StackType* k; + + level = 0; + k = top; + k--; + while (k >= stk_base) { + if (k->type == STK_CALL_FRAME) { + level--; + } + else if (k->type == STK_RETURN) { + level++; + } + else if (level == nest) { + if (k->type == STK_MEM_START) { + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pstart = k->u.mem.pstr; + if (pend != NULL_UCHARP) { + if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ + p = pstart; + ss = *s; + + if (ignore_case != 0) { + if (string_cmp_ic(reg->enc, ambig_flag, + pstart, &ss, (int )(pend - pstart)) == 0) + return 0; /* or goto next_mem; */ + } + else { + while (p < pend) { + if (*p++ != *ss++) return 0; /* or goto next_mem; */ + } + } + + *s = ss; + return 1; + } + } + } + else if (k->type == STK_MEM_END) { + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pend = k->u.mem.pstr; + } + } + } + k--; + } + + return 0; +} +#endif /* USE_BACKREF_AT_LEVEL */ + + #ifdef RUBY_PLATFORM typedef struct { @@ -1010,7 +1084,7 @@ trap_ensure(VALUE arg) TrapEnsureArg* ta = (TrapEnsureArg* )arg; if (ta->state == 0) { /* trap_exec() is not normal return */ - ONIG_STATE_DEC(ta->reg); + ONIG_STATE_DEC_THREAD(ta->reg); if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p) xfree(ta->stk_base); @@ -2227,6 +2301,35 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, continue; } break; + +#ifdef USE_BACKREF_AT_LEVEL + case OP_BACKREF_AT_LEVEL: + { + int len; + OnigOptionType ic; + LengthType level; + + GET_OPTION_INC(ic, p); + GET_LENGTH_INC(level, p); + GET_LENGTH_INC(tlen, p); + + sprev = s; + if (backref_match_at_nested_level(reg, stk, stk_base, ic, ambig_flag + , (int )level, (int )tlen, p, &s, end)) { + while (sprev + (len = enc_len(encode, sprev)) < s) + sprev += len; + + p += (SIZE_MEMNUM * tlen); + } + else + goto fail; + + STAT_OP_OUT; + continue; + } + + break; +#endif case OP_SET_OPTION_PUSH: STAT_OP_IN(OP_SET_OPTION_PUSH); GET_OPTION_INC(option, p); @@ -2766,66 +2869,56 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, const UChar* text, const UChar* text_end, const UChar* text_range) { - const UChar *s, *t, *p, *end; + const UChar *s, *se, *t, *p, *end; const UChar *tail; - int skip; + int skip, tlen1; #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n", (int )text, (int )text_end, (int )text_range); #endif - end = text_range + (target_end - target) - 1; + tlen1 = (target_end - target) - 1; + end = text_range + tlen1; if (end > text_end) end = text_end; tail = target_end - 1; s = text; - while ((s - text) < target_end - target) { - s += enc_len(reg->enc, s); - } - s--; /* set to text check tail position. */ if (IS_NULL(reg->int_map)) { while (s < end) { - p = s; + p = se = s + tlen1; t = tail; - while (t >= target && *p == *t) { - p--; t--; + while (*p == *t && t >= target) { + p--; t--; } - if (t < target) return (UChar* )(p + 1); + if (t < target) return (UChar* )s; - skip = reg->map[*s]; - p = s + 1; - if (p >= text_end) return (UChar* )NULL; - t = p; + skip = reg->map[*se]; + t = s; do { - p += enc_len(reg->enc, p); - } while ((p - t) < skip && p < text_end); - - s += (p - t); + s += enc_len(reg->enc, s); + } while ((s - t) < skip && s < end); } } else { while (s < end) { - p = s; + p = se = s + tlen1; t = tail; - while (t >= target && *p == *t) { - p--; t--; + while (*p == *t && t >= target) { + p--; t--; } - if (t < target) return (UChar* )(p + 1); + if (t < target) return (UChar* )s; - skip = reg->int_map[*s]; - p = s + 1; - if (p >= text_end) return (UChar* )NULL; - t = p; + skip = reg->int_map[*se]; + t = s; do { - p += enc_len(reg->enc, p); - } while ((p - t) < skip && p < text_end); - - s += (p - t); + s += enc_len(reg->enc, s); + } while ((s - t) < skip && s < end); } } + return (UChar* )NULL; } @@ -2954,7 +3047,9 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On UChar *prev; MatchArg msa; -#ifdef USE_MULTI_THREAD_SYSTEM +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) + start: + THREAD_ATOMIC_START; if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { ONIG_STATE_INC(reg); if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { @@ -2963,15 +3058,19 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On } } else { - int n = 0; + int n; + + THREAD_ATOMIC_END; + n = 0; while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { if (++n > THREAD_PASS_LIMIT_COUNT) return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; THREAD_PASS; } - ONIG_STATE_INC(reg); + goto start; } -#endif /* USE_MULTI_THREAD_SYSTEM */ + THREAD_ATOMIC_END; +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ MATCH_ARG_INIT(msa, option, region, at); @@ -2991,7 +3090,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On } MATCH_ARG_FREE(msa); - ONIG_STATE_DEC(reg); + ONIG_STATE_DEC_THREAD(reg); return r; } @@ -3234,8 +3333,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, int r; UChar *s, *prev; MatchArg msa; + const UChar *orig_start = start; -#ifdef USE_MULTI_THREAD_SYSTEM +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) + start: + THREAD_ATOMIC_START; if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { ONIG_STATE_INC(reg); if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { @@ -3244,15 +3346,19 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, } } else { - int n = 0; + int n; + + THREAD_ATOMIC_END; + n = 0; while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { if (++n > THREAD_PASS_LIMIT_COUNT) return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; THREAD_PASS; } - ONIG_STATE_INC(reg); + goto start; } -#endif /* USE_MULTI_THREAD_SYSTEM */ + THREAD_ATOMIC_END; +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, @@ -3380,7 +3486,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, (int )(end - str), (int )(start - str), (int )(range - str)); #endif - MATCH_ARG_INIT(msa, option, region, start); + MATCH_ARG_INIT(msa, option, region, orig_start); s = (UChar* )start; if (range > start) { /* forward search */ @@ -3512,7 +3618,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, finish: MATCH_ARG_FREE(msa); - ONIG_STATE_DEC(reg); + ONIG_STATE_DEC_THREAD(reg); /* If result is mismatch and no FIND_NOT_EMPTY option, then the region is not setted in match_at(). */ @@ -3533,7 +3639,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, mismatch_no_msa: r = ONIG_MISMATCH; finish_no_msa: - ONIG_STATE_DEC(reg); + ONIG_STATE_DEC_THREAD(reg); #ifdef ONIG_DEBUG if (r != ONIG_MISMATCH) fprintf(stderr, "onig_search: error %d\n", r); @@ -3541,7 +3647,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, return r; match: - ONIG_STATE_DEC(reg); + ONIG_STATE_DEC_THREAD(reg); MATCH_ARG_FREE(msa); return s - str; } diff --git a/regint.h b/regint.h index 9affd70aaa..11df33b7db 100644 --- a/regint.h +++ b/regint.h @@ -59,9 +59,11 @@ /* #define USE_UNICODE_FULL_RANGE_CTYPE */ /* --> move to regenc.h */ #define USE_NAMED_GROUP #define USE_SUBEXP_CALL +/* #define USE_BACKREF_AT_LEVEL */ #define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */ #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR +/* #define USE_RECOMPILE_API */ /* treat \r\n as line terminator. !!! NO SUPPORT !!! use this configuration on your own responsibility */ @@ -80,6 +82,7 @@ /* interface to external system */ #ifdef NOT_RUBY /* given from Makefile */ #include "config.h" +#define USE_BACKREF_AT_LEVEL #define USE_CAPTURE_HISTORY #define USE_VARIABLE_META_CHARS #define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */ @@ -129,13 +132,26 @@ #endif -#ifdef USE_MULTI_THREAD_SYSTEM -#define ONIG_STATE_INC(reg) (reg)->state++ -#define ONIG_STATE_DEC(reg) (reg)->state-- +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) +#define ONIG_STATE_INC(reg) (reg)->state++ +#define ONIG_STATE_DEC(reg) (reg)->state-- + +#define ONIG_STATE_INC_THREAD(reg) do {\ + THREAD_ATOMIC_START;\ + (reg)->state++;\ + THREAD_ATOMIC_END;\ +} while(0) +#define ONIG_STATE_DEC_THREAD(reg) do {\ + THREAD_ATOMIC_START;\ + (reg)->state--;\ + THREAD_ATOMIC_END;\ +} while(0) #else -#define ONIG_STATE_INC(reg) /* Nothing */ -#define ONIG_STATE_DEC(reg) /* Nothing */ -#endif /* USE_MULTI_THREAD_SYSTEM */ +#define ONIG_STATE_INC(reg) /* Nothing */ +#define ONIG_STATE_DEC(reg) /* Nothing */ +#define ONIG_STATE_INC_THREAD(reg) /* Nothing */ +#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */ +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ #define onig_st_is_member st_is_member @@ -584,6 +600,7 @@ enum OpCode { OP_BACKREFN_IC, OP_BACKREF_MULTI, OP_BACKREF_MULTI_IC, + OP_BACKREF_AT_LEVEL, /* \k, \k */ OP_MEMORY_START, OP_MEMORY_START_PUSH, /* push back-tracker to stack */ diff --git a/regparse.c b/regparse.c index 21ec65e86d..d70dbb6c3b 100644 --- a/regparse.c +++ b/regparse.c @@ -1085,13 +1085,13 @@ onig_free_node_list() { FreeNode* n; - THREAD_ATOMIC_START; + /* THREAD_ATOMIC_START; */ while (IS_NOT_NULL(FreeNodeList)) { n = FreeNodeList; FreeNodeList = FreeNodeList->next; xfree(n); } - THREAD_ATOMIC_END; + /* THREAD_ATOMIC_END; */ return 0; } #endif @@ -1244,7 +1244,11 @@ onig_node_new_anchor(int type) } static Node* -node_new_backref(int back_num, int* backrefs, int by_name, ScanEnv* env) +node_new_backref(int back_num, int* backrefs, int by_name, +#ifdef USE_BACKREF_AT_LEVEL + int exist_level, int nest_level, +#endif + ScanEnv* env) { int i; Node* node = node_new(); @@ -1257,6 +1261,13 @@ node_new_backref(int back_num, int* backrefs, int by_name, ScanEnv* env) if (by_name != 0) NBACKREF(node).state |= NST_NAME_REF; +#ifdef USE_BACKREF_AT_LEVEL + if (exist_level != 0) { + NBACKREF(node).state |= NST_NEST_LEVEL; + NBACKREF(node).nest_level = nest_level; + } +#endif + for (i = 0; i < back_num; i++) { if (backrefs[i] <= env->num_mem && IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) { @@ -2241,6 +2252,10 @@ typedef struct { int ref1; int* refs; int by_name; +#ifdef USE_BACKREF_AT_LEVEL + int exist_level; + int level; /* \k */ +#endif } backref; struct { UChar* name; @@ -2420,6 +2435,89 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env); #ifdef USE_NAMED_GROUP +#ifdef USE_BACKREF_AT_LEVEL +/* + \k, \k +*/ +static int +fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end + , ScanEnv* env, int* level) +{ + int r, exist_level = 0; + OnigCodePoint c = 0; + OnigCodePoint first_code; + OnigEncoding enc = env->enc; + UChar *name_end; + UChar *p = *src; + PFETCH_READY; + + name_end = end; + r = 0; + if (PEND) { + return ONIGERR_EMPTY_GROUP_NAME; + } + else { + PFETCH(c); + first_code = c; + if (c == '>') + return ONIGERR_EMPTY_GROUP_NAME; + + if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == '>' || c == ')' || c == '+' || c == '-') break; + + if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + if (c != '>') { + if (c == '+' || c == '-') { + int num; + int flag = (c == '-' ? -1 : 1); + + PFETCH(c); + if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err; + PUNFETCH; + num = onig_scan_unsigned_number(&p, end, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + *level = (num * flag); + exist_level = 1; + + PFETCH(c); + if (c == '>') + goto first_check; + } + + err: + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + else { + first_check: + if (ONIGENC_IS_CODE_ASCII(first_code) && + ONIGENC_IS_CODE_UPPER(enc, first_code)) + r = ONIGERR_INVALID_GROUP_NAME; + } + + if (r == 0) { + *rname_end = name_end; + *src = p; + return (exist_level ? 1 : 0); + } + else { + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; + } +} +#endif /* USE_BACKREF_AT_LEVEL */ + /* def: 0 -> define name (don't allow number name) 1 -> reference name (allow number name) @@ -3132,6 +3230,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->u.backref.num = 1; tok->u.backref.ref1 = num; tok->u.backref.by_name = 0; +#ifdef USE_BACKREF_AT_LEVEL + tok->u.backref.exist_level = 0; +#endif break; } @@ -3170,8 +3271,17 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) int* backs; prev = p; + +#ifdef USE_BACKREF_AT_LEVEL + name_end = NULL_UCHARP; /* no need. escape gcc warning. */ + r = fetch_name_with_level(&p, end, &name_end, env, &tok->u.backref.level); + if (r == 1) tok->u.backref.exist_level = 1; + else tok->u.backref.exist_level = 0; +#else r = fetch_name(&p, end, &name_end, env, 1); +#endif if (r < 0) return r; + num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); if (num <= 0) { onig_scan_env_set_error_string(env, @@ -5007,8 +5117,13 @@ parse_exp(Node** np, OnigToken* tok, int term, case TK_BACKREF: len = tok->u.backref.num; *np = node_new_backref(len, - (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), - tok->u.backref.by_name, env); + (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), + tok->u.backref.by_name, +#ifdef USE_BACKREF_AT_LEVEL + tok->u.backref.exist_level, + tok->u.backref.level, +#endif + env); CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); break; diff --git a/regparse.h b/regparse.h index 13f80b0f37..0958c909bf 100644 --- a/regparse.h +++ b/regparse.h @@ -76,7 +76,7 @@ #define NODE_STR_MARGIN 16 #define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ -#define NODE_BACKREFS_SIZE 7 +#define NODE_BACKREFS_SIZE 6 #define NSTR_RAW (1<<0) /* by backslashed number */ #define NSTR_AMBIG (1<<1) @@ -145,6 +145,7 @@ typedef struct { #define NST_NAMED_GROUP (1<<10) #define NST_NAME_REF (1<<11) #define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */ +#define NST_NEST_LEVEL (1<<13) #define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f) #define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f) @@ -165,6 +166,7 @@ typedef struct { #define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0) #define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0) #define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0) +#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0) #define IS_QUALIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0) typedef struct { @@ -212,6 +214,7 @@ typedef struct { int back_num; int back_static[NODE_BACKREFS_SIZE]; int* back_dynamic; + int nest_level; } BackrefNode; typedef struct { -- cgit v1.2.3