From 2873edeafb6f6df1fc99bb9b1167591b99dd378c Mon Sep 17 00:00:00 2001 From: naruse Date: Sat, 10 Dec 2016 17:47:04 +0000 Subject: Merge Onigmo 6.0.0 * https://github.com/k-takata/Onigmo/blob/Onigmo-6.0.0/HISTORY * fix for ruby 2.4: https://github.com/k-takata/Onigmo/pull/78 * suppress warning: https://github.com/k-takata/Onigmo/pull/79 * include/ruby/oniguruma.h: include onigmo.h. * template/encdb.h.tmpl: ignore duplicated definition of EUC-CN in enc/euc_kr.c. It is defined in enc/gb2313.c with CRuby macro. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@57045 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- regexec.c | 895 +++++++++++++++++++++++++++++++------------------------------- 1 file changed, 450 insertions(+), 445 deletions(-) (limited to 'regexec.c') diff --git a/regexec.c b/regexec.c index f8813875dc..b27884b32c 100644 --- a/regexec.c +++ b/regexec.c @@ -3,7 +3,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2008 K.Kosako - * Copyright (c) 2011-2014 K.Takata + * Copyright (c) 2011-2016 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,33 +30,39 @@ #include "regint.h" -/* #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ +#ifdef RUBY +# undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#else +# define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#endif -#ifndef USE_DIRECT_THREADED_VM +#ifndef USE_TOKEN_THREADED_VM # ifdef __GNUC__ -# define USE_DIRECT_THREADED_VM 1 +# define USE_TOKEN_THREADED_VM 1 # else -# define USE_DIRECT_THREADED_VM 0 +# define USE_TOKEN_THREADED_VM 0 # endif #endif -#define ENC_DUMMY_FLAG (1<<24) +#ifdef RUBY +# define ENC_DUMMY_FLAG (1<<24) static inline int rb_enc_asciicompat(OnigEncoding enc) { - return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG); + return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG); } -#undef ONIGENC_IS_MBC_ASCII_WORD -#define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \ +# undef ONIGENC_IS_MBC_ASCII_WORD +# define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \ (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \ onigenc_ascii_is_code_ctype( \ ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc)) +#endif /* RUBY */ #ifdef USE_CRNL_AS_LINE_TERMINATOR -#define ONIGENC_IS_MBC_CRNL(enc,p,end) \ +# define ONIGENC_IS_MBC_CRNL(enc,p,end) \ (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10) -#define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \ +# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \ is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev)) static int is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start, @@ -90,7 +96,7 @@ is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start, } } #else /* USE_CRNL_AS_LINE_TERMINATOR */ -#define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \ +# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \ ONIGENC_IS_MBC_NEWLINE((enc), (p), (end)) #endif /* USE_CRNL_AS_LINE_TERMINATOR */ @@ -105,7 +111,7 @@ history_tree_clear(OnigCaptureTreeNode* node) if (IS_NOT_NULL(node)) { for (i = 0; i < node->num_childs; i++) { if (IS_NOT_NULL(node->childs[i])) { - history_tree_free(node->childs[i]); + history_tree_free(node->childs[i]); } } for (i = 0; i < node->allocated; i++) { @@ -156,7 +162,7 @@ history_node_new(void) static int history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) { -#define HISTORY_TREE_INIT_ALLOC_SIZE 8 +# define HISTORY_TREE_INIT_ALLOC_SIZE 8 if (parent->num_childs >= parent->allocated) { int n, i; @@ -164,15 +170,15 @@ history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) if (IS_NULL(parent->childs)) { n = HISTORY_TREE_INIT_ALLOC_SIZE; parent->childs = - (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n); + (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n); CHECK_NULL_RETURN_MEMERR(parent->childs); } else { OnigCaptureTreeNode** tmp; n = parent->allocated * 2; tmp = - (OnigCaptureTreeNode** )xrealloc(parent->childs, - sizeof(OnigCaptureTreeNode*) * n); + (OnigCaptureTreeNode** )xrealloc(parent->childs, + sizeof(OnigCaptureTreeNode*) * n); if (tmp == 0) { history_tree_clear(parent); return ONIGERR_MEMORY; @@ -348,7 +354,7 @@ onig_region_free(OnigRegion* r, int free_self) } extern void -onig_region_copy(OnigRegion* to, OnigRegion* from) +onig_region_copy(OnigRegion* to, const OnigRegion* from) { #define RREGC_SIZE (sizeof(int) * from->num_regs) int i, r; @@ -404,7 +410,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\ +# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\ (msa).stack_p = (void* )0;\ (msa).options = (arg_option);\ (msa).region = (arg_region);\ @@ -413,7 +419,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) (msa).best_len = ONIG_MISMATCH;\ } while(0) #else -#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\ +# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\ (msa).stack_p = (void* )0;\ (msa).options = (arg_option);\ (msa).region = (arg_region);\ @@ -424,9 +430,9 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) #ifdef USE_COMBINATION_EXPLOSION_CHECK -#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 +# define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 -#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ +# define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ offset = ((offset) * (state_num)) >> 3;\ @@ -452,14 +458,14 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) }\ } while(0) -#define MATCH_ARG_FREE(msa) do {\ +# define MATCH_ARG_FREE(msa) do {\ if ((msa).stack_p) xfree((msa).stack_p);\ if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ if ((msa).state_check_buff) xfree((msa).state_check_buff);\ }\ } while(0) #else /* USE_COMBINATION_EXPLOSION_CHECK */ -#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) +# define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) #endif /* USE_COMBINATION_EXPLOSION_CHECK */ @@ -548,9 +554,9 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, n *= 2; if (limit_size != 0 && n > limit_size) { if ((unsigned int )(stk_end - stk_base) == limit_size) - return ONIGERR_MATCH_STACK_LIMIT_OVER; + return ONIGERR_MATCH_STACK_LIMIT_OVER; else - n = limit_size; + n = limit_size; } x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n); if (IS_NULL(x)) { @@ -587,9 +593,9 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) #ifdef USE_COMBINATION_EXPLOSION_CHECK -#define STATE_CHECK_POS(s,snum) \ +# define STATE_CHECK_POS(s,snum) \ (((s) - str) * num_comb_exp_check + ((snum) - 1)) -#define STATE_CHECK_VAL(v,snum) do {\ +# define STATE_CHECK_VAL(v,snum) do {\ if (state_check_buff != NULL) {\ int x = STATE_CHECK_POS(s,snum);\ (v) = state_check_buff[x/8] & (1<<(x%8));\ @@ -598,13 +604,13 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, } while(0) -#define ELSE_IF_STATE_CHECK_MARK(stk) \ +# define ELSE_IF_STATE_CHECK_MARK(stk) \ else if ((stk)->type == STK_STATE_CHECK_MARK) { \ int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ state_check_buff[x/8] |= (1<<(x%8)); \ } -#define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\ +# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\ STACK_ENSURE(1);\ stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ @@ -615,14 +621,14 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, STACK_INC;\ } while(0) -#define STACK_PUSH_ENSURED(stack_type,pat) do {\ +# define STACK_PUSH_ENSURED(stack_type,pat) do {\ stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ stk->u.state.state_check = 0;\ STACK_INC;\ } while(0) -#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\ +# define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\ STACK_ENSURE(1);\ stk->type = STK_ALT;\ stk->u.state.pcode = (pat);\ @@ -633,7 +639,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, STACK_INC;\ } while(0) -#define STACK_PUSH_STATE_CHECK(s,snum) do {\ +# define STACK_PUSH_STATE_CHECK(s,snum) do {\ if (state_check_buff != NULL) {\ STACK_ENSURE(1);\ stk->type = STK_STATE_CHECK_MARK;\ @@ -645,9 +651,9 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #else /* USE_COMBINATION_EXPLOSION_CHECK */ -#define ELSE_IF_STATE_CHECK_MARK(stk) +# define ELSE_IF_STATE_CHECK_MARK(stk) -#define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\ +# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\ STACK_ENSURE(1);\ stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ @@ -657,7 +663,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, STACK_INC;\ } while(0) -#define STACK_PUSH_ENSURED(stack_type,pat) do {\ +# define STACK_PUSH_ENSURED(stack_type,pat) do {\ stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ STACK_INC;\ @@ -781,13 +787,13 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #ifdef ONIG_DEBUG -#define STACK_BASE_CHECK(p, at) \ +# define STACK_BASE_CHECK(p, at) \ if ((p) < stk_base) {\ fprintf(stderr, "at %s\n", at);\ goto stack_error;\ } #else -#define STACK_BASE_CHECK(p, at) +# define STACK_BASE_CHECK(p, at) #endif #define STACK_POP_ONE do {\ @@ -1124,16 +1130,16 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, #define IS_EMPTY_STR (str == end) -#define ON_STR_BEGIN(s) ((s) == str) -#define ON_STR_END(s) ((s) == end) +#define ON_STR_BEGIN(s) ((s) == str) +#define ON_STR_END(s) ((s) == end) #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE -#define DATA_ENSURE_CHECK1 (s < right_range) -#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) -#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail +# define DATA_ENSURE_CHECK1 (s < right_range) +# define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) +# define DATA_ENSURE(n) if (s + (n) > right_range) goto fail #else -#define DATA_ENSURE_CHECK1 (s < end) -#define DATA_ENSURE_CHECK(n) (s + (n) <= end) -#define DATA_ENSURE(n) if (s + (n) > end) goto fail +# define DATA_ENSURE_CHECK1 (s < end) +# define DATA_ENSURE_CHECK(n) (s + (n) <= end) +# define DATA_ENSURE(n) if (s + (n) > end) goto fail #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ @@ -1150,29 +1156,29 @@ make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, if (k->type == STK_MEM_START) { n = k->u.mem.num; if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && - BIT_STATUS_AT(reg->capture_history, n) != 0) { - child = history_node_new(); - CHECK_NULL_RETURN_MEMERR(child); - child->group = n; - child->beg = k->u.mem.pstr - str; - r = history_tree_add_child(node, child); - if (r != 0) { - history_tree_free(child); - return r; - } - *kp = (k + 1); - r = make_capture_history_tree(child, kp, stk_top, str, reg); - if (r != 0) return r; - - k = *kp; - child->end = k->u.mem.pstr - str; + BIT_STATUS_AT(reg->capture_history, n) != 0) { + child = history_node_new(); + CHECK_NULL_RETURN_MEMERR(child); + child->group = n; + child->beg = k->u.mem.pstr - str; + r = history_tree_add_child(node, child); + if (r != 0) { + history_tree_free(child); + return r; + } + *kp = (k + 1); + r = make_capture_history_tree(child, kp, stk_top, str, reg); + if (r != 0) return r; + + k = *kp; + child->end = k->u.mem.pstr - str; } } else if (k->type == STK_MEM_END) { if (k->u.mem.num == node->group) { - node->end = k->u.mem.pstr - str; - *kp = k; - return 0; + node->end = k->u.mem.pstr - str; + *kp = k; + return 0; } } k++; @@ -1195,10 +1201,10 @@ static int mem_is_in_memp(int mem, int num, UChar* memp) return 0; } -static int backref_match_at_nested_level(regex_t* reg - , OnigStackType* top, OnigStackType* stk_base - , int ignore_case, int case_fold_flag - , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) +static int backref_match_at_nested_level(regex_t* reg, + OnigStackType* top, OnigStackType* stk_base, + int ignore_case, int case_fold_flag, + int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) { UChar *ss, *p, *pstart, *pend = NULL_UCHARP; int level; @@ -1255,27 +1261,37 @@ static int backref_match_at_nested_level(regex_t* reg #ifdef ONIG_DEBUG_STATISTICS -#define USE_TIMEOFDAY - -#ifdef USE_TIMEOFDAY -#ifdef HAVE_SYS_TIME_H -#include -#endif -#ifdef HAVE_UNISTD_H -#include -#endif +# ifdef _WIN32 +# include +static LARGE_INTEGER ts, te, freq; +# define GETTIME(t) QueryPerformanceCounter(&(t)) +# define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \ + * 1000000 / freq.QuadPart) +# else /* _WIN32 */ + +# define USE_TIMEOFDAY + +# ifdef USE_TIMEOFDAY +# ifdef HAVE_SYS_TIME_H +# include +# endif +# ifdef HAVE_UNISTD_H +# include +# endif static struct timeval ts, te; -#define GETTIME(t) gettimeofday(&(t), (struct timezone* )0) -#define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \ - (((te).tv_sec - (ts).tv_sec)*1000000)) -#else /* USE_TIMEOFDAY */ -#ifdef HAVE_SYS_TIMES_H -#include -#endif +# define GETTIME(t) gettimeofday(&(t), (struct timezone* )0) +# define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \ + (((te).tv_sec - (ts).tv_sec)*1000000)) +# else /* USE_TIMEOFDAY */ +# ifdef HAVE_SYS_TIMES_H +# include +# endif static struct tms ts, te; -#define GETTIME(t) times(&(t)) -#define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime) -#endif /* USE_TIMEOFDAY */ +# define GETTIME(t) times(&(t)) +# define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime) +# endif /* USE_TIMEOFDAY */ + +# endif /* _WIN32 */ static int OpCounter[256]; static int OpPrevCounter[256]; @@ -1284,14 +1300,14 @@ static int OpCurr = OP_FINISH; static int OpPrevTarget = OP_FAIL; static int MaxStackDepth = 0; -#define MOP_IN(opcode) do {\ +# define MOP_IN(opcode) do {\ if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ OpCurr = opcode;\ OpCounter[opcode]++;\ GETTIME(ts);\ } while(0) -#define MOP_OUT do {\ +# define MOP_OUT do {\ GETTIME(te);\ OpTime[OpCurr] += TIMEDIFF(te, ts);\ } while(0) @@ -1304,6 +1320,9 @@ onig_statistics_init(void) OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0; } MaxStackDepth = 0; +# ifdef _WIN32 + QueryPerformanceFrequency(&freq); +# endif } extern void @@ -1318,28 +1337,45 @@ onig_print_statistics(FILE* f) fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); } -#define STACK_INC do {\ +# define STACK_INC do {\ stk++;\ if (stk - stk_base > MaxStackDepth) \ MaxStackDepth = stk - stk_base;\ } while(0) #else /* ONIG_DEBUG_STATISTICS */ -#define STACK_INC stk++ +# define STACK_INC stk++ -#define MOP_IN(opcode) -#define MOP_OUT +# define MOP_IN(opcode) +# define MOP_OUT #endif /* ONIG_DEBUG_STATISTICS */ - -/* matching region of POSIX API */ -typedef int regoff_t; - -typedef struct { - regoff_t rm_so; - regoff_t rm_eo; -} posix_regmatch_t; +#ifdef ONIG_DEBUG_MATCH +static char * +stack_type_str(int stack_type) +{ + switch (stack_type) { + case STK_ALT: return "Alt "; + case STK_LOOK_BEHIND_NOT: return "LBNot "; + case STK_POS_NOT: return "PosNot"; + case STK_MEM_START: return "MemS "; + case STK_MEM_END: return "MemE "; + case STK_REPEAT_INC: return "RepInc"; + case STK_STATE_CHECK_MARK: return "StChMk"; + case STK_NULL_CHECK_START: return "NulChS"; + case STK_NULL_CHECK_END: return "NulChE"; + case STK_MEM_END_MARK: return "MemEMk"; + case STK_POS: return "Pos "; + case STK_STOP_BT: return "StopBt"; + case STK_REPEAT: return "Rep "; + case STK_CALL_FRAME: return "Call "; + case STK_RETURN: return "Ret "; + case STK_VOID: return "Void "; + default: return " "; + } +} +#endif /* match data(str - end) from position (sstart). */ /* if sstart == str then set sprev to NULL. */ @@ -1376,13 +1412,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, int num_comb_exp_check = reg->num_comb_exp_check; #endif -#if USE_DIRECT_THREADED_VM -#define VM_LOOP JUMP; -#define VM_LOOP_END -#define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK; -#define DEFAULT L_DEFAULT: -#define NEXT sprev = sbegin; JUMP -#define JUMP goto *oplabels[*p++] +#if USE_TOKEN_THREADED_VM +# define OP_OFFSET 1 +# define VM_LOOP JUMP; +# define VM_LOOP_END +# define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK; +# define DEFAULT L_DEFAULT: +# define NEXT sprev = sbegin; JUMP +# define JUMP goto *oplabels[*p++] static const void *oplabels[] = { &&L_OP_FINISH, /* matching process terminator (no more alternative) */ @@ -1410,7 +1447,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, &&L_OP_CCLASS_NOT, &&L_OP_CCLASS_MB_NOT, &&L_OP_CCLASS_MIX_NOT, - &&L_OP_CCLASS_NODE, /* pointer to CClassNode node */ &&L_OP_ANYCHAR, /* "." */ &&L_OP_ANYCHAR_ML, /* "." multi-line */ @@ -1423,24 +1459,24 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, &&L_OP_NOT_WORD, &&L_OP_WORD_BOUND, &&L_OP_NOT_WORD_BOUND, -#ifdef USE_WORD_BEGIN_END +# ifdef USE_WORD_BEGIN_END &&L_OP_WORD_BEGIN, &&L_OP_WORD_END, -#else +# else &&L_DEFAULT, &&L_DEFAULT, -#endif +# endif &&L_OP_ASCII_WORD, &&L_OP_NOT_ASCII_WORD, &&L_OP_ASCII_WORD_BOUND, &&L_OP_NOT_ASCII_WORD_BOUND, -#ifdef USE_WORD_BEGIN_END +# ifdef USE_WORD_BEGIN_END &&L_OP_ASCII_WORD_BEGIN, &&L_OP_ASCII_WORD_END, -#else +# else &&L_DEFAULT, &&L_DEFAULT, -#endif +# endif &&L_OP_BEGIN_BUF, &&L_OP_END_BUF, @@ -1456,25 +1492,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, &&L_OP_BACKREFN_IC, &&L_OP_BACKREF_MULTI, &&L_OP_BACKREF_MULTI_IC, -#ifdef USE_BACKREF_WITH_LEVEL +# ifdef USE_BACKREF_WITH_LEVEL &&L_OP_BACKREF_WITH_LEVEL, /* \k, \k */ -#else +# else &&L_DEFAULT, -#endif +# endif &&L_OP_MEMORY_START, &&L_OP_MEMORY_START_PUSH, /* push back-tracker to stack */ &&L_OP_MEMORY_END_PUSH, /* push back-tracker to stack */ -#ifdef USE_SUBEXP_CALL +# ifdef USE_SUBEXP_CALL &&L_OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */ -#else +# else &&L_DEFAULT, -#endif +# endif &&L_OP_MEMORY_END, -#ifdef USE_SUBEXP_CALL +# ifdef USE_SUBEXP_CALL &&L_OP_MEMORY_END_REC, /* push marker to stack */ -#else +# else &&L_DEFAULT, -#endif +# endif &&L_OP_KEEP, @@ -1482,7 +1518,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, &&L_OP_JUMP, &&L_OP_PUSH, &&L_OP_POP, +# ifdef USE_OP_PUSH_OR_JUMP_EXACT &&L_OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */ +# else + &&L_DEFAULT, +# endif &&L_OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */ &&L_OP_REPEAT, /* {n,m} */ &&L_OP_REPEAT_NG, /* {n,m}? (non greedy) */ @@ -1492,16 +1532,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, &&L_OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */ &&L_OP_NULL_CHECK_START, /* null loop checker start */ &&L_OP_NULL_CHECK_END, /* null loop checker end */ -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT +# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT &&L_OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */ -#else +# else &&L_DEFAULT, -#endif -#ifdef USE_SUBEXP_CALL +# endif +# ifdef USE_SUBEXP_CALL &&L_OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ -#else +# else &&L_DEFAULT, -#endif +# endif &&L_OP_PUSH_POS, /* (?=...) start */ &&L_OP_POP_POS, /* (?=...) end */ @@ -1513,69 +1553,66 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, &&L_OP_PUSH_LOOK_BEHIND_NOT, /* (? */ &&L_OP_RETURN, -#else +# else &&L_DEFAULT, &&L_DEFAULT, -#endif +# endif &&L_OP_CONDITION, -#ifdef USE_COMBINATION_EXPLOSION_CHECK +# ifdef USE_COMBINATION_EXPLOSION_CHECK &&L_OP_STATE_CHECK_PUSH, /* combination explosion check and push */ &&L_OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ &&L_OP_STATE_CHECK, /* check only */ -#else +# else &&L_DEFAULT, &&L_DEFAULT, &&L_DEFAULT, -#endif -#ifdef USE_COMBINATION_EXPLOSION_CHECK +# endif +# ifdef USE_COMBINATION_EXPLOSION_CHECK &&L_OP_STATE_CHECK_ANYCHAR_STAR, &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR, -#else +# else &&L_DEFAULT, &&L_DEFAULT, -#endif +# endif /* no need: IS_DYNAMIC_OPTION() == 0 */ -#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ +# if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ &&L_OP_SET_OPTION_PUSH, /* set option and push recover option */ &&L_OP_SET_OPTION /* set option */ -#else +# else &&L_DEFAULT, &&L_DEFAULT -#endif +# endif }; -#else +#else /* USE_TOKEN_THREADED_VM */ -#define VM_LOOP \ +# define OP_OFFSET 0 +# define VM_LOOP \ while (1) { \ OPCODE_EXEC_HOOK; \ sbegin = s; \ switch (*p++) { -#define VM_LOOP_END } sprev = sbegin; } -#define CASE(x) case x: -#define DEFAULT default: -#define NEXT break -#define JUMP continue; break -#endif +# define VM_LOOP_END } sprev = sbegin; } +# define CASE(x) case x: +# define DEFAULT default: +# define NEXT break +# define JUMP continue; break +#endif /* USE_TOKEN_THREADED_VM */ #ifdef USE_SUBEXP_CALL - /* Stack #0 is used to store the pattern itself and used for (?R), \g<0>, etc. */ - n = reg->num_repeat + (reg->num_mem + 1) * 2; - - STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE); - pop_level = reg->stack_pop_level; - num_mem = reg->num_mem; - repeat_stk = (OnigStackIndex* )alloca_base; +/* Stack #0 is used to store the pattern itself and used for (?R), \g<0>, + etc. Additional space is required. */ +# define ADD_NUMMEM 1 +#else +/* Stack #0 not is used. */ +# define ADD_NUMMEM 0 +#endif - mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat); - mem_end_stk = mem_start_stk + (num_mem + 1); -#else /* USE_SUBEXP_CALL */ - /* Stack #0 not is used. */ - n = reg->num_repeat + reg->num_mem * 2; + n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2; STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE); pop_level = reg->stack_pop_level; @@ -1583,25 +1620,27 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, repeat_stk = (OnigStackIndex* )alloca_base; mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat); - mem_end_stk = mem_start_stk + num_mem; + mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM); + { + OnigStackIndex *pp = mem_start_stk; + for (; pp < repeat_stk + n; pp += 2) { + pp[0] = INVALID_STACK_INDEX; + pp[1] = INVALID_STACK_INDEX; + } + } +#ifndef USE_SUBEXP_CALL mem_start_stk--; /* for index start from 1, mem_start_stk[1]..mem_start_stk[num_mem] */ mem_end_stk--; /* for index start from 1, mem_end_stk[1]..mem_end_stk[num_mem] */ -#endif /* USE_SUBEXP_CALL */ - { - OnigStackIndex *pp = mem_start_stk; - for (; pp < (repeat_stk + n); pp+=2) { - pp[0] = INVALID_STACK_INDEX; - pp[1] = INVALID_STACK_INDEX; - } - } +#endif #ifdef ONIG_DEBUG_MATCH fprintf(stderr, "match_at: str: %"PRIdPTR" (%p), end: %"PRIdPTR" (%p), start: %"PRIdPTR" (%p), sprev: %"PRIdPTR" (%p)\n", (intptr_t )str, str, (intptr_t )end, end, (intptr_t )sstart, sstart, (intptr_t )sprev, sprev); fprintf(stderr, "size: %d, start offset: %d\n", (int )(end - str), (int )(sstart - str)); + fprintf(stderr, "\n ofs> str stk:type addr:opcode\n"); #endif STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */ @@ -1611,31 +1650,34 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef ONIG_DEBUG_MATCH -#define OPCODE_EXEC_HOOK \ +# define OPCODE_EXEC_HOOK \ if (s) { \ UChar *op, *q, *bp, buf[50]; \ int len; \ - op = p - 1; \ + op = p - OP_OFFSET; \ fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \ bp = buf; \ q = s; \ if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \ for (i = 0; i < 7 && q < end; i++) { \ - len = enclen(encode, q, end); \ + len = enclen(encode, q, end); \ while (len-- > 0) *bp++ = *q++; \ } \ + if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \ } \ - if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } \ - else { xmemcpy(bp, "\"", 1); bp += 1; } \ + xmemcpy(bp, "\"", 1); bp += 1; \ *bp = 0; \ fputs((char* )buf, stderr); \ for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \ - fprintf(stderr, "%4"PRIdPTR":", (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \ + fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \ + stk - stk_base - 1, \ + (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \ + (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \ onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \ fprintf(stderr, "\n"); \ } #else -#define OPCODE_EXEC_HOOK ((void) 0) +# define OPCODE_EXEC_HOOK ((void) 0) #endif @@ -1652,83 +1694,56 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else goto end_best_len; - } + } #endif best_len = n; region = msa->region; if (region) { -#ifdef USE_POSIX_API_REGION_OPTION - if (IS_POSIX_REGION(msa->options)) { - posix_regmatch_t* rmt = (posix_regmatch_t* )region; - - rmt[0].rm_so = (regoff_t )(((pkeep > s) ? s : pkeep) - str); - rmt[0].rm_eo = (regoff_t )(s - str); - for (i = 1; i <= num_mem; i++) { - if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (BIT_STATUS_AT(reg->bt_mem_start, i)) - rmt[i].rm_so = (regoff_t )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str); - else - rmt[i].rm_so = (regoff_t )((UChar* )((void* )(mem_start_stk[i])) - str); - - rmt[i].rm_eo = (regoff_t )((BIT_STATUS_AT(reg->bt_mem_end, i) + region->beg[0] = ((pkeep > s) ? s : pkeep) - str; + region->end[0] = s - str; + for (i = 1; i <= num_mem; i++) { + if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (BIT_STATUS_AT(reg->bt_mem_start, i)) + region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; + else + region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; + + region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) ? STACK_AT(mem_end_stk[i])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[i])) - str); - } - else { - rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS; - } + : (UChar* )((void* )mem_end_stk[i])) - str; } - } - else { -#endif /* USE_POSIX_API_REGION_OPTION */ - region->beg[0] = ((pkeep > s) ? s : pkeep) - str; - region->end[0] = s - str; - for (i = 1; i <= num_mem; i++) { - if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (BIT_STATUS_AT(reg->bt_mem_start, i)) - region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; - else - region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; - - region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) - ? STACK_AT(mem_end_stk[i])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[i])) - str; - } - else { - region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; - } + else { + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; } + } #ifdef USE_CAPTURE_HISTORY - if (reg->capture_history != 0) { - int r; - OnigCaptureTreeNode* node; - - if (IS_NULL(region->history_root)) { - region->history_root = node = history_node_new(); - CHECK_NULL_RETURN_MEMERR(node); - } - else { - node = region->history_root; - history_tree_clear(node); - } - - node->group = 0; - node->beg = ((pkeep > s) ? s : pkeep) - str; - node->end = s - str; - - stkp = stk_base; - r = make_capture_history_tree(region->history_root, &stkp, - stk, (UChar* )str, reg); - if (r < 0) { - best_len = r; /* error code */ - goto finish; - } + if (reg->capture_history != 0) { + int r; + OnigCaptureTreeNode* node; + + if (IS_NULL(region->history_root)) { + region->history_root = node = history_node_new(); + CHECK_NULL_RETURN_MEMERR(node); } + else { + node = region->history_root; + history_tree_clear(node); + } + + node->group = 0; + node->beg = ((pkeep > s) ? s : pkeep) - str; + node->end = s - str; + + stkp = stk_base; + r = make_capture_history_tree(region->history_root, &stkp, + stk, (UChar* )str, reg); + if (r < 0) { + best_len = r; /* error code */ + goto finish; + } + } #endif /* USE_CAPTURE_HISTORY */ -#ifdef USE_POSIX_API_REGION_OPTION - } /* else IS_POSIX_REGION() */ -#endif } /* if (region) */ } /* n > best_len */ @@ -1777,8 +1792,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, q = lowbuf; while (len-- > 0) { if (*p != *q) { - goto fail; - } + goto fail; + } p++; q++; } } @@ -2036,7 +2051,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, int mb_len = enclen(encode, s, end); if (! DATA_ENSURE_CHECK(mb_len)) { - DATA_ENSURE(1); + DATA_ENSURE(1); s = (UChar* )end; p += tlen; goto cc_mb_not_success; @@ -2078,25 +2093,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, MOP_OUT; NEXT; - CASE(OP_CCLASS_NODE) MOP_IN(OP_CCLASS_NODE); - { - OnigCodePoint code; - void *node; - int mb_len; - UChar *ss; - - DATA_ENSURE(1); - GET_POINTER_INC(node, p); - mb_len = enclen(encode, s, end); - ss = s; - s += mb_len; - DATA_ENSURE(0); - code = ONIGENC_MBC_TO_CODE(encode, ss, s); - if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; - } - MOP_OUT; - NEXT; - CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR); DATA_ENSURE(1); n = enclen(encode, s, end); @@ -2118,10 +2114,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev, pkeep); n = enclen(encode, s, end); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; - sprev = s; - s += n; + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; + sprev = s; + s += n; } MOP_OUT; NEXT; @@ -2149,10 +2145,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_ALT(p + 1, s, sprev, pkeep); } n = enclen(encode, s, end); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; - sprev = s; - s += n; + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; + sprev = s; + s += n; } p++; MOP_OUT; @@ -2187,10 +2183,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep); n = enclen(encode, s, end); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; - sprev = s; - s += n; + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; + sprev = s; + s += n; } MOP_OUT; NEXT; @@ -2681,8 +2677,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_LENGTH_INC(tlen, p); sprev = s; - if (backref_match_at_nested_level(reg, stk, stk_base, ic - , case_fold_flag, (int )level, (int )tlen, p, &s, end)) { + if (backref_match_at_nested_level(reg, stk, stk_base, ic, + case_fold_flag, (int )level, (int )tlen, p, &s, end)) { while (sprev + (len = enclen(encode, sprev, end)) < s) sprev += len; @@ -2758,10 +2754,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_MEMNUM_INC(mem, p); /* mem: null check id */ STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); if (isnull) { -#ifdef ONIG_DEBUG_MATCH +# ifdef ONIG_DEBUG_MATCH fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIdPTR" (%p)\n", (int )mem, (intptr_t )s, s); -#endif +# endif if (isnull == -1) goto fail; goto null_check_found; } @@ -2777,16 +2773,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, int isnull; GET_MEMNUM_INC(mem, p); /* mem: null check id */ -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT +# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); -#else +# else STACK_NULL_CHECK_REC(isnull, mem, s); -#endif +# endif if (isnull) { -#ifdef ONIG_DEBUG_MATCH +# ifdef ONIG_DEBUG_MATCH fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIdPTR" (%p)\n", (int )mem, (intptr_t )s, s); -#endif +# endif if (isnull == -1) goto fail; goto null_check_found; } @@ -2850,6 +2846,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, MOP_OUT; JUMP; +#ifdef USE_OP_PUSH_OR_JUMP_EXACT CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1); GET_RELADDR_INC(addr, p); if (*p == *s && DATA_ENSURE_CHECK1) { @@ -2861,6 +2858,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += (addr + 1); MOP_OUT; JUMP; +#endif CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT); GET_RELADDR_INC(addr, p); @@ -2915,14 +2913,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, repeat_inc: stkp->u.repeat.count++; if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) { - /* end of repeat. Nothing to do. */ + /* end of repeat. Nothing to do. */ } else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { - STACK_PUSH_ALT(p, s, sprev, pkeep); - p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ + STACK_PUSH_ALT(p, s, sprev, pkeep); + p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ } else { - p = stkp->u.repeat.pcode; + p = stkp->u.repeat.pcode; } STACK_PUSH_REPEAT_INC(si); MOP_OUT; @@ -2944,19 +2942,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, repeat_inc_ng: stkp->u.repeat.count++; if (stkp->u.repeat.count < reg->repeat_range[mem].upper) { - if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { - UChar* pcode = stkp->u.repeat.pcode; - - STACK_PUSH_REPEAT_INC(si); - STACK_PUSH_ALT(pcode, s, sprev, pkeep); - } - else { - p = stkp->u.repeat.pcode; - STACK_PUSH_REPEAT_INC(si); - } + if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { + UChar* pcode = stkp->u.repeat.pcode; + + STACK_PUSH_REPEAT_INC(si); + STACK_PUSH_ALT(pcode, s, sprev, pkeep); + } + else { + p = stkp->u.repeat.pcode; + STACK_PUSH_REPEAT_INC(si); + } } else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { - STACK_PUSH_REPEAT_INC(si); + STACK_PUSH_REPEAT_INC(si); } MOP_OUT; CHECK_INTERRUPT_IN_MATCH_AT; @@ -3067,9 +3065,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_FAIL) if (0) { - /* fall */ + /* fall */ fail: - MOP_OUT; + MOP_OUT; } MOP_IN(OP_FAIL); STACK_POP; @@ -3080,8 +3078,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_COMBINATION_EXPLOSION_CHECK if (stk->u.state.state_check != 0) { - stk->type = STK_STATE_CHECK_MARK; - stk++; + stk->type = STK_STATE_CHECK_MARK; + stk++; } #endif @@ -3158,7 +3156,7 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end, static int str_lower_case_match(OnigEncoding enc, int case_fold_flag, - const UChar* t, const UChar* tend, + const UChar* t, const UChar* tend, const UChar* p, const UChar* end) { int lowlen; @@ -3250,7 +3248,7 @@ slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, while (s >= text) { if (str_lower_case_match(enc, case_fold_flag, - target, target_end, s, text_end)) + target, target_end, s, text_end)) return s; s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end); @@ -3270,10 +3268,10 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, const UChar *tail; ptrdiff_t skip, tlen1; -#ifdef ONIG_DEBUG_SEARCH +# ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", text, text, text_end, text_end, text_range, text_range); -#endif +# endif tail = target_end - 1; tlen1 = tail - target; @@ -3294,11 +3292,12 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, skip = reg->map[*se]; t = s; do { - s += enclen(reg->enc, s, end); + s += enclen(reg->enc, s, end); } while ((s - t) < skip && s < end); } } else { +# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE while (s < end) { p = se = s + tlen1; t = tail; @@ -3309,9 +3308,10 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, skip = reg->int_map[*se]; t = s; do { - s += enclen(reg->enc, s, end); + s += enclen(reg->enc, s, end); } while ((s - t) < skip && s < end); } +# endif } return (UChar* )NULL; @@ -3325,10 +3325,10 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, const UChar *s, *t, *p, *end; const UChar *tail; -#ifdef ONIG_DEBUG_SEARCH +# ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "bm_search: text: %"PRIuPTR", text_end: %"PRIuPTR", text_range: %"PRIuPTR"\n", text, text_end, text_range); -#endif +# endif end = text_range + (target_end - target) - 1; if (end > text_end) @@ -3340,10 +3340,10 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, while (s < end) { p = s; t = tail; -#ifdef ONIG_DEBUG_SEARCH +# ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n", (intptr_t )(s - text), s); -#endif +# endif while (*p == *t) { if (t == target) return (UChar* )p; p--; t--; @@ -3352,6 +3352,7 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, } } else { /* see int_map[] */ +# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE while (s < end) { p = s; t = tail; @@ -3361,6 +3362,7 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, } s += reg->int_map[*s]; } +# endif } return (UChar* )NULL; } @@ -3377,10 +3379,10 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, OnigEncoding enc = reg->enc; int case_fold_flag = reg->case_fold_flag; -#ifdef ONIG_DEBUG_SEARCH +# ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n", (int )text, text, (int )text_end, text_end, (int )text_range, text_range); -#endif +# endif tail = target_end - 1; tlen1 = tail - target; @@ -3399,11 +3401,12 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, skip = reg->map[*se]; t = s; do { - s += enclen(reg->enc, s, end); + s += enclen(reg->enc, s, end); } while ((s - t) < skip && s < end); } } else { +# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE while (s < end) { se = s + tlen1; if (str_lower_case_match(enc, case_fold_flag, target, target_end, @@ -3412,9 +3415,10 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, skip = reg->int_map[*se]; t = s; do { - s += enclen(reg->enc, s, end); + s += enclen(reg->enc, s, end); } while ((s - t) < skip && s < end); } +# endif } return (UChar* )NULL; @@ -3430,10 +3434,10 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, OnigEncoding enc = reg->enc; int case_fold_flag = reg->case_fold_flag; -#ifdef ONIG_DEBUG_SEARCH +# ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n", (int )text, text, (int )text_end, text_end, (int )text_range, text_range); -#endif +# endif end = text_range + (target_end - target) - 1; if (end > text_end) @@ -3451,6 +3455,7 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, } } else { /* see int_map[] */ +# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE while (s < end) { p = s - (target_end - target) + 1; if (str_lower_case_match(enc, case_fold_flag, target, target_end, @@ -3458,6 +3463,7 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, return (UChar* )p; s += reg->int_map[*s]; } +# endif } return (UChar* )NULL; } @@ -3475,10 +3481,10 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, ptrdiff_t skip, tlen1; OnigEncoding enc = reg->enc; -#ifdef ONIG_DEBUG_SEARCH +# ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "bm_search_notrev: text: %"PRIdPTR" (%p), text_end: %"PRIdPTR" (%p), text_range: %"PRIdPTR" (%p)\n", (intptr_t )text, text, (intptr_t )text_end, text_end, (intptr_t )text_range, text_range); -#endif +# endif tail = target_end - 1; tlen1 = tail - target; @@ -3500,11 +3506,12 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, skip = reg->map[se[1]]; t = s; do { - s += enclen(enc, s, end); + s += enclen(enc, s, end); } while ((s - t) < skip && s < end); } } else { +# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE while (s < end) { p = se = s + tlen1; t = tail; @@ -3516,9 +3523,10 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, skip = reg->int_map[se[1]]; t = s; do { - s += enclen(enc, s, end); + s += enclen(enc, s, end); } while ((s - t) < skip && s < end); } +# endif } return (UChar* )NULL; @@ -3533,6 +3541,11 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, const UChar *tail; ptrdiff_t tlen1; +# ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "bm_search: text: %"PRIuPTR", text_end: %"PRIuPTR", text_range: %"PRIuPTR"\n", + text, text_end, text_range); +# endif + tail = target_end - 1; tlen1 = tail - target; end = text_range + tlen1; @@ -3553,6 +3566,7 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, } } else { /* see int_map[] */ +# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE while (s < end) { p = s; t = tail; @@ -3563,6 +3577,7 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, if (s + 1 >= end) break; s += reg->int_map[s[1]]; } +# endif } return (UChar* )NULL; } @@ -3579,10 +3594,10 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, OnigEncoding enc = reg->enc; int case_fold_flag = reg->case_fold_flag; -#ifdef ONIG_DEBUG_SEARCH +# ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "bm_search_notrev_ic: text: %"PRIdPTR" (%p), text_end: %"PRIdPTR" (%p), text_range: %"PRIdPTR" (%p)\n", (intptr_t )text, text, (intptr_t )text_end, text_end, (intptr_t )text_range, text_range); -#endif +# endif tail = target_end - 1; tlen1 = tail - target; @@ -3602,11 +3617,12 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, skip = reg->map[se[1]]; t = s; do { - s += enclen(enc, s, end); + s += enclen(enc, s, end); } while ((s - t) < skip && s < end); } } else { +# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE while (s < end) { se = s + tlen1; if (str_lower_case_match(enc, case_fold_flag, target, target_end, @@ -3616,9 +3632,10 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, skip = reg->int_map[se[1]]; t = s; do { - s += enclen(enc, s, end); + s += enclen(enc, s, end); } while ((s - t) < skip && s < end); } +# endif } return (UChar* )NULL; @@ -3635,10 +3652,10 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, OnigEncoding enc = reg->enc; int case_fold_flag = reg->case_fold_flag; -#ifdef ONIG_DEBUG_SEARCH +# ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "bm_search_ic: text: %"PRIdPTR" (%p), text_end: %"PRIdPTR" (%p), text_range: %"PRIdPTR" (%p)\n", (intptr_t )text, text, (intptr_t )text_end, text_end, (intptr_t )text_range, text_range); -#endif +# endif tail = target_end - 1; tlen1 = tail - target; @@ -3658,6 +3675,7 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, } } else { /* see int_map[] */ +# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE while (s < end) { p = s - tlen1; if (str_lower_case_match(enc, case_fold_flag, target, target_end, @@ -3666,11 +3684,13 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, if (s + 1 >= end) break; s += reg->int_map[s[1]]; } +# endif } return (UChar* )NULL; } #endif /* USE_SUNDAY_QUICK_SEARCH */ +#ifdef USE_INT_MAP_BACKWARD static int set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, int** skip) @@ -3720,6 +3740,7 @@ bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, return (UChar* )NULL; } +#endif static UChar* map_search(OnigEncoding enc, UChar map[], @@ -3758,31 +3779,6 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On UChar *prev; OnigMatchArg msa; -#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) - start: - THREAD_ATOMIC_START; - if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { - ONIG_STATE_INC(reg); - if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - onig_chain_reduce(reg); - ONIG_STATE_INC(reg); - } - } - else { - int n; - - THREAD_ATOMIC_END; - n = 0; - while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { - if (++n > THREAD_PASS_LIMIT_COUNT) - return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; - THREAD_PASS; - } - goto start; - } - THREAD_ATOMIC_END; -#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ - MATCH_ARG_INIT(msa, option, region, at, at); #ifdef USE_COMBINATION_EXPLOSION_CHECK { @@ -3791,11 +3787,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On } #endif - if (region -#ifdef USE_POSIX_API_REGION_OPTION - && !IS_POSIX_REGION(option) -#endif - ) { + if (region) { r = onig_region_resize_clear(region, reg->num_mem + 1); } else @@ -3811,7 +3803,6 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On } MATCH_ARG_FREE(msa); - ONIG_STATE_DEC_THREAD(reg); return r; } @@ -3844,7 +3835,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, break; case ONIG_OPTIMIZE_EXACT_IC: p = slow_search_ic(reg->enc, reg->case_fold_flag, - reg->exact, reg->exact_end, p, end, range); + reg->exact, reg->exact_end, p, end, range); break; case ONIG_OPTIMIZE_EXACT_BM: @@ -3952,7 +3943,6 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, const UChar* range, UChar* adjrange, UChar** low, UChar** high) { - int r; UChar *p; range += reg->dmin; @@ -3970,13 +3960,15 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, case ONIG_OPTIMIZE_EXACT_BM_IC: case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC: p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, - reg->exact, reg->exact_end, - range, adjrange, end, p); + reg->exact, reg->exact_end, + range, adjrange, end, p); break; case ONIG_OPTIMIZE_EXACT_BM: case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: +#ifdef USE_INT_MAP_BACKWARD if (IS_NULL(reg->int_map_backward)) { + int r; if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) goto exact_method; @@ -3986,6 +3978,9 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, } p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, end, p); +#else + goto exact_method; +#endif break; case ONIG_OPTIMIZE_MAP: @@ -4070,42 +4065,13 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, const UChar *orig_range = range; #endif -#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) - start: - THREAD_ATOMIC_START; - if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { - ONIG_STATE_INC(reg); - if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - onig_chain_reduce(reg); - ONIG_STATE_INC(reg); - } - } - else { - int n; - - THREAD_ATOMIC_END; - n = 0; - while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { - if (++n > THREAD_PASS_LIMIT_COUNT) - return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; - THREAD_PASS; - } - goto start; - } - THREAD_ATOMIC_END; -#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ - #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n", (intptr_t )str, str, end - str, start - str, range - str); #endif - if (region -#ifdef USE_POSIX_API_REGION_OPTION - && !IS_POSIX_REGION(option) -#endif - ) { + if (region) { r = onig_region_resize_clear(region, reg->num_mem + 1); if (r) goto finish_no_msa; } @@ -4114,8 +4080,8 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_AND_RETURN_CHECK(upper_range) \ +# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +# define MATCH_AND_RETURN_CHECK(upper_range) \ r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ if (r != ONIG_MISMATCH) {\ if (r >= 0) {\ @@ -4125,8 +4091,8 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, }\ else goto finish; /* error */ \ } -#else -#define MATCH_AND_RETURN_CHECK(upper_range) \ +# else +# define MATCH_AND_RETURN_CHECK(upper_range) \ r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ if (r != ONIG_MISMATCH) {\ if (r >= 0) {\ @@ -4134,10 +4100,10 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, }\ else goto finish; /* error */ \ } -#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ +# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ #else -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_AND_RETURN_CHECK(none) \ +# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +# define MATCH_AND_RETURN_CHECK(none) \ r = match_at(reg, str, end, s, prev, &msa);\ if (r != ONIG_MISMATCH) {\ if (r >= 0) {\ @@ -4147,8 +4113,8 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, }\ else goto finish; /* error */ \ } -#else -#define MATCH_AND_RETURN_CHECK(none) \ +# else +# define MATCH_AND_RETURN_CHECK(none) \ r = match_at(reg, str, end, s, prev, &msa);\ if (r != ONIG_MISMATCH) {\ if (r >= 0) {\ @@ -4156,7 +4122,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, }\ else goto finish; /* error */ \ } -#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ +# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ @@ -4168,7 +4134,15 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, /* search start-position only */ begin_position: if (range > start) - range = start + 1; + { + if (global_pos > start) + { + if (global_pos < range) + range = global_pos + 1; + } + else + range = start + 1; + } else range = start; } @@ -4244,9 +4218,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, } } else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { - if (! (reg->anchor & ANCHOR_LOOK_BEHIND)) { - goto begin_position; - } + goto begin_position; } } else if (str == end) { /* empty string */ @@ -4306,7 +4278,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, } if ((end - start) < reg->threshold_len) - goto mismatch; + goto mismatch; if (reg->dmax != ONIG_INFINITE_DISTANCE) { do { @@ -4328,24 +4300,24 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, if (! forward_search_range(reg, str, end, s, sch_range, &low, &high, (UChar** )NULL)) goto mismatch; - if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { - do { - if ((reg->anchor & ANCHOR_BEGIN_POSITION) == 0) - msa.gpos = s; /* move \G position */ - MATCH_AND_RETURN_CHECK(orig_range); - prev = s; - s += enclen(reg->enc, s, end); - - if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) { - while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0) - && s < range) { - prev = s; - s += enclen(reg->enc, s, end); - } - } - } while (s < range); - goto mismatch; - } + if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { + do { + if ((reg->anchor & ANCHOR_BEGIN_POSITION) == 0) + msa.gpos = s; /* move \G position */ + MATCH_AND_RETURN_CHECK(orig_range); + prev = s; + s += enclen(reg->enc, s, end); + + if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) { + while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0) + && s < range) { + prev = s; + s += enclen(reg->enc, s, end); + } + } + } while (s < range); + goto mismatch; + } } } @@ -4428,15 +4400,10 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, finish: MATCH_ARG_FREE(msa); - ONIG_STATE_DEC_THREAD(reg); /* If result is mismatch and no FIND_NOT_EMPTY option, then the region is not set in match_at(). */ - if (IS_FIND_NOT_EMPTY(reg->options) && region -#ifdef USE_POSIX_API_REGION_OPTION - && !IS_POSIX_REGION(option) -#endif - ) { + if (IS_FIND_NOT_EMPTY(reg->options) && region) { onig_region_clear(region); } @@ -4449,7 +4416,6 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, mismatch_no_msa: r = ONIG_MISMATCH; finish_no_msa: - ONIG_STATE_DEC_THREAD(reg); #ifdef ONIG_DEBUG if (r != ONIG_MISMATCH) fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r); @@ -4457,43 +4423,82 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, return r; match: - ONIG_STATE_DEC_THREAD(reg); MATCH_ARG_FREE(msa); return s - str; } +extern OnigPosition +onig_scan(regex_t* reg, const UChar* str, const UChar* end, + OnigRegion* region, OnigOptionType option, + int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*), + void* callback_arg) +{ + OnigPosition r; + OnigPosition n; + int rs; + const UChar* start; + + n = 0; + start = str; + while (1) { + r = onig_search(reg, str, end, start, end, region, option); + if (r >= 0) { + rs = scan_callback(n, r, region, callback_arg); + n++; + if (rs != 0) + return rs; + + if (region->end[0] == start - str) + start++; + else + start = str + region->end[0]; + + if (start > end) + break; + } + else if (r == ONIG_MISMATCH) { + break; + } + else { /* error */ + return r; + } + } + + return n; +} + extern OnigEncoding -onig_get_encoding(regex_t* reg) +onig_get_encoding(const regex_t* reg) { return reg->enc; } extern OnigOptionType -onig_get_options(regex_t* reg) +onig_get_options(const regex_t* reg) { return reg->options; } extern OnigCaseFoldType -onig_get_case_fold_flag(regex_t* reg) +onig_get_case_fold_flag(const regex_t* reg) { return reg->case_fold_flag; } extern const OnigSyntaxType* -onig_get_syntax(regex_t* reg) +onig_get_syntax(const regex_t* reg) { return reg->syntax; } extern int -onig_number_of_captures(regex_t* reg) +onig_number_of_captures(const regex_t* reg) { return reg->num_mem; } extern int -onig_number_of_capture_histories(regex_t* reg) +onig_number_of_capture_histories(const regex_t* reg) { #ifdef USE_CAPTURE_HISTORY int i, n; -- cgit v1.2.3