summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regcomp.c29
-rw-r--r--regexec.c57
-rw-r--r--regint.h28
3 files changed, 73 insertions, 41 deletions
diff --git a/regcomp.c b/regcomp.c
index 24d44dd1b8..de44cfe037 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3021,7 +3021,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
NSTRING_SET_CASE_AMBIG(node);
break;
}
- p++;
+ p += enc_len(reg->enc, *p);
}
}
break;
@@ -3950,22 +3950,17 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
}
}
- if (IS_NULL(cc->mbuf)) {
- if (cc->not) {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- add_char_opt_map_info(&opt->map, i);
- }
- mb_found = 1;
- }
- }
- else {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- z = ONIGENC_IS_MBC_HEAD(env->enc, i);
- if (z) {
- mb_found = 1;
- add_char_opt_map_info(&opt->map, i);
- }
- }
+ if (! ONIGENC_IS_SINGLEBYTE(env->enc)) {
+ if (! IS_NULL(cc->mbuf) ||
+ (cc->not != 0 && found != 0)) {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ z = ONIGENC_IS_MBC_HEAD(env->enc, i);
+ if (z) {
+ mb_found = 1;
+ add_char_opt_map_info(&opt->map, i);
+ }
+ }
+ }
}
if (mb_found) {
diff --git a/regexec.c b/regexec.c
index 870a6535bd..1bae0d9516 100644
--- a/regexec.c
+++ b/regexec.c
@@ -362,11 +362,26 @@ typedef struct {
};\
} while(0)
+static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
+
+extern unsigned int
+onig_get_match_stack_limit_size(void)
+{
+ return MatchStackLimitSize;
+}
+
+extern int
+onig_set_match_stack_limit_size(unsigned int size)
+{
+ MatchStackLimitSize = size;
+ return 0;
+}
+
static int
stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
StackType** arg_stk, StackType* stk_alloc, MatchArg* msa)
{
- int n;
+ unsigned int n;
StackType *x, *stk_base, *stk_end, *stk;
stk_base = *arg_stk_base;
@@ -385,7 +400,12 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
}
else {
n *= 2;
- if (n > MATCH_STACK_LIMIT_SIZE) return ONIGERR_MATCH_STACK_LIMIT_OVER;
+ if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
+ if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
+ return ONIGERR_MATCH_STACK_LIMIT_OVER;
+ else
+ n = MatchStackLimitSize;
+ }
x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n);
if (IS_NULL(x)) {
STACK_SAVE;
@@ -1171,10 +1191,9 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
goto fail; /* for retry */
}
}
- else {
- /* default behavior: return first-matching result. */
- goto finish;
- }
+
+ /* default behavior: return first-matching result. */
+ goto finish;
break;
case OP_EXACT1: STAT_OP_IN(OP_EXACT1);
@@ -2574,11 +2593,13 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end,
if (t < target) return p + 1;
skip = reg->map[*s];
- p++;
+ p = s + 1;
+ if (p >= text_end) return (UChar* )NULL;
t = p;
- while ((p - t) < skip) {
+ do {
p += enc_len(reg->enc, *p);
- }
+ } while ((p - t) < skip && p < text_end);
+
s += (p - t);
}
}
@@ -2592,11 +2613,13 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end,
if (t < target) return p + 1;
skip = reg->int_map[*s];
- p++;
+ p = s + 1;
+ if (p >= text_end) return (UChar* )NULL;
t = p;
- while ((p - t) < skip) {
+ do {
p += enc_len(reg->enc, *p);
- }
+ } while ((p - t) < skip && p < text_end);
+
s += (p - t);
}
}
@@ -3288,13 +3311,3 @@ onig_get_syntax(regex_t* reg)
{
return reg->syntax;
}
-
-extern const char*
-onig_version(void)
-{
-#define MSTR(a) # a
-
- return (MSTR(ONIGURUMA_VERSION_MAJOR) "."
- MSTR(ONIGURUMA_VERSION_MINOR) "."
- MSTR(ONIGURUMA_VERSION_TEENY));
-}
diff --git a/regint.h b/regint.h
index dacc0400be..bcc5fa5fc4 100644
--- a/regint.h
+++ b/regint.h
@@ -46,13 +46,12 @@
#define USE_QUALIFIER_PEEK_NEXT
#define INIT_MATCH_STACK_SIZE 160
-#define MATCH_STACK_LIMIT_SIZE 500000
+#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
/* interface to external system */
#ifdef NOT_RUBY /* gived from Makefile */
#include "config.h"
#define USE_VARIABLE_META_CHARS
-#define USE_VARIABLE_SYNTAX
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
#define THREAD_ATOMIC_START /* depend on thread system */
@@ -654,6 +653,31 @@ extern OnigMetaCharTableType OnigMetaCharTable;
#define MC_ONE_OR_MORE_TIME OnigMetaCharTable.one_or_more_time
#define MC_ANYCHAR_ANYTIME OnigMetaCharTable.anychar_anytime
+#define SYN_POSIX_COMMON_OP \
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
+ ONIG_SYN_OP_DECIMAL_BACKREF | \
+ ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \
+ ONIG_SYN_OP_LINE_ANCHOR | \
+ ONIG_SYN_OP_ESC_CONTROL_CHARS )
+
+#define SYN_GNU_REGEX_OP \
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \
+ ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \
+ ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \
+ ONIG_SYN_OP_VBAR_ALT | \
+ ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \
+ ONIG_SYN_OP_QMARK_ZERO_ONE | \
+ ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \
+ ONIG_SYN_OP_ESC_W_WORD | \
+ ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \
+ ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \
+ ONIG_SYN_OP_LINE_ANCHOR )
+
+#define SYN_GNU_REGEX_BV \
+ ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \
+ ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
+ ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
+
#ifdef ONIG_DEBUG