summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-02-11 15:08:33 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-02-11 15:08:33 +0000
commit6b1c6e0e55707c78f7dba05e3f005269aa78fa3f (patch)
tree69e97c4465966bc427f9569dd8664562aa456662
parent238b9276decab770a18138ebc298fa7172f2a047 (diff)
Merge Onigmo 6.1.1
* Support absent operator https://github.com/k-takata/Onigmo/issues/82 * https://github.com/k-takata/Onigmo/blob/Onigmo-6.1.1/HISTORY git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@57603 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--NEWS4
-rw-r--r--include/ruby/onigmo.h11
-rw-r--r--regcomp.c99
-rw-r--r--regenc.c4
-rw-r--r--regexec.c157
-rw-r--r--regint.h15
-rw-r--r--regparse.c26
-rw-r--r--regparse.h1
8 files changed, 227 insertions, 90 deletions
diff --git a/NEWS b/NEWS
index 03b2ee9e3b..d60d9277df 100644
--- a/NEWS
+++ b/NEWS
@@ -20,6 +20,10 @@ with all sufficient information, see the ChangeLog file or Redmine
=== Core classes updates (outstanding ones only)
+* Regexp
+ * Update Onigmo 6.1.1.
+ * Support absent operator https://github.com/k-takata/Onigmo/issues/82
+
=== Stdlib updates (outstanding ones only)
=== Compatibility issues (excluding feature bug fixes)
diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h
index 228aa77ea5..868372494b 100644
--- a/include/ruby/onigmo.h
+++ b/include/ruby/onigmo.h
@@ -5,7 +5,7 @@
**********************************************************************/
/*-
* Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
+ * Copyright (c) 2011-2017 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -38,8 +38,8 @@ extern "C" {
#endif
#define ONIGMO_VERSION_MAJOR 6
-#define ONIGMO_VERSION_MINOR 0
-#define ONIGMO_VERSION_TEENY 0
+#define ONIGMO_VERSION_MINOR 1
+#define ONIGMO_VERSION_TEENY 1
#ifndef ONIG_EXTERN
# ifdef RUBY_EXTERN
@@ -580,7 +580,8 @@ ONIG_EXTERN const OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET (1U<<28) /* (?|...) */ /* NOTIMPL */
#define ONIG_SYN_OP2_QMARK_LPAREN_CONDITION (1U<<29) /* (?(cond)yes...|no...) */
#define ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP (1U<<30) /* (?P<name>...), (?P=name), (?P>name) -- Python/PCRE */
-#define ONIG_SYN_OP2_OPTION_JAVA (1U<<31) /* (?idmsux), (?-idmsux) */ /* NOTIMPL */
+#define ONIG_SYN_OP2_QMARK_TILDE_ABSENT (1U<<31) /* (?~...) */
+/* #define ONIG_SYN_OP2_OPTION_JAVA (1U<<xx) */ /* (?idmsux), (?-idmsux) */ /* NOTIMPL */
/* syntax (behavior) */
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
@@ -824,7 +825,7 @@ int onig_new(OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end,
ONIG_EXTERN
int onig_reg_init(OnigRegex reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType* syntax);
ONIG_EXTERN
-int onig_new_without_alloc(OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo);
+int onig_new_without_alloc(OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, OnigErrorInfo* einfo);
ONIG_EXTERN
int onig_new_deluxe(OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo);
ONIG_EXTERN
diff --git a/regcomp.c b/regcomp.c
index ecf956c5dc..59b1f40d46 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1286,6 +1286,10 @@ compile_length_enclose_node(EncloseNode* node, regex_t* reg)
}
break;
+ case ENCLOSE_ABSENT:
+ len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
+ break;
+
default:
return ONIGERR_TYPE_BUG;
break;
@@ -1430,6 +1434,19 @@ compile_enclose_node(EncloseNode* node, regex_t* reg)
}
break;
+ case ENCLOSE_ABSENT:
+ len = compile_length_tree(node->target, reg);
+ if (len < 0) return len;
+
+ r = add_opcode(reg, OP_PUSH_ABSENT_POS);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ if (r) return r;
+ r = add_opcode(reg, OP_ABSENT_END);
+ break;
+
default:
return ONIGERR_TYPE_BUG;
break;
@@ -1484,9 +1501,6 @@ compile_anchor_node(AnchorNode* node, regex_t* reg)
case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
- /* used for implicit anchor optimization: /.*a/ ==> /(?:^|\G).*a/ */
- case ANCHOR_ANYCHAR_STAR: r = add_opcode(reg, OP_BEGIN_POS_OR_LINE); break;
-
case ANCHOR_WORD_BOUND:
if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
else r = add_opcode(reg, OP_WORD_BOUND);
@@ -2112,6 +2126,7 @@ quantifiers_memory_node_info(Node* node)
case ENCLOSE_OPTION:
case ENCLOSE_STOP_BACKTRACK:
case ENCLOSE_CONDITION:
+ case ENCLOSE_ABSENT:
r = quantifiers_memory_node_info(en->target);
break;
default:
@@ -2251,6 +2266,9 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
case ENCLOSE_CONDITION:
r = get_min_match_length(en->target, min, env);
break;
+
+ case ENCLOSE_ABSENT:
+ break;
}
}
break;
@@ -2374,6 +2392,9 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
case ENCLOSE_CONDITION:
r = get_max_match_length(en->target, max, env);
break;
+
+ case ENCLOSE_ABSENT:
+ break;
}
}
break;
@@ -2497,6 +2518,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
case ENCLOSE_CONDITION:
r = get_char_length_tree1(en->target, reg, len, level);
break;
+ case ENCLOSE_ABSENT:
default:
break;
}
@@ -2790,6 +2812,9 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
case ENCLOSE_CONDITION:
n = get_head_value_node(en->target, exact, reg);
break;
+
+ case ENCLOSE_ABSENT:
+ break;
}
}
break;
@@ -3295,7 +3320,7 @@ setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
}
static int
-next_setup(Node* node, Node* next_node, int in_root, regex_t* reg)
+next_setup(Node* node, Node* next_node, regex_t* reg)
{
int type;
@@ -3329,32 +3354,10 @@ next_setup(Node* node, Node* next_node, int in_root, regex_t* reg)
}
}
}
-
-#ifndef ONIG_DONT_OPTIMIZE
- if (NTYPE(node) == NT_QTFR && /* the type may be changed by above block */
- in_root && /* qn->lower == 0 && */
- NTYPE(qn->target) == NT_CANY &&
- ! IS_MULTILINE(reg->options)) {
- /* implicit anchor: /.*a/ ==> /(?:^|\G).*a/ */
- Node *np;
- np = onig_node_new_list(NULL_NODE, NULL_NODE);
- CHECK_NULL_RETURN_MEMERR(np);
- swap_node(node, np);
- NCDR(node) = onig_node_new_list(np, NULL_NODE);
- if (IS_NULL(NCDR(node))) {
- onig_node_free(np);
- return ONIGERR_MEMORY;
- }
- np = onig_node_new_anchor(ANCHOR_ANYCHAR_STAR); /* (?:^|\G) */
- CHECK_NULL_RETURN_MEMERR(np);
- NCAR(node) = np;
- }
-#endif
}
}
else if (type == NT_ENCLOSE) {
EncloseNode* en = NENCLOSE(node);
- in_root = 0;
if (en->type == ENCLOSE_MEMORY) {
node = en->target;
goto retry;
@@ -3852,9 +3855,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
#define IN_NOT (1<<1)
#define IN_REPEAT (1<<2)
#define IN_VAR_REPEAT (1<<3)
-#define IN_ROOT (1<<4)
-#define IN_CALL (1<<5)
-#define IN_RECCALL (1<<6)
+#define IN_CALL (1<<4)
+#define IN_RECCALL (1<<5)
/* setup_tree does the following work.
1. check empty loop. (set qn->target_empty_info)
@@ -3869,25 +3871,19 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
{
int type;
int r = 0;
- int in_root = state & IN_ROOT;
- state &= ~IN_ROOT;
restart:
type = NTYPE(node);
switch (type) {
case NT_LIST:
{
Node* prev = NULL_NODE;
- int prev_in_root = 0;
- state |= in_root;
do {
r = setup_tree(NCAR(node), reg, state, env);
if (IS_NOT_NULL(prev) && r == 0) {
- r = next_setup(prev, NCAR(node), prev_in_root, reg);
+ r = next_setup(prev, NCAR(node), reg);
}
prev = NCAR(node);
- prev_in_root = state & IN_ROOT;
- state &= ~IN_ROOT;
} while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
}
break;
@@ -4051,7 +4047,6 @@ restart:
case ENCLOSE_OPTION:
{
OnigOptionType options = reg->options;
- state |= in_root;
reg->options = NENCLOSE(node)->option;
r = setup_tree(NENCLOSE(node)->target, reg, state, env);
reg->options = options;
@@ -4101,6 +4096,10 @@ restart:
return ONIGERR_INVALID_BACKREF;
r = setup_tree(NENCLOSE(node)->target, reg, state, env);
break;
+
+ case ENCLOSE_ABSENT:
+ r = setup_tree(NENCLOSE(node)->target, reg, state, env);
+ break;
}
}
break;
@@ -4195,6 +4194,8 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg,
n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
p, end, items);
clen = enclen(enc, p, end);
+ if (p + clen > end)
+ clen = (int )(end - p);
for (j = 0; j < n; j++) {
if ((items[j].code_len != 1) || (items[j].byte_len != clen))
@@ -4229,6 +4230,8 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg,
n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
p, end, items);
clen = enclen(enc, p, end);
+ if (p + clen > end)
+ clen = (int )(end - p);
for (j = 0; j < n; j++) {
if ((items[j].code_len != 1) || (items[j].byte_len != clen))
@@ -4273,6 +4276,8 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg,
n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
p, end, items);
clen = enclen(enc, p, end);
+ if (p + clen > end)
+ clen = (int )(end - p);
for (j = 0; j < n; j++) {
if ((items[j].code_len != 1) || (items[j].byte_len != clen))
@@ -4307,6 +4312,8 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg,
n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
p, end, items);
clen = enclen(enc, p, end);
+ if (p + clen > end)
+ clen = (int )(end - p);
for (j = 0; j < n; j++) {
if ((items[j].code_len != 1) || (items[j].byte_len != clen))
@@ -5274,6 +5281,10 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
case ENCLOSE_CONDITION:
r = optimize_node_left(en->target, opt, env);
break;
+
+ case ENCLOSE_ABSENT:
+ set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
+ break;
}
}
break;
@@ -5782,7 +5793,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
reg->num_call = 0;
#endif
- r = setup_tree(root, reg, IN_ROOT, &scan_env);
+ r = setup_tree(root, reg, 0, &scan_env);
if (r != 0) goto err_unset;
#ifdef ONIG_DEBUG_PARSE_TREE
@@ -5944,7 +5955,7 @@ onig_reg_init(regex_t* reg, OnigOptionType option,
extern int
onig_new_without_alloc(regex_t* reg, const UChar* pattern,
const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
- OnigSyntaxType* syntax, OnigErrorInfo* einfo)
+ const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
{
int r;
@@ -6173,7 +6184,6 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_END_LINE, "end-line", ARG_NON },
{ OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
{ OP_BEGIN_POSITION, "begin-position", ARG_NON },
- { OP_BEGIN_POS_OR_LINE, "begin-pos-or-line", ARG_NON },
{ OP_BACKREF1, "backref1", ARG_NON },
{ OP_BACKREF2, "backref2", ARG_NON },
{ OP_BACKREFN, "backrefn", ARG_MEMNUM },
@@ -6215,6 +6225,9 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
{ OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
{ OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
+ { OP_PUSH_ABSENT_POS, "push-absent-pos", ARG_NON },
+ { OP_ABSENT, "absent", ARG_RELADDR },
+ { OP_ABSENT_END, "absent-end", ARG_NON },
{ OP_CALL, "call", ARG_ABSADDR },
{ OP_RETURN, "return", ARG_NON },
{ OP_CONDITION, "condition", ARG_SPECIAL },
@@ -6509,7 +6522,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
default:
fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
- *--bp);
+ bp[-1]);
}
}
fputs("]", f);
@@ -6629,7 +6642,6 @@ print_indent_tree(FILE* f, Node* node, int indent)
case ANCHOR_END_LINE: fputs("end line", f); break;
case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
- case ANCHOR_ANYCHAR_STAR: fputs("begin position/line", f); break;
case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
@@ -6694,6 +6706,9 @@ print_indent_tree(FILE* f, Node* node, int indent)
case ENCLOSE_CONDITION:
fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
break;
+ case ENCLOSE_ABSENT:
+ fprintf(f, "absent");
+ break;
default:
break;
diff --git a/regenc.c b/regenc.c
index ca09a7fcb3..16d62fdf40 100644
--- a/regenc.c
+++ b/regenc.c
@@ -54,11 +54,11 @@ onigenc_set_default_encoding(OnigEncoding enc)
extern int
onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, OnigEncoding enc)
{
- int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e);
+ int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e);
if (ONIGENC_MBCLEN_CHARFOUND_P(ret))
return ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
else if (ONIGENC_MBCLEN_NEEDMORE_P(ret))
- return (int)(e-p)+ONIGENC_MBCLEN_NEEDMORE_LEN(ret);
+ return (int )(e - p) + ONIGENC_MBCLEN_NEEDMORE_LEN(ret);
return 1;
}
diff --git a/regexec.c b/regexec.c
index b27884b32c..9e5f559731 100644
--- a/regexec.c
+++ b/regexec.c
@@ -403,6 +403,8 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from)
#define STK_CALL_FRAME 0x0800
#define STK_RETURN 0x0900
#define STK_VOID 0x0a00 /* for fill a blank */
+#define STK_ABSENT_POS 0x0b00 /* for absent */
+#define STK_ABSENT 0x0c00 /* absent inner loop marker */
/* stack type check mask */
#define STK_MASK_POP_USED 0x00ff
@@ -673,7 +675,8 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
#define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
#define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
-#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
+#define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT)
+#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
@@ -785,6 +788,14 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
STACK_INC;\
} while(0)
+#define STACK_PUSH_ABSENT_POS(start, end) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_ABSENT_POS;\
+ stk->u.absent_pos.abs_pstr = (start);\
+ stk->u.absent_pos.end_pstr = (end);\
+ STACK_INC;\
+} while(0)
+
#ifdef ONIG_DEBUG
# define STACK_BASE_CHECK(p, at) \
@@ -885,6 +896,33 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
}\
} while(0)
+#define STACK_POP_TIL_ABSENT do {\
+ while (1) {\
+ stk--;\
+ STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
+ if (stk->type == STK_ABSENT) break;\
+ else if (stk->type == STK_MEM_START) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ else if (stk->type == STK_REPEAT_INC) {\
+ STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
+ }\
+ else if (stk->type == STK_MEM_END) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
+ }\
+} while(0)
+
+#define STACK_POP_ABSENT_POS(start, end) do {\
+ stk--;\
+ STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
+ (start) = stk->u.absent_pos.abs_pstr;\
+ (end) = stk->u.absent_pos.end_pstr;\
+} while(0)
+
#define STACK_POS_END(k) do {\
k = stk;\
while (1) {\
@@ -1136,10 +1174,12 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
# define DATA_ENSURE_CHECK1 (s < right_range)
# define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
# define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
+# define ABSENT_END_POS right_range
#else
# define DATA_ENSURE_CHECK1 (s < end)
# define DATA_ENSURE_CHECK(n) (s + (n) <= end)
# define DATA_ENSURE(n) if (s + (n) > end) goto fail
+# define ABSENT_END_POS end
#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
@@ -1372,6 +1412,8 @@ stack_type_str(int stack_type)
case STK_CALL_FRAME: return "Call ";
case STK_RETURN: return "Ret ";
case STK_VOID: return "Void ";
+ case STK_ABSENT_POS: return "AbsPos";
+ case STK_ABSENT: return "Absent";
default: return " ";
}
}
@@ -1484,7 +1526,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_OP_END_LINE,
&&L_OP_SEMI_END_BUF,
&&L_OP_BEGIN_POSITION,
- &&L_OP_BEGIN_POS_OR_LINE, /* used for implicit anchor optimization */
&&L_OP_BACKREF1,
&&L_OP_BACKREF2,
@@ -1552,6 +1593,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
&&L_OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
&&L_OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
+ &&L_OP_PUSH_ABSENT_POS, /* (?~...) start */
+ &&L_OP_ABSENT, /* (?~...) start of inner loop */
+ &&L_OP_ABSENT_END, /* (?~...) end */
# ifdef USE_SUBEXP_CALL
&&L_OP_CALL, /* \g<name> */
@@ -1636,8 +1680,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#endif
#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "match_at: str: %"PRIdPTR" (%p), end: %"PRIdPTR" (%p), start: %"PRIdPTR" (%p), sprev: %"PRIdPTR" (%p)\n",
- (intptr_t )str, str, (intptr_t )end, end, (intptr_t )sstart, sstart, (intptr_t )sprev, sprev);
+ fprintf(stderr, "match_at: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), start: %"PRIuPTR" (%p), sprev: %"PRIuPTR" (%p)\n",
+ (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
fprintf(stderr, "size: %d, start offset: %d\n",
(int )(end - str), (int )(sstart - str));
fprintf(stderr, "\n ofs> str stk:type addr:opcode\n");
@@ -2378,7 +2422,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
JUMP;
CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE);
- op_begin_line:
if (ON_STR_BEGIN(s)) {
if (IS_NOTBOL(msa->options)) goto fail;
MOP_OUT;
@@ -2454,13 +2497,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
MOP_OUT;
JUMP;
- CASE(OP_BEGIN_POS_OR_LINE) MOP_IN(OP_BEGIN_POS_OR_LINE);
- if (s != msa->gpos)
- goto op_begin_line;
-
- MOP_OUT;
- JUMP;
-
CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH);
GET_MEMNUM_INC(mem, p);
STACK_PUSH_MEM_START(mem, s);
@@ -2721,8 +2757,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_NULL_CHECK(isnull, mem, s);
if (isnull) {
#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIdPTR" (%p)\n",
- (int )mem, (intptr_t )s, s);
+ fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n",
+ (int )mem, (uintptr_t )s, s);
#endif
null_check_found:
/* empty loop founded, skip next instruction */
@@ -2755,8 +2791,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
if (isnull) {
# ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIdPTR" (%p)\n",
- (int )mem, (intptr_t )s, s);
+ fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n",
+ (int )mem, (uintptr_t )s, s);
# endif
if (isnull == -1) goto fail;
goto null_check_found;
@@ -2780,8 +2816,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
# endif
if (isnull) {
# ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIdPTR" (%p)\n",
- (int )mem, (intptr_t )s, s);
+ fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n",
+ (int )mem, (uintptr_t )s, s);
# endif
if (isnull == -1) goto fail;
goto null_check_found;
@@ -3033,6 +3069,63 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto fail;
NEXT;
+ CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
+ /* Save the absent-start-pos and the original end-pos. */
+ STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS);
+ MOP_OUT;
+ JUMP;
+
+ CASE(OP_ABSENT) MOP_IN(OP_ABSENT);
+ {
+ const UChar* aend = ABSENT_END_POS;
+ UChar* absent;
+ UChar* selfp = p - 1;
+
+ STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */
+ GET_RELADDR_INC(addr, p);
+#ifdef ONIG_DEBUG_MATCH
+ fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
+#endif
+ if ((absent > aend) && (s > absent)) {
+ /* An empty match occurred in (?~...) at the start point.
+ * Never match. */
+ STACK_POP;
+ goto fail;
+ }
+ else if ((s >= aend) && (s > absent)) {
+ if (s > aend) {
+ /* Only one (or less) character matched in the last iteration.
+ * This is not a possible point. */
+ goto fail;
+ }
+ /* All possible points were found. Try matching after (?~...). */
+ DATA_ENSURE(0);
+ p += addr;
+ }
+ else {
+ STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */
+ n = enclen(encode, s, end);
+ STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */
+ STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */
+ STACK_PUSH_ABSENT;
+ ABSENT_END_POS = aend;
+ }
+ }
+ MOP_OUT;
+ JUMP;
+
+ CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END);
+ /* The pattern inside (?~...) was matched.
+ * Set the end-pos temporary and go to next iteration. */
+ if (sprev < ABSENT_END_POS)
+ ABSENT_END_POS = sprev;
+#ifdef ONIG_DEBUG_MATCH
+ fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS);
+#endif
+ STACK_POP_TIL_ABSENT;
+ goto fail;
+ NEXT;
+
#ifdef USE_SUBEXP_CALL
CASE(OP_CALL) MOP_IN(OP_CALL);
GET_ABSADDR_INC(addr, p);
@@ -3270,7 +3363,7 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
# ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
- text, text, text_end, text_end, text_range, text_range);
+ (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
# endif
tail = target_end - 1;
@@ -3326,8 +3419,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
const UChar *tail;
# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search: text: %"PRIuPTR", text_end: %"PRIuPTR", text_range: %"PRIuPTR"\n",
- text, text_end, text_range);
+ fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
+ (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
# endif
end = text_range + (target_end - target) - 1;
@@ -3482,8 +3575,8 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
OnigEncoding enc = reg->enc;
# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search_notrev: text: %"PRIdPTR" (%p), text_end: %"PRIdPTR" (%p), text_range: %"PRIdPTR" (%p)\n",
- (intptr_t )text, text, (intptr_t )text_end, text_end, (intptr_t )text_range, text_range);
+ fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
+ (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
# endif
tail = target_end - 1;
@@ -3542,8 +3635,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
ptrdiff_t tlen1;
# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search: text: %"PRIuPTR", text_end: %"PRIuPTR", text_range: %"PRIuPTR"\n",
- text, text_end, text_range);
+ fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
+ (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
# endif
tail = target_end - 1;
@@ -3595,8 +3688,8 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
int case_fold_flag = reg->case_fold_flag;
# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search_notrev_ic: text: %"PRIdPTR" (%p), text_end: %"PRIdPTR" (%p), text_range: %"PRIdPTR" (%p)\n",
- (intptr_t )text, text, (intptr_t )text_end, text_end, (intptr_t )text_range, text_range);
+ fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
+ (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
# endif
tail = target_end - 1;
@@ -3653,8 +3746,8 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
int case_fold_flag = reg->case_fold_flag;
# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search_ic: text: %"PRIdPTR" (%p), text_end: %"PRIdPTR" (%p), text_range: %"PRIdPTR" (%p)\n",
- (intptr_t )text, text, (intptr_t )text_end, text_end, (intptr_t )text_range, text_range);
+ fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
+ (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
# endif
tail = target_end - 1;
@@ -3814,7 +3907,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
- (intptr_t )str, str, (intptr_t )end, end, (intptr_t )s, s, (intptr_t )range, range);
+ (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
#endif
p = s;
@@ -4068,7 +4161,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr,
"onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
- (intptr_t )str, str, end - str, start - str, range - str);
+ (uintptr_t )str, str, end - str, start - str, range - str);
#endif
if (region) {
@@ -4302,8 +4395,6 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
do {
- if ((reg->anchor & ANCHOR_BEGIN_POSITION) == 0)
- msa.gpos = s; /* move \G position */
MATCH_AND_RETURN_CHECK(orig_range);
prev = s;
s += enclen(reg->enc, s, end);
diff --git a/regint.h b/regint.h
index 624deea864..a2f5bbba1d 100644
--- a/regint.h
+++ b/regint.h
@@ -202,7 +202,9 @@
#define xmemcpy memcpy
#define xmemmove memmove
-#if defined(RUBY_MSVCRT_VERSION) && RUBY_MSVCRT_VERSION >= 90 && !defined(__GNUC__)
+#if ((defined(RUBY_MSVCRT_VERSION) && RUBY_MSVCRT_VERSION >= 90) \
+ || (!defined(RUBY_MSVCRT_VERSION) && defined(_WIN32))) \
+ && !defined(__GNUC__)
# define xalloca _alloca
# define xvsnprintf(buf,size,fmt,args) _vsnprintf_s(buf,size,_TRUNCATE,fmt,args)
# define xsnprintf sprintf_s
@@ -598,7 +600,6 @@ enum OpCode {
OP_END_LINE,
OP_SEMI_END_BUF,
OP_BEGIN_POSITION,
- OP_BEGIN_POS_OR_LINE, /* used for implicit anchor optimization */
OP_BACKREF1,
OP_BACKREF2,
@@ -643,6 +644,9 @@ enum OpCode {
OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
+ OP_PUSH_ABSENT_POS, /* (?~...) start */
+ OP_ABSENT, /* (?~...) start of inner loop */
+ OP_ABSENT_END, /* (?~...) end */
OP_CALL, /* \g<name> */
OP_RETURN,
@@ -730,6 +734,9 @@ typedef void* PointerType;
#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR)
#define SIZE_OP_RETURN SIZE_OPCODE
#define SIZE_OP_CONDITION (SIZE_OPCODE + SIZE_MEMNUM + SIZE_RELADDR)
+#define SIZE_OP_PUSH_ABSENT_POS SIZE_OPCODE
+#define SIZE_OP_ABSENT (SIZE_OPCODE + SIZE_RELADDR)
+#define SIZE_OP_ABSENT_END SIZE_OPCODE
#ifdef USE_COMBINATION_EXPLOSION_CHECK
# define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
@@ -841,6 +848,10 @@ typedef struct _OnigStackType {
UChar *pstr; /* string position */
} call_frame;
#endif
+ struct {
+ UChar *abs_pstr; /* absent start position */
+ const UChar *end_pstr; /* end position */
+ } absent_pos;
} u;
} OnigStackType;
diff --git a/regparse.c b/regparse.c
index 204aa46ce9..a2d2fcf6a7 100644
--- a/regparse.c
+++ b/regparse.c
@@ -58,7 +58,8 @@ const OnigSyntaxType OnigSyntaxRuby = {
ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER |
ONIG_SYN_OP2_QMARK_LPAREN_CONDITION |
ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK |
- ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP )
+ ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
+ ONIG_SYN_OP2_QMARK_TILDE_ABSENT )
, ( SYN_GNU_REGEX_BV |
ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
@@ -1024,14 +1025,15 @@ scan_env_add_mem_entry(ScanEnv* env)
if (IS_NULL(env->mem_nodes_dynamic)) {
alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE;
p = (Node** )xmalloc(sizeof(Node*) * alloc);
+ CHECK_NULL_RETURN_MEMERR(p);
xmemcpy(p, env->mem_nodes_static,
sizeof(Node*) * SCANENV_MEMNODES_SIZE);
}
else {
alloc = env->mem_alloc * 2;
p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
+ CHECK_NULL_RETURN_MEMERR(p);
}
- CHECK_NULL_RETURN_MEMERR(p);
for (i = env->num_mem + 1; i < alloc; i++)
p[i] = NULL_NODE;
@@ -3176,7 +3178,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
PUNFETCH;
num = fetch_escaped_value(&p, end, env, &c2);
if (num < 0) return num;
- if ((OnigCodePoint)tok->u.c != c2) {
+ if ((OnigCodePoint )tok->u.c != c2) {
tok->u.code = (OnigCodePoint )c2;
tok->type = TK_CODE_POINT;
}
@@ -3780,7 +3782,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
num = fetch_escaped_value(&p, end, env, &c2);
if (num < 0) return num;
/* set_raw: */
- if ((OnigCodePoint)tok->u.c != c2) {
+ if ((OnigCodePoint )tok->u.c != c2) {
tok->type = TK_CODE_POINT;
tok->u.code = (OnigCodePoint )c2;
}
@@ -4989,6 +4991,14 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
case '>': /* (?>...) stop backtrack */
*np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
break;
+ case '~': /* (?~...) absent operator */
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT)) {
+ *np = node_new_enclose(ENCLOSE_ABSENT);
+ }
+ else {
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ }
+ break;
#ifdef USE_NAMED_GROUP
case '\'':
@@ -5030,7 +5040,9 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
named_group1:
list_capture = 0;
+# ifdef USE_CAPTURE_HISTORY
named_group2:
+# endif
name = p;
r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
if (r < 0) return r;
@@ -5060,9 +5072,10 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
#endif
break;
+#ifdef USE_CAPTURE_HISTORY
case '@':
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
-#ifdef USE_NAMED_GROUP
+# ifdef USE_NAMED_GROUP
if (!PEND &&
IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
PFETCH(c);
@@ -5072,7 +5085,7 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
}
PUNFETCH;
}
-#endif
+# endif
*np = node_new_enclose_memory(env->option, 0);
CHECK_NULL_RETURN_MEMERR(*np);
num = scan_env_add_mem_entry(env);
@@ -5087,6 +5100,7 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
return ONIGERR_UNDEFINED_GROUP_OPTION;
}
break;
+#endif /* USE_CAPTURE_HISTORY */
case '(': /* conditional expression: (?(cond)yes), (?(cond)yes|no) */
if (!PEND &&
diff --git a/regparse.h b/regparse.h
index 111a840b84..888ebf4ce6 100644
--- a/regparse.h
+++ b/regparse.h
@@ -95,6 +95,7 @@ RUBY_SYMBOL_EXPORT_BEGIN
#define ENCLOSE_OPTION (1<<1)
#define ENCLOSE_STOP_BACKTRACK (1<<2)
#define ENCLOSE_CONDITION (1<<3)
+#define ENCLOSE_ABSENT (1<<4)
#define NODE_STR_MARGIN 16
#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */