summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-02-11 15:08:33 (GMT)
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-02-11 15:08:33 (GMT)
commit6b1c6e0e55707c78f7dba05e3f005269aa78fa3f (patch)
tree69e97c4465966bc427f9569dd8664562aa456662 /regcomp.c
parent238b9276decab770a18138ebc298fa7172f2a047 (diff)
Merge Onigmo 6.1.1
* Support absent operator https://github.com/k-takata/Onigmo/issues/82 * https://github.com/k-takata/Onigmo/blob/Onigmo-6.1.1/HISTORY git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@57603 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c99
1 files changed, 57 insertions, 42 deletions
diff --git a/regcomp.c b/regcomp.c
index ecf956c..59b1f40 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1286,6 +1286,10 @@ compile_length_enclose_node(EncloseNode* node, regex_t* reg)
}
break;
+ case ENCLOSE_ABSENT:
+ len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
+ break;
+
default:
return ONIGERR_TYPE_BUG;
break;
@@ -1430,6 +1434,19 @@ compile_enclose_node(EncloseNode* node, regex_t* reg)
}
break;
+ case ENCLOSE_ABSENT:
+ len = compile_length_tree(node->target, reg);
+ if (len < 0) return len;
+
+ r = add_opcode(reg, OP_PUSH_ABSENT_POS);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ if (r) return r;
+ r = add_opcode(reg, OP_ABSENT_END);
+ break;
+
default:
return ONIGERR_TYPE_BUG;
break;
@@ -1484,9 +1501,6 @@ compile_anchor_node(AnchorNode* node, regex_t* reg)
case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
- /* used for implicit anchor optimization: /.*a/ ==> /(?:^|\G).*a/ */
- case ANCHOR_ANYCHAR_STAR: r = add_opcode(reg, OP_BEGIN_POS_OR_LINE); break;
-
case ANCHOR_WORD_BOUND:
if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
else r = add_opcode(reg, OP_WORD_BOUND);
@@ -2112,6 +2126,7 @@ quantifiers_memory_node_info(Node* node)
case ENCLOSE_OPTION:
case ENCLOSE_STOP_BACKTRACK:
case ENCLOSE_CONDITION:
+ case ENCLOSE_ABSENT:
r = quantifiers_memory_node_info(en->target);
break;
default:
@@ -2251,6 +2266,9 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
case ENCLOSE_CONDITION:
r = get_min_match_length(en->target, min, env);
break;
+
+ case ENCLOSE_ABSENT:
+ break;
}
}
break;
@@ -2374,6 +2392,9 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
case ENCLOSE_CONDITION:
r = get_max_match_length(en->target, max, env);
break;
+
+ case ENCLOSE_ABSENT:
+ break;
}
}
break;
@@ -2497,6 +2518,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
case ENCLOSE_CONDITION:
r = get_char_length_tree1(en->target, reg, len, level);
break;
+ case ENCLOSE_ABSENT:
default:
break;
}
@@ -2790,6 +2812,9 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
case ENCLOSE_CONDITION:
n = get_head_value_node(en->target, exact, reg);
break;
+
+ case ENCLOSE_ABSENT:
+ break;
}
}
break;
@@ -3295,7 +3320,7 @@ setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
}
static int
-next_setup(Node* node, Node* next_node, int in_root, regex_t* reg)
+next_setup(Node* node, Node* next_node, regex_t* reg)
{
int type;
@@ -3329,32 +3354,10 @@ next_setup(Node* node, Node* next_node, int in_root, regex_t* reg)
}
}
}
-
-#ifndef ONIG_DONT_OPTIMIZE
- if (NTYPE(node) == NT_QTFR && /* the type may be changed by above block */
- in_root && /* qn->lower == 0 && */
- NTYPE(qn->target) == NT_CANY &&
- ! IS_MULTILINE(reg->options)) {
- /* implicit anchor: /.*a/ ==> /(?:^|\G).*a/ */
- Node *np;
- np = onig_node_new_list(NULL_NODE, NULL_NODE);
- CHECK_NULL_RETURN_MEMERR(np);
- swap_node(node, np);
- NCDR(node) = onig_node_new_list(np, NULL_NODE);
- if (IS_NULL(NCDR(node))) {
- onig_node_free(np);
- return ONIGERR_MEMORY;
- }
- np = onig_node_new_anchor(ANCHOR_ANYCHAR_STAR); /* (?:^|\G) */
- CHECK_NULL_RETURN_MEMERR(np);
- NCAR(node) = np;
- }
-#endif
}
}
else if (type == NT_ENCLOSE) {
EncloseNode* en = NENCLOSE(node);
- in_root = 0;
if (en->type == ENCLOSE_MEMORY) {
node = en->target;
goto retry;
@@ -3852,9 +3855,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
#define IN_NOT (1<<1)
#define IN_REPEAT (1<<2)
#define IN_VAR_REPEAT (1<<3)
-#define IN_ROOT (1<<4)
-#define IN_CALL (1<<5)
-#define IN_RECCALL (1<<6)
+#define IN_CALL (1<<4)
+#define IN_RECCALL (1<<5)
/* setup_tree does the following work.
1. check empty loop. (set qn->target_empty_info)
@@ -3869,25 +3871,19 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
{
int type;
int r = 0;
- int in_root = state & IN_ROOT;
- state &= ~IN_ROOT;
restart:
type = NTYPE(node);
switch (type) {
case NT_LIST:
{
Node* prev = NULL_NODE;
- int prev_in_root = 0;
- state |= in_root;
do {
r = setup_tree(NCAR(node), reg, state, env);
if (IS_NOT_NULL(prev) && r == 0) {
- r = next_setup(prev, NCAR(node), prev_in_root, reg);
+ r = next_setup(prev, NCAR(node), reg);
}
prev = NCAR(node);
- prev_in_root = state & IN_ROOT;
- state &= ~IN_ROOT;
} while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
}
break;
@@ -4051,7 +4047,6 @@ restart:
case ENCLOSE_OPTION:
{
OnigOptionType options = reg->options;
- state |= in_root;
reg->options = NENCLOSE(node)->option;
r = setup_tree(NENCLOSE(node)->target, reg, state, env);
reg->options = options;
@@ -4101,6 +4096,10 @@ restart:
return ONIGERR_INVALID_BACKREF;
r = setup_tree(NENCLOSE(node)->target, reg, state, env);
break;
+
+ case ENCLOSE_ABSENT:
+ r = setup_tree(NENCLOSE(node)->target, reg, state, env);
+ break;
}
}
break;
@@ -4195,6 +4194,8 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg,
n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
p, end, items);
clen = enclen(enc, p, end);
+ if (p + clen > end)
+ clen = (int )(end - p);
for (j = 0; j < n; j++) {
if ((items[j].code_len != 1) || (items[j].byte_len != clen))
@@ -4229,6 +4230,8 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg,
n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
p, end, items);
clen = enclen(enc, p, end);
+ if (p + clen > end)
+ clen = (int )(end - p);
for (j = 0; j < n; j++) {
if ((items[j].code_len != 1) || (items[j].byte_len != clen))
@@ -4273,6 +4276,8 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg,
n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
p, end, items);
clen = enclen(enc, p, end);
+ if (p + clen > end)
+ clen = (int )(end - p);
for (j = 0; j < n; j++) {
if ((items[j].code_len != 1) || (items[j].byte_len != clen))
@@ -4307,6 +4312,8 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg,
n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
p, end, items);
clen = enclen(enc, p, end);
+ if (p + clen > end)
+ clen = (int )(end - p);
for (j = 0; j < n; j++) {
if ((items[j].code_len != 1) || (items[j].byte_len != clen))
@@ -5274,6 +5281,10 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
case ENCLOSE_CONDITION:
r = optimize_node_left(en->target, opt, env);
break;
+
+ case ENCLOSE_ABSENT:
+ set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
+ break;
}
}
break;
@@ -5782,7 +5793,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
reg->num_call = 0;
#endif
- r = setup_tree(root, reg, IN_ROOT, &scan_env);
+ r = setup_tree(root, reg, 0, &scan_env);
if (r != 0) goto err_unset;
#ifdef ONIG_DEBUG_PARSE_TREE
@@ -5944,7 +5955,7 @@ onig_reg_init(regex_t* reg, OnigOptionType option,
extern int
onig_new_without_alloc(regex_t* reg, const UChar* pattern,
const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
- OnigSyntaxType* syntax, OnigErrorInfo* einfo)
+ const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
{
int r;
@@ -6173,7 +6184,6 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_END_LINE, "end-line", ARG_NON },
{ OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
{ OP_BEGIN_POSITION, "begin-position", ARG_NON },
- { OP_BEGIN_POS_OR_LINE, "begin-pos-or-line", ARG_NON },
{ OP_BACKREF1, "backref1", ARG_NON },
{ OP_BACKREF2, "backref2", ARG_NON },
{ OP_BACKREFN, "backrefn", ARG_MEMNUM },
@@ -6215,6 +6225,9 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
{ OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
{ OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
+ { OP_PUSH_ABSENT_POS, "push-absent-pos", ARG_NON },
+ { OP_ABSENT, "absent", ARG_RELADDR },
+ { OP_ABSENT_END, "absent-end", ARG_NON },
{ OP_CALL, "call", ARG_ABSADDR },
{ OP_RETURN, "return", ARG_NON },
{ OP_CONDITION, "condition", ARG_SPECIAL },
@@ -6509,7 +6522,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
default:
fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
- *--bp);
+ bp[-1]);
}
}
fputs("]", f);
@@ -6629,7 +6642,6 @@ print_indent_tree(FILE* f, Node* node, int indent)
case ANCHOR_END_LINE: fputs("end line", f); break;
case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
- case ANCHOR_ANYCHAR_STAR: fputs("begin position/line", f); break;
case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
@@ -6694,6 +6706,9 @@ print_indent_tree(FILE* f, Node* node, int indent)
case ENCLOSE_CONDITION:
fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
break;
+ case ENCLOSE_ABSENT:
+ fprintf(f, "absent");
+ break;
default:
break;