summaryrefslogtreecommitdiff
path: root/regex.c
diff options
context:
space:
mode:
Diffstat (limited to 'regex.c')
-rw-r--r--regex.c152
1 files changed, 71 insertions, 81 deletions
diff --git a/regex.c b/regex.c
index 0ac881c1757..6b0ee087e5f 100644
--- a/regex.c
+++ b/regex.c
@@ -294,7 +294,6 @@ enum regexpcode
and store it in a memory register. Followed by
one byte containing the register number. Register
numbers must be in the range 0 through RE_NREGS. */
- start_paren, /* Just a mark for starting(?:). */
casefold_on, /* Turn on casefold flag. */
casefold_off, /* Turn off casefold flag. */
start_nowidth, /* Save string point to the stack. */
@@ -661,10 +660,6 @@ print_partial_compiled_pattern(start, end)
printf ("/stop_memory/%d", mcnt);
break;
- case start_paren:
- printf ("/start_paren");
- break;
-
case casefold_on:
printf ("/casefold_on");
break;
@@ -882,7 +877,6 @@ calculate_must_string(start, end)
case casefold_off:
return 0; /* should not check must_string */
- case start_paren:
case start_nowidth:
case stop_nowidth:
case pop_and_fail:
@@ -1010,6 +1004,10 @@ re_compile_pattern(pattern, size, bufp)
char *begalt = b;
+ /* Place in the uncompiled pattern (i.e., the {) to
+ which to go back if the interval is invalid. */
+ char *beg_interval;
+
/* In processing an interval, at least this many matches must be made. */
int lower_bound;
@@ -1530,8 +1528,6 @@ re_compile_pattern(pattern, size, bufp)
break;
case ':':
- if (b > bufp->buffer && b[-1] != start_paren)
- BUFPUSH(start_paren);
pending_exact = 0;
default:
break;
@@ -1616,46 +1612,35 @@ re_compile_pattern(pattern, size, bufp)
case '{':
/* If there is no previous pattern, this isn't an interval. */
- if (!laststart)
+ if (!laststart || p == pend)
{
goto normal_backsl;
}
- /* It also isn't an interval if not preceded by an re
- matching a single character or subexpression, or if
- the current type of intervals can't handle back
- references and the previous thing is a back reference. */
-
- if (! (*laststart == anychar
- || *laststart == charset
- || *laststart == charset_not
- || *laststart == wordchar
- || *laststart == notwordchar
- || *laststart == start_memory
- || *laststart == start_paren
- || (*laststart == exactn
- && (laststart[1] == 1
- || (laststart[1] == 2 && ismbchar(laststart[2]))))
- || *laststart == duplicate))
- {
- /* Posix extended syntax is handled in previous
- statement; this is for Posix basic syntax. */
- goto normal_backsl;
- }
+
+ beg_interval = p - 1;
+
lower_bound = -1; /* So can see if are set. */
upper_bound = -1;
GET_UNSIGNED_NUMBER(lower_bound);
if (c == ',') {
GET_UNSIGNED_NUMBER(upper_bound);
- if (upper_bound < 0)
- upper_bound = RE_DUP_MAX;
+ if (upper_bound < 0) upper_bound = RE_DUP_MAX;
}
- if (upper_bound < 0)
+ else
+ /* Interval such as `{1}' => match exactly once. */
upper_bound = lower_bound;
- if (c != '}' || lower_bound < 0 || upper_bound > RE_DUP_MAX
- || lower_bound > upper_bound
- || (p != pend && *p == '{')) {
- goto invalid_pattern;
- }
+
+ if (lower_bound < 0 || c != '}')
+ goto unfetch_interval;
+
+ if (lower_bound > RE_DUP_MAX || upper_bound > RE_DUP_MAX)
+ FREE_AND_RETURN(stackb, "too big quantifier in {,}");
+ if (lower_bound > upper_bound)
+ FREE_AND_RETURN(stackb, "can't do {n,m} with n > m");
+
+ beg_interval = 0;
+ pending_exact = 0;
+
greedy = 1;
if (p != pend) {
PATFETCH(c);
@@ -1663,17 +1648,6 @@ re_compile_pattern(pattern, size, bufp)
else PATUNFETCH;
}
- /* If upper_bound is zero, don't want to succeed at all;
- jump from laststart to b + 3, which will be the end of
- the buffer after this jump is inserted. */
-
- if (upper_bound == 0) {
- GET_BUFFER_SPACE(3);
- insert_jump(jump, laststart, b + 3, b);
- b += 3;
- break;
- }
-
if (lower_bound == 0) {
zero_times_ok = 1;
if (upper_bound == RE_DUP_MAX) {
@@ -1685,28 +1659,49 @@ re_compile_pattern(pattern, size, bufp)
goto repeat;
}
}
- if (lower_bound == 1 && upper_bound == RE_DUP_MAX) {
- many_times_ok = 1;
- zero_times_ok = 0;
- goto repeat;
+ if (lower_bound == 1) {
+ if (upper_bound == 1) {
+ /* No need to repeat */
+ break;
+ }
+ if (upper_bound == RE_DUP_MAX) {
+ many_times_ok = 1;
+ zero_times_ok = 0;
+ goto repeat;
+ }
}
- /* Star, etc. applied to an empty pattern is equivalent
- to an empty pattern. */
- if (!laststart)
+ /* If upper_bound is zero, don't want to succeed at all;
+ jump from laststart to b + 3, which will be the end of
+ the buffer after this jump is inserted. */
+
+ if (upper_bound == 0) {
+ GET_BUFFER_SPACE(3);
+ insert_jump(jump, laststart, b + 3, b);
+ b += 3;
break;
+ }
+ /* Otherwise, we have a nontrivial interval. When
+ we're all done, the pattern will look like:
+ set_number_at <jump count> <upper bound>
+ set_number_at <succeed_n count> <lower bound>
+ succeed_n <after jump addr> <succed_n count>
+ <body of loop>
+ jump_n <succeed_n addr> <jump count>
+ (The upper bound and `jump_n' are omitted if
+ `upper_bound' is 1, though.) */
{ /* If the upper bound is > 1, we need to insert
more at the end of the loop. */
- unsigned slots_needed = upper_bound == 1 ? 5 : 10;
+ unsigned nbytes = upper_bound == 1 ? 10 : 20;
- GET_BUFFER_SPACE(5);
+ GET_BUFFER_SPACE(nbytes);
/* Initialize lower bound of the `succeed_n', even
though it will be set during matching by its
attendant `set_number_at' (inserted next),
because `re_compile_fastmap' needs to know.
Jump to the `jump_n' we might insert below. */
- insert_jump_n(succeed_n, laststart, b + slots_needed,
+ insert_jump_n(succeed_n, laststart, b + (nbytes/2),
b, lower_bound);
b += 5; /* Just increment for the succeed_n here. */
@@ -1714,7 +1709,6 @@ re_compile_pattern(pattern, size, bufp)
before the `succeed_n'. The `5' is the last two
bytes of this `set_number_at', plus 3 bytes of
the following `succeed_n'. */
- GET_BUFFER_SPACE(5);
insert_op_2(set_number_at, laststart, b, 5, lower_bound);
b += 5;
@@ -1727,7 +1721,8 @@ re_compile_pattern(pattern, size, bufp)
we'll have matched the interval once, so
jump back only `upper_bound - 1' times. */
GET_BUFFER_SPACE(5);
- store_jump_n(b, greedy?jump_n:finalize_push_n, laststart + 5, upper_bound - 1);
+ store_jump_n(b, greedy?jump_n:finalize_push_n, laststart + 5,
+ upper_bound - 1);
b += 5;
/* The location we want to set is the second
@@ -1744,24 +1739,22 @@ re_compile_pattern(pattern, size, bufp)
We insert this at the beginning of the loop
so that if we fail during matching, we'll
reinitialize the bounds. */
- GET_BUFFER_SPACE(5);
- insert_op_2(set_number_at, laststart, b, b - laststart, upper_bound - 1);
+ insert_op_2(set_number_at, laststart, b, b - laststart,
+ upper_bound - 1);
b += 5;
-
- GET_BUFFER_SPACE(5);
- BUFPUSH(set_number_at);
- STORE_NUMBER_AND_INCR(b, laststart - b + 11);
- STORE_NUMBER_AND_INCR(b, lower_bound);
-
- GET_BUFFER_SPACE(5);
- BUFPUSH(set_number_at);
- STORE_NUMBER_AND_INCR(b, -10);
- STORE_NUMBER_AND_INCR(b, upper_bound - 1);
}
- pending_exact = 0;
}
break;
+ unfetch_interval:
+ /* If an invalid interval, match the characters as literals. */
+ p = beg_interval;
+ beg_interval = 0;
+
+ /* normal_char and normal_backslash need `c'. */
+ PATFETCH (c);
+ goto normal_char;
+
case '\\':
if (p == pend) goto invalid_pattern;
/* Do not translate the character after the \, so that we can
@@ -2246,7 +2239,6 @@ re_compile_fastmap(bufp)
case wordbeg:
case wordend:
case pop_and_fail:
- case start_paren:
continue;
case casefold_on:
@@ -2283,6 +2275,7 @@ re_compile_fastmap(bufp)
if ((enum regexpcode) *p != on_failure_jump
&& (enum regexpcode) *p != try_next
+ && (enum regexpcode) *p != succeed_n
&& (enum regexpcode) *p != finalize_push
&& (enum regexpcode) *p != finalize_push_n)
continue;
@@ -2718,11 +2711,11 @@ typedef union
{ \
unsigned char **stackx; \
unsigned int len = stacke - stackb; \
- if (len > re_max_failures * MAX_NUM_FAILURE_ITEMS) \
+ /* if (len > re_max_failures * MAX_NUM_FAILURE_ITEMS) \
{ \
FREE_VARIABLES(); \
FREE_AND_RETURN(stackb,(-2)); \
- } \
+ }*/ \
\
/* Roughly double the size of the stack. */ \
EXPAND_FAIL_STACK(stackx, stackb, len); \
@@ -3387,7 +3380,7 @@ re_match(bufp, string_arg, size, pos, regs)
EXTRACT_NUMBER_AND_INCR(mcnt, p);
PUSH_FAILURE_POINT(p + mcnt, d);
stackp[-1] = (unsigned char*)1;
- p += 7; /* skip n and set_number_at after destination */
+ p += 2; /* skip n */
}
/* If don't have to push any more, skip over the rest of command. */
else
@@ -3399,9 +3392,6 @@ re_match(bufp, string_arg, size, pos, regs)
case unused:
continue;
- case start_paren:
- continue;
-
case casefold_on:
options |= RE_OPTION_IGNORECASE;
continue;