summaryrefslogtreecommitdiff
path: root/regex.c
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2000-05-24 04:34:26 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2000-05-24 04:34:26 +0000
commit8b1de0b1ad49733abeddd8be359ae816b29de59a (patch)
treea7ed03dd3f6442635f694c4367bcd94be57f6dbd /regex.c
parent24b9bdca25dc431aff935df7739b9c5ea0ee4077 (diff)
2000-05-24
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@710 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'regex.c')
-rw-r--r--regex.c197
1 files changed, 83 insertions, 114 deletions
diff --git a/regex.c b/regex.c
index d38f640..156e75b 100644
--- a/regex.c
+++ b/regex.c
@@ -232,27 +232,27 @@ re_set_casetable(table)
#endif
#ifdef isblank
-# define ISBLANK(c) (ISASCII (c) && isblank (c))
+# define ISBLANK(c) (ISASCII(c) && isblank(c))
#else
# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
#endif
#ifdef isgraph
-# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
+# define ISGRAPH(c) (ISASCII(c) && isgraph(c))
#else
-# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
+# define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
#endif
#undef ISPRINT
-#define ISPRINT(c) (ISASCII (c) && isprint (c))
-#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
-#define ISALNUM(c) (ISASCII (c) && isalnum (c))
-#define ISALPHA(c) (ISASCII (c) && isalpha (c))
-#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
-#define ISLOWER(c) (ISASCII (c) && islower (c))
-#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
-#define ISSPACE(c) (ISASCII (c) && isspace (c))
-#define ISUPPER(c) (ISASCII (c) && isupper (c))
-#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+#define ISPRINT(c) (ISASCII(c) && isprint(c))
+#define ISDIGIT(c) (ISASCII(c) && isdigit(c))
+#define ISALNUM(c) (ISASCII(c) && isalnum(c))
+#define ISALPHA(c) (ISASCII(c) && isalpha(c))
+#define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
+#define ISLOWER(c) (ISASCII(c) && islower(c))
+#define ISPUNCT(c) (ISASCII(c) && ispunct(c))
+#define ISSPACE(c) (ISASCII(c) && isspace(c))
+#define ISUPPER(c) (ISASCII(c) && isupper(c))
+#define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
#ifndef NULL
# define NULL (void *)0
@@ -264,10 +264,10 @@ re_set_casetable(table)
(Per Bothner suggested the basic approach.) */
#undef SIGN_EXTEND_CHAR
#if __STDC__
-# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+# define SIGN_EXTEND_CHAR(c) ((signed char)(c))
#else /* not __STDC__ */
/* As in Harbison and Steele. */
-# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
#endif
/* These are the command codes that appear in compiled regular
@@ -350,10 +350,7 @@ enum regexpcode
stop_paren, /* Place holder at the end of (?:..). */
casefold_on, /* Turn on casefold flag. */
casefold_off, /* Turn off casefold flag. */
- mline_on, /* Turn on multi line match (match with newlines). */
- mline_off, /* Turn off multi line match. */
- posix_on, /* Turn on POSIXified line match (match with newlines). */
- posix_off, /* Turn off POSIXified line match. */
+ option_set, /* Turn on multi line match (match with newlines). */
start_nowidth, /* Save string point to the stack. */
stop_nowidth, /* Restore string place at the point start_nowidth. */
pop_and_fail, /* Fail after popping nowidth entry from stack. */
@@ -395,7 +392,7 @@ enum regexpcode
at SOURCE. */
#define EXTRACT_NUMBER(destination, source) \
do { (destination) = *(source) & 0377; \
- (destination) += SIGN_EXTEND_CHAR (*(char*)((source) + 1)) << 8; } while (0)
+ (destination) += SIGN_EXTEND_CHAR(*(char*)((source) + 1)) << 8; } while (0)
/* Same as EXTRACT_NUMBER, except increment the pointer for source to
point to second byte of SOURCE. Note that SOURCE has to be a value
@@ -538,7 +535,7 @@ print_mbc(c)
if (bufp->allocated == (1L<<16)) goto too_big; \
bufp->allocated *= 2; \
if (bufp->allocated > (1L<<16)) bufp->allocated = (1L<<16); \
- bufp->buffer = (char*)xrealloc (bufp->buffer, bufp->allocated); \
+ bufp->buffer = (char*)xrealloc(bufp->buffer, bufp->allocated); \
if (bufp->buffer == 0) \
goto memory_exhausted; \
b = (b - old_buffer) + bufp->buffer; \
@@ -572,7 +569,7 @@ print_mbc(c)
} \
} while (0)
-#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+#define STREQ(s1, s2) ((strcmp(s1, s2) == 0))
#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
@@ -766,22 +763,12 @@ print_partial_compiled_pattern(start, end)
printf("/casefold_off");
break;
- case mline_on:
- printf("/mline_on");
+ case option_set:
+ printf("/option_set/%d", *p++);
break;
- case mline_off:
- printf("/mline_off");
- break;
-
- case posix_on:
- printf("/posix_on");
-
- case posix_off:
- printf("/posix_off");
-
case start_nowidth:
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
printf("/start_nowidth//%d", mcnt);
break;
@@ -851,12 +838,12 @@ print_partial_compiled_pattern(start, end)
break;
case on_failure_jump:
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
printf("/on_failure_jump//%d", mcnt);
break;
case dummy_failure_jump:
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
printf("/dummy_failure_jump//%d", mcnt);
break;
@@ -865,56 +852,56 @@ print_partial_compiled_pattern(start, end)
break;
case finalize_jump:
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
printf("/finalize_jump//%d", mcnt);
break;
case maybe_finalize_jump:
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
printf("/maybe_finalize_jump//%d", mcnt);
break;
case jump_past_alt:
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
printf("/jump_past_alt//%d", mcnt);
break;
case jump:
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
printf("/jump//%d", mcnt);
break;
case succeed_n:
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
- EXTRACT_NUMBER_AND_INCR (mcnt2, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt2, p);
printf("/succeed_n//%d//%d", mcnt, mcnt2);
break;
case jump_n:
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
- EXTRACT_NUMBER_AND_INCR (mcnt2, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt2, p);
printf("/jump_n//%d//%d", mcnt, mcnt2);
break;
case set_number_at:
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
- EXTRACT_NUMBER_AND_INCR (mcnt2, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt2, p);
printf("/set_number_at//%d//%d", mcnt, mcnt2);
break;
case try_next:
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
printf("/try_next//%d", mcnt);
break;
case finalize_push:
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
printf("/finalize_push//%d", mcnt);
break;
case finalize_push_n:
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
- EXTRACT_NUMBER_AND_INCR (mcnt2, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt2, p);
printf("/finalize_push_n//%d//%d", mcnt, mcnt2);
break;
@@ -971,7 +958,7 @@ print_compiled_pattern(bufp)
{
unsigned char *buffer = (unsigned char*)bufp->buffer;
- print_partial_compiled_pattern (buffer, buffer + bufp->used);
+ print_partial_compiled_pattern(buffer, buffer + bufp->used);
}
static char*
@@ -1033,10 +1020,7 @@ calculate_must_string(start, end)
case push_dummy_failure:
case start_paren:
case stop_paren:
- case mline_on:
- case mline_off:
- case posix_on:
- case posix_off:
+ case option_set:
break;
case charset:
@@ -1050,11 +1034,11 @@ calculate_must_string(start, end)
break;
case on_failure_jump:
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
if (mcnt > 0) p += mcnt;
if ((enum regexpcode)p[-3] == jump) {
p -= 3;
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
if (mcnt > 0) p += mcnt;
}
break;
@@ -1063,7 +1047,7 @@ calculate_must_string(start, end)
case succeed_n:
case try_next:
case jump:
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
if (mcnt > 0) p += mcnt;
break;
@@ -1260,6 +1244,7 @@ re_compile_pattern(pattern, size, bufp)
int had_char_class = 0;
int options = bufp->options;
+ int old_options = 0;
bufp->fastmap_accurate = 0;
bufp->must = 0;
@@ -1273,7 +1258,7 @@ re_compile_pattern(pattern, size, bufp)
bufp->allocated = INIT_BUF_SIZE;
if (bufp->buffer)
/* EXTEND_BUFFER loses when bufp->allocated is 0. */
- bufp->buffer = (char*)xrealloc (bufp->buffer, INIT_BUF_SIZE);
+ bufp->buffer = (char*)xrealloc(bufp->buffer, INIT_BUF_SIZE);
else
/* Caller did not allocate a buffer. Do it for them. */
bufp->buffer = (char*)xmalloc(INIT_BUF_SIZE);
@@ -1286,7 +1271,7 @@ re_compile_pattern(pattern, size, bufp)
switch (c) {
case '$':
- if (bufp->options & RE_OPTION_POSIXLINE) {
+ if (bufp->options & RE_OPTION_SINGLELINE) {
BUFPUSH(endbuf);
}
else {
@@ -1306,7 +1291,7 @@ re_compile_pattern(pattern, size, bufp)
break;
case '^':
- if (bufp->options & RE_OPTION_POSIXLINE)
+ if (bufp->options & RE_OPTION_SINGLELINE)
BUFPUSH(begbuf);
else
BUFPUSH(begline);
@@ -1576,7 +1561,7 @@ re_compile_pattern(pattern, size, bufp)
/* Leave room for the null. */
char str[CHAR_CLASS_MAX_LENGTH + 1];
- PATFETCH_RAW (c);
+ PATFETCH_RAW(c);
c1 = 0;
/* If pattern is `[[:'. */
@@ -1610,11 +1595,11 @@ re_compile_pattern(pattern, size, bufp)
char is_upper = STREQ(str, "upper");
char is_xdigit = STREQ(str, "xdigit");
- if (!IS_CHAR_CLASS (str))
+ if (!IS_CHAR_CLASS(str))
goto invalid_pattern;
/* Throw away the ] at the end of the character class. */
- PATFETCH (c);
+ PATFETCH(c);
if (p == pend)
goto invalid_pattern;
@@ -1632,7 +1617,7 @@ re_compile_pattern(pattern, size, bufp)
|| (is_space && ISSPACE(ch))
|| (is_upper && ISUPPER(ch))
|| (is_xdigit && ISXDIGIT(ch)))
- SET_LIST_BIT (ch);
+ SET_LIST_BIT(ch);
}
had_char_class = 1;
}
@@ -1661,14 +1646,16 @@ re_compile_pattern(pattern, size, bufp)
b[-1]--;
if (b[-1] != (1 << BYTEWIDTH) / BYTEWIDTH)
memmove(&b[b[-1]], &b[(1 << BYTEWIDTH) / BYTEWIDTH],
- 2 + EXTRACT_UNSIGNED (&b[(1 << BYTEWIDTH) / BYTEWIDTH])*8);
- b += b[-1] + 2 + EXTRACT_UNSIGNED (&b[b[-1]])*8;
+ 2 + EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH])*8);
+ b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[b[-1]])*8;
break;
case '(':
+ old_options = options;
PATFETCH(c);
if (c == '?') {
int negative = 0;
+ int push_option = 0;
PATFETCH_RAW(c);
switch (c) {
case 'x': case 'p': case 'm': case 'i': case '-':
@@ -1691,28 +1678,26 @@ re_compile_pattern(pattern, size, bufp)
case 'p':
if (negative) {
- if (options&RE_OPTION_POSIXLINE) {
+ if ((options&RE_OPTION_POSIXLINE) == RE_OPTION_POSIXLINE) {
options &= ~RE_OPTION_POSIXLINE;
- BUFPUSH(posix_off);
}
}
- else if (!(options&RE_OPTION_POSIXLINE)) {
+ else if ((options&RE_OPTION_POSIXLINE) != RE_OPTION_POSIXLINE) {
options |= RE_OPTION_POSIXLINE;
- BUFPUSH(posix_on);
}
+ push_option = 1;
break;
case 'm':
if (negative) {
if (options&RE_OPTION_MULTILINE) {
options &= ~RE_OPTION_MULTILINE;
- BUFPUSH(mline_off);
}
}
else if (!(options&RE_OPTION_MULTILINE)) {
options |= RE_OPTION_MULTILINE;
- BUFPUSH(mline_on);
}
+ push_option = 1;
break;
case 'i':
@@ -1757,6 +1742,10 @@ re_compile_pattern(pattern, size, bufp)
default:
FREE_AND_RETURN(stackb, "undefined (?...) sequence");
}
+ if (push_option) {
+ BUFPUSH(option_set);
+ BUFPUSH(options);
+ }
}
else {
PATUNFETCH;
@@ -1813,7 +1802,7 @@ re_compile_pattern(pattern, size, bufp)
break;
}
*stackp++ = c;
- *stackp++ = options;
+ *stackp++ = old_options;
fixup_alt_jump = 0;
laststart = 0;
begalt = b;
@@ -1822,14 +1811,13 @@ re_compile_pattern(pattern, size, bufp)
case ')':
if (stackp == stackb)
FREE_AND_RETURN(stackb, "unmatched )");
- if ((options ^ stackp[-1]) & RE_OPTION_IGNORECASE) {
- BUFPUSH((options&RE_OPTION_IGNORECASE)?casefold_off:casefold_on);
- }
- if ((options ^ stackp[-1]) & RE_OPTION_POSIXLINE) {
- BUFPUSH((options&RE_OPTION_MULTILINE)?posix_off:posix_on);
- }
- if ((options ^ stackp[-1]) & RE_OPTION_MULTILINE) {
- BUFPUSH((options&RE_OPTION_MULTILINE)?mline_off:mline_on);
+
+ if (options != stackp[-1]) {
+ if ((options ^ stackp[-1]) & RE_OPTION_IGNORECASE) {
+ BUFPUSH((options&RE_OPTION_IGNORECASE)?casefold_off:casefold_on);
+ }
+ BUFPUSH(option_set);
+ BUFPUSH(stackp[-1]);
}
pending_exact = 0;
if (fixup_alt_jump) {
@@ -2111,7 +2099,7 @@ re_compile_pattern(pattern, size, bufp)
beg_interval = 0;
/* normal_char and normal_backslash need `c'. */
- PATFETCH (c);
+ PATFETCH(c);
goto normal_char;
case '\\':
@@ -2195,7 +2183,7 @@ re_compile_pattern(pattern, size, bufp)
break;
case 'Z':
- if ((bufp->options & RE_OPTION_POSIXLINE) == 0) {
+ if ((bufp->options & RE_OPTION_SINGLELINE) == 0) {
BUFPUSH(endbuf2);
break;
}
@@ -2789,14 +2777,8 @@ re_compile_fastmap(bufp)
options ^= RE_OPTION_IGNORECASE;
continue;
- case mline_on:
- case mline_off:
- options ^= RE_OPTION_MULTILINE;
- continue;
-
- case posix_on:
- case posix_off:
- options ^= RE_OPTION_POSIXLINE;
+ case option_set:
+ options = *p++;
continue;
case endline:
@@ -2804,7 +2786,7 @@ re_compile_fastmap(bufp)
fastmap[translate['\n']] = 1;
else
fastmap['\n'] = 1;
- if ((options & RE_OPTION_POSIXLINE) == 0 && bufp->can_be_null == 0)
+ if ((options & RE_OPTION_SINGLELINE) == 0 && bufp->can_be_null == 0)
bufp->can_be_null = 2;
break;
@@ -2889,7 +2871,7 @@ re_compile_fastmap(bufp)
case anychar_repeat:
case anychar:
for (j = 0; j < (1 << BYTEWIDTH); j++) {
- if (j != '\n' || (options & RE_OPTION_POSIXLINE))
+ if (j != '\n' || (options & RE_OPTION_MULTILINE))
fastmap[j] = 1;
}
if (bufp->can_be_null) {
@@ -3167,7 +3149,7 @@ re_search(bufp, string, size, startpos, range, regs)
}
}
if (bufp->options & RE_OPTIMIZE_ANCHOR) {
- if (bufp->options&RE_OPTION_POSIXLINE) {
+ if (bufp->options&RE_OPTION_SINGLELINE) {
goto begbuf_match;
}
anchor = 1;
@@ -3783,9 +3765,8 @@ re_match(bufp, string_arg, size, pos, regs)
d += mbclen(*d);
break;
}
- if (!(options&RE_OPTION_MULTILINE) &&
- !(options&RE_OPTION_POSIXLINE) &&
- (TRANSLATE_P() ? translate[*d] : *d) == '\n')
+ if (!(options&RE_OPTION_MULTILINE)
+ && (TRANSLATE_P() ? translate[*d] : *d) == '\n')
goto fail;
SET_REGS_MATCHED;
d++;
@@ -4132,20 +4113,8 @@ re_match(bufp, string_arg, size, pos, regs)
options &= ~RE_OPTION_IGNORECASE;
continue;
- case mline_on:
- options |= RE_OPTION_MULTILINE;
- continue;
-
- case mline_off:
- options &= ~RE_OPTION_MULTILINE;
- continue;
-
- case posix_on:
- options |= RE_OPTION_POSIXLINE;
- continue;
-
- case posix_off:
- options &= ~RE_OPTION_POSIXLINE;
+ case option_set:
+ options = *p++;
continue;
case wordbound:
@@ -4308,7 +4277,7 @@ re_match(bufp, string_arg, size, pos, regs)
case finalize_push:
case jump:
p1++;
- EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ EXTRACT_NUMBER_AND_INCR(mcnt, p1);
if (mcnt >= 0) break; /* should be backward jump */
p1 += mcnt;