summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>1998-06-05 09:54:28 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>1998-06-05 09:54:28 +0000
commit5196e8609ba576021305710610b071d31777e798 (patch)
tree375c2e7a9bedbb9996f3839418093249c75240c8
parentebfbdcf4dffb423363965f6b41e574c69794db5f (diff)
regex
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/v1_1r@234 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog8
-rw-r--r--lib/date2.rb119
-rw-r--r--lib/tk.rb19
-rw-r--r--regex.c507
-rw-r--r--sample/ruby-mode.el2
-rw-r--r--string.c2
6 files changed, 555 insertions, 102 deletions
diff --git a/ChangeLog b/ChangeLog
index 517f389d19..3cf5ccf6ae 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+Wed Jun 3 18:07:54 1998 Yukihiro Matsumoto <matz@netlab.co.jp>
+
+ * re.c (reg_raise): check rb_in_compile, not rb_in_eval.
+
+Mon Jun 1 05:26:06 1998 WATANABE Tetsuya <tetsu@jpn.hp.com>
+
+ * string.c (trnext): casting to signed char* needed.
+
Tue Jun 2 16:00:12 1998 Yukihiro Matsumoto <matz@netlab.co.jp>
* ext/socket/socket.c (udp_addrsetup): error check enhanced.
diff --git a/lib/date2.rb b/lib/date2.rb
index 939e633fe0..50c2ccfbd9 100644
--- a/lib/date2.rb
+++ b/lib/date2.rb
@@ -1,5 +1,5 @@
# date.rb: Written by Tadayoshi Funaba 1998
-# $Id: date.rb,v 1.3 1998/03/08 09:43:54 tadf Exp $
+# $Id: date.rb,v 1.4 1998/06/01 12:52:33 tadf Exp $
class Date
@@ -11,8 +11,8 @@ class Date
DAYNAMES = [ 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday',
'Friday', 'Saturday' ]
- GREGORY = 2299161 # Oct 15, 1582
- ENGLAND = 2361222 # Sept 14, 1752
+ ITALY = 2299161 # Oct 15, 1582
+ ENGLAND = 2361222 # Sept 14, 1752
def Date.civil_to_jd(y, m, d, gs = true)
if m <= 2 then
@@ -20,16 +20,15 @@ class Date
m += 12
end
a = (y / 100).to_i
- b = (a / 4).to_i
- c = 2 - a + b
- e = (365.25 * (y + 4716)).to_i
- f = (30.6001 * (m + 1)).to_i
- jd = c + d + e + f - 1524
+ b = 2 - a + (a / 4).to_i
+ jd = (365.25 * (y + 4716)).to_i +
+ (30.6001 * (m + 1)).to_i +
+ d + b - 1524
unless
(if gs.kind_of? Numeric then jd >= gs else gs end) then
- jd -= c
+ jd -= b
end
- return jd
+ jd
end
def Date.jd_to_civil(jd, gs = true)
@@ -37,27 +36,22 @@ class Date
(if gs.kind_of? Numeric then jd >= gs else gs end) then
a = jd
else
- w = ((jd - 1867216.25) / 36524.25).to_i
- x = (w / 4).to_i
- a = jd + 1 + w - x
+ x = ((jd - 1867216.25) / 36524.25).to_i
+ a = jd + 1 + x - (x / 4).to_i
end
b = a + 1524
c = ((b - 122.1) / 365.25).to_i
d = (365.25 * c).to_i
e = ((b - d) / 30.6001).to_i
- f = (30.6001 * e).to_i
- day = b - d - f
+ dom = b - d - (30.6001 * e).to_i
if e <= 13 then
m = e - 1
+ y = c - 4716
else
m = e - 13
- end
- if m <= 2 then
y = c - 4715
- else
- y = c - 4716
end
- return y, m, day
+ return y, m, dom
end
def Date.mjd_to_jd(mjd)
@@ -76,22 +70,38 @@ class Date
jd - 2440000.5
end
- def initialize(jd = 0, gs = GREGORY)
- @jd = jd
- @gs = gs
+ def Date.julian_leap? (y)
+ y % 4 == 0
+ end
+
+ def Date.gregorian_leap? (y)
+ y % 4 == 0 and y % 100 != 0 or y % 400 == 0
+ end
+
+ def Date.leap? (y)
+ Date.gregorian_leap?(y)
end
- def Date.new3(y = -4712, m = 1, d = 1, gs = GREGORY)
+ def initialize(jd = 0, gs = ITALY)
+ @jd, @gs = jd, gs
+ end
+
+ def Date.exist? (y, m, d, gs = true)
jd = Date.civil_to_jd(y, m, d, gs)
- y2, m2, d2 = Date.jd_to_civil(jd, gs)
- unless y == y2 and m == m2 and d == d2 then
- raise ArgumentError, 'invalid date'
+ if [y, m, d] == Date.jd_to_civil(jd, gs) then
+ jd
+ end
+ end
+
+ def Date.new3(y = -4712, m = 1, d = 1, gs = ITALY)
+ unless jd = Date.exist?(y, m, d, gs) then
+ fail ArgumentError, 'invalid date'
end
Date.new(jd, gs)
end
- def Date.today(gs = GREGORY)
- Date.new3(*(Time.now.to_a[3..5].reverse << gs))
+ def Date.today(gs = ITALY)
+ Date.new(Date.civil_to_jd(*(Time.now.to_a[3..5].reverse << gs)), gs)
end
def jd
@@ -99,48 +109,63 @@ class Date
end
def mjd
- Date.jd_to_mjd(@jd)
+ def self.mjd; @mjd end
+ @mjd = Date.jd_to_mjd(@jd)
end
def tjd
- Date.jd_to_tjd(@jd)
+ def self.tjd; @tjd end
+ @tjd = Date.jd_to_tjd(@jd)
+ end
+
+ def civil
+ def self.year; @year end
+ def self.mon; @mon end
+ def self.mday; @mday end
+ @year, @mon, @mday = Date.jd_to_civil(@jd, @gs)
end
+ private :civil
+
def year
- Date.jd_to_civil(@jd, @gs)[0]
+ civil
+ @year
end
def yday
- gs = if @gs.kind_of? Numeric then @jd >= @gs else @gs end
- jd = Date.civil_to_jd(year - 1, 12, 31, gs)
- @jd - jd
+ def self.yday; @yday end
+ ns = if @gs.kind_of? Numeric then @jd >= @gs else @gs end
+ jd = Date.civil_to_jd(year - 1, 12, 31, ns)
+ @yday = @jd - jd
end
def mon
- Date.jd_to_civil(@jd, @gs)[1]
+ civil
+ @mon
end
def mday
- Date.jd_to_civil(@jd, @gs)[2]
+ civil
+ @mday
end
def wday
- k = (@jd + 1) % 7
- k += 7 if k < 0
- k
+ def self.wday; @wday end
+ @wday = (@jd + 1) % 7
end
def leap?
- gs = if @gs.kind_of? Numeric then @jd >= @gs else @gs end
- jd = Date.civil_to_jd(year, 2, 28, gs)
- Date.jd_to_civil(jd + 1, gs)[1] == 2
+ def self.leap?; @leap_p end
+ ns = if @gs.kind_of? Numeric then @jd >= @gs else @gs end
+ jd = Date.civil_to_jd(year, 2, 28, ns)
+ @leap_p = Date.jd_to_civil(jd + 1, ns)[1] == 2
end
def + (other)
if other.kind_of? Numeric then
return Date.new(@jd + other, @gs)
end
- raise TypeError, 'expected numeric'
+ fail TypeError, 'expected numeric'
end
def - (other)
@@ -149,7 +174,7 @@ class Date
elsif other.kind_of? Date then
return @jd - other.jd
end
- raise TypeError, 'expected numeric or date'
+ fail TypeError, 'expected numeric or date'
end
def <=> (other)
@@ -158,7 +183,7 @@ class Date
elsif other.kind_of? Date then
return @jd <=> other.jd
end
- raise TypeError, 'expected numeric or date'
+ fail TypeError, 'expected numeric or date'
end
def downto(min)
@@ -188,7 +213,7 @@ class Date
end
def to_s
- format('%04d-%02d-%02d', *Date.jd_to_civil(@jd, @gs))
+ format('%04d-%02d-%02d', year, mon, mday)
end
end
diff --git a/lib/tk.rb b/lib/tk.rb
index 213fc54de7..3891428ff7 100644
--- a/lib/tk.rb
+++ b/lib/tk.rb
@@ -910,15 +910,26 @@ class TkWindow<TkObject
ilist = list(tk_call('place', 'info', epath))
info = {}
while key = ilist.shift
- info[key[1,-1]] = ilist.shift
+ info[key[1..-1]] = ilist.shift
+ end
+ return info
+ end
+
+ def pack_slaves()
+ list(tk_call('pack', 'slaves', epath))
+ end
+
+ def pack_info()
+ ilist = list(tk_call('pack', 'info', epath))
+ info = {}
+ while key = ilist.shift
+ info[key[1..-1]] = ilist.shift
end
return info
end
def place_slaves()
- list(tk_call('place', 'slaves', epath)).collect { |w|
- window(w)
- }
+ list(tk_call('place', 'slaves', epath))
end
def focus
diff --git a/regex.c b/regex.c
index 6b0ee087e5..b07561897e 100644
--- a/regex.c
+++ b/regex.c
@@ -99,6 +99,8 @@ char *alloca();
do { \
FREE_VAR(regstart); \
FREE_VAR(regend); \
+ FREE_VAR(old_regstart) \
+ FREE_VAR(old_regend); \
FREE_VAR(best_regstart); \
FREE_VAR(best_regend); \
FREE_VAR(reg_info); \
@@ -135,6 +137,9 @@ static void insert_jump_n P((int, char *, char *, char *, unsigned));
static void insert_op P((int, char *, char *));
static void insert_op_2 P((int, char *, char *, int, int));
static int memcmp_translate P((unsigned char *, unsigned char *, int));
+static int alt_match_null_string_p ();
+static int common_op_match_null_string_p ();
+static int group_match_null_string_p ();
/* Define the syntax stuff, so we can do the \<, \>, etc. */
@@ -244,6 +249,7 @@ enum regexpcode
of string to be matched (if not). */
endbuf, /* Analogously, for end of buffer/string. */
jump, /* Followed by two bytes giving relative address to jump to. */
+ jump_past_alt,/* Same as jump, but marks the end of an alternative. */
on_failure_jump, /* Followed by two bytes giving relative address of
place to resume at in case of failure. */
finalize_jump, /* Throw away latest failure point and then jump to
@@ -262,6 +268,8 @@ enum regexpcode
makes this before the first repeat. Also
use it as an intermediary kind of jump when
compiling an or construct. */
+ push_dummy_failure, /* Push a dummy failure point and continue. Used at the end of
+ alternatives. */
succeed_n, /* Used like on_failure_jump except has to succeed n times;
then gets turned into an on_failure_jump. The relative
address following it is useless until then. The
@@ -431,8 +439,8 @@ long re_syntax_options = 0;
if (bufp->buffer == 0) \
goto memory_exhausted; \
b = (b - old_buffer) + bufp->buffer; \
- if (fixup_jump) \
- fixup_jump = (fixup_jump - old_buffer) + bufp->buffer; \
+ if (fixup_alt_jump) \
+ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \
if (laststart) \
laststart = (laststart - old_buffer) + bufp->buffer; \
begalt = (begalt - old_buffer) + bufp->buffer; \
@@ -652,12 +660,12 @@ print_partial_compiled_pattern(start, end)
case start_memory:
mcnt = *p++;
- printf ("/start_memory/%d", mcnt);
+ printf ("/start_memory/%d/%d", mcnt, *p++);
break;
case stop_memory:
mcnt = *p++;
- printf ("/stop_memory/%d", mcnt);
+ printf ("/stop_memory/%d/%d", mcnt, *p++);
break;
case casefold_on:
@@ -740,6 +748,10 @@ print_partial_compiled_pattern(start, end)
printf ("/dummy_failure_jump//%d", mcnt);
break;
+ case push_dummy_failure:
+ printf ("/push_dummy_failure");
+ break;
+
case finalize_jump:
EXTRACT_NUMBER_AND_INCR (mcnt, p);
printf ("/finalize_jump//%d", mcnt);
@@ -750,6 +762,11 @@ print_partial_compiled_pattern(start, end)
printf ("/maybe_finalize_jump//%d", mcnt);
break;
+ case jump_past_alt:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ printf ("/jump_past_alt//%d", mcnt);
+ break;
+
case jump:
EXTRACT_NUMBER_AND_INCR (mcnt, p);
printf ("/jump//%d", mcnt);
@@ -869,6 +886,9 @@ calculate_must_string(start, end)
case start_memory:
case stop_memory:
+ p += 2;
+ break;
+
case duplicate:
p++;
break;
@@ -891,6 +911,7 @@ calculate_must_string(start, end)
case notwordchar:
case begbuf:
case endbuf:
+ case push_dummy_failure:
break;
case charset:
@@ -981,7 +1002,7 @@ re_compile_pattern(pattern, size, bufp)
the containing expression. Each alternative of an `or', except the
last, ends with a forward-jump of this sort. */
- char *fixup_jump = 0;
+ char *fixup_alt_jump = 0;
/* Address of start of the most recently finished expression.
This tells postfix * where to find the start of its operand. */
@@ -1017,7 +1038,7 @@ re_compile_pattern(pattern, size, bufp)
/* Stack of information saved by \( and restored by \).
Five stack elements are pushed by each \(:
First, the value of b.
- Second, the value of fixup_jump.
+ Second, the value of fixup_alt_jump.
Third, the value of begalt.
Fourth, the value of regnum.
Fifth, the type of the paren. */
@@ -1487,7 +1508,7 @@ re_compile_pattern(pattern, size, bufp)
c = '(';
}
if (c == '#') break;
- if (stackp+7 >= stacke) {
+ if (stackp+8 >= stacke) {
int *stackx;
unsigned int len = stacke - stackb;
@@ -1502,13 +1523,15 @@ re_compile_pattern(pattern, size, bufp)
to push (unless the pattern has RE_NREGS or more ('s). */
/* obsolete: now RE_NREGS is just a default register size. */
*stackp++ = b - bufp->buffer;
- *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
+ *stackp++ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
*stackp++ = begalt - bufp->buffer;
switch (c) {
case '(':
BUFPUSH(start_memory);
BUFPUSH(regnum);
*stackp++ = regnum++;
+ *stackp++ = b - bufp->buffer;
+ BUFPUSH(0);
/* too many ()'s to fit in a byte. (max 254) */
if (regnum >= RE_REG_MAX) goto too_big;
break;
@@ -1534,7 +1557,7 @@ re_compile_pattern(pattern, size, bufp)
}
*stackp++ = c;
*stackp++ = options;
- fixup_jump = 0;
+ fixup_alt_jump = 0;
laststart = 0;
begalt = b;
break;
@@ -1548,11 +1571,27 @@ re_compile_pattern(pattern, size, bufp)
options = *--stackp;
switch (c = *--stackp) {
case '(':
- if (fixup_jump)
- store_jump(fixup_jump, jump, b);
- BUFPUSH(stop_memory);
- BUFPUSH(stackp[-1]);
- stackp--;
+ case ':':
+ pending_exact = 0;
+ if (fixup_alt_jump)
+ { /* Push a dummy failure point at the end of the
+ alternative for a possible future
+ `finalize_jump' to pop. See comments at
+ `push_dummy_failure' in `re_match_2'. */
+ BUFPUSH(push_dummy_failure);
+
+ /* We allocated space for this jump when we assigned
+ to `fixup_alt_jump', in the `handle_alt' case below. */
+ store_jump(fixup_alt_jump, jump, b);
+ }
+ if (c == '(') {
+ char *loc = bufp->buffer + *--stackp;
+ *loc = regnum - stackp[-1];
+ BUFPUSH(stop_memory);
+ BUFPUSH(stackp[-1]);
+ BUFPUSH(regnum - stackp[-1]);
+ stackp--;
+ }
break;
case '!':
@@ -1570,16 +1609,12 @@ re_compile_pattern(pattern, size, bufp)
stackp--;
break;
- case ':':
- if (fixup_jump)
- store_jump(fixup_jump, jump, b);
- pending_exact = 0;
default:
break;
}
begalt = *--stackp + bufp->buffer;
stackp--;
- fixup_jump = *stackp ? *stackp + bufp->buffer - 1 : 0;
+ fixup_alt_jump = *stackp ? *stackp + bufp->buffer - 1 : 0;
laststart = *--stackp + bufp->buffer;
if (c == '!' || c == '=') laststart = b;
break;
@@ -1591,19 +1626,28 @@ re_compile_pattern(pattern, size, bufp)
insert_jump(on_failure_jump, begalt, b + 6, b);
pending_exact = 0;
b += 3;
- /* The alternative before the previous alternative has a
- jump after it which gets executed if it gets matched.
- Adjust that jump so it will jump to the previous
- alternative's analogous jump (put in below, which in
- turn will jump to the next (if any) alternative's such
- jump, etc.). The last such jump jumps to the correct
- final destination. */
- if (fixup_jump)
- store_jump(fixup_jump, jump, b);
+ /* The alternative before this one has a jump after it
+ which gets executed if it gets matched. Adjust that
+ jump so it will jump to this alternative's analogous
+ jump (put in below, which in turn will jump to the next
+ (if any) alternative's such jump, etc.). The last such
+ jump jumps to the correct final destination. A picture:
+ _____ _____
+ | | | |
+ | v | v
+ a | b | c
+
+ If we are at `b', then fixup_alt_jump right now points to a
+ three-byte space after `a'. We'll put in the jump, set
+ fixup_alt_jump to right after `b', and leave behind three
+ bytes which we'll fill in when we get to after `c'. */
+
+ if (fixup_alt_jump)
+ store_jump(fixup_alt_jump, jump_past_alt, b);
/* Leave space for a jump after previous alternative---to be
filled in later. */
- fixup_jump = b;
+ fixup_alt_jump = b;
b += 3;
laststart = 0;
@@ -1947,8 +1991,8 @@ re_compile_pattern(pattern, size, bufp)
}
}
- if (fixup_jump)
- store_jump(fixup_jump, jump, b);
+ if (fixup_alt_jump)
+ store_jump(fixup_alt_jump, jump, b);
if (stackp != stackb)
FREE_AND_RETURN(stackb, "unmatched (");
@@ -2239,6 +2283,7 @@ re_compile_fastmap(bufp)
case wordbeg:
case wordend:
case pop_and_fail:
+ case push_dummy_failure:
continue;
case casefold_on:
@@ -2261,6 +2306,7 @@ re_compile_fastmap(bufp)
case finalize_jump:
case maybe_finalize_jump:
case jump:
+ case jump_past_alt:
case dummy_failure_jump:
EXTRACT_NUMBER_AND_INCR(j, p);
p += j;
@@ -2335,7 +2381,7 @@ re_compile_fastmap(bufp)
case start_memory:
case stop_memory:
- p++;
+ p += 2;
continue;
case duplicate:
@@ -2763,7 +2809,9 @@ typedef union
for (this_reg = 0; this_reg < num_regs; this_reg++) \
{ \
if (IS_ACTIVE(reg_info[this_reg])) \
- MATCHED_SOMETHING(reg_info[this_reg]) = 1; \
+ MATCHED_SOMETHING(reg_info[this_reg]) \
+ = EVER_MATCHED_SOMETHING (reg_info[this_reg]) \
+ = 1; \
else \
MATCHED_SOMETHING(reg_info[this_reg]) = 0; \
} \
@@ -2831,6 +2879,7 @@ re_match(bufp, string_arg, size, pos, regs)
struct re_registers *regs;
{
register unsigned char *p = (unsigned char *) bufp->buffer;
+ unsigned char *p1;
/* Pointer to beyond end of buffer. */
register unsigned char *pend = p + bufp->used;
@@ -2870,6 +2919,14 @@ re_match(bufp, string_arg, size, pos, regs)
unsigned char **regstart = RE_TALLOC(num_regs, unsigned char*);
unsigned char **regend = RE_TALLOC(num_regs, unsigned char*);
+ /* If a group that's operated upon by a repetition operator fails to
+ match anything, then the register for its start will need to be
+ restored because it will have been set to wherever in the string we
+ are when we last see its open-group operator. Similarly for a
+ register's end. */
+ unsigned char **old_regstart = RE_TALLOC(num_regs, unsigned char*);
+ unsigned char **old_regend = RE_TALLOC(num_regs, unsigned char*);
+
/* The is_active field of reg_info helps us keep track of which (possibly
nested) subexpressions we are currently in. The matched_something
field of reg_info[reg_num] helps us tell whether or not we have
@@ -2906,11 +2963,13 @@ re_match(bufp, string_arg, size, pos, regs)
inactive and mark them as not having matched anything or ever
failed. */
for (mcnt = 0; mcnt < num_regs; mcnt++) {
- regstart[mcnt] = regend[mcnt]
- = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE;
- reg_info[mcnt].word = 0;
- IS_ACTIVE (reg_info[mcnt]) = 0;
- MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ regstart[mcnt] = regend[mcnt]
+ = old_regstart[mcnt] = old_regend[mcnt]
+ = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE;
+ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
+ IS_ACTIVE (reg_info[mcnt]) = 0;
+ MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
}
/* Set up pointers to ends of strings.
@@ -3016,13 +3075,36 @@ re_match(bufp, string_arg, size, pos, regs)
a register number in the next byte. The text matched
within the \( and \) is recorded under that number. */
case start_memory:
+ /* Find out if this group can match the empty string. */
+ p1 = p; /* To send to group_match_null_string_p. */
+ if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P (reg_info[*p])
+ = group_match_null_string_p (&p1, pend, reg_info);
+
+ /* Save the position in the string where we were the last time
+ we were at this open-group operator in case the group is
+ operated upon by a repetition operator, e.g., with `(a*)*b'
+ against `ab'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+ ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
+ : regstart[*p];
regstart[*p] = d;
IS_ACTIVE(reg_info[*p]) = 1;
MATCHED_SOMETHING(reg_info[*p]) = 0;
- p++;
+ p += 2;
continue;
case stop_memory:
+ /* We need to save the string position the last time we were at
+ this close-group operator in case the group is operated
+ upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
+ against `aba'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+ ? REG_UNSET (regend[*p]) ? d : regend[*p]
+ : regend[*p];
+
regend[*p] = d;
IS_ACTIVE(reg_info[*p]) = 0;
@@ -3037,6 +3119,7 @@ re_match(bufp, string_arg, size, pos, regs)
switch (*p2++)
{
case jump_n:
+ case finalize_push_n:
is_a_jump_n = 1;
case finalize_jump:
case maybe_finalize_jump:
@@ -3053,14 +3136,42 @@ re_match(bufp, string_arg, size, pos, regs)
to an on_failure_jump, exit from the loop by forcing a
failure after pushing on the stack the on_failure_jump's
jump in the pattern, and d. */
- if (mcnt < 0 && (enum regexpcode) *p2++ == on_failure_jump)
+ if (mcnt < 0 && (enum regexpcode) *p2++ == on_failure_jump
+ && (enum regexpcode) p1[3] == start_memory && p1[4] == *p)
{
+ /* If this group ever matched anything, then restore
+ what its registers were before trying this last
+ failed match, e.g., with `(a*)*b' against `ab' for
+ regstart[1], and, e.g., with `((a*)*(b*)*)*'
+ against `aba' for regend[3].
+
+ Also restore the registers for inner groups for,
+ e.g., `((a*)(b*))*' against `aba' (register 3 would
+ otherwise get trashed). */
+
+ if (EVER_MATCHED_SOMETHING (reg_info[*p]))
+ {
+ unsigned r;
+
+ EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+ /* Restore this and inner groups' (if any) registers. */
+ for (r = *p; r < *p + *(p + 1); r++)
+ {
+ regstart[r] = old_regstart[r];
+
+ /* xx why this test? */
+ if ((int) old_regend[r] >= (int) regstart[r])
+ regend[r] = old_regend[r];
+ }
+ }
+ p2++;
EXTRACT_NUMBER_AND_INCR(mcnt, p2);
PUSH_FAILURE_POINT(p2 + mcnt, d);
goto fail;
}
}
- p++;
+ p += 2;
continue;
/* \<digit> has been turned into a `duplicate' command which is
@@ -3235,13 +3346,26 @@ re_match(bufp, string_arg, size, pos, regs)
EXTRACT_NUMBER_AND_INCR(mcnt, p);
{
register unsigned char *p2 = p;
- /* Compare what follows with the beginning of the repeat.
- If we can establish that there is nothing that they would
- both match, we can change to finalize_jump. */
- while (p2 + 1 != pend
- && (*p2 == (unsigned char)stop_memory
- || *p2 == (unsigned char)start_memory))
- p2 += 2; /* Skip over reg number. */
+
+ /* Compare the beginning of the repeat with what in the
+ pattern follows its end. If we can establish that there
+ is nothing that they would both match, i.e., that we
+ would have to backtrack because of (as in, e.g., `a*a')
+ then we can change to pop_failure_jump, because we'll
+ never have to backtrack.
+
+ This is not true in the case of alternatives: in
+ `(a|ab)*' we do need to backtrack to the `ab' alternative
+ (e.g., if the string was `ab'). But instead of trying to
+ detect that here, the alternative has put on a dummy
+ failure point which is what we will end up popping. */
+
+ /* Skip over open/close-group commands. */
+ while (p2 + 2 < pend
+ && ((enum regexpcode) *p2 == stop_memory
+ || (enum regexpcode) *p2 == start_memory))
+ p2 += 3; /* Skip over args, too. */
+
if (p2 == pend)
p[-3] = (unsigned char)finalize_jump;
else if (*p2 == (unsigned char)exactn
@@ -3294,6 +3418,11 @@ re_match(bufp, string_arg, size, pos, regs)
p += mcnt;
continue;
+ /* We need this opcode so we can detect where alternatives end
+ in `group_match_null_string_p' et al. */
+ case jump_past_alt:
+ goto nofinalize;
+
case dummy_failure_jump:
/* Normally, the on_failure_jump pushes a failure point, which
then gets popped at finalize_jump. We will end up at
@@ -3303,6 +3432,17 @@ re_match(bufp, string_arg, size, pos, regs)
PUSH_FAILURE_POINT(0, 0);
goto nofinalize;
+ /* At the end of an alternative, we need to push a dummy failure
+ point in case we are followed by a `finalize_jump', because
+ we don't want the failure point for the alternative to be
+ popped. For example, matching `(a|ab)*' against `aab'
+ requires that we match the `ab' alternative. */
+ case push_dummy_failure:
+ /* See comments just above at `dummy_failure_jump' about the
+ two zeroes. */
+ PUSH_FAILURE_POINT(0, 0);
+ break;
+
/* Have to succeed matching what follows at least n times. Then
just handle like an on_failure_jump. */
case succeed_n:
@@ -3527,7 +3667,37 @@ re_match(bufp, string_arg, size, pos, regs)
regend[this_reg] = *--stackp;
regstart[this_reg] = *--stackp;
}
- }
+
+ if (p < pend)
+ {
+ int is_a_jump_n = 0;
+ unsigned char *p1 = p;
+
+ /* If failed to a backwards jump that's part of a repetition
+ loop, need to pop this failure point and use the next one. */
+ switch ((enum regexpcode) *p1)
+ {
+ case jump_n:
+ case finalize_push_n:
+ is_a_jump_n = 1;
+ case maybe_finalize_jump:
+ case finalize_jump:
+ case finalize_push:
+ case jump:
+ p1 = p + 1;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+
+ if ((is_a_jump_n && (enum regexpcode) *p1 == succeed_n)
+ || (!is_a_jump_n
+ && (enum regexpcode) *p1 == on_failure_jump))
+ goto fail;
+ break;
+ default:
+ /* do nothing */ ;
+ }
+ }
+ }
else
break; /* Matching at this starting point really fails. */
}
@@ -3539,6 +3709,245 @@ re_match(bufp, string_arg, size, pos, regs)
}
+/* We are passed P pointing to a register number after a start_memory.
+
+ Return true if the pattern up to the corresponding stop_memory can
+ match the empty string, and false otherwise.
+
+ If we find the matching stop_memory, sets P to point to one past its number.
+ Otherwise, sets P to an undefined byte less than or equal to END.
+
+ We don't handle duplicates properly (yet). */
+
+static int
+group_match_null_string_p (p, end, reg_info)
+ unsigned char **p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ /* Point to after the args to the start_memory. */
+ unsigned char *p1 = *p + 2;
+
+ while (p1 < end)
+ {
+ /* Skip over opcodes that can match nothing, and return true or
+ false, as appropriate, when we get to one that can't, or to the
+ matching stop_memory. */
+
+ switch ((enum regexpcode) *p1)
+ {
+ /* Could be either a loop or a series of alternatives. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+ /* If the next operation is not a jump backwards in the
+ pattern. */
+
+ if (mcnt >= 0)
+ {
+ /* Go through the on_failure_jumps of the alternatives,
+ seeing if any of the alternatives cannot match nothing.
+ The last alternative starts with only a jump,
+ whereas the rest start with on_failure_jump and end
+ with a jump, e.g., here is the pattern for `a|b|c':
+
+ /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
+ /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
+ /exactn/1/c
+
+ So, we have to first go through the first (n-1)
+ alternatives and then deal with the last one separately. */
+
+
+ /* Deal with the first (n-1) alternatives, which start
+ with an on_failure_jump (see above) that jumps to right
+ past a jump_past_alt. */
+
+ while ((enum regexpcode) p1[mcnt-3] == jump_past_alt)
+ {
+ /* `mcnt' holds how many bytes long the alternative
+ is, including the ending `jump_past_alt' and
+ its number. */
+
+ if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
+ reg_info))
+ return 0;
+
+ /* Move to right after this alternative, including the
+ jump_past_alt. */
+ p1 += mcnt;
+
+ /* Break if it's the beginning of an n-th alternative
+ that doesn't begin with an on_failure_jump. */
+ if ((enum regexpcode) *p1 != on_failure_jump)
+ break;
+
+ /* Still have to check that it's not an n-th
+ alternative that starts with an on_failure_jump. */
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if ((enum regexpcode) p1[mcnt-3] != jump_past_alt)
+ {
+ /* Get to the beginning of the n-th alternative. */
+ p1 -= 3;
+ break;
+ }
+ }
+
+ /* Deal with the last alternative: go back and get number
+ of the `jump_past_alt' just before it. `mcnt' contains
+ the length of the alternative. */
+ EXTRACT_NUMBER (mcnt, p1 - 2);
+
+ if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
+ return 0;
+
+ p1 += mcnt; /* Get past the n-th alternative. */
+ } /* if mcnt > 0 */
+ break;
+
+
+ case stop_memory:
+ *p = p1 + 2;
+ return 1;
+
+
+ default:
+ if (!common_op_match_null_string_p (&p1, end, reg_info))
+ return 0;
+ }
+ } /* while p1 < end */
+
+ return 0;
+} /* group_match_null_string_p */
+
+
+/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
+ It expects P to be the first byte of a single alternative and END one
+ byte past the last. The alternative can contain groups. */
+
+static int
+alt_match_null_string_p (p, end, reg_info)
+ unsigned char *p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ unsigned char *p1 = p;
+
+ while (p1 < end)
+ {
+ /* Skip over opcodes that can match nothing, and break when we get
+ to one that can't. */
+
+ switch ((enum regexpcode) *p1)
+ {
+ /* It's a loop. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+ break;
+
+ default:
+ if (!common_op_match_null_string_p (&p1, end, reg_info))
+ return 0;
+ }
+ } /* while p1 < end */
+
+ return 1;
+} /* alt_match_null_string_p */
+
+
+/* Deals with the ops common to group_match_null_string_p and
+ alt_match_null_string_p.
+
+ Sets P to one after the op and its arguments, if any. */
+
+static int
+common_op_match_null_string_p (p, end, reg_info)
+ unsigned char **p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ int ret;
+ int reg_no;
+ unsigned char *p1 = *p;
+
+ switch ((enum regexpcode) *p1++)
+ {
+ case unused:
+ case begline:
+ case endline:
+ case begbuf:
+ case endbuf:
+ case wordbeg:
+ case wordend:
+ case wordbound:
+ case notwordbound:
+#ifdef emacs
+ case before_dot:
+ case at_dot:
+ case after_dot:
+#endif
+ break;
+
+ case start_memory:
+ reg_no = *p1;
+ ret = group_match_null_string_p (&p1, end, reg_info);
+
+ /* Have to set this here in case we're checking a group which
+ contains a group and a back reference to it. */
+
+ if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
+
+ if (!ret)
+ return 0;
+ break;
+
+ /* If this is an optimized succeed_n for zero times, make the jump. */
+ case jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if (mcnt >= 0)
+ p1 += mcnt;
+ else
+ return 0;
+ break;
+
+ case succeed_n:
+ /* Get to the number of times to succeed. */
+ p1 += 2;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+ if (mcnt == 0)
+ {
+ p1 -= 4;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+ }
+ else
+ return 0;
+ break;
+
+ case duplicate:
+ if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
+ return 0;
+ break;
+
+ case set_number_at:
+ p1 += 4;
+
+ default:
+ /* All other opcodes mean we cannot match the empty string. */
+ return 0;
+ }
+
+ *p = p1;
+ return 1;
+} /* common_op_match_null_string_p */
+
+
static int
memcmp_translate(s1, s2, len)
unsigned char *s1, *s2;
diff --git a/sample/ruby-mode.el b/sample/ruby-mode.el
index c2410f1fe6..7b9549612c 100644
--- a/sample/ruby-mode.el
+++ b/sample/ruby-mode.el
@@ -660,7 +660,7 @@ An end of a defun is found by moving forward from the beginning of one."
'("\\(^\\|[^_]\\)\\b\\([A-Z]+[a-zA-Z0-9_]*\\)"
2 font-lock-type-face)
;; functions
- '("^\\s *def[ \t]+[^ \t(]*$"
+ '("^\\s *def[ \t]+[^ \t(]*"
0 font-lock-function-name-face t))
"*Additional expressions to highlight in ruby mode.")
(if (and (>= (string-to-int emacs-version) 19)
diff --git a/string.c b/string.c
index 5def049e35..5229236f3b 100644
--- a/string.c
+++ b/string.c
@@ -1708,7 +1708,7 @@ trnext(t)
for (;;) {
if (!t->gen) {
if (t->p == t->pend) return -1;
- t->now = *t->p++;
+ t->now = *(USTR)t->p++;
if (t->p < t->pend && *t->p == '-') {
t->p++;
if (t->p < t->pend) {