summaryrefslogtreecommitdiff
path: root/re.c
diff options
context:
space:
mode:
Diffstat (limited to 're.c')
-rw-r--r--re.c247
1 files changed, 166 insertions, 81 deletions
diff --git a/re.c b/re.c
index f6dde5a4eb..bd6c2fb9b8 100644
--- a/re.c
+++ b/re.c
@@ -6,13 +6,15 @@
$Date: 1995/01/10 10:42:49 $
created at: Mon Aug 9 18:24:49 JST 1993
- Copyright (C) 1993-1995 Yukihiro Matsumoto
+ Copyright (C) 1993-1996 Yukihiro Matsumoto
************************************************/
#include "ruby.h"
#include "re.h"
+static VALUE eRegxpError;
+
#define BEG(no) regs->beg[no]
#define END(no) regs->end[no]
@@ -104,6 +106,73 @@ static int reg_kcode =
# endif
#endif
+extern int rb_in_eval;
+
+static VALUE
+reg_desc(s, len, re)
+ char *s;
+ int len;
+ VALUE re;
+{
+ VALUE str = str_new2("/");
+ char *p, *pend;
+ int slash = 0;
+
+ p = s; pend = p + len;
+ while (p<pend) {
+ if (*p == '/') {
+ slash = 1;
+ break;
+ }
+ p++;
+ }
+ if (!slash) {
+ str_cat(str, s, len);
+ }
+ else {
+ p = s;
+ while (p<pend) {
+ if (*p == '/') {
+ char c = '\\';
+ str_cat(str, &c, 1);
+ str_cat(str, p, 1);
+ }
+ else {
+ str_cat(str, p, 1);
+ }
+ p++;
+ }
+ }
+ str_cat(str, "/", 1);
+ if (re && FL_TEST(re, REG_IGNORECASE)) {
+ str_cat(str, "i", 1);
+ }
+ return str;
+}
+
+static VALUE
+reg_inspect(re)
+ struct RRegexp *re;
+{
+ return reg_desc(re->str, re->len, re);
+}
+
+static void
+reg_raise(s, len, err, compile, re)
+ char *s;
+ int len;
+ char *err;
+ int compile;
+ VALUE re;
+{
+ VALUE desc = reg_desc(s, len, re);
+
+ if (!compile)
+ Raise(eRegxpError, "%s: %s", err, RSTRING(desc)->ptr);
+ else
+ Error("%s: %s", err, RSTRING(desc)->ptr);
+}
+
static Regexp*
make_regexp(s, len)
char *s;
@@ -125,17 +194,52 @@ make_regexp(s, len)
rp->allocated = 16;
rp->fastmap = ALLOC_N(char, 256);
- if ((err = re_compile_pattern(s, (size_t)len, rp)) != NULL)
- Fail("%s: /%s/", err, s);
+ if ((err = re_compile_pattern(s, (size_t)len, rp)) != NULL) {
+ reg_raise(s, len, err, !rb_in_eval, 0);
+ }
return rp;
}
+extern VALUE cData;
+static VALUE cMatch;
+
+static VALUE
+match_to_a(match)
+ struct RMatch *match;
+{
+ struct re_registers *regs = match->regs;
+ VALUE ary = ary_new(regs->num_regs);
+ int i;
+
+ for (i=0; i<regs->num_regs; i++) {
+ if (regs->beg[0] == -1) ary_push(ary, Qnil);
+ else ary_push(ary, str_new(match->ptr+regs->beg[i],
+ regs->end[i]-regs->beg[i]));
+ }
+ return ary;
+}
+
+static VALUE
+match_to_s(match)
+ struct RMatch *match;
+{
+ int beg, len;
+
+ if (match->regs->allocated == 0) return Qnil;
+
+ beg = match->regs->beg[0];
+ if (beg == -1) return Qnil;
+
+ len = match->regs->end[0] - beg;
+ return str_new(match->ptr+beg, len);
+}
+
static VALUE
match_alloc()
{
NEWOBJ(match, struct RMatch);
- OBJSETUP(match, cData, T_MATCH);
+ OBJSETUP(match, cMatch, T_MATCH);
match->ptr = 0;
match->len = 0;
@@ -155,10 +259,12 @@ reg_search(reg, str, start, regs)
struct re_registers *regs;
{
int result;
- int casefold = ignorecase;
+ int casefold = RTEST(ignorecase);
VALUE match = 0;
struct re_registers *regs0 = 0;
+ if (start > str->len) return -1;
+
/* case-flag set for the object */
if (FL_TEST(reg, REG_IGNORECASE)) {
casefold = TRUE;
@@ -174,16 +280,11 @@ reg_search(reg, str, start, regs)
reg->ptr->fastmap_accurate = 0;
}
- if (start > str->len) return -1;
-
- if (regs == (struct re_registers *)-1) {
+ if (regs == (struct re_registers*)-1) {
regs = 0;
}
- else if (match = backref_get()) {
- if (match == 1) {
- match = match_alloc();
- backref_set(match);
- }
+ else {
+ match = match_alloc();
regs0 = RMATCH(match)->regs;
}
@@ -192,8 +293,9 @@ reg_search(reg, str, start, regs)
if ((RBASIC(reg)->flags & KCODE_MASK) != reg_kcode) {
char *err;
- if ((err = re_compile_pattern(reg->str, reg->len, reg->ptr)) != NULL)
- Fail("%s: /%s/", err, reg->str);
+ if ((err = re_compile_pattern(reg->str, reg->len, reg->ptr)) != NULL) {
+ reg_raise(reg->str, reg->len, err, reg);
+ }
RBASIC(reg)->flags = RBASIC(reg)->flags & ~KCODE_MASK;
RBASIC(reg)->flags |= reg_kcode;
}
@@ -201,11 +303,18 @@ reg_search(reg, str, start, regs)
result = re_search(reg->ptr, str->ptr, str->len,
start, str->len - start, regs0);
- if (match && result >= 0) {
+ if (start == -2) {
+ reg_raise(reg->str, reg->len, "Stack overfow in regexp matcher", reg);
+ }
+ if (result < 0) {
+ backref_set(Qnil);
+ }
+ else if (match) {
RMATCH(match)->len = str->len;
REALLOC_N(RMATCH(match)->ptr, char, str->len+1);
memcpy(RMATCH(match)->ptr, str->ptr, str->len);
RMATCH(match)->ptr[str->len] = '\0';
+ backref_set(match);
}
if (regs && regs0 && regs0 != regs) re_copy_registers(regs, regs0);
@@ -217,7 +326,7 @@ reg_nth_defined(nth, match)
int nth;
struct RMatch *match;
{
- if (!match) return FALSE;
+ if (NIL_P(match)) return Qnil;
if (nth >= match->regs->num_regs) {
return FALSE;
}
@@ -232,7 +341,7 @@ reg_nth_match(nth, match)
{
int start, end, len;
- if (!match) return Qnil;
+ if (NIL_P(match)) return Qnil;
if (nth >= match->regs->num_regs) {
return Qnil;
}
@@ -254,7 +363,7 @@ VALUE
reg_match_pre(match)
struct RMatch *match;
{
- if (!match) return Qnil;
+ if (NIL_P(match)) return Qnil;
if (match->BEG(0) == -1) return Qnil;
return str_new(match->ptr, match->BEG(0));
}
@@ -263,7 +372,7 @@ VALUE
reg_match_post(match)
struct RMatch *match;
{
- if (!match) return Qnil;
+ if (NIL_P(match)) return Qnil;
if (match->BEG(0) == -1) return Qnil;
return str_new(match->ptr+match->END(0),
match->len-match->END(0));
@@ -275,7 +384,7 @@ reg_match_last(match)
{
int i;
- if (!match) return Qnil;
+ if (NIL_P(match)) return Qnil;
if (match->BEG(0) == -1) return Qnil;
for (i=match->regs->num_regs-1; match->BEG(i) == -1 && i > 0; i--)
@@ -293,13 +402,6 @@ Regexp *rp;
free(rp);
}
-void
-reg_error(s)
-const char *s;
-{
- Fail(s);
-}
-
VALUE cRegexp;
static VALUE
@@ -331,19 +433,22 @@ reg_new(s, len, ci)
return reg_new_1(cRegexp, s, len, ci);
}
-static VALUE reg_cache, ign_cache;
+int ign_cache;
+static VALUE reg_cache;
VALUE
reg_regcomp(str)
struct RString *str;
{
+ int ignc = RTEST(ignorecase);
+
if (reg_cache && RREGEXP(reg_cache)->len == str->len
- && ign_cache == ignorecase
+ && ign_cache == ignc
&& memcmp(RREGEXP(reg_cache)->str, str->ptr, str->len) == 0)
return reg_cache;
- ign_cache = ignorecase;
- return reg_cache = reg_new(str->ptr, str->len, ignorecase);
+ ign_cache = ignc;
+ return reg_cache = reg_new(str->ptr, str->len, ignc);
}
VALUE
@@ -356,7 +461,7 @@ reg_match(re, str)
if (TYPE(str) != T_STRING) return FALSE;
start = reg_search(re, str, 0, 0);
if (start < 0) {
- return Qnil;
+ return FALSE;
}
return INT2FIX(start);
}
@@ -365,15 +470,15 @@ VALUE
reg_match2(re)
struct RRegexp *re;
{
- extern VALUE rb_lastline;
int start;
+ VALUE line = lastline_get();
- if (TYPE(rb_lastline) != T_STRING)
- Fail("$_ is not a string");
+ if (TYPE(line) != T_STRING)
+ return FALSE;
- start = reg_search(re, rb_lastline, 0, 0);
- if (start == -1) {
- return Qnil;
+ start = reg_search(re, line, 0, 0);
+ if (start < 0) {
+ return FALSE;
}
return INT2FIX(start);
}
@@ -384,11 +489,11 @@ reg_s_new(argc, argv, self)
VALUE *argv;
VALUE self;
{
- VALUE src, reg;
+ VALUE src;
int ci = 0;
if (argc == 0 || argc > 2) {
- Fail("wrong # of argument");
+ ArgError("wrong # of argument");
}
if (argc == 2 && argv[1]) {
ci = 1;
@@ -397,10 +502,12 @@ reg_s_new(argc, argv, self)
src = argv[0];
switch (TYPE(src)) {
case T_STRING:
- reg = reg_new_1(self, RREGEXP(src)->ptr, RREGEXP(src)->len, ci);
+ return reg_new_1(self, RSTRING(src)->ptr, RSTRING(src)->len, ci);
+ break;
case T_REGEXP:
- reg = reg_new_1(self, RREGEXP(src)->str, RREGEXP(src)->len, ci);
+ return reg_new_1(self, RREGEXP(src)->str, RREGEXP(src)->len, ci);
+ break;
default:
Check_Type(src, T_STRING);
@@ -472,7 +579,7 @@ reg_regsub(str, src, regs)
no = -1;
if (no >= 0) {
- if (val == Qnil) {
+ if (NIL_P(val)) {
val = str_new(p, ss-p);
}
else {
@@ -490,7 +597,7 @@ reg_regsub(str, src, regs)
}
}
- if (val == Qnil) return (VALUE)str;
+ if (NIL_P(val)) return (VALUE)str;
if (p < e) {
str_cat(val, p, e-p);
}
@@ -501,20 +608,6 @@ reg_regsub(str, src, regs)
}
static VALUE
-reg_to_s(re)
- struct RRegexp *re;
-{
- VALUE str = str_new2("/");
-
- str_cat(str, re->str, re->len);
- str_cat(str, "/", 1);
- if (FL_TEST(re, REG_IGNORECASE)) {
- str_cat(str, "i", 1);
- }
- return str;
-}
-
-static VALUE
kcode_getter()
{
switch (reg_kcode) {
@@ -569,26 +662,7 @@ kcode_setter(val)
static VALUE
match_getter()
{
- VALUE match = backref_get();
-
- if (match && match != 1) {
- NEWOBJ(m, struct RMatch);
- OBJSETUP(m, cData, T_MATCH);
-
- m->len = RMATCH(match)->len;
- if (RMATCH(match)->ptr) {
- m->ptr = ALLOC_N(char, m->len+1);
- memcpy(m->ptr, RMATCH(match)->ptr, m->len);
- m->ptr[m->len] = '\0';
- }
- else {
- m->ptr = 0;
- }
- m->regs = ALLOC(struct re_registers);
- re_copy_registers(m->regs, RMATCH(match)->regs);
- return (VALUE)m;
- }
- return Qnil;
+ return backref_get();
}
static void
@@ -598,11 +672,16 @@ match_setter(val)
backref_set(val);
}
+VALUE krn_to_s();
+
void
Init_Regexp()
{
+ extern VALUE eException;
+
+ eRegxpError = rb_define_class("RegxpError", eException);
+
re_set_syntax(RE_NO_BK_PARENS | RE_NO_BK_VBAR
- | RE_AWK_CLASS_HACK
| RE_INTERVALS
| RE_NO_BK_BRACES
| RE_BACKSLASH_ESCAPE_IN_LISTS
@@ -623,8 +702,14 @@ Init_Regexp()
rb_define_method(cRegexp, "clone", reg_clone, 0);
rb_define_method(cRegexp, "=~", reg_match, 1);
+ rb_define_method(cRegexp, "===", reg_match, 1);
rb_define_method(cRegexp, "~", reg_match2, 0);
- rb_define_method(cRegexp, "to_s", reg_to_s, 0);
+ rb_define_method(cRegexp, "inspect", reg_inspect, 0);
rb_global_variable(&reg_cache);
+
+ cMatch = rb_define_class("MatchingData", cData);
+ rb_define_method(cMatch, "to_a", match_to_a, 0);
+ rb_define_method(cMatch, "to_s", match_to_s, 0);
+ rb_define_method(cMatch, "inspect", krn_to_s, 0);
}