summaryrefslogtreecommitdiff
path: root/re.c
diff options
context:
space:
mode:
Diffstat (limited to 're.c')
-rw-r--r--re.c163
1 files changed, 74 insertions, 89 deletions
diff --git a/re.c b/re.c
index bc83ec7..0814f28 100644
--- a/re.c
+++ b/re.c
@@ -13,7 +13,6 @@
#include "ruby.h"
#include "re.h"
-/* Generate compiled regular expressions */
#if 'a' == 97 /* it's ascii */
static char casetable[] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
@@ -102,22 +101,16 @@ int len;
rp = ALLOC(Regexp);
MEMZERO((char *)rp, Regexp, 1);
- rp->pat.buffer = ALLOC_N(char, 16);
- rp->pat.allocated = 16;
- rp->pat.fastmap = ALLOC_N(char, 256);
+ rp->buffer = ALLOC_N(char, 16);
+ rp->allocated = 16;
+ rp->fastmap = ALLOC_N(char, 256);
- if ((err = re_compile_pattern(s, (size_t)len, &(rp->pat))) != NULL)
+ if ((err = re_compile_pattern(s, (size_t)len, rp)) != NULL)
Fail("%s: /%s/", err, s);
return rp;
}
-struct match {
- UINT len;
- char *ptr;
- struct re_registers regs;
-};
-
struct match last_match;
VALUE ignorecase;
@@ -134,14 +127,20 @@ research(reg, str, start)
if (FL_TEST(reg, FL_USER1)) {
casefold = TRUE;
}
- if (casefold)
- reg->ptr->pat.translate = casetable;
- else
- reg->ptr->pat.translate = NULL;
+ if (casefold) {
+ if (reg->ptr->translate != casetable) {
+ reg->ptr->translate = casetable;
+ reg->ptr->fastmap_accurate = 0;
+ }
+ }
+ else if (reg->ptr->translate) {
+ reg->ptr->translate = NULL;
+ reg->ptr->fastmap_accurate = 0;
+ }
if (start > str->len) return -1;
- result = re_search(&(reg->ptr->pat), str->ptr, str->len,
- start, str->len - start, &(reg->ptr->regs));
+ result = re_search(reg->ptr, str->ptr, str->len,
+ start, str->len - start, &last_match.regs);
if (result >= 0) {
last_match.len = str->len;
@@ -153,24 +152,23 @@ research(reg, str, start)
}
memcpy(last_match.ptr, str->ptr, last_match.len);
last_match.ptr[last_match.len] = '\0';
- last_match.regs = reg->ptr->regs;
}
return result;
}
-static VALUE
-nth_match(nth)
+VALUE
+re_nth_match(nth)
int nth;
{
int start, end, len;
- if (nth >= RE_NREGS) {
- Fail("match out of range %d, %d", nth, RE_NREGS);
+ if (nth >= last_match.regs.num_regs) {
+ return Qnil;
}
- start = last_match.regs.start[nth];
+ start = BEG(nth);
if (start == -1) return Qnil;
- end = last_match.regs.end[nth];
+ end = END(nth);
len = end - start;
return str_new(last_match.ptr + start, len);
}
@@ -179,7 +177,7 @@ VALUE
re_last_match(id)
ID id;
{
- return nth_match(0);
+ return re_nth_match(0);
}
static VALUE
@@ -187,8 +185,8 @@ re_match_pre()
{
struct match *match;
- if (last_match.regs.start[0] == -1) return Qnil;
- return str_new(last_match.ptr, last_match.regs.start[0]);
+ if (BEG(0) == -1) return Qnil;
+ return str_new(last_match.ptr, BEG(0));
}
static VALUE
@@ -196,24 +194,22 @@ re_match_post()
{
struct match *match;
- if (last_match.regs.start[0] == -1) return Qnil;
- return str_new(last_match.ptr+last_match.regs.end[0],
- last_match.len-last_match.regs.end[0]);
+ if (BEG(0) == -1) return Qnil;
+ return str_new(last_match.ptr+END(0),
+ last_match.len-END(0));
}
static VALUE
re_match_last()
{
- struct match *match;
- int i;
+ int i, len;
- if (last_match.regs.start[0] == -1) return Qnil;
+ if (BEG(0) == -1) return Qnil;
- for (i=0; i<RE_NREGS; i++) {
- if (last_match.regs.start[i] == -1) break;
+ for (i=last_match.regs.num_regs-1; BEG(i) == -1 && i > 0; i--) {
}
- if (i == RE_NREGS) return Qnil;
- return nth_match(i-1);
+ if (i == 0) return Qnil;
+ return re_nth_match(i);
}
static VALUE
@@ -221,7 +217,7 @@ get_match_data(id, nth)
ID id;
int nth;
{
- return nth_match(nth);
+ return re_nth_match(nth);
}
static void
@@ -229,6 +225,10 @@ free_match(data)
struct match *data;
{
free(data->ptr);
+ if (data->regs.allocated > 0) {
+ free(data->regs.beg);
+ free(data->regs.end);
+ }
}
static VALUE
@@ -242,7 +242,8 @@ get_match()
data->len = last_match.len;
data->ptr = ALLOC_N(char, last_match.len+1);
memcpy(data->ptr, last_match.ptr, data->len+1);
- data->regs = last_match.regs;
+ data->regs.allocated = 0;
+ re_copy_registers(&data->regs, &last_match.regs);
return data_new(data, free_match, Qnil);
}
@@ -280,8 +281,8 @@ void
reg_free(rp)
Regexp *rp;
{
- free(rp->pat.buffer);
- free(rp->pat.fastmap);
+ free(rp->buffer);
+ free(rp->fastmap);
free(rp);
}
@@ -472,10 +473,6 @@ re_regsub(str)
if (c == '\\' && (*s == '\\' || *s == '&'))
p = ++s;
} else {
-
-#define BEG(no) last_match.regs.start[no]
-#define END(no) last_match.regs.end[no]
-
if (BEG(no) == -1) continue;
str_cat(val, last_match.ptr+BEG(no), END(no)-BEG(no));
}
@@ -491,6 +488,19 @@ re_regsub(str)
return val;
}
+static VALUE
+kcode()
+{
+ switch (re_syntax_options & RE_MBCTYPE_MASK) {
+ case RE_MBCTYPE_SJIS:
+ return str_new2("SJIS");
+ case RE_MBCTYPE_EUC:
+ return str_new2("EUC");
+ default:
+ return str_new2("NONE");
+ }
+}
+
void
rb_set_kanjicode(code)
char *code;
@@ -500,59 +510,46 @@ rb_set_kanjicode(code)
switch (code[0]) {
case 'E':
case 'e':
- obscure_syntax &= ~RE_MBCTYPE_MASK;
- obscure_syntax |= RE_MBCTYPE_EUC;
+ re_syntax_options &= ~RE_MBCTYPE_MASK;
+ re_syntax_options |= RE_MBCTYPE_EUC;
break;
case 'S':
case 's':
- obscure_syntax &= ~RE_MBCTYPE_MASK;
- obscure_syntax |= RE_MBCTYPE_SJIS;
+ re_syntax_options &= ~RE_MBCTYPE_MASK;
+ re_syntax_options |= RE_MBCTYPE_SJIS;
break;
default:
case 'N':
case 'n':
set_no_conversion:
- obscure_syntax &= ~RE_MBCTYPE_MASK;
+ re_syntax_options &= ~RE_MBCTYPE_MASK;
break;
}
- re_set_syntax(obscure_syntax);
+ re_set_syntax(re_syntax_options);
}
-static VALUE
-kanji_var_get()
+void
+rb_setup_kcode()
{
- switch (obscure_syntax & RE_MBCTYPE_MASK) {
- case RE_MBCTYPE_SJIS:
- return str_new2("SJIS");
- case RE_MBCTYPE_EUC:
- return str_new2("EUC");
- default:
- return Qnil;
- }
+ rb_define_const(C_Object, "KCODE", kcode());
}
-static void
-kanji_var_set(val)
- struct RString *val;
-{
- if (val == Qnil) rb_set_kanjicode(Qnil);
- Check_Type(val, T_STRING);
- rb_set_kanjicode(val->ptr);
-}
+VALUE rb_readonly_hook();
void
Init_Regexp()
{
int i;
- obscure_syntax = RE_NO_BK_PARENS | RE_NO_BK_VBAR
- | RE_CONTEXT_INDEP_OPS | RE_INTERVALS
- | RE_NO_BK_CURLY_BRACES
- | RE_MBCTYPE_EUC;
-
- for (i=0; i<RE_NREGS; i++) {
- last_match.regs.start[i] = last_match.regs.end[i] = -1;
- }
+ re_set_syntax(RE_NO_BK_PARENS | RE_NO_BK_VBAR
+ | RE_AWK_CLASS_HACK
+ | RE_INTERVALS
+ | RE_NO_BK_BRACES
+ | RE_BACKSLASH_ESCAPE_IN_LISTS
+#ifdef DEFAULT_MBCTYPE
+ | DEFAULT_MBCTYPE
+#endif
+);
rb_define_variable("$~", Qnil, get_match, set_match, 0);
@@ -561,18 +558,6 @@ Init_Regexp()
rb_define_variable("$'", Qnil, re_match_post, Qnil, 0);
rb_define_variable("$+", Qnil, re_match_last, Qnil, 0);
- rb_define_variable("$1", Qnil, get_match_data, Qnil, 1);
- rb_define_variable("$2", Qnil, get_match_data, Qnil, 2);
- rb_define_variable("$3", Qnil, get_match_data, Qnil, 3);
- rb_define_variable("$4", Qnil, get_match_data, Qnil, 4);
- rb_define_variable("$5", Qnil, get_match_data, Qnil, 5);
- rb_define_variable("$6", Qnil, get_match_data, Qnil, 6);
- rb_define_variable("$7", Qnil, get_match_data, Qnil, 7);
- rb_define_variable("$8", Qnil, get_match_data, Qnil, 8);
- rb_define_variable("$9", Qnil, get_match_data, Qnil, 9);
-
- rb_define_variable("$KANJI", Qnil, kanji_var_get, kanji_var_set, 0);
-
rb_define_variable("$=", &ignorecase, Qnil, Qnil, 0);
C_Regexp = rb_define_class("Regexp", C_Object);