From d349889e770a2078c247d9d28070e86a54b856f4 Mon Sep 17 00:00:00 2001 From: Yukihiro Matsumoto Date: Wed, 15 Mar 1995 14:59:18 +0900 Subject: version 0.69 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://cache.ruby-lang.org/pub/ruby/1.0/ruby-0.69.tar.gz Wed Mar 15 14:59:18 1995 Yukihiro Matsumoto (matz@ix-02) * version 0.69 * eval.c(method_missing): unknownから名称変更. * eval.c(single_method_added): 特異メソッドが定義された時に呼ばれ るメソッド.hookとして使える.実際に定義される直前に呼ばれる. Tue Mar 14 14:46:44 1995 Yukihiro Matsumoto (matz@ix-02) * ruby.c(proc_options): 引数の解析を自分でやることにより引数指定の 方法がperlに近付いた.getopt_longはもう使わない. * dir.c(glob): `{}'のネストを許すようにした. Mon Mar 13 17:56:25 1995 Yukihiro Matsumoto (matz@ix-02) * glob.c: Glob(ワイルドカードオブジェクト)はなくなった.ワイルドカー ドの展開はDir.glob(文字列)を使う.ワイルドカードのマッチは正規表 現で代用. Fri Mar 10 18:35:46 1995 Yukihiro Matsumoto (matz@ix-02) * eval.c: Mathのようなモジュールは自分自身でextendする. * eval.c: クラスやモジュールを定義した既に同名のものがあれば追加定 義となるように.ただし.superクラスの違いなどはチェックする. * regex.c: debug. * math.c: 定数PIとEを定義. Thu Mar 9 21:35:12 1995 Yukihiro Matsumoto (matz@ix-02) * regex.c: EUC,SJISモードでは0x80以上の8進,16進リテラルを禁止. * regex.c: クラス内でも数値リテラル・文字クラスが使えるようした. Wed Mar 8 17:39:05 1995 Yukihiro Matsumoto (matz@ix-02) * regex.c: \200など括弧の数以上の表現は8進リテラルと解釈する.ただ し,\1から\9までは例外. * regex.c: \9以上のリファレンスも有効にした. Tue Mar 7 14:26:01 1995 Yukihiro Matsumoto (matz@ix-02) * eval.c(public/private): スコープ制御メソッドの名称変更.静的なア クセスも出来るようにしてみたが,不採用. Mon Mar 6 19:34:32 1995 Yukihiro Matsumoto (matz@ix-02) * eval.c(inlcude): メソッド化.動的にモジュールをインクルードでき るように.さらに任意のオブジェクトにもモジュールをインクルードで きるメソッド `extend'も用意した. * parse.y: 文法からincludeを削除.メソッド化. Tue Feb 28 15:35:10 1995 Yukihiro Matsumoto (matz@ix-02) * parse.y: 配列,連想配列の最後に`,'をおけるように. --- re.c | 163 ++++++++++++++++++++++++++++++------------------------------------- 1 file changed, 74 insertions(+), 89 deletions(-) (limited to 're.c') diff --git a/re.c b/re.c index bc83ec71db..0814f287a4 100644 --- a/re.c +++ b/re.c @@ -13,7 +13,6 @@ #include "ruby.h" #include "re.h" -/* Generate compiled regular expressions */ #if 'a' == 97 /* it's ascii */ static char casetable[] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', @@ -102,22 +101,16 @@ int len; rp = ALLOC(Regexp); MEMZERO((char *)rp, Regexp, 1); - rp->pat.buffer = ALLOC_N(char, 16); - rp->pat.allocated = 16; - rp->pat.fastmap = ALLOC_N(char, 256); + rp->buffer = ALLOC_N(char, 16); + rp->allocated = 16; + rp->fastmap = ALLOC_N(char, 256); - if ((err = re_compile_pattern(s, (size_t)len, &(rp->pat))) != NULL) + if ((err = re_compile_pattern(s, (size_t)len, rp)) != NULL) Fail("%s: /%s/", err, s); return rp; } -struct match { - UINT len; - char *ptr; - struct re_registers regs; -}; - struct match last_match; VALUE ignorecase; @@ -134,14 +127,20 @@ research(reg, str, start) if (FL_TEST(reg, FL_USER1)) { casefold = TRUE; } - if (casefold) - reg->ptr->pat.translate = casetable; - else - reg->ptr->pat.translate = NULL; + if (casefold) { + if (reg->ptr->translate != casetable) { + reg->ptr->translate = casetable; + reg->ptr->fastmap_accurate = 0; + } + } + else if (reg->ptr->translate) { + reg->ptr->translate = NULL; + reg->ptr->fastmap_accurate = 0; + } if (start > str->len) return -1; - result = re_search(&(reg->ptr->pat), str->ptr, str->len, - start, str->len - start, &(reg->ptr->regs)); + result = re_search(reg->ptr, str->ptr, str->len, + start, str->len - start, &last_match.regs); if (result >= 0) { last_match.len = str->len; @@ -153,24 +152,23 @@ research(reg, str, start) } memcpy(last_match.ptr, str->ptr, last_match.len); last_match.ptr[last_match.len] = '\0'; - last_match.regs = reg->ptr->regs; } return result; } -static VALUE -nth_match(nth) +VALUE +re_nth_match(nth) int nth; { int start, end, len; - if (nth >= RE_NREGS) { - Fail("match out of range %d, %d", nth, RE_NREGS); + if (nth >= last_match.regs.num_regs) { + return Qnil; } - start = last_match.regs.start[nth]; + start = BEG(nth); if (start == -1) return Qnil; - end = last_match.regs.end[nth]; + end = END(nth); len = end - start; return str_new(last_match.ptr + start, len); } @@ -179,7 +177,7 @@ VALUE re_last_match(id) ID id; { - return nth_match(0); + return re_nth_match(0); } static VALUE @@ -187,8 +185,8 @@ re_match_pre() { struct match *match; - if (last_match.regs.start[0] == -1) return Qnil; - return str_new(last_match.ptr, last_match.regs.start[0]); + if (BEG(0) == -1) return Qnil; + return str_new(last_match.ptr, BEG(0)); } static VALUE @@ -196,24 +194,22 @@ re_match_post() { struct match *match; - if (last_match.regs.start[0] == -1) return Qnil; - return str_new(last_match.ptr+last_match.regs.end[0], - last_match.len-last_match.regs.end[0]); + if (BEG(0) == -1) return Qnil; + return str_new(last_match.ptr+END(0), + last_match.len-END(0)); } static VALUE re_match_last() { - struct match *match; - int i; + int i, len; - if (last_match.regs.start[0] == -1) return Qnil; + if (BEG(0) == -1) return Qnil; - for (i=0; i 0; i--) { } - if (i == RE_NREGS) return Qnil; - return nth_match(i-1); + if (i == 0) return Qnil; + return re_nth_match(i); } static VALUE @@ -221,7 +217,7 @@ get_match_data(id, nth) ID id; int nth; { - return nth_match(nth); + return re_nth_match(nth); } static void @@ -229,6 +225,10 @@ free_match(data) struct match *data; { free(data->ptr); + if (data->regs.allocated > 0) { + free(data->regs.beg); + free(data->regs.end); + } } static VALUE @@ -242,7 +242,8 @@ get_match() data->len = last_match.len; data->ptr = ALLOC_N(char, last_match.len+1); memcpy(data->ptr, last_match.ptr, data->len+1); - data->regs = last_match.regs; + data->regs.allocated = 0; + re_copy_registers(&data->regs, &last_match.regs); return data_new(data, free_match, Qnil); } @@ -280,8 +281,8 @@ void reg_free(rp) Regexp *rp; { - free(rp->pat.buffer); - free(rp->pat.fastmap); + free(rp->buffer); + free(rp->fastmap); free(rp); } @@ -472,10 +473,6 @@ re_regsub(str) if (c == '\\' && (*s == '\\' || *s == '&')) p = ++s; } else { - -#define BEG(no) last_match.regs.start[no] -#define END(no) last_match.regs.end[no] - if (BEG(no) == -1) continue; str_cat(val, last_match.ptr+BEG(no), END(no)-BEG(no)); } @@ -491,6 +488,19 @@ re_regsub(str) return val; } +static VALUE +kcode() +{ + switch (re_syntax_options & RE_MBCTYPE_MASK) { + case RE_MBCTYPE_SJIS: + return str_new2("SJIS"); + case RE_MBCTYPE_EUC: + return str_new2("EUC"); + default: + return str_new2("NONE"); + } +} + void rb_set_kanjicode(code) char *code; @@ -500,59 +510,46 @@ rb_set_kanjicode(code) switch (code[0]) { case 'E': case 'e': - obscure_syntax &= ~RE_MBCTYPE_MASK; - obscure_syntax |= RE_MBCTYPE_EUC; + re_syntax_options &= ~RE_MBCTYPE_MASK; + re_syntax_options |= RE_MBCTYPE_EUC; break; case 'S': case 's': - obscure_syntax &= ~RE_MBCTYPE_MASK; - obscure_syntax |= RE_MBCTYPE_SJIS; + re_syntax_options &= ~RE_MBCTYPE_MASK; + re_syntax_options |= RE_MBCTYPE_SJIS; break; default: case 'N': case 'n': set_no_conversion: - obscure_syntax &= ~RE_MBCTYPE_MASK; + re_syntax_options &= ~RE_MBCTYPE_MASK; break; } - re_set_syntax(obscure_syntax); + re_set_syntax(re_syntax_options); } -static VALUE -kanji_var_get() +void +rb_setup_kcode() { - switch (obscure_syntax & RE_MBCTYPE_MASK) { - case RE_MBCTYPE_SJIS: - return str_new2("SJIS"); - case RE_MBCTYPE_EUC: - return str_new2("EUC"); - default: - return Qnil; - } + rb_define_const(C_Object, "KCODE", kcode()); } -static void -kanji_var_set(val) - struct RString *val; -{ - if (val == Qnil) rb_set_kanjicode(Qnil); - Check_Type(val, T_STRING); - rb_set_kanjicode(val->ptr); -} +VALUE rb_readonly_hook(); void Init_Regexp() { int i; - obscure_syntax = RE_NO_BK_PARENS | RE_NO_BK_VBAR - | RE_CONTEXT_INDEP_OPS | RE_INTERVALS - | RE_NO_BK_CURLY_BRACES - | RE_MBCTYPE_EUC; - - for (i=0; i