diff options
author | Yukihiro Matsumoto <matz@ruby-lang.org> | 1995-03-15 14:59:18 +0900 |
---|---|---|
committer | Takashi Kokubun <takashikkbn@gmail.com> | 2019-08-17 22:09:31 +0900 |
commit | d349889e770a2078c247d9d28070e86a54b856f4 (patch) | |
tree | 8bdbf87f3ff78ef237c4ff8efa061fbb8dc5a226 /re.c | |
parent | 881c5a9c320c637ee0f6526b40cf70c1379ab656 (diff) |
version 0.69v0_69
https://cache.ruby-lang.org/pub/ruby/1.0/ruby-0.69.tar.gz
Wed Mar 15 14:59:18 1995 Yukihiro Matsumoto (matz@ix-02)
* version 0.69
* eval.c(method_missing): unknownから名称変更.
* eval.c(single_method_added): 特異メソッドが定義された時に呼ばれ
るメソッド.hookとして使える.実際に定義される直前に呼ばれる.
Tue Mar 14 14:46:44 1995 Yukihiro Matsumoto (matz@ix-02)
* ruby.c(proc_options): 引数の解析を自分でやることにより引数指定の
方法がperlに近付いた.getopt_longはもう使わない.
* dir.c(glob): `{}'のネストを許すようにした.
Mon Mar 13 17:56:25 1995 Yukihiro Matsumoto (matz@ix-02)
* glob.c: Glob(ワイルドカードオブジェクト)はなくなった.ワイルドカー
ドの展開はDir.glob(文字列)を使う.ワイルドカードのマッチは正規表
現で代用.
Fri Mar 10 18:35:46 1995 Yukihiro Matsumoto (matz@ix-02)
* eval.c: Mathのようなモジュールは自分自身でextendする.
* eval.c: クラスやモジュールを定義した既に同名のものがあれば追加定
義となるように.ただし.superクラスの違いなどはチェックする.
* regex.c: debug.
* math.c: 定数PIとEを定義.
Thu Mar 9 21:35:12 1995 Yukihiro Matsumoto (matz@ix-02)
* regex.c: EUC,SJISモードでは0x80以上の8進,16進リテラルを禁止.
* regex.c: クラス内でも数値リテラル・文字クラスが使えるようした.
Wed Mar 8 17:39:05 1995 Yukihiro Matsumoto (matz@ix-02)
* regex.c: \200など括弧の数以上の表現は8進リテラルと解釈する.ただ
し,\1から\9までは例外.
* regex.c: \9以上のリファレンスも有効にした.
Tue Mar 7 14:26:01 1995 Yukihiro Matsumoto (matz@ix-02)
* eval.c(public/private): スコープ制御メソッドの名称変更.静的なア
クセスも出来るようにしてみたが,不採用.
Mon Mar 6 19:34:32 1995 Yukihiro Matsumoto (matz@ix-02)
* eval.c(inlcude): メソッド化.動的にモジュールをインクルードでき
るように.さらに任意のオブジェクトにもモジュールをインクルードで
きるメソッド `extend'も用意した.
* parse.y: 文法からincludeを削除.メソッド化.
Tue Feb 28 15:35:10 1995 Yukihiro Matsumoto (matz@ix-02)
* parse.y: 配列,連想配列の最後に`,'をおけるように.
Diffstat (limited to 're.c')
-rw-r--r-- | re.c | 163 |
1 files changed, 74 insertions, 89 deletions
@@ -13,7 +13,6 @@ #include "ruby.h" #include "re.h" -/* Generate compiled regular expressions */ #if 'a' == 97 /* it's ascii */ static char casetable[] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', @@ -102,22 +101,16 @@ int len; rp = ALLOC(Regexp); MEMZERO((char *)rp, Regexp, 1); - rp->pat.buffer = ALLOC_N(char, 16); - rp->pat.allocated = 16; - rp->pat.fastmap = ALLOC_N(char, 256); + rp->buffer = ALLOC_N(char, 16); + rp->allocated = 16; + rp->fastmap = ALLOC_N(char, 256); - if ((err = re_compile_pattern(s, (size_t)len, &(rp->pat))) != NULL) + if ((err = re_compile_pattern(s, (size_t)len, rp)) != NULL) Fail("%s: /%s/", err, s); return rp; } -struct match { - UINT len; - char *ptr; - struct re_registers regs; -}; - struct match last_match; VALUE ignorecase; @@ -134,14 +127,20 @@ research(reg, str, start) if (FL_TEST(reg, FL_USER1)) { casefold = TRUE; } - if (casefold) - reg->ptr->pat.translate = casetable; - else - reg->ptr->pat.translate = NULL; + if (casefold) { + if (reg->ptr->translate != casetable) { + reg->ptr->translate = casetable; + reg->ptr->fastmap_accurate = 0; + } + } + else if (reg->ptr->translate) { + reg->ptr->translate = NULL; + reg->ptr->fastmap_accurate = 0; + } if (start > str->len) return -1; - result = re_search(&(reg->ptr->pat), str->ptr, str->len, - start, str->len - start, &(reg->ptr->regs)); + result = re_search(reg->ptr, str->ptr, str->len, + start, str->len - start, &last_match.regs); if (result >= 0) { last_match.len = str->len; @@ -153,24 +152,23 @@ research(reg, str, start) } memcpy(last_match.ptr, str->ptr, last_match.len); last_match.ptr[last_match.len] = '\0'; - last_match.regs = reg->ptr->regs; } return result; } -static VALUE -nth_match(nth) +VALUE +re_nth_match(nth) int nth; { int start, end, len; - if (nth >= RE_NREGS) { - Fail("match out of range %d, %d", nth, RE_NREGS); + if (nth >= last_match.regs.num_regs) { + return Qnil; } - start = last_match.regs.start[nth]; + start = BEG(nth); if (start == -1) return Qnil; - end = last_match.regs.end[nth]; + end = END(nth); len = end - start; return str_new(last_match.ptr + start, len); } @@ -179,7 +177,7 @@ VALUE re_last_match(id) ID id; { - return nth_match(0); + return re_nth_match(0); } static VALUE @@ -187,8 +185,8 @@ re_match_pre() { struct match *match; - if (last_match.regs.start[0] == -1) return Qnil; - return str_new(last_match.ptr, last_match.regs.start[0]); + if (BEG(0) == -1) return Qnil; + return str_new(last_match.ptr, BEG(0)); } static VALUE @@ -196,24 +194,22 @@ re_match_post() { struct match *match; - if (last_match.regs.start[0] == -1) return Qnil; - return str_new(last_match.ptr+last_match.regs.end[0], - last_match.len-last_match.regs.end[0]); + if (BEG(0) == -1) return Qnil; + return str_new(last_match.ptr+END(0), + last_match.len-END(0)); } static VALUE re_match_last() { - struct match *match; - int i; + int i, len; - if (last_match.regs.start[0] == -1) return Qnil; + if (BEG(0) == -1) return Qnil; - for (i=0; i<RE_NREGS; i++) { - if (last_match.regs.start[i] == -1) break; + for (i=last_match.regs.num_regs-1; BEG(i) == -1 && i > 0; i--) { } - if (i == RE_NREGS) return Qnil; - return nth_match(i-1); + if (i == 0) return Qnil; + return re_nth_match(i); } static VALUE @@ -221,7 +217,7 @@ get_match_data(id, nth) ID id; int nth; { - return nth_match(nth); + return re_nth_match(nth); } static void @@ -229,6 +225,10 @@ free_match(data) struct match *data; { free(data->ptr); + if (data->regs.allocated > 0) { + free(data->regs.beg); + free(data->regs.end); + } } static VALUE @@ -242,7 +242,8 @@ get_match() data->len = last_match.len; data->ptr = ALLOC_N(char, last_match.len+1); memcpy(data->ptr, last_match.ptr, data->len+1); - data->regs = last_match.regs; + data->regs.allocated = 0; + re_copy_registers(&data->regs, &last_match.regs); return data_new(data, free_match, Qnil); } @@ -280,8 +281,8 @@ void reg_free(rp) Regexp *rp; { - free(rp->pat.buffer); - free(rp->pat.fastmap); + free(rp->buffer); + free(rp->fastmap); free(rp); } @@ -472,10 +473,6 @@ re_regsub(str) if (c == '\\' && (*s == '\\' || *s == '&')) p = ++s; } else { - -#define BEG(no) last_match.regs.start[no] -#define END(no) last_match.regs.end[no] - if (BEG(no) == -1) continue; str_cat(val, last_match.ptr+BEG(no), END(no)-BEG(no)); } @@ -491,6 +488,19 @@ re_regsub(str) return val; } +static VALUE +kcode() +{ + switch (re_syntax_options & RE_MBCTYPE_MASK) { + case RE_MBCTYPE_SJIS: + return str_new2("SJIS"); + case RE_MBCTYPE_EUC: + return str_new2("EUC"); + default: + return str_new2("NONE"); + } +} + void rb_set_kanjicode(code) char *code; @@ -500,59 +510,46 @@ rb_set_kanjicode(code) switch (code[0]) { case 'E': case 'e': - obscure_syntax &= ~RE_MBCTYPE_MASK; - obscure_syntax |= RE_MBCTYPE_EUC; + re_syntax_options &= ~RE_MBCTYPE_MASK; + re_syntax_options |= RE_MBCTYPE_EUC; break; case 'S': case 's': - obscure_syntax &= ~RE_MBCTYPE_MASK; - obscure_syntax |= RE_MBCTYPE_SJIS; + re_syntax_options &= ~RE_MBCTYPE_MASK; + re_syntax_options |= RE_MBCTYPE_SJIS; break; default: case 'N': case 'n': set_no_conversion: - obscure_syntax &= ~RE_MBCTYPE_MASK; + re_syntax_options &= ~RE_MBCTYPE_MASK; break; } - re_set_syntax(obscure_syntax); + re_set_syntax(re_syntax_options); } -static VALUE -kanji_var_get() +void +rb_setup_kcode() { - switch (obscure_syntax & RE_MBCTYPE_MASK) { - case RE_MBCTYPE_SJIS: - return str_new2("SJIS"); - case RE_MBCTYPE_EUC: - return str_new2("EUC"); - default: - return Qnil; - } + rb_define_const(C_Object, "KCODE", kcode()); } -static void -kanji_var_set(val) - struct RString *val; -{ - if (val == Qnil) rb_set_kanjicode(Qnil); - Check_Type(val, T_STRING); - rb_set_kanjicode(val->ptr); -} +VALUE rb_readonly_hook(); void Init_Regexp() { int i; - obscure_syntax = RE_NO_BK_PARENS | RE_NO_BK_VBAR - | RE_CONTEXT_INDEP_OPS | RE_INTERVALS - | RE_NO_BK_CURLY_BRACES - | RE_MBCTYPE_EUC; - - for (i=0; i<RE_NREGS; i++) { - last_match.regs.start[i] = last_match.regs.end[i] = -1; - } + re_set_syntax(RE_NO_BK_PARENS | RE_NO_BK_VBAR + | RE_AWK_CLASS_HACK + | RE_INTERVALS + | RE_NO_BK_BRACES + | RE_BACKSLASH_ESCAPE_IN_LISTS +#ifdef DEFAULT_MBCTYPE + | DEFAULT_MBCTYPE +#endif +); rb_define_variable("$~", Qnil, get_match, set_match, 0); @@ -561,18 +558,6 @@ Init_Regexp() rb_define_variable("$'", Qnil, re_match_post, Qnil, 0); rb_define_variable("$+", Qnil, re_match_last, Qnil, 0); - rb_define_variable("$1", Qnil, get_match_data, Qnil, 1); - rb_define_variable("$2", Qnil, get_match_data, Qnil, 2); - rb_define_variable("$3", Qnil, get_match_data, Qnil, 3); - rb_define_variable("$4", Qnil, get_match_data, Qnil, 4); - rb_define_variable("$5", Qnil, get_match_data, Qnil, 5); - rb_define_variable("$6", Qnil, get_match_data, Qnil, 6); - rb_define_variable("$7", Qnil, get_match_data, Qnil, 7); - rb_define_variable("$8", Qnil, get_match_data, Qnil, 8); - rb_define_variable("$9", Qnil, get_match_data, Qnil, 9); - - rb_define_variable("$KANJI", Qnil, kanji_var_get, kanji_var_set, 0); - rb_define_variable("$=", &ignorecase, Qnil, Qnil, 0); C_Regexp = rb_define_class("Regexp", C_Object); |