From ff95039936708914c3f973ce26f1991382b2c358 Mon Sep 17 00:00:00 2001 From: aamine Date: Thu, 28 Mar 2002 08:53:24 +0000 Subject: * ext/strscan/strscan.c: add taint check. * ext/strscan/strscan.c: #getch/#get_byte should set regexp registers. * ext/strscan/strscan.c: remove useless #include directive. * ext/strscan/strscan.c: refactor struct strscanner. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@2298 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ext/strscan/strscan.c | 290 +++++++++++++++++++++++++++++--------------------- 1 file changed, 169 insertions(+), 121 deletions(-) (limited to 'ext') diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index 87854c773d..55c17a179a 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -26,37 +26,66 @@ struct strscanner { + /* multi-purpose flags */ unsigned long flags; + + /* the string to scan */ VALUE str; - long end2; - long beg1; - long end1; - long idx; + + /* scan pointers */ + long prev; /* legal only when MATCHED_P(s) */ + long curr; /* always legal */ + + /* the regexp register; legal only when last match had successed */ struct re_registers regs; }; #define S_PTR(s) (RSTRING(s->str)->ptr) #define S_LEN(s) (RSTRING(s->str)->len) #define S_END(s) (S_PTR(s) + S_LEN(s)) -#define CURPTR(s) (S_PTR(s) + s->idx) - -#define MATCH_FLAG (1UL) -#define CLEAR_MATCH_STATUS(s) s->flags &= ~MATCH_FLAG -#define MATCHED(s,i,len) do {\ - s->flags |= MATCH_FLAG; \ - s->end2 = s->idx; \ - s->beg1 = i; \ - s->end1 = i + len; \ -} while (0) -#define MATCHED_P(s) (s->flags & MATCH_FLAG) +#define CURPTR(s) (S_PTR(s) + s->curr) +#define S_RESTLEN(s) (S_LEN(s) - s->curr) + +#define FLAG_MATCHED (1UL) + +#define CLEAR_MATCH_STATUS(s) s->flags &= ~FLAG_MATCHED +#define MATCHED(s) s->flags |= FLAG_MATCHED +#define MATCHED_P(s) (s->flags & FLAG_MATCHED) #define GET_SCANNER(obj,var) Data_Get_Struct(obj, struct strscanner, var) -#define SCAN_FINISHED(s) ((s)->idx >= RSTRING(p->str)->len) +#define SCAN_FINISHED(s) ((s)->curr >= RSTRING(p->str)->len) static VALUE StringScanner; static VALUE ScanError; +/* ------------------------------------------------------------- */ + +static VALUE +infect(str, p) + VALUE str; + struct strscanner *p; +{ + OBJ_INFECT(str, p->str); + return str; +} + +static VALUE +extract_range(p, beg_i, end_i) + struct strscanner *p; + long beg_i, end_i; +{ + return infect(rb_str_new(S_PTR(p) + beg_i, end_i - beg_i), p); +} + +static VALUE +extract_beg_len(p, beg_i, len) + struct strscanner *p; + long beg_i, len; +{ + return infect(rb_str_new(S_PTR(p) + beg_i, len), p); +} + /* ------------------------------------------------------------- */ static VALUE @@ -113,7 +142,7 @@ strscan_reset(self) struct strscanner *p; GET_SCANNER(self, p); - p->idx = 0; + p->curr = 0; CLEAR_MATCH_STATUS(p); return self; } @@ -126,7 +155,7 @@ strscan_terminate(self) struct strscanner *p; GET_SCANNER(self, p); - p->idx = S_LEN(p); + p->curr = S_LEN(p); CLEAR_MATCH_STATUS(p); return self; } @@ -151,7 +180,7 @@ strscan_set_string(self, str) Check_Type(str, T_STRING); p->str = rb_str_dup(str); rb_obj_freeze(p->str); - p->idx = 0; + p->curr = 0; CLEAR_MATCH_STATUS(p); return str; } @@ -163,7 +192,7 @@ strscan_get_pos(self) struct strscanner *p; GET_SCANNER(self, p); - return INT2FIX(p->idx); + return INT2FIX(p->curr); } static VALUE @@ -178,11 +207,13 @@ strscan_set_pos(self, v) if (i < 0) i += S_LEN(p); if (i < 0) rb_raise(rb_eRangeError, "index out of range"); if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range"); - p->idx = i; + p->curr = i; return INT2FIX(i); } +/* I should implement this function? */ +#define strscan_prepare_re(re) /* none */ static VALUE strscan_do_scan(self, regex, succptr, getstr, headonly) @@ -191,51 +222,42 @@ strscan_do_scan(self, regex, succptr, getstr, headonly) { struct strscanner *p; int ret; - char *pbeg; - long plen; Check_Type(regex, T_REGEXP); GET_SCANNER(self, p); - pbeg = CURPTR(p); - plen = S_LEN(p) - p->idx; - CLEAR_MATCH_STATUS(p); + strscan_prepare_re(regex); if (headonly) { ret = re_match(RREGEXP(regex)->ptr, - pbeg, plen, + CURPTR(p), S_RESTLEN(p), 0, &(p->regs)); } else { ret = re_search(RREGEXP(regex)->ptr, - pbeg, plen, + CURPTR(p), S_RESTLEN(p), 0, - plen, + S_RESTLEN(p), &(p->regs)); } - if (ret == -2) { - rb_raise(ScanError, "regexp buffer overflow"); - return Qnil; /* not reach */ - } - else if (ret < 0) { + if (ret == -2) rb_raise(ScanError, "regexp buffer overflow"); + if (ret < 0) { /* not matched */ return Qnil; } + + MATCHED(p); + p->prev = p->curr; + if (succptr) { + p->curr += p->regs.end[0]; + } + if (getstr) { + return extract_beg_len(p, p->prev, p->regs.end[0]); + } else { - /* matched */ - MATCHED(p, p->idx + p->regs.beg[0], - p->regs.end[0] - p->regs.beg[0]); - if (succptr) { - p->idx += p->regs.end[0]; - } - if (getstr) { - return rb_str_new(pbeg, p->regs.end[0]); - } - else { - return INT2FIX(p->regs.end[0]); - } + return INT2FIX(p->regs.end[0]); } } @@ -308,6 +330,20 @@ strscan_search_full(self, re, s, f) return strscan_do_scan(self, re, RTEST(s), RTEST(f), 0); } +/* DANGEROUS; need to synchronize with regex.c */ +static void +adjust_registers_to_matched(p) + struct strscanner *p; +{ + if (p->regs.allocated == 0) { + p->regs.beg = ALLOC_N(int, RE_NREGS); + p->regs.end = ALLOC_N(int, RE_NREGS); + p->regs.allocated = RE_NREGS; + } + p->regs.num_regs = 1; + p->regs.beg[0] = 0; + p->regs.end[0] = p->curr - p->prev; +} static VALUE strscan_getch(self) @@ -322,11 +358,14 @@ strscan_getch(self) return Qnil; len = mbclen(*CURPTR(p)); - if (p->idx + len > S_LEN(p)) - len = S_LEN(p) - p->idx; - MATCHED(p, p->idx, len); - p->idx += len; - return rb_str_new(S_PTR(p) + p->beg1, p->end1 - p->beg1); + if (p->curr + len > S_LEN(p)) + len = S_LEN(p) - p->curr; + p->prev = p->curr; + p->curr += len; + MATCHED(p); + adjust_registers_to_matched(p); + return extract_range(p, p->prev + p->regs.beg[0], + p->prev + p->regs.end[0]); } static VALUE @@ -340,9 +379,12 @@ strscan_get_byte(self) if (SCAN_FINISHED(p)) return Qnil; - MATCHED(p, p->idx, 1); - p->idx++; - return rb_str_new(S_PTR(p) + p->beg1, 1); + p->prev = p->curr; + p->curr++; + MATCHED(p); + adjust_registers_to_matched(p); + return extract_range(p, p->prev + p->regs.beg[0], + p->prev + p->regs.end[0]); } @@ -356,71 +398,55 @@ strscan_peek(self, vlen) GET_SCANNER(self, p); len = NUM2LONG(vlen); - if (SCAN_FINISHED(p)) { - return rb_str_new("", 0); - } - else { - if (p->idx + len > S_LEN(p)) - len = S_LEN(p) - p->idx; - return rb_str_new(CURPTR(p), len); - } + if (SCAN_FINISHED(p)) + return infect(rb_str_new("", 0), p); + + if (p->curr + len > S_LEN(p)) + len = S_LEN(p) - p->curr; + return extract_beg_len(p, p->curr, len); } static VALUE -strscan_eos_p(self) +strscan_unscan(self) VALUE self; { struct strscanner *p; GET_SCANNER(self, p); - if (SCAN_FINISHED(p)) - return Qtrue; - else - return Qfalse; + if (! MATCHED_P(p)) + rb_raise(ScanError, "cannot unscan: prev match had failed"); + + p->curr = p->prev; + CLEAR_MATCH_STATUS(p); + return self; } + static VALUE -strscan_rest_p(self) +strscan_eos_p(self) VALUE self; { struct strscanner *p; GET_SCANNER(self, p); if (SCAN_FINISHED(p)) - return Qfalse; - else return Qtrue; + else + return Qfalse; } static VALUE -strscan_rest(self) +strscan_rest_p(self) VALUE self; { struct strscanner *p; GET_SCANNER(self, p); if (SCAN_FINISHED(p)) - return rb_str_new("", 0); + return Qfalse; else - return rb_str_new(CURPTR(p), S_LEN(p) - p->idx); -} - -static VALUE -strscan_rest_size(self) - VALUE self; -{ - struct strscanner *p; - - GET_SCANNER(self, p); - if (SCAN_FINISHED(p)) { - return INT2FIX(0); - } - else { - long tmp; - tmp = S_LEN(p) - p->idx; - return INT2FIX(tmp); - } + return Qtrue; } @@ -445,7 +471,9 @@ strscan_matched(self) GET_SCANNER(self, p); if (! MATCHED_P(p)) return Qnil; - return rb_str_new(S_PTR(p) + p->beg1, p->end1 - p->beg1); + + return extract_range(p, p->prev + p->regs.beg[0], + p->prev + p->regs.end[0]); } static VALUE @@ -457,22 +485,28 @@ strscan_matched_size(self) GET_SCANNER(self, p); if (! MATCHED_P(p)) return Qnil; - return INT2NUM(p->end1 - p->beg1); + return INT2NUM(p->regs.end[0] - p->regs.beg[0]); } static VALUE -strscan_unscan(self) - VALUE self; +strscan_aref(self, idx) + VALUE self, idx; { struct strscanner *p; + long i; GET_SCANNER(self, p); - if (! MATCHED_P(p)) - rb_raise(ScanError, "cannot unscan: prev match had failed"); + if (! MATCHED_P(p)) return Qnil; + + i = NUM2LONG(idx); + if (i < 0) + i += p->regs.num_regs; + if (i < 0) return Qnil; + if (i >= p->regs.num_regs) return Qnil; + if (p->regs.beg[i] == -1) return Qnil; - p->idx = p->end2; - CLEAR_MATCH_STATUS(p); - return self; + return extract_range(p, p->prev + p->regs.beg[i], + p->prev + p->regs.end[i]); } static VALUE @@ -484,7 +518,7 @@ strscan_pre_match(self) GET_SCANNER(self, p); if (! MATCHED_P(p)) return Qnil; - return rb_str_new(S_PTR(p) + p->end2, p->beg1 - p->end2); + return extract_range(p, 0, p->prev + p->regs.beg[0]); } static VALUE @@ -496,31 +530,42 @@ strscan_post_match(self) GET_SCANNER(self, p); if (! MATCHED_P(p)) return Qnil; - return rb_str_new(S_PTR(p) + p->end1, S_LEN(p) - p->end1); + return extract_range(p, p->prev + p->regs.end[0], S_LEN(p)); } + static VALUE -strscan_aref(self, idx) - VALUE self, idx; +strscan_rest(self) + VALUE self; +{ + struct strscanner *p; + + GET_SCANNER(self, p); + if (SCAN_FINISHED(p)) { + return infect(rb_str_new("", 0), p); + } + return extract_range(p, p->curr, S_LEN(p)); +} + +static VALUE +strscan_rest_size(self) + VALUE self; { struct strscanner *p; long i; GET_SCANNER(self, p); - if (! MATCHED_P(p)) return Qnil; - - i = NUM2LONG(idx); - if (i < 0) return Qnil; - if (i >= p->regs.num_regs) return Qnil; - if (p->regs.beg[i] == -1) return Qnil; + if (SCAN_FINISHED(p)) { + return INT2FIX(0); + } - return rb_str_new(S_PTR(p) + p->end2 + p->regs.beg[i], - p->regs.end[i] - p->regs.beg[i]); + i = S_LEN(p) - p->curr; + return INT2FIX(i); } static void -cat_i_char(ret, c) +catchar(ret, c) VALUE ret; int c; { @@ -544,7 +589,7 @@ strscan_inspect(self) GET_SCANNER(self, p); len = sprintf(buf, "#<%s %ld/%ld", rb_class2name(CLASS_OF(self)), - p->idx, S_LEN(p)); + p->curr, S_LEN(p)); ret = rb_str_new(buf, len); if (SCAN_FINISHED(p)) { @@ -560,7 +605,7 @@ strscan_inspect(self) if (sp > S_PTR(p)) rb_str_cat(ret, "...", 3); for (; sp < CURPTR(p); sp++) { - cat_i_char(ret, *sp); + catchar(ret, *sp); } rb_str_cat(ret, "\"", 1); } @@ -572,7 +617,7 @@ strscan_inspect(self) if (e > S_END(p)) e = S_END(p); rb_str_cat(ret, " \"", 2); for (; sp < e; sp++) { - cat_i_char(ret, *sp); + catchar(ret, *sp); } if (sp < S_END(p)) rb_str_cat(ret, "...", 3); @@ -580,9 +625,10 @@ strscan_inspect(self) } rb_str_cat(ret, ">", 1); } - return ret; + return infect(ret, p); } +/* ------------------------------------------------------------- */ void Init_strscan() @@ -636,21 +682,23 @@ Init_strscan() rb_define_method(StringScanner, "peek", strscan_peek, 1); rb_define_method(StringScanner, "peep", strscan_peek, 1); + rb_define_method(StringScanner, "unscan", strscan_unscan, 0); + rb_define_method(StringScanner, "eos?", strscan_eos_p, 0); rb_define_method(StringScanner, "empty?", strscan_eos_p, 0); rb_define_method(StringScanner, "rest?", strscan_rest_p, 0); - rb_define_method(StringScanner, "rest", strscan_rest, 0); - rb_define_method(StringScanner, "rest_size", strscan_rest_size, 0); - rb_define_method(StringScanner, "restsize", strscan_rest_size, 0); rb_define_method(StringScanner, "matched?", strscan_matched_p, 0); rb_define_method(StringScanner, "matched", strscan_matched, 0); rb_define_method(StringScanner, "matched_size", strscan_matched_size, 0); rb_define_method(StringScanner, "matchedsize", strscan_matched_size, 0); - rb_define_method(StringScanner, "unscan", strscan_unscan, 0); + rb_define_method(StringScanner, "[]", strscan_aref, 1); rb_define_method(StringScanner, "pre_match", strscan_pre_match, 0); rb_define_method(StringScanner, "post_match", strscan_post_match, 0); - rb_define_method(StringScanner, "[]", strscan_aref, 1); + + rb_define_method(StringScanner, "rest", strscan_rest, 0); + rb_define_method(StringScanner, "rest_size", strscan_rest_size, 0); + rb_define_method(StringScanner, "restsize", strscan_rest_size, 0); rb_define_method(StringScanner, "inspect", strscan_inspect, 0); } -- cgit v1.2.3