diff options
Diffstat (limited to 'string.c')
-rw-r--r-- | string.c | 189 |
1 files changed, 131 insertions, 58 deletions
@@ -6,7 +6,7 @@ $Date$ created at: Mon Aug 9 17:12:58 JST 1993 - Copyright (C) 1993-1996 Yukihiro Matsumoto + Copyright (C) 1993-1998 Yukihiro Matsumoto ************************************************/ @@ -100,6 +100,21 @@ str_new4(orig) return (VALUE)str; } +static void +str_assign(str, str2) + VALUE str, str2; +{ + if (NIL_P(str2) || str == str2) return; + if (!RSTRING(str)->orig && RSTRING(str)->ptr) + free(RSTRING(str)->ptr); + RSTRING(str)->ptr = RSTRING(str2)->ptr; + RSTRING(str)->len = RSTRING(str2)->len; + RSTRING(str)->orig = RSTRING(str2)->orig; + RSTRING(str2)->ptr = 0; /* abandon str2 */ + RSTRING(str2)->len = 0; + if (str_tainted(str2)) str_taint(str); +} + static ID pr_str; VALUE @@ -141,12 +156,12 @@ str_dup(str) } static VALUE -str_s_new(class, orig) - VALUE class; +str_s_new(klass, orig) + VALUE klass; VALUE orig; { NEWOBJ(str, struct RString); - OBJSETUP(str, class, T_STRING); + OBJSETUP(str, klass, T_STRING); orig = obj_as_string(orig); str->len = RSTRING(orig)->len; @@ -171,6 +186,15 @@ str_length(str) return INT2FIX(RSTRING(str)->len); } +static VALUE +str_empty(str) + VALUE str; +{ + if (RSTRING(str)->len == 0) + return TRUE; + return FALSE; +} + VALUE str_plus(str1, str2) VALUE str1, str2; @@ -185,7 +209,7 @@ str_plus(str1, str2) if (str_tainted(str1) || str_tainted(str2)) return str_taint(str3); - return (VALUE)str3; + return str3; } VALUE @@ -208,7 +232,7 @@ str_times(str, times) RSTRING(str2)->ptr[RSTRING(str2)->len] = '\0'; if (str_tainted(str)) { - return str_taint((VALUE)str2); + return str_taint(str2); } return str2; @@ -371,7 +395,7 @@ str_resize(str, len) RSTRING(str)->len = len; RSTRING(str)->ptr[len] = '\0'; /* sentinel */ } - return (VALUE)str; + return str; } VALUE @@ -696,6 +720,16 @@ str_succ(orig) return str; } +static VALUE +str_succ_bang(str) + VALUE str; +{ + str_modify(str); + str_assign(str, str_succ(str)); + + return str; +} + VALUE str_upto(beg, end) VALUE beg, end; @@ -735,7 +769,7 @@ str_aref(str, indx) if (idx < 0 || RSTRING(str)->len <= idx) { return Qnil; } - return (VALUE)INT2FIX(RSTRING(str)->ptr[idx] & 0xff); + return INT2FIX(RSTRING(str)->ptr[idx] & 0xff); case T_REGEXP: if (str_match(str, indx)) @@ -863,15 +897,16 @@ str_sub_s(str, pat, val, once) repl = reg_regsub(val, str, regs); str_cat(result, RSTRING(repl)->ptr, RSTRING(repl)->len); - if (END(0) == offset) { + if (BEG(0) == END(0)) { + int len = ismbchar(RSTRING(str)->ptr[END(0)])?2:1; /* * Always consume at least one character of the input string * in order to prevent infinite loops. */ - if (RSTRING(str)->len > 0) { - str_cat(result, RSTRING(str)->ptr+END(0), 1); + if (RSTRING(str)->len > END(0)) { + str_cat(result, RSTRING(str)->ptr+END(0), len); } - offset = END(0)+1; + offset = END(0)+len; } else { offset = END(0); @@ -900,13 +935,10 @@ str_sub_f(str, pat, val, once) str_modify(str); result = str_sub_s(str, pat, val, once); - if (NIL_P(result)) return Qnil; - str_resize(str, RSTRING(result)->len); - memcpy(RSTRING(str)->ptr, RSTRING(result)->ptr, RSTRING(result)->len); - if (str_tainted(result)) str_taint(str); + str_assign(str, result); - return (VALUE)str; + return str; } static VALUE @@ -939,27 +971,30 @@ str_sub_iter_s(str, pat, once) result = str_new(0,0); n = 0; offset = 0; while ((beg=reg_search(pat, str, offset, 0)) >= 0) { - n++; - null = 0; + n++; + regs = RMATCH(backref_get())->regs; str_cat(result, RSTRING(str)->ptr+offset, beg-offset); - regs = RMATCH(backref_get())->regs; - if (END(0) == offset) { - null = 1; - offset = END(0)+1; + val = obj_as_string(rb_yield(reg_nth_match(0, backref_get()))); + str_cat(result, RSTRING(val)->ptr, RSTRING(val)->len); + + if (BEG(0) == END(0)) { + int len = ismbchar(RSTRING(str)->ptr[END(0)])?2:1; + + /* + * Always consume at least one character of the input string + * in order to prevent infinite loops. + */ + if (RSTRING(str)->len > END(0)) { + str_cat(result, RSTRING(str)->ptr+END(0), len); + } + offset = END(0)+len; } else { offset = END(0); } - val = rb_yield(reg_nth_match(0, backref_get())); - val = obj_as_string(val); - str_cat(result, RSTRING(val)->ptr, RSTRING(val)->len); - if (null && RSTRING(str)->len) { - str_cat(result, RSTRING(str)->ptr+offset-1, 1); - } - if (once) break; if (offset >= STRLEN(str)) break; } @@ -981,12 +1016,10 @@ str_sub_iter_f(str, pat, once) str_modify(str); result = str_sub_iter_s(str, pat, once); - if (NIL_P(result)) return Qnil; - str_resize(str, RSTRING(result)->len); - memcpy(RSTRING(str)->ptr, RSTRING(result)->ptr, RSTRING(result)->len); + str_assign(str, result); - return (VALUE)str; + return str; } static VALUE @@ -1005,7 +1038,12 @@ str_aset(str, indx, val) if (idx < 0 || RSTRING(str)->len <= idx) { IndexError("index %d out of range [0..%d]", idx, RSTRING(str)->len-1); } - RSTRING(str)->ptr[idx] = FIX2INT(val) & 0xff; + if (TYPE(val) == T_STRING) { + str_replace(str, idx, 1, val); + } + else { + RSTRING(str)->ptr[idx] = NUM2INT(val) & 0xff; + } return val; case T_REGEXP: @@ -1131,6 +1169,20 @@ str_gsub(argc, argv, str) } static VALUE +str_replace_method(str, str2) + VALUE str, str2; +{ + Check_Type(str2, T_STRING); + + str_modify(str); + str_resize(str, RSTRING(str2)->len); + memcpy(RSTRING(str)->ptr, RSTRING(str2)->ptr, RSTRING(str2)->len); + if (str_tainted(str2)) str_taint(str); + + return str; +} + +static VALUE uscore_get() { VALUE line; @@ -1227,7 +1279,7 @@ str_reverse_bang(str) } MEMCPY(RSTRING(str)->ptr, p, char, RSTRING(str)->len); - return (VALUE)str; + return str; } static VALUE @@ -1397,13 +1449,16 @@ str_upcase_bang(str) str_modify(str); s = RSTRING(str)->ptr; send = s + RSTRING(str)->len; while (s < send) { - if (islower(*s)) { + if (ismbchar(*s)) { + s++; + } + else if (islower(*s)) { *s = toupper(*s); } s++; } - return (VALUE)str; + return str; } static VALUE @@ -1422,13 +1477,16 @@ str_downcase_bang(str) str_modify(str); s = RSTRING(str)->ptr; send = s + RSTRING(str)->len; while (s < send) { - if (isupper(*s)) { + if (ismbchar(*s)) { + s++; + } + else if (isupper(*s)) { *s = tolower(*s); } s++; } - return (VALUE)str; + return str; } static VALUE @@ -1449,11 +1507,14 @@ str_capitalize_bang(str) if (islower(*s)) *s = toupper(*s); while (++s < send) { - if (isupper(*s)) { + if (ismbchar(*s)) { + s++; + } + else if (isupper(*s)) { *s = tolower(*s); } } - return (VALUE)str; + return str; } static VALUE @@ -1472,7 +1533,10 @@ str_swapcase_bang(str) str_modify(str); s = RSTRING(str)->ptr; send = s + RSTRING(str)->len; while (s < send) { - if (isupper(*s)) { + if (ismbchar(*s)) { + s++; + } + else if (isupper(*s)) { *s = tolower(*s); } else if (islower(*s)) { @@ -1481,7 +1545,7 @@ str_swapcase_bang(str) s++; } - return (VALUE)str; + return str; } static VALUE @@ -1611,7 +1675,7 @@ tr_trans(str, src, repl, sflag) *t = '\0'; if (sflag) RSTRING(str)->len = (t - RSTRING(str)->ptr); - return (VALUE)str; + return str; } static VALUE @@ -1675,7 +1739,7 @@ str_delete_bang(str1, str2) *t = '\0'; RSTRING(str1)->len = t - RSTRING(str1)->ptr; - return (VALUE)str1; + return str1; } static VALUE @@ -1718,7 +1782,7 @@ tr_squeeze(str1, str2) *t = '\0'; RSTRING(str1)->len = t - RSTRING(str1)->ptr; - return (VALUE)str1; + return str1; } static VALUE @@ -2100,7 +2164,7 @@ str_strip_bang(str) RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; } - return (VALUE)str; + return str; } static VALUE @@ -2188,7 +2252,13 @@ static VALUE str_oct(str) VALUE str; { - return str2inum(RSTRING(str)->ptr, 8); + int base = 8; + + if (RSTRING(str)->len > 2 && RSTRING(str)->ptr[0] == '0' && + (RSTRING(str)->ptr[1] == 'x' || RSTRING(str)->ptr[1] == 'X')) { + base = 16; + } + return str2inum(RSTRING(str)->ptr, base); } static VALUE @@ -2229,7 +2299,7 @@ str_sum(argc, argv, str) else bits = NUM2INT(vbits); p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len; - if (bits > 32) { + if (bits > sizeof(UINT)*CHAR_BIT) { VALUE res = INT2FIX(0); VALUE mod; @@ -2238,20 +2308,23 @@ str_sum(argc, argv, str) while (p < pend) { res = rb_funcall(res, '+', 1, INT2FIX((UINT)*p)); - res = rb_funcall(res, '%', 1, mod); p++; } + res = rb_funcall(res, '&', 1, mod); return res; } else { UINT res = 0; UINT mod = (1<<bits)-1; + if (mod == 0) { + mod = -1; + } while (p < pend) { res += (UINT)*p; - res %= mod; p++; } + res &= mod; return int2inum(res); } } @@ -2265,7 +2338,7 @@ str_ljust(str, w) VALUE res; UCHAR *p, *pend; - if (RSTRING(str)->len >= width) return (VALUE)str; + if (RSTRING(str)->len >= width) return str; res = str_new(0, width); memcpy(RSTRING(res)->ptr, RSTRING(str)->ptr, RSTRING(str)->len); p = RSTRING(res)->ptr + RSTRING(str)->len; pend = RSTRING(res)->ptr + width; @@ -2284,7 +2357,7 @@ str_rjust(str, w) VALUE res; UCHAR *p, *pend; - if (RSTRING(str)->len >= width) return (VALUE)str; + if (RSTRING(str)->len >= width) return str; res = str_new(0, width); p = RSTRING(res)->ptr; pend = p + width - RSTRING(str)->len; while (p < pend) { @@ -2304,7 +2377,7 @@ str_center(str, w) UCHAR *p, *pend; int n; - if (RSTRING(str)->len >= width) return (VALUE)str; + if (RSTRING(str)->len >= width) return str; res = str_new(0, width); n = (width - RSTRING(str)->len)/2; p = RSTRING(res)->ptr; pend = p + n; @@ -2345,12 +2418,15 @@ Init_String() rb_define_method(cString, "[]=", str_aset_method, -1); rb_define_method(cString, "length", str_length, 0); rb_define_alias(cString, "size", "length"); + rb_define_method(cString, "empty?", str_empty, 0); rb_define_method(cString, "=~", str_match, 1); rb_define_method(cString, "~", str_match2, 0); rb_define_method(cString, "succ", str_succ, 0); + rb_define_method(cString, "succ!", str_succ_bang, 0); rb_define_method(cString, "upto", str_upto, 1); rb_define_method(cString, "index", str_index_method, -1); rb_define_method(cString, "rindex", str_rindex, -1); + rb_define_method(cString, "replace", str_replace_method, 1); rb_define_method(cString, "freeze", str_freeze, 0); rb_define_method(cString, "frozen?", str_frozen_p, 0); @@ -2434,7 +2510,4 @@ Init_String() rb_define_global_function("split", f_split, -1); pr_str = rb_intern("to_s"); - - /* Fix-up initialize ordering */ - RCLASS(eGlobalExit)->super = cString; } |