diff options
Diffstat (limited to 'string.c')
-rw-r--r-- | string.c | 1248 |
1 files changed, 595 insertions, 653 deletions
@@ -22,23 +22,21 @@ #include <unistd.h> #endif -VALUE cString; - -#define STRLEN(s) RSTRING(s)->len +VALUE rb_cString; #define STR_FREEZE FL_USER1 #define STR_TAINT FL_USER2 #define STR_NO_ORIG FL_USER3 -extern VALUE RS; +extern VALUE rb_rs; VALUE -str_new(ptr, len) +rb_str_new(ptr, len) char *ptr; unsigned len; { NEWOBJ(str, struct RString); - OBJSETUP(str, cString, T_STRING); + OBJSETUP(str, rb_cString, T_STRING); if (rb_safe_level() >= 3) { FL_SET(str, STR_TAINT); @@ -55,18 +53,18 @@ str_new(ptr, len) } VALUE -str_new2(ptr) +rb_str_new2(ptr) char *ptr; { - return str_new(ptr, strlen(ptr)); + return rb_str_new(ptr, strlen(ptr)); } VALUE -str_new3(str) +rb_str_new3(str) VALUE str; { NEWOBJ(str2, struct RString); - OBJSETUP(str2, cString, T_STRING); + OBJSETUP(str2, rb_cString, T_STRING); str2->len = RSTRING(str)->len; str2->ptr = RSTRING(str)->ptr; @@ -80,18 +78,18 @@ str_new3(str) } VALUE -str_new4(orig) +rb_str_new4(orig) VALUE orig; { if (FL_TEST(orig, STR_FREEZE)) { return orig; } else if (RSTRING(orig)->orig && !FL_TEST(orig, STR_NO_ORIG)) { - return str_freeze(RSTRING(orig)->orig); + return rb_str_freeze(RSTRING(orig)->orig); } else { NEWOBJ(str, struct RString); - OBJSETUP(str, cString, T_STRING); + OBJSETUP(str, rb_cString, T_STRING); str->len = RSTRING(orig)->len; str->ptr = RSTRING(orig)->ptr; @@ -105,14 +103,14 @@ str_new4(orig) } VALUE -str_to_str(str) +rb_str_to_str(str) VALUE str; { return rb_convert_type(str, T_STRING, "String", "to_str"); } static void -str_assign(str, str2) +rb_str_assign(str, str2) VALUE str, str2; { if (str == str2) return; @@ -129,13 +127,13 @@ str_assign(str, str2) RSTRING(str)->orig = RSTRING(str2)->orig; RSTRING(str2)->ptr = 0; /* abandon str2 */ RSTRING(str2)->len = 0; - if (str_tainted(str2)) str_taint(str); + if (rb_str_tainted(str2)) rb_str_taint(str); } static ID pr_str; VALUE -obj_as_string(obj) +rb_obj_as_string(obj) VALUE obj; { VALUE str; @@ -145,20 +143,20 @@ obj_as_string(obj) } str = rb_funcall(obj, pr_str, 0); if (TYPE(str) != T_STRING) - return any_to_s(obj); + return rb_any_to_s(obj); return str; } static VALUE -str_clone(orig) +rb_str_clone(orig) VALUE orig; { VALUE str; if (RSTRING(orig)->orig && !FL_TEST(orig, STR_NO_ORIG)) - str = str_new3(RSTRING(orig)->orig); + str = rb_str_new3(RSTRING(orig)->orig); else - str = str_new(RSTRING(orig)->ptr, RSTRING(orig)->len); + str = rb_str_new(RSTRING(orig)->ptr, RSTRING(orig)->len); if (RSTRING(orig)->orig && FL_TEST(orig, STR_NO_ORIG)) RSTRING(str)->orig = RSTRING(orig)->orig; CLONESETUP(str, orig); @@ -166,79 +164,80 @@ str_clone(orig) } VALUE -str_dup(str) +rb_str_dup(str) VALUE str; { VALUE s; - str = str_to_str(str); - s = str_new(RSTRING(str)->ptr, RSTRING(str)->len); - if (str_tainted(str)) s = str_taint(s); + if (TYPE(str) != T_STRING) str = rb_str_to_str(str); + s = rb_str_new(RSTRING(str)->ptr, RSTRING(str)->len); + if (rb_str_tainted(str)) s = rb_str_taint(s); if (RSTRING(str)->orig && FL_TEST(str, STR_NO_ORIG)) RSTRING(s)->orig = RSTRING(str)->orig; return s; } static VALUE -str_s_new(klass, orig) +rb_str_s_new(klass, orig) VALUE klass; VALUE orig; { NEWOBJ(str, struct RString); OBJSETUP(str, klass, T_STRING); - orig = obj_as_string(orig); + str->orig = 0; + orig = rb_obj_as_string(orig); str->len = RSTRING(orig)->len; - str->ptr = ALLOC_N(char, RSTRING(orig)->len+1); - if (str->ptr) { + if (RSTRING(orig)->ptr) { + str->ptr = ALLOC_N(char, RSTRING(orig)->len+1); memcpy(str->ptr, RSTRING(orig)->ptr, RSTRING(orig)->len); + str->ptr[RSTRING(orig)->len] = '\0'; } - str->ptr[RSTRING(orig)->len] = '\0'; - str->orig = 0; if (rb_safe_level() >= 3) { FL_SET(str, STR_TAINT); } - obj_call_init((VALUE)str); + rb_obj_call_init((VALUE)str); return (VALUE)str; } static VALUE -str_length(str) +rb_str_length(str) VALUE str; { return INT2FIX(RSTRING(str)->len); } static VALUE -str_empty(str) +rb_str_empty(str) VALUE str; { if (RSTRING(str)->len == 0) - return TRUE; - return FALSE; + return Qtrue; + return Qfalse; } VALUE -str_plus(str1, str2) +rb_str_plus(str1, str2) VALUE str1, str2; { VALUE str3; - str2 = str_to_str(str2); - str3 = str_new(0, RSTRING(str1)->len+RSTRING(str2)->len); + if (TYPE(str2) != T_STRING) str2 = rb_str_to_str(str2); + str3 = rb_str_new(0, RSTRING(str1)->len+RSTRING(str2)->len); memcpy(RSTRING(str3)->ptr, RSTRING(str1)->ptr, RSTRING(str1)->len); - memcpy(RSTRING(str3)->ptr+RSTRING(str1)->len, RSTRING(str2)->ptr, RSTRING(str2)->len); + memcpy(RSTRING(str3)->ptr + RSTRING(str1)->len, + RSTRING(str2)->ptr, RSTRING(str2)->len); RSTRING(str3)->ptr[RSTRING(str3)->len] = '\0'; - if (str_tainted(str1) || str_tainted(str2)) - return str_taint(str3); + if (rb_str_tainted(str1) || rb_str_tainted(str2)) + return rb_str_taint(str3); return str3; } VALUE -str_times(str, times) +rb_str_times(str, times) VALUE str; VALUE times; { @@ -247,24 +246,25 @@ str_times(str, times) len = NUM2INT(times); if (len < 0) { - ArgError("negative argument"); + rb_raise(rb_eArgError, "negative argument"); } - str2 = str_new(0, RSTRING(str)->len*len); + str2 = rb_str_new(0, RSTRING(str)->len*len); for (i=0; i<len; i++) { - memcpy(RSTRING(str2)->ptr+(i*RSTRING(str)->len), RSTRING(str)->ptr, RSTRING(str)->len); + memcpy(RSTRING(str2)->ptr+(i*RSTRING(str)->len), + RSTRING(str)->ptr, RSTRING(str)->len); } RSTRING(str2)->ptr[RSTRING(str2)->len] = '\0'; - if (str_tainted(str)) { - return str_taint(str2); + if (rb_str_tainted(str)) { + return rb_str_taint(str2); } return str2; } -VALUE -str_format(str, arg) +static VALUE +rb_str_format(str, arg) VALUE str, arg; { VALUE *argv; @@ -273,17 +273,17 @@ str_format(str, arg) argv = ALLOCA_N(VALUE, RARRAY(arg)->len + 1); argv[0] = str; MEMCPY(argv+1, RARRAY(arg)->ptr, VALUE, RARRAY(arg)->len); - return f_sprintf(RARRAY(arg)->len+1, argv); + return rb_f_sprintf(RARRAY(arg)->len+1, argv); } argv = ALLOCA_N(VALUE, 2); argv[0] = str; argv[1] = arg; - return f_sprintf(2, argv); + return rb_f_sprintf(2, argv); } VALUE -str_substr(str, start, len) +rb_str_substr(str, start, len) VALUE str; int start, len; { @@ -293,27 +293,27 @@ str_substr(str, start, len) start = RSTRING(str)->len + start; } if (RSTRING(str)->len <= start || len < 0) { - return str_new(0,0); + return rb_str_new(0,0); } if (RSTRING(str)->len < start + len) { len = RSTRING(str)->len - start; } - str2 = str_new(RSTRING(str)->ptr+start, len); - if (str_tainted(str)) str_taint(str2); + str2 = rb_str_new(RSTRING(str)->ptr+start, len); + if (rb_str_tainted(str)) rb_str_taint(str2); return str2; } static VALUE -str_subseq(str, beg, end) +rb_str_subseq(str, beg, end) VALUE str; int beg, end; { int len; if ((beg > 0 && end > 0 || beg < 0 && end < 0) && beg > end) { - IndexError("end smaller than beg [%d..%d]", beg, end); + rb_raise(rb_eIndexError, "end smaller than beg [%d..%d]", beg, end); } if (beg < 0) { @@ -329,7 +329,7 @@ str_subseq(str, beg, end) } if (beg >= RSTRING(str)->len) { - return str_new(0, 0); + return rb_str_new(0, 0); } len = end - beg + 1; @@ -337,22 +337,20 @@ str_subseq(str, beg, end) len = 0; } - return str_substr(str, beg, len); + return rb_str_substr(str, beg, len); } -extern VALUE ignorecase; - void -str_modify(str) +rb_str_modify(str) VALUE str; { char *ptr; if (rb_safe_level() >= 5) { - Raise(eSecurityError, "cannot change string status"); + rb_raise(rb_eSecurityError, "cannot change string status"); } if (FL_TEST(str, STR_FREEZE)) - TypeError("can't modify frozen string"); + rb_raise(rb_eTypeError, "can't modify frozen string"); if (!RSTRING(str)->orig || FL_TEST(str, STR_NO_ORIG)) return; ptr = RSTRING(str)->ptr; RSTRING(str)->ptr = ALLOC_N(char, RSTRING(str)->len+1); @@ -364,7 +362,7 @@ str_modify(str) } VALUE -str_freeze(str) +rb_str_freeze(str) VALUE str; { FL_SET(str, STR_FREEZE); @@ -372,28 +370,28 @@ str_freeze(str) } static VALUE -str_frozen_p(str) +rb_str_frozen_p(str) VALUE str; { if (FL_TEST(str, STR_FREEZE)) - return TRUE; - return FALSE; + return Qtrue; + return Qfalse; } VALUE -str_dup_frozen(str) +rb_str_dup_frozen(str) VALUE str; { if (RSTRING(str)->orig && !FL_TEST(str, STR_NO_ORIG)) { - return str_freeze(RSTRING(str)->orig); + return rb_str_freeze(RSTRING(str)->orig); } if (FL_TEST(str, STR_FREEZE)) return str; - return str_freeze(str_dup(str)); + return rb_str_freeze(rb_str_dup(str)); } VALUE -str_taint(str) +rb_str_taint(str) VALUE str; { if (TYPE(str) == T_STRING) { @@ -403,20 +401,20 @@ str_taint(str) } VALUE -str_tainted(str) +rb_str_tainted(str) VALUE str; { if (FL_TEST(str, STR_TAINT)) - return TRUE; - return FALSE; + return Qtrue; + return Qfalse; } VALUE -str_resize(str, len) +rb_str_resize(str, len) VALUE str; int len; { - str_modify(str); + rb_str_modify(str); if (len >= 0) { if (RSTRING(str)->len < len || RSTRING(str)->len - len > 1024) { @@ -429,13 +427,13 @@ str_resize(str, len) } VALUE -str_cat(str, ptr, len) +rb_str_cat(str, ptr, len) VALUE str; char *ptr; unsigned len; { if (len > 0) { - str_modify(str); + rb_str_modify(str); REALLOC_N(RSTRING(str)->ptr, char, RSTRING(str)->len + len + 1); if (ptr) memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len); @@ -446,23 +444,23 @@ str_cat(str, ptr, len) } VALUE -str_concat(str1, str2) +rb_str_concat(str1, str2) VALUE str1, str2; { - str2 = str_to_str(str2); - str_cat(str1, RSTRING(str2)->ptr, RSTRING(str2)->len); + if (TYPE(str2) != T_STRING) str2 = rb_str_to_str(str2); + rb_str_cat(str1, RSTRING(str2)->ptr, RSTRING(str2)->len); return str1; } int -str_hash(str) +rb_str_hash(str) VALUE str; { register int len = RSTRING(str)->len; register char *p = RSTRING(str)->ptr; register int key = 0; - if (RTEST(ignorecase)) { + if (rb_ignorecase_p()) { while (len--) { key = key*65599 + toupper(*p); p++; @@ -478,24 +476,24 @@ str_hash(str) } static VALUE -str_hash_method(str) +rb_str_hash_method(str) VALUE str; { - int key = str_hash(str); + int key = rb_str_hash(str); return INT2FIX(key); } #define min(a,b) (((a)>(b))?(b):(a)) int -str_cmp(str1, str2) +rb_str_cmp(str1, str2) VALUE str1, str2; { unsigned int len; int retval; - if (RTEST(ignorecase)) { - return str_cicmp(str1, str2); + if (rb_ignorecase_p()) { + return rb_str_cicmp(str1, str2); } len = min(RSTRING(str1)->len, RSTRING(str2)->len); @@ -511,32 +509,32 @@ str_cmp(str1, str2) } static VALUE -str_equal(str1, str2) +rb_str_equal(str1, str2) VALUE str1, str2; { if (TYPE(str2) != T_STRING) - return FALSE; + return Qfalse; if (RSTRING(str1)->len == RSTRING(str2)->len - && str_cmp(str1, str2) == 0) { - return TRUE; + && rb_str_cmp(str1, str2) == 0) { + return Qtrue; } - return FALSE; + return Qfalse; } static VALUE -str_cmp_method(str1, str2) +rb_str_cmp_method(str1, str2) VALUE str1, str2; { int result; - str2 = str_to_str(str2); - result = str_cmp(str1, str2); + if (TYPE(str2) != T_STRING) str2 = rb_str_to_str(str2); + result = rb_str_cmp(str1, str2); return INT2FIX(result); } static VALUE -str_match(x, y) +rb_str_match(x, y) VALUE x, y; { VALUE reg; @@ -544,13 +542,13 @@ str_match(x, y) switch (TYPE(y)) { case T_REGEXP: - return reg_match(y, x); + return rb_reg_match(y, x); case T_STRING: - reg = reg_regcomp(y); - start = reg_search(reg, x, 0, 0); + reg = rb_reg_regcomp(y); + start = rb_reg_search(reg, x, 0, 0); if (start == -1) { - return FALSE; + return Qfalse; } return INT2FIX(start); @@ -560,14 +558,14 @@ str_match(x, y) } static VALUE -str_match2(str) +rb_str_match2(str) VALUE str; { - return reg_match2(reg_regcomp(str)); + return rb_reg_match2(rb_reg_regcomp(str)); } static int -str_index(str, sub, offset) +rb_str_index(str, sub, offset) VALUE str, sub; int offset; { @@ -589,7 +587,7 @@ str_index(str, sub, offset) } static VALUE -str_index_method(argc, argv, str) +rb_str_index_method(argc, argv, str) int argc; VALUE *argv; VALUE str; @@ -607,11 +605,11 @@ str_index_method(argc, argv, str) switch (TYPE(sub)) { case T_REGEXP: - pos = reg_search(sub, str, pos, 0); + pos = rb_reg_search(sub, str, pos, 0); break; case T_STRING: - pos = str_index(str, sub, pos); + pos = rb_str_index(str, sub, pos); break; case T_FIXNUM: @@ -627,7 +625,8 @@ str_index_method(argc, argv, str) } default: - TypeError("Type mismatch: %s given", rb_class2name(CLASS_OF(sub))); + rb_raise(rb_eTypeError, "Type mismatch: %s given", + rb_class2name(CLASS_OF(sub))); } if (pos == -1) return Qnil; @@ -635,7 +634,7 @@ str_index_method(argc, argv, str) } static VALUE -str_rindex(argc, argv, str) +rb_str_rindex(argc, argv, str) int argc; VALUE *argv; VALUE str; @@ -655,7 +654,7 @@ str_rindex(argc, argv, str) switch (TYPE(sub)) { case T_REGEXP: - pos = reg_search(sub, str, pos, 1); + pos = rb_reg_search(sub, str, pos, 1); if (pos >= 0) return INT2FIX(pos); break; @@ -685,7 +684,8 @@ str_rindex(argc, argv, str) } default: - TypeError("Type mismatch: %s given", rb_class2name(CLASS_OF(sub))); + rb_raise(rb_eTypeError, "Type mismatch: %s given", + rb_class2name(CLASS_OF(sub))); } return Qnil; } @@ -716,14 +716,14 @@ succ_char(s) } static VALUE -str_succ(orig) +rb_str_succ(orig) VALUE orig; { VALUE str, str2; char *sbeg, *s; char c = -1; - str = str_new(RSTRING(orig)->ptr, RSTRING(orig)->len); + str = rb_str_new(RSTRING(orig)->ptr, RSTRING(orig)->len); sbeg = RSTRING(str)->ptr; s = sbeg + RSTRING(str)->len - 1; @@ -736,45 +736,45 @@ str_succ(orig) RSTRING(str)->ptr[RSTRING(str)->len-1] += 1; } else { - str2 = str_new(0, RSTRING(str)->len+1); + str2 = rb_str_new(0, RSTRING(str)->len+1); RSTRING(str2)->ptr[0] = c; memcpy(RSTRING(str2)->ptr+1, RSTRING(str)->ptr, RSTRING(str)->len); str = str2; } } - if (str_tainted(orig)) { - return str_taint(str); + if (rb_str_tainted(orig)) { + return rb_str_taint(str); } return str; } static VALUE -str_succ_bang(str) +rb_str_succ_bang(str) VALUE str; { - str_modify(str); - str_assign(str, str_succ(str)); + rb_str_modify(str); + rb_str_assign(str, rb_str_succ(str)); return str; } VALUE -str_upto(beg, end) +rb_str_upto(beg, end) VALUE beg, end; { VALUE current; - end = str_to_str(end); + if (TYPE(end) != T_STRING) end = rb_str_to_str(end); if (RTEST(rb_funcall(beg, '>', 1, end))) return Qnil; current = beg; for (;;) { rb_yield(current); - if (str_equal(current, end)) break; - current = str_succ(current); + if (rb_str_equal(current, end)) break; + current = rb_str_succ(current); if (RSTRING(current)->len > RSTRING(end)->len) break; } @@ -783,7 +783,7 @@ str_upto(beg, end) } static VALUE -str_aref(str, indx) +rb_str_aref(str, indx) VALUE str; VALUE indx; { @@ -802,28 +802,28 @@ str_aref(str, indx) return INT2FIX(RSTRING(str)->ptr[idx] & 0xff); case T_REGEXP: - if (str_match(str, indx)) - return reg_last_match(0); + if (rb_str_match(str, indx)) + return rb_reg_last_match(0); return Qnil; case T_STRING: - if (str_index(str, indx, 0) != -1) return indx; + if (rb_str_index(str, indx, 0) != -1) return indx; return Qnil; default: /* check if indx is Range */ { int beg, end; - if (range_beg_end(indx, &beg, &end)) { - return str_subseq(str, beg, end); + if (rb_range_beg_end(indx, &beg, &end)) { + return rb_str_subseq(str, beg, end); } } - IndexError("Invalid index for string"); + rb_raise(rb_eIndexError, "Invalid index for string"); } } static VALUE -str_aref_method(argc, argv, str) +rb_str_aref_method(argc, argv, str) int argc; VALUE *argv; VALUE str; @@ -831,13 +831,13 @@ str_aref_method(argc, argv, str) VALUE arg1, arg2; if (rb_scan_args(argc, argv, "11", &arg1, &arg2) == 2) { - return str_substr(str, NUM2INT(arg1), NUM2INT(arg2)); + return rb_str_substr(str, NUM2INT(arg1), NUM2INT(arg2)); } - return str_aref(str, arg1); + return rb_str_aref(str, arg1); } static void -str_replace(str, beg, len, val) +rb_str_replace(str, beg, len, val) VALUE str, val; int beg, len; { @@ -847,28 +847,28 @@ str_replace(str, beg, len, val) } if (len != RSTRING(val)->len) { - memmove(RSTRING(str)->ptr+beg+RSTRING(val)->len, - RSTRING(str)->ptr+beg+len, - RSTRING(str)->len-(beg+len)); + memmove(RSTRING(str)->ptr + beg + RSTRING(val)->len, + RSTRING(str)->ptr + beg + len, + RSTRING(str)->len - (beg + len)); } if (RSTRING(str)->len < beg && len < 0) { - MEMZERO(RSTRING(str)->ptr+RSTRING(str)->len, char, -len); + MEMZERO(RSTRING(str)->ptr + RSTRING(str)->len, char, -len); } memcpy(RSTRING(str)->ptr+beg, RSTRING(val)->ptr, RSTRING(val)->len); RSTRING(str)->len += RSTRING(val)->len - len; RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; } -/* str_replace2() understands negatice offset */ +/* rb_str_replace2() understands negatice offset */ static void -str_replace2(str, beg, end, val) +rb_str_replace2(str, beg, end, val) VALUE str, val; int beg, end; { int len; if ((beg > 0 && end > 0 || beg < 0 && end < 0) && beg > end) { - IndexError("end smaller than beg [%d..%d]", beg, end); + rb_raise(rb_eIndexError, "end smaller than beg [%d..%d]", beg, end); } if (beg < 0) { @@ -894,170 +894,13 @@ str_replace2(str, beg, end, val) len = 0; } - str_replace(str, beg, len, val); + rb_str_replace(str, beg, len, val); } -static VALUE -str_sub_s(str, pat, val, once) - VALUE str, pat, val; - int once; -{ - VALUE result, repl; - int beg, offset, n; - struct re_registers *regs; - - switch (TYPE(pat)) { - case T_REGEXP: - break; - - case T_STRING: - pat = reg_regcomp(pat); - break; - - default: - /* type failed */ - Check_Type(pat, T_REGEXP); - } - - val = obj_as_string(val); - result = str_new(0,0); - offset=0; n=0; - while ((beg=reg_search(pat, str, offset, 0)) >= 0) { - n++; - - regs = RMATCH(backref_get())->regs; - str_cat(result, RSTRING(str)->ptr+offset, beg-offset); - - repl = reg_regsub(val, str, regs); - str_cat(result, RSTRING(repl)->ptr, RSTRING(repl)->len); - if (BEG(0) == END(0)) { - int len = ismbchar(RSTRING(str)->ptr[END(0)])?2:1; - /* - * Always consume at least one character of the input string - * in order to prevent infinite loops. - */ - if (RSTRING(str)->len > END(0)) { - str_cat(result, RSTRING(str)->ptr+END(0), len); - } - offset = END(0)+len; - } - else { - offset = END(0); - } - - if (once) break; - if (offset > STRLEN(str)) break; - } - if (n == 0) return Qnil; - if (RSTRING(str)->len > offset) { - str_cat(result, RSTRING(str)->ptr+offset, RSTRING(str)->len-offset); - } - - if (str_tainted(val)) str_taint(result); - return result; -} +static VALUE rb_str_sub_bang _((int, VALUE*, VALUE)); static VALUE -str_sub_f(str, pat, val, once) - VALUE str; - VALUE pat; - VALUE val; - int once; -{ - VALUE result; - - str_modify(str); - result = str_sub_s(str, pat, val, once); - if (NIL_P(result)) return Qnil; - str_assign(str, result); - - return str; -} - -static VALUE -str_sub_iter_s(str, pat, once) - VALUE str; - VALUE pat; - int once; -{ - VALUE val, match, result; - int beg, offset, n; - struct re_registers *regs; - - if (!iterator_p()) { - ArgError("Wrong # of arguments(1 for 2)"); - } - - switch (TYPE(pat)) { - case T_REGEXP: - break; - - case T_STRING: - pat = reg_regcomp(pat); - break; - - default: - /* type failed */ - Check_Type(pat, T_REGEXP); - } - - result = str_new(0,0); - n = 0; offset = 0; - while ((beg=reg_search(pat, str, offset, 0)) >= 0) { - - n++; - match = backref_get(); - regs = RMATCH(match)->regs; - str_cat(result, RSTRING(str)->ptr+offset, beg-offset); - - val = obj_as_string(rb_yield(reg_nth_match(0, match))); - str_cat(result, RSTRING(val)->ptr, RSTRING(val)->len); - - if (BEG(0) == END(0)) { - int len = ismbchar(RSTRING(str)->ptr[END(0)])?2:1; - - /* - * Always consume at least one character of the input string - * in order to prevent infinite loops. - */ - if (RSTRING(str)->len > END(0)) { - str_cat(result, RSTRING(str)->ptr+END(0), len); - } - offset = END(0)+len; - } - else { - offset = END(0); - } - - if (once) break; - if (offset > STRLEN(str)) break; - } - if (n == 0) return Qnil; - if (RSTRING(str)->len > offset) { - str_cat(result, RSTRING(str)->ptr+offset, RSTRING(str)->len-offset); - } - - return result; -} - -static VALUE -str_sub_iter_f(str, pat, once) - VALUE str; - VALUE pat; - int once; -{ - VALUE result; - - str_modify(str); - result = str_sub_iter_s(str, pat, once); - if (NIL_P(result)) return Qnil; - str_assign(str, result); - - return str; -} - -static VALUE -str_aset(str, indx, val) +rb_str_aset(str, indx, val) VALUE str; VALUE indx, val; { @@ -1070,10 +913,11 @@ str_aset(str, indx, val) idx = RSTRING(str)->len + idx; } if (idx < 0 || RSTRING(str)->len <= idx) { - IndexError("index %d out of range [0..%d]", idx, RSTRING(str)->len-1); + rb_raise(rb_eIndexError, "index %d out of range [0..%d]", idx, + RSTRING(str)->len - 1); } if (TYPE(val) == T_STRING) { - str_replace(str, idx, 1, val); + rb_str_replace(str, idx, 1, val); } else { RSTRING(str)->ptr[idx] = NUM2INT(val) & 0xff; @@ -1081,15 +925,20 @@ str_aset(str, indx, val) return val; case T_REGEXP: - str_sub_f(str, indx, val, 0); + { + VALUE args[2]; + args[0] = indx; + args[1] = val; + rb_str_sub_bang(2, args, str); + } return val; case T_STRING: for (offset=0; - (beg=str_index(str, indx, offset)) >= 0; - offset=beg+STRLEN(val)) { - end = beg + STRLEN(indx) - 1; - str_replace2(str, beg, end, val); + (beg=rb_str_index(str, indx, offset)) >= 0; + offset=beg+RSTRING(val)->len) { + end = beg + RSTRING(indx)->len - 1; + rb_str_replace2(str, beg, end, val); } if (offset == 0) return Qnil; return val; @@ -1098,118 +947,241 @@ str_aset(str, indx, val) /* check if indx is Range */ { int beg, end; - if (range_beg_end(indx, &beg, &end)) { - str_replace2(str, beg, end, str_to_str(val)); + if (rb_range_beg_end(indx, &beg, &end)) { + if (TYPE(val) != T_STRING) val = rb_str_to_str(val); + rb_str_replace2(str, beg, end, val); return val; } } - IndexError("Invalid index for string"); + rb_raise(rb_eIndexError, "Invalid index for string"); } } static VALUE -str_aset_method(argc, argv, str) +rb_str_aset_method(argc, argv, str) int argc; VALUE *argv; VALUE str; { VALUE arg1, arg2, arg3; - str_modify(str); + rb_str_modify(str); if (rb_scan_args(argc, argv, "21", &arg1, &arg2, &arg3) == 3) { int beg, len; - arg3 = str_to_str(arg3); + if (TYPE(arg3) != T_STRING) arg3 = rb_str_to_str(arg3); beg = NUM2INT(arg1); if (beg < 0) { beg = RSTRING(str)->len + beg; if (beg < 0) beg = 0; } len = NUM2INT(arg2); - if (len < 0) IndexError("negative length %d", len); + if (len < 0) rb_raise(rb_eIndexError, "negative length %d", len); if (beg + len > RSTRING(str)->len) { len = RSTRING(str)->len - beg; } - str_replace(str, beg, len, arg3); + rb_str_replace(str, beg, len, arg3); return arg3; } - return str_aset(str, arg1, arg2); + return rb_str_aset(str, arg1, arg2); } static VALUE -str_sub_bang(argc, argv, str) - int argc; - VALUE *argv; - VALUE str; +get_pat(pat) + VALUE pat; { - VALUE pat, val; + switch (TYPE(pat)) { + case T_REGEXP: + break; - if (rb_scan_args(argc, argv, "11", &pat, &val) == 1) { - return str_sub_iter_f(str, pat, 1); + case T_STRING: + pat = rb_reg_regcomp(pat); + break; + + default: + /* type failed */ + Check_Type(pat, T_REGEXP); } - return str_sub_f(str, pat, val, 1); + return pat; } static VALUE -str_sub(argc, argv, str) +rb_str_sub_bang(argc, argv, str) int argc; VALUE *argv; VALUE str; { - VALUE pat, val, v; + VALUE pat, repl, match; + struct re_registers *regs; + int iter = 0; + int plen; - if (rb_scan_args(argc, argv, "11", &pat, &val) == 1) { - v = str_sub_iter_s(str, pat, 1); + if (rb_scan_args(argc, argv, "11", &pat, &repl) == 1) { + if (!rb_iterator_p()) { + rb_raise(rb_eArgError, "Wrong # of arguments(1 for 2)"); + } + iter = 1; } else { - v = str_sub_s(str, pat, val, 1); + repl = rb_obj_as_string(repl); } - if (NIL_P(v)) return str_dup(str); - return v; + + pat = get_pat(pat); + if (rb_reg_search(pat, str, 0, 0) >= 0) { + rb_str_modify(str); + match = rb_backref_get(); + regs = RMATCH(match)->regs; + + if (iter) { + repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match))); + } + else { + repl = rb_reg_regsub(repl, str, regs); + } + plen = END(0) - BEG(0); + if (RSTRING(repl)->len > plen) { + REALLOC_N(RSTRING(str)->ptr, char, + RSTRING(str)->len + RSTRING(repl)->len - plen + 1); + } + if (RSTRING(repl)->len != plen) { + memmove(RSTRING(str)->ptr + BEG(0) + RSTRING(repl)->len, + RSTRING(str)->ptr + BEG(0) + plen, + RSTRING(str)->len - BEG(0) - plen); + } + memcpy(RSTRING(str)->ptr + BEG(0), + RSTRING(repl)->ptr, RSTRING(repl)->len); + RSTRING(str)->len += RSTRING(repl)->len - plen; + return str; + } + return Qnil; } static VALUE -str_gsub_bang(argc, argv, str) +rb_str_sub(argc, argv, str) int argc; VALUE *argv; VALUE str; { - VALUE pat, val; + VALUE val = rb_str_sub_bang(argc, argv, rb_str_dup(str)); - if (rb_scan_args(argc, argv, "11", &pat, &val) == 1) { - return str_sub_iter_f(str, pat, 0); - } - return str_sub_f(str, pat, val, 0); + if (NIL_P(val)) return str; + return val; } static VALUE -str_gsub(argc, argv, str) +rb_str_gsub_bang(argc, argv, str) int argc; VALUE *argv; VALUE str; { - VALUE pat, val, v; + VALUE pat, val, repl, match; + struct re_registers *regs; + int beg, offset, n; + int iter = 0; + char *buf, *bp, *cp; + int blen, len; - if (rb_scan_args(argc, argv, "11", &pat, &val) == 1) { - v = str_sub_iter_s(str, pat, 0); + if (rb_scan_args(argc, argv, "11", &pat, &repl) == 1) { + if (!rb_iterator_p()) { + rb_raise(rb_eArgError, "Wrong # of arguments(1 for 2)"); + } + iter = 1; } else { - v = str_sub_s(str, pat, val, 0); + repl = rb_obj_as_string(repl); + } + + pat = get_pat(pat); + offset=0; n=0; + beg = rb_reg_search(pat, str, 0, 0); + if (beg < 0) return Qnil; /* no match, no substitution */ + + blen = RSTRING(str)->len + 30; /* len + margin */ + buf = ALLOC_N(char, blen); + bp = buf; + cp = RSTRING(str)->ptr; + + while (beg >= 0) { + n++; + match = rb_backref_get(); + regs = RMATCH(match)->regs; + if (iter) { + val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match))); + } + else { + val = rb_reg_regsub(repl, str, regs); + } + len = beg + RSTRING(val)->len + 3; + if (blen < len) { + while (blen < len) blen *= 2; + len = bp - buf; + REALLOC_N(buf, char, blen); + bp = buf + len; + } + len = beg - offset; /* copy pre-match substr */ + memcpy(bp, cp, len); + bp += len; + memcpy(bp, RSTRING(val)->ptr, RSTRING(val)->len); + bp += RSTRING(val)->len; + if (BEG(0) == END(0)) { + /* + * Always consume at least one character of the input string + * in order to prevent infinite loops. + */ + len = ismbchar(RSTRING(str)->ptr[END(0)])?2:1; + if (RSTRING(str)->len > END(0)) { + memcpy(bp, RSTRING(str)->ptr, len); + bp += len; + } + offset = END(0) + len; + } + else { + offset = END(0); + } + cp = RSTRING(str)->ptr + offset; + if (offset > RSTRING(str)->len) break; + beg = rb_reg_search(pat, str, offset, 0); + } + if (RSTRING(str)->len > offset) { + len = bp - buf; + if (blen - len < RSTRING(str)->len - offset) { + REALLOC_N(buf, char, len + RSTRING(str)->len - offset + 1); + bp = buf + len; + } + memcpy(bp, cp, RSTRING(str)->len - offset); + bp += RSTRING(str)->len - offset; } - if (NIL_P(v)) return str_dup(str); - return v; + rb_str_modify(str); + free(RSTRING(str)->ptr); + RSTRING(str)->ptr = buf; + RSTRING(str)->len = len = bp - buf; + RSTRING(str)->ptr[len] = '\0'; + + return str; } static VALUE -str_replace_method(str, str2) +rb_str_gsub(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + VALUE val = rb_str_gsub_bang(argc, argv, rb_str_dup(str)); + + if (NIL_P(val)) return str; + return val; +} + +static VALUE +rb_str_replace_method(str, str2) VALUE str, str2; { - str2 = str_to_str(str2); - str_modify(str); - str_resize(str, RSTRING(str2)->len); + if (TYPE(str2) != T_STRING) str2 = rb_str_to_str(str2); + rb_str_modify(str); + rb_str_resize(str, RSTRING(str2)->len); memcpy(RSTRING(str)->ptr, RSTRING(str2)->ptr, RSTRING(str2)->len); - if (str_tainted(str2)) str_taint(str); + if (rb_str_tainted(str2)) rb_str_taint(str); return str; } @@ -1219,85 +1191,64 @@ uscore_get() { VALUE line; - line = lastline_get(); + line = rb_lastline_get(); if (TYPE(line) != T_STRING) { - TypeError("$_ value need to be String (%s given)", - rb_class2name(CLASS_OF(line))); + rb_raise(rb_eTypeError, "$_ value need to be String (%s given)", + rb_class2name(CLASS_OF(line))); } return line; } static VALUE -f_sub_bang(argc, argv) +rb_f_sub_bang(argc, argv) int argc; VALUE *argv; { - VALUE pat, val, line; - - line = uscore_get(); - if (rb_scan_args(argc, argv, "11", &pat, &val) == 1) { - return str_sub_iter_f(line, pat, 1); - } - return str_sub_f(line, pat, val, 1); + return rb_str_sub_bang(argc, argv, uscore_get()); } static VALUE -f_sub(argc, argv) +rb_f_sub(argc, argv) int argc; VALUE *argv; { - VALUE pat, val, line, v; + VALUE line, v; line = uscore_get(); - if (rb_scan_args(argc, argv, "11", &pat, &val) == 1) { - v = str_sub_iter_s(line, pat, 1); - } - else { - v = str_sub_s(line, pat, val, 1); - } + v = rb_str_sub_bang(argc, argv, line); if (!NIL_P(v)) { - lastline_set(v); + rb_lastline_set(v); return v; } return line; } static VALUE -f_gsub_bang(argc, argv) +rb_f_gsub_bang(argc, argv) int argc; VALUE *argv; { - VALUE pat, val, line; - - line = uscore_get(); - if (rb_scan_args(argc, argv, "11", &pat, &val) == 1) { - return str_sub_iter_f(line, pat, 0); - } - return str_sub_f(line, pat, val, 0); + return rb_str_gsub_bang(argc, argv, uscore_get()); } static VALUE -f_gsub(argc, argv) +rb_f_gsub(argc, argv) int argc; VALUE *argv; { - VALUE pat, val, line, v; + VALUE line, v; line = uscore_get(); - if (rb_scan_args(argc, argv, "11", &pat, &val) == 1) { - v = str_sub_iter_s(line, pat, 0); - } - else { - v = str_sub_s(line, pat, val, 0); + v = rb_str_gsub_bang(argc, argv, line); + if (!NIL_P(v)) { + rb_lastline_set(v); + return v; } - if (NIL_P(v)) v = str_dup(line); - lastline_set(v); - - return v; + return line; } static VALUE -str_reverse_bang(str) +rb_str_reverse_bang(str) VALUE str; { char *s, *e, *p, *q; @@ -1315,7 +1266,7 @@ str_reverse_bang(str) } static VALUE -str_reverse(str) +rb_str_reverse(str) VALUE str; { VALUE obj; @@ -1323,7 +1274,7 @@ str_reverse(str) if (RSTRING(str)->len <= 1) return str; - obj = str_new(0, RSTRING(str)->len); + obj = rb_str_new(0, RSTRING(str)->len); s = RSTRING(str)->ptr; e = s + RSTRING(str)->len - 1; p = RSTRING(obj)->ptr; @@ -1335,7 +1286,7 @@ str_reverse(str) } static VALUE -str_include(str, arg) +rb_str_include(str, arg) VALUE str, arg; { int i; @@ -1350,40 +1301,41 @@ str_include(str, arg) return INT2FIX(i); } } - return FALSE; + return Qfalse; } - i = str_index(str, str_to_str(arg), 0); + if (TYPE(arg) != T_STRING) arg = rb_str_to_str(arg); + i = rb_str_index(str, arg, 0); - if (i == -1) return FALSE; + if (i == -1) return Qfalse; return INT2FIX(i); } static VALUE -str_to_i(str) +rb_str_to_i(str) VALUE str; { - return str2inum(RSTRING(str)->ptr, 10); + return rb_str2inum(RSTRING(str)->ptr, 10); } static VALUE -str_to_f(str) +rb_str_to_f(str) VALUE str; { double f = atof(RSTRING(str)->ptr); - return float_new(f); + return rb_float_new(f); } static VALUE -str_to_s(str) +rb_str_to_s(str) VALUE str; { return str; } VALUE -str_inspect(str) +rb_str_inspect(str) VALUE str; { #define STRMAX 80 @@ -1471,11 +1423,11 @@ str_inspect(str) } } *b++ = '"'; - return str_new(buf, b - buf); + return rb_str_new(buf, b - buf); } -VALUE -str_dump(str) +static VALUE +rb_str_dump(str) VALUE str; { int len; @@ -1506,7 +1458,7 @@ str_dump(str) } } - result = str_new(0, len); + result = rb_str_new(0, len); p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len; q = RSTRING(result)->ptr; qend = q + len; @@ -1561,13 +1513,13 @@ str_dump(str) } static VALUE -str_upcase_bang(str) +rb_str_upcase_bang(str) VALUE str; { char *s, *send; int modify = 0; - str_modify(str); + rb_str_modify(str); s = RSTRING(str)->ptr; send = s + RSTRING(str)->len; while (s < send) { if (ismbchar(*s)) { @@ -1585,23 +1537,23 @@ str_upcase_bang(str) } static VALUE -str_upcase(str) +rb_str_upcase(str) VALUE str; { - VALUE val = str_upcase_bang(str_dup(str)); + VALUE val = rb_str_upcase_bang(rb_str_dup(str)); if (NIL_P(val)) return str; return val; } static VALUE -str_downcase_bang(str) +rb_str_downcase_bang(str) VALUE str; { char *s, *send; int modify = 0; - str_modify(str); + rb_str_modify(str); s = RSTRING(str)->ptr; send = s + RSTRING(str)->len; while (s < send) { if (ismbchar(*s)) { @@ -1619,23 +1571,23 @@ str_downcase_bang(str) } static VALUE -str_downcase(str) +rb_str_downcase(str) VALUE str; { - VALUE val = str_downcase_bang(str_dup(str)); + VALUE val = rb_str_downcase_bang(rb_str_dup(str)); if (NIL_P(val)) return str; return val; } static VALUE -str_capitalize_bang(str) +rb_str_capitalize_bang(str) VALUE str; { char *s, *send; int modify = 0; - str_modify(str); + rb_str_modify(str); s = RSTRING(str)->ptr; send = s + RSTRING(str)->len; if (ISLOWER(*s)) { *s = toupper(*s); @@ -1655,23 +1607,23 @@ str_capitalize_bang(str) } static VALUE -str_capitalize(str) +rb_str_capitalize(str) VALUE str; { - VALUE val = str_capitalize_bang(str_dup(str)); + VALUE val = rb_str_capitalize_bang(rb_str_dup(str)); if (NIL_P(val)) return str; return val; } static VALUE -str_swapcase_bang(str) +rb_str_swapcase_bang(str) VALUE str; { char *s, *send; int modify = 0; - str_modify(str); + rb_str_modify(str); s = RSTRING(str)->ptr; send = s + RSTRING(str)->len; while (s < send) { if (ismbchar(*s)) { @@ -1693,10 +1645,10 @@ str_swapcase_bang(str) } static VALUE -str_swapcase(str) +rb_str_swapcase(str) VALUE str; { - VALUE val = str_swapcase_bang(str_dup(str)); + VALUE val = rb_str_swapcase_bang(rb_str_dup(str)); if (NIL_P(val)) return str; return val; @@ -1740,7 +1692,7 @@ trnext(t) } } -static VALUE str_delete_bang _((VALUE,VALUE)); +static VALUE rb_str_delete_bang _((VALUE,VALUE)); static VALUE tr_trans(str, src, repl, sflag) @@ -1753,15 +1705,15 @@ tr_trans(str, src, repl, sflag) int i, c, c0, modify = 0; char *s, *send; - str_modify(str); - src = str_to_str(src); + rb_str_modify(str); + if (TYPE(src) != T_STRING) src = rb_str_to_str(src); trsrc.p = RSTRING(src)->ptr; trsrc.pend = trsrc.p + RSTRING(src)->len; if (RSTRING(src)->len > 2 && RSTRING(src)->ptr[0] == '^') { cflag++; trsrc.p++; } - repl = str_to_str(repl); - if (RSTRING(repl)->len == 0) return str_delete_bang(str, src); + if (TYPE(repl) != T_STRING) repl = rb_str_to_str(repl); + if (RSTRING(repl)->len == 0) return rb_str_delete_bang(str, src); trrepl.p = RSTRING(repl)->ptr; trrepl.pend = trrepl.p + RSTRING(repl)->len; trsrc.gen = trrepl.gen = 0; @@ -1841,17 +1793,17 @@ tr_trans(str, src, repl, sflag) } static VALUE -str_tr_bang(str, src, repl) +rb_str_tr_bang(str, src, repl) VALUE str, src, repl; { return tr_trans(str, src, repl, 0); } static VALUE -str_tr(str, src, repl) +rb_str_tr(str, src, repl) VALUE str, src, repl; { - VALUE val = tr_trans(str_dup(str), src, repl, 0); + VALUE val = tr_trans(rb_str_dup(str), src, repl, 0); if (NIL_P(val)) return str; return val; @@ -1882,17 +1834,17 @@ tr_setup_table(str, table) } static VALUE -str_delete_bang(str1, str2) +rb_str_delete_bang(str1, str2) VALUE str1, str2; { char *s, *send, *t; char squeez[256]; int modify = 0; - str2 = str_to_str(str2); + if (TYPE(str2) != T_STRING) str2 = rb_str_to_str(str2); tr_setup_table(str2, squeez); - str_modify(str1); + rb_str_modify(str1); s = t = RSTRING(str1)->ptr; send = s + RSTRING(str1)->len; @@ -1911,10 +1863,10 @@ str_delete_bang(str1, str2) } static VALUE -str_delete(str1, str2) +rb_str_delete(str1, str2) VALUE str1, str2; { - VALUE val = str_delete_bang(str_dup(str1), str2); + VALUE val = rb_str_delete_bang(rb_str_dup(str1), str2); if (NIL_P(val)) return str1; return val; @@ -1939,7 +1891,7 @@ tr_squeeze(str1, str2) } } - str_modify(str1); + rb_str_modify(str1); s = t = RSTRING(str1)->ptr; send = s + RSTRING(str1)->len; @@ -1959,55 +1911,54 @@ tr_squeeze(str1, str2) } static VALUE -str_squeeze_bang(argc, argv, str1) +rb_str_squeeze_bang(argc, argv, str1) int argc; VALUE *argv; VALUE str1; { VALUE str2; - if (rb_scan_args(argc, argv, "01", &str2) == 1) { - str2 = str_to_str(str2); + if (rb_scan_args(argc, argv, "01", &str2) == 1 && TYPE(str2) != T_STRING) { + str2 = rb_str_to_str(str2); } return tr_squeeze(str1, str2); } static VALUE -str_squeeze(argc, argv, str) +rb_str_squeeze(argc, argv, str) int argc; VALUE *argv; VALUE str; { - VALUE val = str_squeeze_bang(argc, argv, str_dup(str)); + VALUE val = rb_str_squeeze_bang(argc, argv, rb_str_dup(str)); if (NIL_P(val)) return str; return val; } static VALUE -str_tr_s_bang(str, src, repl) +rb_str_tr_s_bang(str, src, repl) VALUE str, src, repl; { return tr_trans(str, src, repl, 1); } static VALUE -str_tr_s(str, src, repl) +rb_str_tr_s(str, src, repl) VALUE str, src, repl; { - VALUE val = tr_trans(str_dup(str), src, repl, 1); + VALUE val = tr_trans(rb_str_dup(str), src, repl, 1); if (NIL_P(val)) return str; return val; } static VALUE -str_split_method(argc, argv, str) +rb_str_split_method(argc, argv, str) int argc; VALUE *argv; VALUE str; { - extern VALUE FS; VALUE spat; VALUE limit; int char_sep = -1; @@ -2017,13 +1968,13 @@ str_split_method(argc, argv, str) if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) { lim = NUM2INT(limit); if (lim == 0) limit = Qnil; - else if (lim == 1) return ary_new3(1, str); + else if (lim == 1) return rb_ary_new3(1, str); i = 1; } if (argc == 0) { - if (!NIL_P(FS)) { - spat = FS; + if (!NIL_P(rb_fs)) { + spat = rb_fs; goto fs_set; } char_sep = ' '; @@ -2032,21 +1983,21 @@ str_split_method(argc, argv, str) switch (TYPE(spat)) { case T_STRING: fs_set: - if (STRLEN(spat) == 1) { + if (RSTRING(spat)->len == 1) { char_sep = (unsigned char)RSTRING(spat)->ptr[0]; } else { - spat = reg_regcomp(spat); + spat = rb_reg_regcomp(spat); } break; case T_REGEXP: break; default: - ArgError("split(): bad separator"); + rb_raise(rb_eArgError, "split(): bad separator"); } } - result = ary_new(); + result = rb_ary_new(); beg = 0; if (char_sep >= 0) { char *ptr = RSTRING(str)->ptr; @@ -2068,7 +2019,7 @@ str_split_method(argc, argv, str) } else { if (ISSPACE(*ptr)) { - ary_push(result, str_substr(str, beg, end-beg)); + rb_ary_push(result, rb_str_substr(str, beg, end-beg)); skip = 1; beg = end + 1; if (!NIL_P(limit) && lim <= ++i) break; @@ -2082,7 +2033,7 @@ str_split_method(argc, argv, str) else { for (end = beg = 0; ptr<eptr; ptr++) { if (*ptr == (char)char_sep) { - ary_push(result, str_substr(str, beg, end-beg)); + rb_ary_push(result, rb_str_substr(str, beg, end-beg)); beg = end + 1; if (!NIL_P(limit) && lim <= ++i) break; } @@ -2096,14 +2047,14 @@ str_split_method(argc, argv, str) int idx; struct re_registers *regs; - while ((end = reg_search(spat, str, start, 0)) >= 0) { - regs = RMATCH(backref_get())->regs; + while ((end = rb_reg_search(spat, str, start, 0)) >= 0) { + regs = RMATCH(rb_backref_get())->regs; if (start == end && BEG(0) == END(0)) { if (last_null == 1) { if (ismbchar(RSTRING(str)->ptr[beg])) - ary_push(result, str_substr(str, beg, 2)); + rb_ary_push(result, rb_str_substr(str, beg, 2)); else - ary_push(result, str_substr(str, beg, 1)); + rb_ary_push(result, rb_str_substr(str, beg, 1)); beg = start; } else { @@ -2113,7 +2064,7 @@ str_split_method(argc, argv, str) } } else { - ary_push(result, str_substr(str, beg, end-beg)); + rb_ary_push(result, rb_str_substr(str, beg, end-beg)); beg = start = END(0); } last_null = 0; @@ -2121,43 +2072,43 @@ str_split_method(argc, argv, str) for (idx=1; idx < regs->num_regs; idx++) { if (BEG(idx) == -1) continue; if (BEG(idx) == END(idx)) - tmp = str_new(0, 0); + tmp = rb_str_new(0, 0); else - tmp = str_subseq(str, BEG(idx), END(idx)-1); - ary_push(result, tmp); + tmp = rb_str_subseq(str, BEG(idx), END(idx)-1); + rb_ary_push(result, tmp); } if (!NIL_P(limit) && lim <= ++i) break; } } if (RSTRING(str)->len > beg) { - ary_push(result, str_subseq(str, beg, -1)); + rb_ary_push(result, rb_str_subseq(str, beg, -1)); } return result; } VALUE -str_split(str, sep0) +rb_str_split(str, sep0) VALUE str; char *sep0; { VALUE sep; - str = str_to_str(str); - sep = str_new2(sep0); - return str_split_method(1, &sep, str); + if (TYPE(str) != T_STRING) str = rb_str_to_str(str); + sep = rb_str_new2(sep0); + return rb_str_split_method(1, &sep, str); } static VALUE -f_split(argc, argv) +rb_f_split(argc, argv) int argc; VALUE *argv; { - return str_split_method(argc, argv, uscore_get()); + return rb_str_split_method(argc, argv, uscore_get()); } static VALUE -str_each_line(argc, argv, str) +rb_str_each_line(argc, argv, str) int argc; VALUE *argv; VALUE str; @@ -2171,14 +2122,14 @@ str_each_line(argc, argv, str) VALUE line; if (rb_scan_args(argc, argv, "01", &rs) == 0) { - rs = RS; + rs = rb_rs; } if (NIL_P(rs)) { rb_yield(str); return Qnil; } - rs = str_to_str(rs); + if (TYPE(rs) != T_STRING) rs = rb_str_to_str(rs); rslen = RSTRING(rs)->len; if (rslen == 0) { @@ -2197,18 +2148,18 @@ str_each_line(argc, argv, str) if (*p == newline && (rslen <= 1 || memcmp(RSTRING(rs)->ptr, p-rslen+1, rslen) == 0)) { - line = str_new(s, p - s + 1); - lastline_set(line); + line = rb_str_new(s, p - s + 1); + rb_lastline_set(line); rb_yield(line); if (RSTRING(str)->ptr != ptr || RSTRING(str)->len != len) - Fail("string modified"); + rb_raise(rb_eArgError, "string modified"); s = p + 1; } } if (s != pend) { - line = str_new(s, p - s); - lastline_set(line); + line = rb_str_new(s, p - s); + rb_lastline_set(line); rb_yield(line); } @@ -2216,7 +2167,7 @@ str_each_line(argc, argv, str) } static VALUE -str_each_byte(str) +rb_str_each_byte(str) struct RString* str; { int i; @@ -2228,11 +2179,11 @@ str_each_byte(str) } static VALUE -str_chop_bang(str) +rb_str_chop_bang(str) VALUE str; { if (RSTRING(str)->len > 0) { - str_modify(str); + rb_str_modify(str); RSTRING(str)->len--; if (RSTRING(str)->ptr[RSTRING(str)->len] == '\n') { if (RSTRING(str)->len > 0 && @@ -2247,35 +2198,35 @@ str_chop_bang(str) } static VALUE -str_chop(str) +rb_str_chop(str) VALUE str; { - VALUE val = str_chop_bang(str_dup(str)); + VALUE val = rb_str_chop_bang(rb_str_dup(str)); if (NIL_P(val)) return str; return val; } static VALUE -f_chop_bang(str) +rb_f_chop_bang(str) VALUE str; { - return str_chop_bang(uscore_get()); + return rb_str_chop_bang(uscore_get()); } static VALUE -f_chop() +rb_f_chop() { VALUE str = uscore_get(); - str = str_chop_bang(str_dup(str)); + str = rb_str_chop_bang(rb_str_dup(str)); if (NIL_P(str)) return uscore_get(); - lastline_set(str); + rb_lastline_set(str); return str; } static VALUE -str_chomp_bang(argc, argv, str) +rb_str_chomp_bang(argc, argv, str) int argc; VALUE *argv; VALUE str; @@ -2287,11 +2238,11 @@ str_chomp_bang(argc, argv, str) int len = RSTRING(str)->len; if (rb_scan_args(argc, argv, "01", &rs) == 0) { - rs = RS; + rs = rb_rs; } if (NIL_P(rs)) return Qnil; - rs = str_to_str(rs); + if (TYPE(rs) != T_STRING) rs = rb_str_to_str(rs); rslen = RSTRING(rs)->len; if (rslen == 0) { while (len>0 && p[len-1] == '\n') { @@ -2318,44 +2269,44 @@ str_chomp_bang(argc, argv, str) } static VALUE -str_chomp(argc, argv, str) +rb_str_chomp(argc, argv, str) int argc; VALUE *argv; VALUE str; { - VALUE val = str_chomp_bang(argc, argv, str_dup(str)); + VALUE val = rb_str_chomp_bang(argc, argv, rb_str_dup(str)); if (NIL_P(val)) return str; return val; } static VALUE -f_chomp_bang(argc, argv) +rb_f_chomp_bang(argc, argv) int argc; VALUE *argv; { - return str_chomp_bang(argc, argv, uscore_get()); + return rb_str_chomp_bang(argc, argv, uscore_get()); } static VALUE -f_chomp(argc, argv) +rb_f_chomp(argc, argv) int argc; VALUE *argv; { - VALUE val = str_chomp_bang(argc, argv, str_dup(uscore_get())); + VALUE val = rb_str_chomp_bang(argc, argv, rb_str_dup(uscore_get())); if (NIL_P(val)) return uscore_get(); - lastline_set(val); + rb_lastline_set(val); return val; } static VALUE -str_strip_bang(str) +rb_str_strip_bang(str) VALUE str; { char *s, *t, *e; - str_modify(str); + rb_str_modify(str); s = RSTRING(str)->ptr; e = t = s + RSTRING(str)->len; /* remove spaces at head */ @@ -2386,10 +2337,10 @@ str_strip_bang(str) } static VALUE -str_strip(str) +rb_str_strip(str) VALUE str; { - VALUE val = str_strip_bang(str_dup(str)); + VALUE val = rb_str_strip_bang(rb_str_dup(str)); if (NIL_P(val)) return str; return val; @@ -2404,8 +2355,8 @@ scan_once(str, pat, start) struct re_registers *regs; int i; - if (reg_search(pat, str, *start, 0) >= 0) { - match = backref_get(); + if (rb_reg_search(pat, str, *start, 0) >= 0) { + match = rb_backref_get(); regs = RMATCH(match)->regs; if (END(0) == *start) { *start = END(0)+1; @@ -2414,11 +2365,11 @@ scan_once(str, pat, start) *start = END(0); } if (regs->num_regs == 1) { - return reg_nth_match(0, match); + return rb_reg_nth_match(0, match); } - result = ary_new2(regs->num_regs); + result = rb_ary_new2(regs->num_regs); for (i=1; i < regs->num_regs; i++) { - ary_push(result, reg_nth_match(i, match)); + rb_ary_push(result, rb_reg_nth_match(i, match)); } return result; @@ -2427,27 +2378,18 @@ scan_once(str, pat, start) } static VALUE -str_scan(str, pat) +rb_str_scan(str, pat) VALUE str, pat; { VALUE result; int start = 0; - switch (TYPE(pat)) { - case T_STRING: - pat = reg_regcomp(pat); - break; - case T_REGEXP: - break; - default: - Check_Type(pat, T_REGEXP); - } - - if (!iterator_p()) { - VALUE ary = ary_new(); + pat = get_pat(pat); + if (!rb_iterator_p()) { + VALUE ary = rb_ary_new(); while (!NIL_P(result = scan_once(str, pat, &start))) { - ary_push(ary, result); + rb_ary_push(ary, result); } return ary; } @@ -2459,14 +2401,14 @@ str_scan(str, pat) } static VALUE -str_hex(str) +rb_str_hex(str) VALUE str; { - return str2inum(RSTRING(str)->ptr, 16); + return rb_str2inum(RSTRING(str)->ptr, 16); } static VALUE -str_oct(str) +rb_str_oct(str) VALUE str; { int base = 8; @@ -2475,35 +2417,35 @@ str_oct(str) (RSTRING(str)->ptr[1] == 'x' || RSTRING(str)->ptr[1] == 'X')) { base = 16; } - return str2inum(RSTRING(str)->ptr, base); + return rb_str2inum(RSTRING(str)->ptr, base); } static VALUE -str_crypt(str, salt) +rb_str_crypt(str, salt) VALUE str, salt; { extern char *crypt(); - salt = str_to_str(salt); + if (TYPE(salt) != T_STRING) salt = rb_str_to_str(salt); if (RSTRING(salt)->len < 2) - ArgError("salt too short(need >2 bytes)"); - return str_new2(crypt(RSTRING(str)->ptr, RSTRING(salt)->ptr)); + rb_raise(rb_eArgError, "salt too short(need >2 bytes)"); + return rb_str_new2(crypt(RSTRING(str)->ptr, RSTRING(salt)->ptr)); } static VALUE -str_intern(str) +rb_str_intern(str) VALUE str; { ID id; if (strlen(RSTRING(str)->ptr) != RSTRING(str)->len) - ArgError("string contains `\\0'"); + rb_raise(rb_eArgError, "string contains `\\0'"); id = rb_intern(RSTRING(str)->ptr); return INT2FIX(id); } static VALUE -str_sum(argc, argv, str) +rb_str_sum(argc, argv, str) int argc; VALUE *argv; VALUE str; @@ -2544,12 +2486,12 @@ str_sum(argc, argv, str) p++; } res &= mod; - return int2inum(res); + return rb_int2inum(res); } } static VALUE -str_ljust(str, w) +rb_str_ljust(str, w) VALUE str; VALUE w; { @@ -2558,7 +2500,7 @@ str_ljust(str, w) char *p, *pend; if (width < 0 || RSTRING(str)->len >= width) return str; - res = str_new(0, width); + res = rb_str_new(0, width); memcpy(RSTRING(res)->ptr, RSTRING(str)->ptr, RSTRING(str)->len); p = RSTRING(res)->ptr + RSTRING(str)->len; pend = RSTRING(res)->ptr + width; while (p < pend) { @@ -2568,7 +2510,7 @@ str_ljust(str, w) } static VALUE -str_rjust(str, w) +rb_str_rjust(str, w) VALUE str; VALUE w; { @@ -2577,7 +2519,7 @@ str_rjust(str, w) char *p, *pend; if (width < 0 || RSTRING(str)->len >= width) return str; - res = str_new(0, width); + res = rb_str_new(0, width); p = RSTRING(res)->ptr; pend = p + width - RSTRING(str)->len; while (p < pend) { *p++ = ' '; @@ -2587,7 +2529,7 @@ str_rjust(str, w) } static VALUE -str_center(str, w) +rb_str_center(str, w) VALUE str; VALUE w; { @@ -2597,7 +2539,7 @@ str_center(str, w) int n; if (width < 0 || RSTRING(str)->len >= width) return str; - res = str_new(0, width); + res = rb_str_new(0, width); n = (width - RSTRING(str)->len)/2; p = RSTRING(res)->ptr; pend = p + n; while (p < pend) { @@ -2614,118 +2556,118 @@ str_center(str, w) void Init_String() { - cString = rb_define_class("String", cObject); - rb_include_module(cString, mComparable); - rb_include_module(cString, mEnumerable); - rb_define_singleton_method(cString, "new", str_s_new, 1); - rb_define_method(cString, "clone", str_clone, 0); - rb_define_method(cString, "dup", str_dup, 0); - rb_define_method(cString, "<=>", str_cmp_method, 1); - rb_define_method(cString, "==", str_equal, 1); - rb_define_method(cString, "===", str_equal, 1); - rb_define_method(cString, "eql?", str_equal, 1); - rb_define_method(cString, "hash", str_hash_method, 0); - rb_define_method(cString, "+", str_plus, 1); - rb_define_method(cString, "*", str_times, 1); - rb_define_method(cString, "%", str_format, 1); - rb_define_method(cString, "[]", str_aref_method, -1); - rb_define_method(cString, "[]=", str_aset_method, -1); - rb_define_method(cString, "length", str_length, 0); - rb_define_alias(cString, "size", "length"); - rb_define_method(cString, "empty?", str_empty, 0); - rb_define_method(cString, "=~", str_match, 1); - rb_define_method(cString, "~", str_match2, 0); - rb_define_method(cString, "succ", str_succ, 0); - rb_define_method(cString, "succ!", str_succ_bang, 0); - rb_define_method(cString, "next", str_succ, 0); - rb_define_method(cString, "next!", str_succ_bang, 0); - rb_define_method(cString, "upto", str_upto, 1); - rb_define_method(cString, "index", str_index_method, -1); - rb_define_method(cString, "rindex", str_rindex, -1); - rb_define_method(cString, "replace", str_replace_method, 1); - - rb_define_method(cString, "freeze", str_freeze, 0); - rb_define_method(cString, "frozen?", str_frozen_p, 0); - - rb_define_method(cString, "taint", str_taint, 0); - rb_define_method(cString, "tainted?", str_tainted, 0); - - rb_define_method(cString, "to_i", str_to_i, 0); - rb_define_method(cString, "to_f", str_to_f, 0); - rb_define_method(cString, "to_s", str_to_s, 0); - rb_define_method(cString, "to_str", str_to_s, 0); - rb_define_method(cString, "inspect", str_inspect, 0); - rb_define_method(cString, "dump", str_dump, 0); - - rb_define_method(cString, "upcase", str_upcase, 0); - rb_define_method(cString, "downcase", str_downcase, 0); - rb_define_method(cString, "capitalize", str_capitalize, 0); - rb_define_method(cString, "swapcase", str_swapcase, 0); - - rb_define_method(cString, "upcase!", str_upcase_bang, 0); - rb_define_method(cString, "downcase!", str_downcase_bang, 0); - rb_define_method(cString, "capitalize!", str_capitalize_bang, 0); - rb_define_method(cString, "swapcase!", str_swapcase_bang, 0); - - rb_define_method(cString, "hex", str_hex, 0); - rb_define_method(cString, "oct", str_oct, 0); - rb_define_method(cString, "split", str_split_method, -1); - rb_define_method(cString, "reverse", str_reverse, 0); - rb_define_method(cString, "reverse!", str_reverse_bang, 0); - rb_define_method(cString, "concat", str_concat, 1); - rb_define_method(cString, "<<", str_concat, 1); - rb_define_method(cString, "crypt", str_crypt, 1); - rb_define_method(cString, "intern", str_intern, 0); - - rb_define_method(cString, "include?", str_include, 1); - - rb_define_method(cString, "scan", str_scan, 1); - - rb_define_method(cString, "ljust", str_ljust, 1); - rb_define_method(cString, "rjust", str_rjust, 1); - rb_define_method(cString, "center", str_center, 1); - - rb_define_method(cString, "sub", str_sub, -1); - rb_define_method(cString, "gsub", str_gsub, -1); - rb_define_method(cString, "chop", str_chop, 0); - rb_define_method(cString, "chomp", str_chomp, -1); - rb_define_method(cString, "strip", str_strip, 0); - - rb_define_method(cString, "sub!", str_sub_bang, -1); - rb_define_method(cString, "gsub!", str_gsub_bang, -1); - rb_define_method(cString, "strip!", str_strip_bang, 0); - rb_define_method(cString, "chop!", str_chop_bang, 0); - rb_define_method(cString, "chomp!", str_chomp_bang, -1); - - rb_define_method(cString, "tr", str_tr, 2); - rb_define_method(cString, "tr_s", str_tr_s, 2); - rb_define_method(cString, "delete", str_delete, 1); - rb_define_method(cString, "squeeze", str_squeeze, -1); - - rb_define_method(cString, "tr!", str_tr_bang, 2); - rb_define_method(cString, "tr_s!", str_tr_s_bang, 2); - rb_define_method(cString, "delete!", str_delete_bang, 1); - rb_define_method(cString, "squeeze!", str_squeeze_bang, -1); - - rb_define_method(cString, "each_line", str_each_line, -1); - rb_define_method(cString, "each", str_each_line, -1); - rb_define_method(cString, "each_byte", str_each_byte, 0); - - rb_define_method(cString, "sum", str_sum, -1); - - rb_define_global_function("sub", f_sub, -1); - rb_define_global_function("gsub", f_gsub, -1); - - rb_define_global_function("sub!", f_sub_bang, -1); - rb_define_global_function("gsub!", f_gsub_bang, -1); - - rb_define_global_function("chop", f_chop, 0); - rb_define_global_function("chop!", f_chop_bang, 0); - - rb_define_global_function("chomp", f_chomp, -1); - rb_define_global_function("chomp!", f_chomp_bang, -1); - - rb_define_global_function("split", f_split, -1); + rb_cString = rb_define_class("String", rb_cObject); + rb_include_module(rb_cString, rb_mComparable); + rb_include_module(rb_cString, rb_mEnumerable); + rb_define_singleton_method(rb_cString, "new", rb_str_s_new, 1); + rb_define_method(rb_cString, "clone", rb_str_clone, 0); + rb_define_method(rb_cString, "dup", rb_str_dup, 0); + rb_define_method(rb_cString, "<=>", rb_str_cmp_method, 1); + rb_define_method(rb_cString, "==", rb_str_equal, 1); + rb_define_method(rb_cString, "===", rb_str_equal, 1); + rb_define_method(rb_cString, "eql?", rb_str_equal, 1); + rb_define_method(rb_cString, "hash", rb_str_hash_method, 0); + rb_define_method(rb_cString, "+", rb_str_plus, 1); + rb_define_method(rb_cString, "*", rb_str_times, 1); + rb_define_method(rb_cString, "%", rb_str_format, 1); + rb_define_method(rb_cString, "[]", rb_str_aref_method, -1); + rb_define_method(rb_cString, "[]=", rb_str_aset_method, -1); + rb_define_method(rb_cString, "length", rb_str_length, 0); + rb_define_alias(rb_cString, "size", "length"); + rb_define_method(rb_cString, "empty?", rb_str_empty, 0); + rb_define_method(rb_cString, "=~", rb_str_match, 1); + rb_define_method(rb_cString, "~", rb_str_match2, 0); + rb_define_method(rb_cString, "succ", rb_str_succ, 0); + rb_define_method(rb_cString, "succ!", rb_str_succ_bang, 0); + rb_define_method(rb_cString, "next", rb_str_succ, 0); + rb_define_method(rb_cString, "next!", rb_str_succ_bang, 0); + rb_define_method(rb_cString, "upto", rb_str_upto, 1); + rb_define_method(rb_cString, "index", rb_str_index_method, -1); + rb_define_method(rb_cString, "rindex", rb_str_rindex, -1); + rb_define_method(rb_cString, "replace", rb_str_replace_method, 1); + + rb_define_method(rb_cString, "freeze", rb_str_freeze, 0); + rb_define_method(rb_cString, "frozen?", rb_str_frozen_p, 0); + + rb_define_method(rb_cString, "taint", rb_str_taint, 0); + rb_define_method(rb_cString, "tainted?", rb_str_tainted, 0); + + rb_define_method(rb_cString, "to_i", rb_str_to_i, 0); + rb_define_method(rb_cString, "to_f", rb_str_to_f, 0); + rb_define_method(rb_cString, "to_s", rb_str_to_s, 0); + rb_define_method(rb_cString, "to_str", rb_str_to_s, 0); + rb_define_method(rb_cString, "inspect", rb_str_inspect, 0); + rb_define_method(rb_cString, "dump", rb_str_dump, 0); + + rb_define_method(rb_cString, "upcase", rb_str_upcase, 0); + rb_define_method(rb_cString, "downcase", rb_str_downcase, 0); + rb_define_method(rb_cString, "capitalize", rb_str_capitalize, 0); + rb_define_method(rb_cString, "swapcase", rb_str_swapcase, 0); + + rb_define_method(rb_cString, "upcase!", rb_str_upcase_bang, 0); + rb_define_method(rb_cString, "downcase!", rb_str_downcase_bang, 0); + rb_define_method(rb_cString, "capitalize!", rb_str_capitalize_bang, 0); + rb_define_method(rb_cString, "swapcase!", rb_str_swapcase_bang, 0); + + rb_define_method(rb_cString, "hex", rb_str_hex, 0); + rb_define_method(rb_cString, "oct", rb_str_oct, 0); + rb_define_method(rb_cString, "split", rb_str_split_method, -1); + rb_define_method(rb_cString, "reverse", rb_str_reverse, 0); + rb_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0); + rb_define_method(rb_cString, "concat", rb_str_concat, 1); + rb_define_method(rb_cString, "<<", rb_str_concat, 1); + rb_define_method(rb_cString, "crypt", rb_str_crypt, 1); + rb_define_method(rb_cString, "intern", rb_str_intern, 0); + + rb_define_method(rb_cString, "include?", rb_str_include, 1); + + rb_define_method(rb_cString, "scan", rb_str_scan, 1); + + rb_define_method(rb_cString, "ljust", rb_str_ljust, 1); + rb_define_method(rb_cString, "rjust", rb_str_rjust, 1); + rb_define_method(rb_cString, "center", rb_str_center, 1); + + rb_define_method(rb_cString, "sub", rb_str_sub, -1); + rb_define_method(rb_cString, "gsub", rb_str_gsub, -1); + rb_define_method(rb_cString, "chop", rb_str_chop, 0); + rb_define_method(rb_cString, "chomp", rb_str_chomp, -1); + rb_define_method(rb_cString, "strip", rb_str_strip, 0); + + rb_define_method(rb_cString, "sub!", rb_str_sub_bang, -1); + rb_define_method(rb_cString, "gsub!", rb_str_gsub_bang, -1); + rb_define_method(rb_cString, "strip!", rb_str_strip_bang, 0); + rb_define_method(rb_cString, "chop!", rb_str_chop_bang, 0); + rb_define_method(rb_cString, "chomp!", rb_str_chomp_bang, -1); + + rb_define_method(rb_cString, "tr", rb_str_tr, 2); + rb_define_method(rb_cString, "tr_s", rb_str_tr_s, 2); + rb_define_method(rb_cString, "delete", rb_str_delete, 1); + rb_define_method(rb_cString, "squeeze", rb_str_squeeze, -1); + + rb_define_method(rb_cString, "tr!", rb_str_tr_bang, 2); + rb_define_method(rb_cString, "tr_s!", rb_str_tr_s_bang, 2); + rb_define_method(rb_cString, "delete!", rb_str_delete_bang, 1); + rb_define_method(rb_cString, "squeeze!", rb_str_squeeze_bang, -1); + + rb_define_method(rb_cString, "each_line", rb_str_each_line, -1); + rb_define_method(rb_cString, "each", rb_str_each_line, -1); + rb_define_method(rb_cString, "each_byte", rb_str_each_byte, 0); + + rb_define_method(rb_cString, "sum", rb_str_sum, -1); + + rb_define_global_function("sub", rb_f_sub, -1); + rb_define_global_function("gsub", rb_f_gsub, -1); + + rb_define_global_function("sub!", rb_f_sub_bang, -1); + rb_define_global_function("gsub!", rb_f_gsub_bang, -1); + + rb_define_global_function("chop", rb_f_chop, 0); + rb_define_global_function("chop!", rb_f_chop_bang, 0); + + rb_define_global_function("chomp", rb_f_chomp, -1); + rb_define_global_function("chomp!", rb_f_chomp_bang, -1); + + rb_define_global_function("split", rb_f_split, -1); pr_str = rb_intern("to_s"); } |