summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
Diffstat (limited to 'string.c')
-rw-r--r--string.c126
1 files changed, 87 insertions, 39 deletions
diff --git a/string.c b/string.c
index 5dea1c7ab2..0de4892f50 100644
--- a/string.c
+++ b/string.c
@@ -33,12 +33,13 @@ VALUE rb_cString;
VALUE rb_fs;
VALUE
-rb_str_new(ptr, len)
+rb_str_new0(klass, ptr, len)
+ VALUE klass;
const char *ptr;
long len;
{
NEWOBJ(str, struct RString);
- OBJSETUP(str, rb_cString, T_STRING);
+ OBJSETUP(str, klass, T_STRING);
str->ptr = 0;
str->len = len;
@@ -52,6 +53,14 @@ rb_str_new(ptr, len)
}
VALUE
+rb_str_new(ptr, len)
+ const char *ptr;
+ long len;
+{
+ return rb_str_new0(rb_cString, ptr, len);
+}
+
+VALUE
rb_str_new2(ptr)
const char *ptr;
{
@@ -129,6 +138,15 @@ rb_str_new4(orig)
}
}
+VALUE
+rb_str_new5(obj, ptr, len)
+ VALUE obj;
+ const char *ptr;
+ long len;
+{
+ return rb_str_new0(rb_obj_class(obj), ptr, len);
+}
+
#define STR_BUF_MIN_SIZE 128
VALUE
@@ -276,7 +294,6 @@ rb_str_dup(str)
return str2;
}
-
static VALUE
rb_str_clone(str)
VALUE str;
@@ -295,8 +312,7 @@ rb_str_s_new(argc, argv, klass)
VALUE *argv;
VALUE klass;
{
- VALUE str = rb_str_new(0, 0);
- OBJSETUP(str, klass, T_STRING);
+ VALUE str = rb_str_new0(klass, 0, 0);
rb_obj_call_init(str, argc, argv);
return str;
@@ -360,7 +376,7 @@ rb_str_times(str, times)
long i, len;
len = NUM2LONG(times);
- if (len == 0) return rb_str_new(0,0);
+ if (len == 0) return rb_str_new5(str,0,0);
if (len < 0) {
rb_raise(rb_eArgError, "negative argument");
}
@@ -368,16 +384,14 @@ rb_str_times(str, times)
rb_raise(rb_eArgError, "argument too big");
}
- str2 = rb_str_new(0, RSTRING(str)->len*len);
+ str2 = rb_str_new5(str,0, RSTRING(str)->len*len);
for (i=0; i<len; i++) {
memcpy(RSTRING(str2)->ptr+(i*RSTRING(str)->len),
RSTRING(str)->ptr, RSTRING(str)->len);
}
RSTRING(str2)->ptr[RSTRING(str2)->len] = '\0';
- if (OBJ_TAINTED(str)) {
- OBJ_TAINT(str2);
- }
+ OBJ_INFECT(str2, str);
return str2;
}
@@ -420,10 +434,10 @@ rb_str_substr(str, beg, len)
if (len < 0) {
len = 0;
}
- if (len == 0) return rb_str_new(0,0);
+ if (len == 0) return rb_str_new5(str,0,0);
- str2 = rb_str_new(RSTRING(str)->ptr+beg, len);
- if (OBJ_TAINTED(str)) OBJ_TAINT(str2);
+ str2 = rb_str_new5(str,RSTRING(str)->ptr+beg, len);
+ OBJ_INFECT(str2, str);
return str2;
}
@@ -984,7 +998,7 @@ rb_str_succ(orig)
int c = -1;
int n = 0;
- str = rb_str_new(RSTRING(orig)->ptr, RSTRING(orig)->len);
+ str = rb_str_new5(orig,RSTRING(orig)->ptr, RSTRING(orig)->len);
OBJ_INFECT(str, orig);
if (RSTRING(str)->len == 0) return str;
@@ -1057,6 +1071,17 @@ rb_str_upto_m(beg, end)
}
static VALUE
+rb_str_subpat(str, re, offset)
+ VALUE str, re;
+ int offset;
+{
+ if (rb_reg_search(re, str, 0, 0) >= 0) {
+ return rb_reg_nth_match(offset, rb_backref_get());
+ }
+ return Qnil;
+}
+
+static VALUE
rb_str_aref(str, indx)
VALUE str;
VALUE indx;
@@ -1077,9 +1102,7 @@ rb_str_aref(str, indx)
return INT2FIX(RSTRING(str)->ptr[idx] & 0xff);
case T_REGEXP:
- if (rb_reg_search(indx, str, 0, 0) >= 0)
- return rb_reg_last_match(rb_backref_get());
- return Qnil;
+ return rb_str_subpat(str, indx, 0);
case T_STRING:
if (rb_str_index(str, indx, 0) != -1) return indx;
@@ -1111,6 +1134,9 @@ rb_str_aref_m(argc, argv, str)
VALUE str;
{
if (argc == 2) {
+ if (TYPE(argv[0]) == T_REGEXP) {
+ return rb_str_subpat(str, argv[0], NUM2INT(argv[1]));
+ }
return rb_str_substr(str, NUM2INT(argv[0]), NUM2INT(argv[1]));
}
if (argc != 1) {
@@ -1162,7 +1188,31 @@ rb_str_update(str, beg, len, val)
OBJ_INFECT(str, val);
}
-static VALUE rb_str_sub_bang _((int, VALUE*, VALUE));
+static void
+rb_str_subpat_set(str, re, offset, val)
+ VALUE str, re;
+ int offset;
+ VALUE val;
+{
+ VALUE match;
+ int start, end, len;
+
+ if (rb_reg_search(re, str, 0, 0) < 0) {
+ rb_raise(rb_eIndexError, "regexp not matched");
+ }
+ match = rb_backref_get();
+ if (offset >= RMATCH(match)->regs->num_regs) {
+ rb_raise(rb_eIndexError, "index %d out of regexp", offset);
+ }
+
+ start = RMATCH(match)->BEG(offset);
+ if (start == -1) {
+ rb_raise(rb_eIndexError, "regexp group %d not matched", offset);
+ }
+ end = RMATCH(match)->END(offset);
+ len = end - start;
+ rb_str_update(str, start, len, val);
+}
static VALUE
rb_str_aset(str, indx, val)
@@ -1194,12 +1244,7 @@ rb_str_aset(str, indx, val)
return val;
case T_REGEXP:
- {
- VALUE args[2];
- args[0] = indx;
- args[1] = val;
- rb_str_sub_bang(2, args, str);
- }
+ rb_str_subpat_set(str, indx, 0, val);
return val;
case T_STRING:
@@ -1231,11 +1276,12 @@ rb_str_aset_m(argc, argv, str)
{
rb_str_modify(str);
if (argc == 3) {
- long beg, len;
-
- beg = NUM2INT(argv[0]);
- len = NUM2INT(argv[1]);
- rb_str_update(str, beg, len, argv[2]);
+ if (TYPE(argv[0]) == T_REGEXP) {
+ rb_str_subpat_set(str, argv[0], NUM2INT(argv[1]), argv[2]);
+ }
+ else {
+ rb_str_update(str, NUM2INT(argv[0]), NUM2INT(argv[1]), argv[2]);
+ }
return argv[2];
}
if (argc != 2) {
@@ -1479,6 +1525,7 @@ str_gsub(argc, argv, str, bang)
NEWOBJ(dup, struct RString);
OBJSETUP(dup, rb_cString, T_STRING);
OBJ_INFECT(dup, str);
+ RBASIC(dup)->klass = rb_obj_class(str);
str = (VALUE)dup;
dup->orig = 0;
}
@@ -1529,7 +1576,7 @@ rb_str_replace(str, str2)
memcpy(RSTRING(str)->ptr, RSTRING(str2)->ptr, RSTRING(str2)->len);
}
- if (OBJ_TAINTED(str2)) OBJ_TAINT(str);
+ OBJ_INFECT(str2, str);
return str;
}
@@ -1616,13 +1663,14 @@ rb_str_reverse(str)
if (RSTRING(str)->len <= 1) return rb_str_dup(str);
- obj = rb_str_new(0, RSTRING(str)->len);
+ obj = rb_str_new5(str, 0, RSTRING(str)->len);
s = RSTRING(str)->ptr; e = s + RSTRING(str)->len - 1;
p = RSTRING(obj)->ptr;
while (e >= s) {
*p++ = *e--;
}
+ OBJ_INFECT(obj, str);
return obj;
}
@@ -1771,7 +1819,7 @@ rb_str_dump(str)
}
}
- result = rb_str_new(0, len);
+ result = rb_str_new5(str, 0, len);
p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len;
q = RSTRING(result)->ptr; qend = q + len;
@@ -2432,7 +2480,7 @@ rb_str_split_m(argc, argv, str)
for (idx=1; idx < regs->num_regs; idx++) {
if (BEG(idx) == -1) continue;
if (BEG(idx) == END(idx))
- tmp = rb_str_new(0, 0);
+ tmp = rb_str_new5(str, 0, 0);
else
tmp = rb_str_substr(str, BEG(idx), END(idx)-BEG(idx));
rb_ary_push(result, tmp);
@@ -2442,7 +2490,7 @@ rb_str_split_m(argc, argv, str)
}
if (!NIL_P(limit) || RSTRING(str)->len > beg || lim < 0) {
if (RSTRING(str)->len == beg)
- tmp = rb_str_new(0, 0);
+ tmp = rb_str_new5(str, 0, 0);
else
tmp = rb_str_substr(str, beg, RSTRING(str)->len-beg);
rb_ary_push(result, tmp);
@@ -2515,7 +2563,7 @@ rb_str_each_line(argc, argv, str)
if (p[-1] == newline &&
(rslen <= 1 ||
rb_memcmp(RSTRING(rs)->ptr, p-rslen, rslen) == 0)) {
- line = rb_str_new(s, p - s);
+ line = rb_str_new5(str, s, p - s);
rb_yield(line);
if (RSTRING(str)->ptr != ptr || RSTRING(str)->len != len)
rb_raise(rb_eArgError, "string modified");
@@ -2525,7 +2573,7 @@ rb_str_each_line(argc, argv, str)
if (s != pend) {
if (p > pend) p = pend;
- line = rb_str_new(s, p - s);
+ line = rb_str_new5(str, s, p - s);
OBJ_INFECT(line, str);
rb_yield(line);
}
@@ -2932,7 +2980,7 @@ rb_str_ljust(str, w)
char *p, *pend;
if (width < 0 || RSTRING(str)->len >= width) return str;
- res = rb_str_new(0, width);
+ res = rb_str_new5(str, 0, width);
memcpy(RSTRING(res)->ptr, RSTRING(str)->ptr, RSTRING(str)->len);
p = RSTRING(res)->ptr + RSTRING(str)->len; pend = RSTRING(res)->ptr + width;
while (p < pend) {
@@ -2952,7 +3000,7 @@ rb_str_rjust(str, w)
char *p, *pend;
if (width < 0 || RSTRING(str)->len >= width) return str;
- res = rb_str_new(0, width);
+ res = rb_str_new5(str, 0, width);
p = RSTRING(res)->ptr; pend = p + width - RSTRING(str)->len;
while (p < pend) {
*p++ = ' ';
@@ -2973,7 +3021,7 @@ rb_str_center(str, w)
long n;
if (width < 0 || RSTRING(str)->len >= width) return str;
- res = rb_str_new(0, width);
+ res = rb_str_new5(str, 0, width);
n = (width - RSTRING(str)->len)/2;
p = RSTRING(res)->ptr; pend = p + n;
while (p < pend) {