summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-06-28 12:25:45 +0000
committerakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-06-28 12:25:45 +0000
commit340cd503a7d0f73bb1ca6446f78793f968a5d5bf (patch)
tree6ef0adfb397248308136bd096c7dc3241b782809
parent4c766e1713eda038b5118e1ebfbb6e26545966cd (diff)
* include/ruby/ruby.h (struct RRegexp): new field usecnt. replace
str and len by src. * gc.c (gc_mark_children): mark src field of regexp. (obj_free): don't free str field. * re.c (REG_BUSY): removed. (rb_reg_initialize): prohibit re-initialize regexp. (rb_reg_search): use usecnt to prevent freeing regexp currently using. this prevents SEGV by: r = /\A((a.)*(a.)*)*b/ r =~ "ab" + "\xc2\xa1".force_encoding("euc-jp") t = Thread.new { r =~ "ab"*8 + "\xc2\xa1".force_encoding("utf-8")} sleep 0.2 r =~ "ab"*8 + "\xc2\xa1".force_encoding("euc-jp") git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@17635 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog18
-rw-r--r--gc.c6
-rw-r--r--include/ruby/ruby.h7
-rw-r--r--marshal.c11
-rw-r--r--re.c74
-rw-r--r--string.c2
6 files changed, 69 insertions, 49 deletions
diff --git a/ChangeLog b/ChangeLog
index a97ce4c13f..5c8a7a2ff0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@
+Sat Jun 28 21:25:08 2008 Tanaka Akira <akr@fsij.org>
+
+ * include/ruby/ruby.h (struct RRegexp): new field usecnt. replace
+ str and len by src.
+
+ * gc.c (gc_mark_children): mark src field of regexp.
+ (obj_free): don't free str field.
+
+ * re.c (REG_BUSY): removed.
+ (rb_reg_initialize): prohibit re-initialize regexp.
+ (rb_reg_search): use usecnt to prevent freeing regexp currently
+ using. this prevents SEGV by:
+ r = /\A((a.)*(a.)*)*b/
+ r =~ "ab" + "\xc2\xa1".force_encoding("euc-jp")
+ t = Thread.new { r =~ "ab"*8 + "\xc2\xa1".force_encoding("utf-8")}
+ sleep 0.2
+ r =~ "ab"*8 + "\xc2\xa1".force_encoding("euc-jp")
+
Sat Jun 28 21:15:43 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
* include/ruby/intern.h (rb_str_new2, rb_tainted_str_new2,
diff --git a/gc.c b/gc.c
index a888d659e3..966aaa1933 100644
--- a/gc.c
+++ b/gc.c
@@ -1267,6 +1267,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr, int lev)
break;
case T_REGEXP:
+ gc_mark(objspace, obj->as.regexp.src, lev);
+ break;
+
case T_FLOAT:
case T_BIGNUM:
break;
@@ -1505,9 +1508,6 @@ obj_free(rb_objspace_t *objspace, VALUE obj)
if (RANY(obj)->as.regexp.ptr) {
onig_free(RANY(obj)->as.regexp.ptr);
}
- if (RANY(obj)->as.regexp.str) {
- xfree(RANY(obj)->as.regexp.str);
- }
break;
case T_DATA:
if (DATA_PTR(obj)) {
diff --git a/include/ruby/ruby.h b/include/ruby/ruby.h
index d2b9f4c8b3..2d602d4a46 100644
--- a/include/ruby/ruby.h
+++ b/include/ruby/ruby.h
@@ -522,9 +522,12 @@ struct RArray {
struct RRegexp {
struct RBasic basic;
struct re_pattern_buffer *ptr;
- long len;
- char *str;
+ VALUE src;
+ unsigned long usecnt;
};
+#define RREGEXP_SRC(r) RREGEXP(r)->src
+#define RREGEXP_SRC_PTR(r) RSTRING_PTR(RREGEXP(r)->src)
+#define RREGEXP_SRC_LEN(r) RSTRING_LEN(RREGEXP(r)->src)
struct RHash {
struct RBasic basic;
diff --git a/marshal.c b/marshal.c
index c9170c6f37..f2d01809ca 100644
--- a/marshal.c
+++ b/marshal.c
@@ -693,10 +693,13 @@ w_object(VALUE obj, struct dump_arg *arg, int limit)
break;
case T_REGEXP:
- w_uclass(obj, rb_cRegexp, arg);
- w_byte(TYPE_REGEXP, arg);
- w_bytes(RREGEXP(obj)->str, RREGEXP(obj)->len, arg);
- w_byte((char)rb_reg_options(obj), arg);
+ w_uclass(obj, rb_cRegexp, arg);
+ w_byte(TYPE_REGEXP, arg);
+ {
+ int opts = rb_reg_options(obj);
+ w_bytes(RREGEXP_SRC_PTR(obj), RREGEXP_SRC_LEN(obj), arg);
+ w_byte((char)opts, arg);
+ }
break;
case T_ARRAY:
diff --git a/re.c b/re.c
index 035e2486da..7734132600 100644
--- a/re.c
+++ b/re.c
@@ -238,7 +238,6 @@ rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
#define REG_LITERAL FL_USER5
#define REG_ENCODING_NONE FL_USER6
-#define REG_BUSY FL_USER7
#define KCODE_FIXED FL_USER4
@@ -309,7 +308,7 @@ rb_char_to_option_kcode(int c, int *option, int *kcode)
static void
rb_reg_check(VALUE re)
{
- if (!RREGEXP(re)->ptr || !RREGEXP(re)->str) {
+ if (!RREGEXP(re)->ptr || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) {
rb_raise(rb_eTypeError, "uninitialized Regexp");
}
}
@@ -416,7 +415,7 @@ rb_reg_source(VALUE re)
VALUE str;
rb_reg_check(re);
- str = rb_enc_str_new(RREGEXP(re)->str,RREGEXP(re)->len, rb_enc_get(re));
+ str = rb_enc_str_new(RREGEXP_SRC_PTR(re),RREGEXP_SRC_LEN(re), rb_enc_get(re));
if (OBJ_TAINTED(re)) OBJ_TAINT(str);
return str;
}
@@ -437,7 +436,7 @@ static VALUE
rb_reg_inspect(VALUE re)
{
rb_reg_check(re);
- return rb_reg_desc(RREGEXP(re)->str, RREGEXP(re)->len, re);
+ return rb_reg_desc(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), re);
}
@@ -475,8 +474,8 @@ rb_reg_to_s(VALUE re)
rb_enc_copy(str, re);
options = RREGEXP(re)->ptr->options;
- ptr = (UChar*)RREGEXP(re)->str;
- len = RREGEXP(re)->len;
+ ptr = (UChar*)RREGEXP_SRC_PTR(re);
+ len = RREGEXP_SRC_LEN(re);
again:
if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') {
int err = 1;
@@ -528,8 +527,8 @@ rb_reg_to_s(VALUE re)
}
if (err) {
options = RREGEXP(re)->ptr->options;
- ptr = (UChar*)RREGEXP(re)->str;
- len = RREGEXP(re)->len;
+ ptr = (UChar*)RREGEXP_SRC_PTR(re);
+ len = RREGEXP_SRC_LEN(re);
}
}
@@ -1220,10 +1219,10 @@ rb_reg_prepare_re(VALUE re, VALUE str)
rb_reg_check(re);
reg = RREGEXP(re)->ptr;
- pattern = RREGEXP(re)->str;
+ pattern = RREGEXP_SRC_PTR(re);
unescaped = rb_reg_preprocess(
- pattern, pattern + RREGEXP(re)->len, enc,
+ pattern, pattern + RREGEXP_SRC_LEN(re), enc,
&fixed_enc, err);
if (unescaped == Qnil) {
@@ -1236,7 +1235,7 @@ rb_reg_prepare_re(VALUE re, VALUE str)
OnigDefaultSyntax, &einfo);
if (r) {
onig_error_code_to_str((UChar*)err, r, &einfo);
- rb_reg_raise(pattern, RREGEXP(re)->len, err, re);
+ rb_reg_raise(pattern, RREGEXP_SRC_LEN(re), err, re);
}
RB_GC_GUARD(unescaped);
@@ -1281,8 +1280,8 @@ rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
VALUE match;
struct re_registers regi, *regs = &regi;
char *range = RSTRING_PTR(str);
- regex_t *reg0 = RREGEXP(re)->ptr, *reg;
- int busy = FL_TEST(re, REG_BUSY);
+ regex_t *reg;
+ int tmpreg;
if (pos > RSTRING_LEN(str) || pos < 0) {
rb_backref_set(Qnil);
@@ -1290,6 +1289,8 @@ rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
}
reg = rb_reg_prepare_re(re, str);
+ tmpreg = reg != RREGEXP(re)->ptr;
+ if (!tmpreg) RREGEXP(re)->usecnt++;
match = rb_backref_get();
if (!NIL_P(match)) {
@@ -1303,7 +1304,6 @@ rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
if (NIL_P(match)) {
MEMZERO(regs, struct re_registers, 1);
}
- FL_SET(re, REG_BUSY);
if (!reverse) {
range += RSTRING_LEN(str);
}
@@ -1313,17 +1313,16 @@ rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
((UChar*)(RSTRING_PTR(str)) + pos),
((UChar*)range),
regs, ONIG_OPTION_NONE);
-
- if (RREGEXP(re)->ptr != reg) {
- if (busy) {
+ if (!tmpreg) RREGEXP(re)->usecnt--;
+ if (tmpreg) {
+ if (RREGEXP(re)->usecnt) {
onig_free(reg);
}
else {
- onig_free(reg0);
+ onig_free(RREGEXP(re)->ptr);
RREGEXP(re)->ptr = reg;
}
}
- if (!busy) FL_UNSET(re, REG_BUSY);
if (result < 0) {
if (regs == &regi)
onig_region_free(regs, 0);
@@ -1334,7 +1333,7 @@ rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
else {
onig_errmsg_buffer err = "";
onig_error_code_to_str((UChar*)err, result);
- rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len, err, 0);
+ rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, 0);
}
}
@@ -2295,10 +2294,9 @@ rb_reg_initialize(VALUE obj, const char *s, int len, rb_encoding *enc,
rb_check_frozen(obj);
if (FL_TEST(obj, REG_LITERAL))
rb_raise(rb_eSecurityError, "can't modify literal regexp");
- if (re->ptr) onig_free(re->ptr);
- if (re->str) xfree(re->str);
+ if (re->ptr)
+ rb_raise(rb_eTypeError, "already initialized regexp");
re->ptr = 0;
- re->str = 0;
unescaped = rb_reg_preprocess(s, s+len, enc, &fixed_enc, err);
if (unescaped == Qnil)
@@ -2330,10 +2328,8 @@ rb_reg_initialize(VALUE obj, const char *s, int len, rb_encoding *enc,
re->ptr = make_regexp(RSTRING_PTR(unescaped), RSTRING_LEN(unescaped), enc,
options & ARG_REG_OPTION_MASK, err);
if (!re->ptr) return -1;
- re->str = ALLOC_N(char, len+1);
- memcpy(re->str, s, len);
- re->str[len] = '\0';
- re->len = len;
+ re->src = rb_enc_str_new(s, len, enc);
+ OBJ_FREEZE(re->src);
RB_GC_GUARD(unescaped);
return 0;
}
@@ -2366,8 +2362,8 @@ rb_reg_s_alloc(VALUE klass)
OBJSETUP(re, klass, T_REGEXP);
re->ptr = 0;
- re->len = 0;
- re->str = 0;
+ re->src = 0;
+ re->usecnt = 0;
return (VALUE)re;
}
@@ -2431,9 +2427,9 @@ VALUE
rb_reg_regcomp(VALUE str)
{
volatile VALUE save_str = str;
- if (reg_cache && RREGEXP(reg_cache)->len == RSTRING_LEN(str)
+ if (reg_cache && RREGEXP_SRC_LEN(reg_cache) == RSTRING_LEN(str)
&& ENCODING_GET(reg_cache) == ENCODING_GET(str)
- && memcmp(RREGEXP(reg_cache)->str, RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
+ && memcmp(RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
return reg_cache;
return reg_cache = rb_reg_new_str(save_str, 0);
@@ -2454,8 +2450,8 @@ rb_reg_hash(VALUE re)
rb_reg_check(re);
hashval = RREGEXP(re)->ptr->options;
- len = RREGEXP(re)->len;
- p = RREGEXP(re)->str;
+ len = RREGEXP_SRC_LEN(re);
+ p = RREGEXP_SRC_PTR(re);
while (len--) {
hashval = hashval * 33 + *p++;
}
@@ -2488,9 +2484,9 @@ rb_reg_equal(VALUE re1, VALUE re2)
rb_reg_check(re1); rb_reg_check(re2);
if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse;
if (RREGEXP(re1)->ptr->options != RREGEXP(re2)->ptr->options) return Qfalse;
- if (RREGEXP(re1)->len != RREGEXP(re2)->len) return Qfalse;
+ if (RREGEXP_SRC_LEN(re1) != RREGEXP_SRC_LEN(re2)) return Qfalse;
if (ENCODING_GET(re1) != ENCODING_GET(re2)) return Qfalse;
- if (memcmp(RREGEXP(re1)->str, RREGEXP(re2)->str, RREGEXP(re1)->len) == 0) {
+ if (memcmp(RREGEXP_SRC_PTR(re1), RREGEXP_SRC_PTR(re2), RREGEXP_SRC_LEN(re1)) == 0) {
return Qtrue;
}
return Qfalse;
@@ -2756,8 +2752,8 @@ rb_reg_initialize_m(int argc, VALUE *argv, VALUE self)
}
rb_reg_check(re);
flags = rb_reg_options(re);
- ptr = RREGEXP(re)->str;
- len = RREGEXP(re)->len;
+ ptr = RREGEXP_SRC_PTR(re);
+ len = RREGEXP_SRC_LEN(re);
enc = rb_enc_get(re);
if (rb_reg_initialize(self, ptr, len, enc, flags, err)) {
str = rb_enc_str_new(ptr, len, enc);
@@ -3107,8 +3103,8 @@ rb_reg_init_copy(VALUE copy, VALUE re)
rb_raise(rb_eTypeError, "wrong argument type");
}
rb_reg_check(re);
- s = RREGEXP(re)->str;
- len = RREGEXP(re)->len;
+ s = RREGEXP_SRC_PTR(re);
+ len = RREGEXP_SRC_LEN(re);
if (rb_reg_initialize(copy, s, len, rb_enc_get(re), rb_reg_options(re), err) != 0) {
rb_reg_raise(s, len, err, re);
}
diff --git a/string.c b/string.c
index 4746842f5c..49f3ba9456 100644
--- a/string.c
+++ b/string.c
@@ -2247,7 +2247,7 @@ rb_str_rindex_m(int argc, VALUE *argv, VALUE str)
switch (TYPE(sub)) {
case T_REGEXP:
/* enc = rb_get_check(str, sub); */
- if (RREGEXP(sub)->len) {
+ if (!RREGEXP(sub)->ptr || RREGEXP_SRC_LEN(sub)) {
pos = rb_reg_adjust_startpos(sub, str, pos, 1);
pos = rb_reg_search(sub, str, pos, 1);
pos = rb_str_sublen(str, pos);