summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog13
-rw-r--r--encoding.c2
-rw-r--r--string.c143
-rw-r--r--version.h6
4 files changed, 123 insertions, 41 deletions
diff --git a/ChangeLog b/ChangeLog
index e8f971a802..6a2d6c19a9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+Sun Nov 4 03:58:32 2007 Yukihiro Matsumoto <matz@ruby-lang.org>
+
+ * string.c (tr_setup_table): use C array for characters that fit
+ in a byte to gain performance.
+
+ * string.c (rb_str_delete_bang): ditto.
+
+ * string.c (rb_str_squeeze_bang): ditto.
+
+ * string.c (rb_str_count): ditto.
+
+ * string.c (tr_trans): ditto.
+
Sun Nov 4 00:06:40 2007 Tanaka Akira <akr@fsij.org>
* gc.c (count_objects): ObjectSpace.count_objects implemented.
diff --git a/encoding.c b/encoding.c
index 8f1a0822b9..97e6cfff50 100644
--- a/encoding.c
+++ b/encoding.c
@@ -261,7 +261,7 @@ rb_enc_find(const char *name)
return rb_enc_from_index(idx);
}
-static int
+static inline int
enc_capable(VALUE obj)
{
if (IMMEDIATE_P(obj)) return Qfalse;
diff --git a/string.c b/string.c
index 9c3a055dbb..472f4cca73 100644
--- a/string.c
+++ b/string.c
@@ -3251,8 +3251,8 @@ rb_str_capitalize(VALUE str)
/*
- * call-seq:
- * str.swapcase! => str or nil
+ * call-seq:
+* str.swapcase! => str or nil
*
* Equivalent to <code>String#swapcase</code>, but modifies the receiver in
* place, returning <i>str</i>, or <code>nil</code> if no changes were made.
@@ -3352,12 +3352,13 @@ static VALUE rb_str_delete_bang(int,VALUE*,VALUE);
static VALUE
tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
{
+ SIGNED_VALUE trans[256];
rb_encoding *enc;
struct tr trsrc, trrepl;
int cflag = 0;
- int c, last = 0, modify = 0;
+ int c, last = 0, modify = 0, i;
char *s, *send;
- VALUE hash;
+ VALUE hash = 0;
StringValue(src);
StringValue(repl);
@@ -3379,23 +3380,45 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
trsrc.gen = trrepl.gen = 0;
trsrc.now = trrepl.now = 0;
trsrc.max = trrepl.max = 0;
- hash = rb_hash_new();
if (cflag) {
+ for (i=0; i<256; i++) {
+ trans[i] = 1;
+ }
while ((c = trnext(&trsrc, enc)) >= 0) {
- rb_hash_aset(hash, INT2NUM(c), Qtrue);
+ if (c < 256) {
+ trans[c] = -1;
+ }
+ else {
+ if (!hash) hash = rb_hash_new();
+ rb_hash_aset(hash, INT2NUM(c), Qtrue);
+ }
}
while ((c = trnext(&trrepl, enc)) >= 0)
/* retrieve last replacer */;
last = trrepl.now;
+ for (i=0; i<256; i++) {
+ if (trans[i] >= 0) {
+ trans[i] = last;
+ }
+ }
}
else {
int r;
+ for (i=0; i<256; i++) {
+ trans[i] = -1;
+ }
while ((c = trnext(&trsrc, enc)) >= 0) {
r = trnext(&trrepl, enc);
if (r == -1) r = trrepl.now;
- rb_hash_aset(hash, INT2NUM(c), INT2NUM(r));
+ if (c < 256) {
+ trans[c] = INT2NUM(r);
+ }
+ else {
+ if (!hash) hash = rb_hash_new();
+ rb_hash_aset(hash, INT2NUM(c), INT2NUM(r));
+ }
}
}
@@ -3413,7 +3436,12 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
tlen = clen = rb_enc_codelen(c, enc);
s += clen;
- v = rb_hash_aref(hash, INT2NUM(c));
+ if (c < 256) {
+ v = trans[c] >= 0 ? trans[c] : Qnil;
+ }
+ else {
+ v = rb_hash_aref(hash, INT2NUM(c));
+ }
if (!NIL_P(v)) {
if (!cflag) {
c = NUM2INT(v);
@@ -3447,11 +3475,11 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
}
else if (rb_enc_mbmaxlen(enc) == 1) {
while (s < send) {
- VALUE v = rb_hash_aref(hash, INT2FIX(*s));
- if (!NIL_P(v)) {
+ c = *s;
+ if (trans[c] >= 0) {
if (!cflag) {
- c = FIX2INT(v);
- *s = c & 0xff;
+ c = FIX2INT(trans[c]);
+ *s = c;
modify = 1;
}
}
@@ -3473,7 +3501,12 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
c = rb_enc_codepoint(s, send, enc);
tlen = clen = rb_enc_codelen(c, enc);
- v = rb_hash_aref(hash, INT2NUM(c));
+ if (c < 256) {
+ v = trans[c] >= 0 ? trans[c] : Qnil;
+ }
+ else {
+ v = rb_hash_aref(hash, INT2NUM(c));
+ }
if (!NIL_P(v)) {
if (!cflag) {
c = NUM2INT(v);
@@ -3552,35 +3585,75 @@ rb_str_tr(VALUE str, VALUE src, VALUE repl)
}
static void
-tr_setup_table(VALUE str, VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
+tr_setup_table(VALUE str, char stable[256],
+ VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
{
+ char buf[256];
struct tr tr;
int c;
- VALUE table, ptable;
+ VALUE table = 0, ptable;
+ int i, cflag = 0;
tr.p = RSTRING_PTR(str); tr.pend = tr.p + RSTRING_LEN(str);
tr.gen = tr.now = tr.max = 0;
- table = rb_hash_new();
+
if (RSTRING_LEN(str) > 1 && RSTRING_PTR(str)[0] == '^') {
+ cflag = 1;
tr.p++;
- ptable = *ctablep;
- *ctablep = table;
}
- else {
- ptable = *tablep;
- *tablep = table;
+ for (i=0; i<256; i++) {
+ stable[i] = 1;
+ }
+ for (i=0; i<256; i++) {
+ buf[i] = cflag;
}
while ((c = trnext(&tr, enc)) >= 0) {
- VALUE key = INT2NUM(c);
+ if (c < 256) {
+ buf[c & 0xff] = !cflag;
+ }
+ else {
+ VALUE key = INT2NUM(c);
- if (!ptable || !NIL_P(rb_hash_aref(ptable, key))) {
- rb_hash_aset(table, key, Qtrue);
+ if (!table) {
+ table = rb_hash_new();
+ if (cflag) {
+ ptable = *ctablep;
+ *ctablep = table;
+ }
+ else {
+ ptable = *tablep;
+ *tablep = table;
+ }
+ }
+ if (!ptable || !NIL_P(rb_hash_aref(ptable, key))) {
+ rb_hash_aset(table, key, Qtrue);
+ }
}
}
+ for (i=0; i<256; i++) {
+ stable[i] = stable[i] && buf[i];
+ }
}
+static int
+tr_find(int c, char table[256], VALUE del, VALUE nodel)
+{
+ if (c < 256) {
+ return table[c] ? Qtrue : Qfalse;
+ }
+ else {
+ VALUE v = INT2NUM(c);
+
+ if ((del && !NIL_P(rb_hash_aref(del, v))) &&
+ (!nodel || NIL_P(rb_hash_aref(nodel, v)))) {
+ return Qtrue;
+ }
+ return Qfalse;
+ }
+}
+
/*
* call-seq:
* str.delete!([other_str]+) => str or nil
@@ -3592,6 +3665,7 @@ tr_setup_table(VALUE str, VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
static VALUE
rb_str_delete_bang(int argc, VALUE *argv, VALUE str)
{
+ char squeez[256];
rb_encoding *enc = 0;
char *s, *send, *t;
VALUE del = 0, nodel = 0;
@@ -3606,7 +3680,7 @@ rb_str_delete_bang(int argc, VALUE *argv, VALUE str)
StringValue(s);
enc = rb_enc_check(str, s);
- tr_setup_table(s, &del, &nodel, enc);
+ tr_setup_table(s, squeez, &del, &nodel, enc);
}
rb_str_modify(str);
@@ -3616,10 +3690,8 @@ rb_str_delete_bang(int argc, VALUE *argv, VALUE str)
while (s < send) {
int c = rb_enc_codepoint(s, send, enc);
int clen = rb_enc_codelen(c, enc);
- VALUE v = INT2NUM(c);
- if ((del && !NIL_P(rb_hash_aref(del, v))) &&
- (!nodel || NIL_P(rb_hash_aref(nodel, v)))) {
+ if (tr_find(c, squeez, del, nodel)) {
modify = 1;
}
else {
@@ -3670,6 +3742,7 @@ rb_str_delete(int argc, VALUE *argv, VALUE str)
static VALUE
rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
{
+ char squeez[256];
rb_encoding *enc = 0;
VALUE del = 0, nodel = 0;
char *s, *send, *t;
@@ -3685,7 +3758,7 @@ rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
StringValue(s);
enc = rb_enc_check(str, s);
- tr_setup_table(s, &del, &nodel, enc);
+ tr_setup_table(s, squeez, &del, &nodel, enc);
}
}
@@ -3697,11 +3770,8 @@ rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
while (s < send) {
int c = rb_enc_codepoint(s, send, enc);
int clen = rb_enc_codelen(c, enc);
- VALUE v = INT2NUM(c);
- if (c != save ||
- ((del && NIL_P(rb_hash_aref(del, v))) &&
- (!nodel || NIL_P(rb_hash_aref(nodel, v))))) {
+ if (c != save || !tr_find(c, squeez, del, nodel)) {
if (t != s) rb_enc_mbcput(c, t, enc);
save = c;
t += clen;
@@ -3799,6 +3869,7 @@ rb_str_tr_s(VALUE str, VALUE src, VALUE repl)
static VALUE
rb_str_count(int argc, VALUE *argv, VALUE str)
{
+ char table[256];
rb_encoding *enc = 0;
VALUE del = 0, nodel = 0;
char *s, *send;
@@ -3812,7 +3883,7 @@ rb_str_count(int argc, VALUE *argv, VALUE str)
StringValue(s);
enc = rb_enc_check(str, s);
- tr_setup_table(s, &del, &nodel, enc);
+ tr_setup_table(s, table, &del, &nodel, enc);
}
s = RSTRING_PTR(str);
@@ -3822,10 +3893,8 @@ rb_str_count(int argc, VALUE *argv, VALUE str)
while (s < send) {
int c = rb_enc_codepoint(s, send, enc);
int clen = rb_enc_codelen(c, enc);
- VALUE v = INT2NUM(c);
- if ((del && !NIL_P(rb_hash_aref(del, v))) &&
- (!nodel || NIL_P(rb_hash_aref(nodel, v)))) {
+ if (tr_find(c, table, del, nodel)) {
i++;
}
s += clen;
diff --git a/version.h b/version.h
index 1168b2bfce..976bd3c274 100644
--- a/version.h
+++ b/version.h
@@ -1,7 +1,7 @@
#define RUBY_VERSION "1.9.0"
-#define RUBY_RELEASE_DATE "2007-11-03"
+#define RUBY_RELEASE_DATE "2007-11-04"
#define RUBY_VERSION_CODE 190
-#define RUBY_RELEASE_CODE 20071103
+#define RUBY_RELEASE_CODE 20071104
#define RUBY_PATCHLEVEL 0
#define RUBY_VERSION_MAJOR 1
@@ -9,7 +9,7 @@
#define RUBY_VERSION_TEENY 0
#define RUBY_RELEASE_YEAR 2007
#define RUBY_RELEASE_MONTH 11
-#define RUBY_RELEASE_DAY 3
+#define RUBY_RELEASE_DAY 4
#ifdef RUBY_EXTERN
RUBY_EXTERN const char ruby_version[];