summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-02-02 13:19:44 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-02-02 13:19:44 +0000
commit6e0fed271c1d2e6f2b13b99d89d43e7d00e81472 (patch)
tree791820116e48e6b69fe1f39d358374d126a4f99b
parent0abfa1c532bfa73eccdb5e8f05957c0935f5b1d9 (diff)
* ruby.h (SYMBOL_P): make Symbol immediate again for performance.
* string.c: redesign symbol methods. * parse.y (rb_id2str): store Strings for operator symbols. [ruby-dev:30235] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@11615 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog11
-rw-r--r--compile.c3
-rw-r--r--parse.y88
-rw-r--r--ruby.h9
-rw-r--r--string.c220
5 files changed, 155 insertions, 176 deletions
diff --git a/ChangeLog b/ChangeLog
index c484b395f1..a98c8f6ac8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -12,6 +12,17 @@ Fri Feb 2 18:27:54 2007 Yukihiro Matsumoto <matz@ruby-lang.org>
* eval.c: remove duplicated global variables rb_cProc and
rb_cBinding. [ruby-dev:30242]
+Fri Feb 2 00:13:44 2007 Yukihiro Matsumoto <matz@ruby-lang.org>
+
+ * ruby.h (SYMBOL_P): make Symbol immediate again for performance.
+
+ * string.c: redesign symbol methods.
+
+Thu Feb 1 23:25:21 2007 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * parse.y (rb_id2str): store Strings for operator symbols.
+ [ruby-dev:30235]
+
Thu Feb 1 21:04:39 2007 Yukihiro Matsumoto <matz@ruby-lang.org>
* parse.y (assignable_gen): no need to generate NODE_CVDECL.
diff --git a/compile.c b/compile.c
index c8ddb2f80f..dc2765e92d 100644
--- a/compile.c
+++ b/compile.c
@@ -2065,8 +2065,7 @@ case_when_optimizable_literal(NODE * node)
{
if (nd_type(node) == NODE_LIT) {
VALUE v = node->nd_lit;
- VALUE klass = CLASS_OF(v);
- if (klass == rb_cSymbol || rb_obj_is_kind_of(v, rb_cNumeric)) {
+ if (SYMBOL_P(v) || rb_obj_is_kind_of(v, rb_cNumeric)) {
return v;
}
}
diff --git a/parse.y b/parse.y
index 1d93b5cd08..a08d429739 100644
--- a/parse.y
+++ b/parse.y
@@ -8318,7 +8318,8 @@ static const struct {
static struct symbols {
ID last_id;
st_table *sym_id;
- st_table *id_sym;
+ st_table *id_str;
+ VALUE op_sym[tLAST_TOKEN];
} global_symbols = {tLAST_TOKEN};
static struct st_hash_type symhash = {
@@ -8330,13 +8331,15 @@ void
Init_sym(void)
{
global_symbols.sym_id = st_init_table_with_size(&symhash, 1000);
- global_symbols.id_sym = st_init_numtable_with_size(1000);
+ global_symbols.id_str = st_init_numtable_with_size(1000);
}
void
rb_gc_mark_symbols(void)
{
- rb_mark_tbl(global_symbols.id_sym);
+ rb_mark_tbl(global_symbols.id_str);
+ rb_gc_mark_locations(global_symbols.op_sym,
+ global_symbols.op_sym + tLAST_TOKEN);
}
static ID
@@ -8440,26 +8443,15 @@ rb_symname_p(const char *name)
return *m ? Qfalse : Qtrue;
}
-int
-rb_sym_interned_p(str)
- VALUE str;
-{
- ID id;
-
- if (st_lookup(global_symbols.sym_id, (st_data_t)str, (st_data_t *)&id))
- return Qtrue;
- return Qfalse;
-}
-
ID
rb_intern2(const char *name, long len)
{
const char *m = name;
- VALUE sym = rb_str_new(name, len);
+ VALUE str = rb_str_new(name, len);
ID id;
int last;
- if (st_lookup(global_symbols.sym_id, (st_data_t)sym, (st_data_t *)&id))
+ if (st_lookup(global_symbols.sym_id, (st_data_t)str, (st_data_t *)&id))
return id;
last = len-1;
@@ -8523,10 +8515,9 @@ rb_intern2(const char *name, long len)
new_id:
id |= ++global_symbols.last_id << ID_SCOPE_SHIFT;
id_register:
- RBASIC(sym)->klass = rb_cSymbol;
- OBJ_FREEZE(sym);
- st_add_direct(global_symbols.sym_id, (st_data_t)sym, id);
- st_add_direct(global_symbols.id_sym, id, (st_data_t)sym);
+ OBJ_FREEZE(str);
+ st_add_direct(global_symbols.sym_id, (st_data_t)str, id);
+ st_add_direct(global_symbols.id_str, id, (st_data_t)str);
return id;
}
@@ -8537,31 +8528,7 @@ rb_intern(const char *name)
}
VALUE
-rb_id2sym(ID id)
-{
- VALUE data;
-
- while (!st_lookup(global_symbols.id_sym, id, &data)) {
- rb_id2name(id);
- }
- if (!RBASIC(data)->klass) {
- RBASIC(data)->klass = rb_cSymbol;
- }
- return data;
-}
-
-ID
-rb_sym2id(VALUE sym)
-{
- ID data;
-
- if (st_lookup(global_symbols.sym_id, sym, &data))
- return data;
- return rb_intern2(RSTRING_PTR(sym), RSTRING_LEN(sym));
-}
-
-const char *
-rb_id2name(ID id)
+rb_id2str(ID id)
{
const char *name;
st_data_t data;
@@ -8570,13 +8537,20 @@ rb_id2name(ID id)
int i = 0;
for (i=0; op_tbl[i].token; i++) {
- if (op_tbl[i].token == id)
- return op_tbl[i].name;
+ if (op_tbl[i].token == id) {
+ VALUE str = global_symbols.op_sym[i];
+ if (!str) {
+ str = rb_str_new2(op_tbl[i].name);
+ OBJ_FREEZE(str);
+ global_symbols.op_sym[i] = str;
+ }
+ return str;
+ }
}
}
- if (st_lookup(global_symbols.id_sym, id, &data))
- return RSTRING_PTR(data);
+ if (st_lookup(global_symbols.id_str, id, &data))
+ return (VALUE)data;
if (is_attrset_id(id)) {
ID id2 = (id & ~ID_SCOPE_MASK) | ID_LOCAL;
@@ -8589,7 +8563,7 @@ rb_id2name(ID id)
strcpy(buf, name);
strcat(buf, "=");
rb_intern(buf);
- return rb_id2name(id);
+ return rb_id2str(id);
}
if (is_local_id(id2)) {
id2 = (id & ~ID_SCOPE_MASK) | ID_CONST;
@@ -8599,13 +8573,19 @@ rb_id2name(ID id)
return 0;
}
+const char *
+rb_id2name(ID id)
+{
+ VALUE str = rb_id2str(id);
+
+ if (!str) return 0;
+ return RSTRING_PTR(str);
+}
+
static int
symbols_i(VALUE sym, ID value, VALUE ary)
{
- if (!RBASIC(sym)->klass) {
- RBASIC(sym)->klass = rb_cSymbol;
- }
- rb_ary_push(ary, sym);
+ rb_ary_push(ary, ID2SYM(value));
return ST_CONTINUE;
}
diff --git a/ruby.h b/ruby.h
index cf303f8e3c..e5b4a69697 100644
--- a/ruby.h
+++ b/ruby.h
@@ -197,11 +197,10 @@ VALUE rb_ull2inum(unsigned LONG_LONG);
#define IMMEDIATE_MASK 0x03
#define IMMEDIATE_P(x) ((VALUE)(x) & IMMEDIATE_MASK)
-#define SYMBOL_P(x) (!SPECIAL_CONST_P(x) && RBASIC(x)->klass == rb_cSymbol)
-VALUE rb_id2sym(ID);
-ID rb_sym2id(VALUE);
-#define ID2SYM(x) rb_id2sym(x)
-#define SYM2ID(x) rb_sym2id(x)
+#define SYMBOL_FLAG 0x0e
+#define SYMBOL_P(x) (((VALUE)(x)&0xff)==SYMBOL_FLAG)
+#define ID2SYM(x) ((VALUE)(((long)(x))<<8|SYMBOL_FLAG))
+#define SYM2ID(x) RSHIFT((unsigned long)x,8)
/* special contants - i.e. non-zero and non-fixnum constants */
#define Qfalse ((VALUE)0)
diff --git a/string.c b/string.c
index 7a27af8d77..faacf95428 100644
--- a/string.c
+++ b/string.c
@@ -4227,7 +4227,7 @@ rb_str_intern(VALUE s)
volatile VALUE str = s;
ID id;
- if (OBJ_TAINTED(str) && rb_safe_level() >= 1 && !rb_sym_interned_p(str)) {
+ if (OBJ_TAINTED(str) && rb_safe_level() >= 1) {
rb_raise(rb_eSecurityError, "Insecure: can't intern tainted string");
}
id = rb_intern2(RSTRING_PTR(str), RSTRING_LEN(str));
@@ -4631,7 +4631,7 @@ rb_str_setter(VALUE val, ID id, VALUE *var)
static VALUE
rb_sym_s_intern(VALUE s)
{
- if (rb_class_real(s) == rb_cSymbol) {
+ if (SYMBOL_P(s)) {
return s;
}
StringValue(s);
@@ -4651,48 +4651,7 @@ static VALUE
sym_equal(VALUE sym1, VALUE sym2)
{
if (sym1 == sym2) return Qtrue;
- if (SYMBOL_P(sym2)) return Qfalse;
- return rb_str_equal(sym1, sym2);
-}
-
-/*
- * call-seq:
- * sym.eql?(other) => true or false
- *
- * Two symbols are equal if they are exactly same symbols.
- */
-
-static VALUE
-sym_eql(VALUE sym1, VALUE sym2)
-{
- if (sym1 == sym2) return Qtrue;
- if (SYMBOL_P(sym2)) return Qfalse;
- return rb_str_eql(sym1, sym2);
-}
-
-/*
- * call-seq:
- * sym.hash => fixnum
- *
- * Return a hash based on the symbol's length and content.
- */
-static VALUE
-sym_hash(VALUE sym)
-{
- int h;
- VALUE hval;
-
- if (STR_SHARED_P(sym)) {
- /* if a symbol has shared value, that's a hash value. */
- return RSTRING(sym)->as.heap.aux.shared;
- }
- h = rb_str_hash(sym);
- hval = INT2FIX(h);
- if (!STR_EMBED_P(sym)) {
- FL_SET(sym, STR_ASSOC);
- RSTRING(sym)->as.heap.aux.shared = hval;
- }
- return hval;
+ return Qfalse;
}
@@ -4728,8 +4687,10 @@ sym_to_i(VALUE sym)
static VALUE
sym_inspect(VALUE sym)
{
- VALUE str;
+ VALUE str, klass = Qundef;
+ ID id = SYM2ID(sym);
+ sym = rb_id2str(id);
str = rb_str_new(0, RSTRING_LEN(sym)+1);
RSTRING_PTR(str)[0] = ':';
memcpy(RSTRING_PTR(str)+1, RSTRING_PTR(sym), RSTRING_LEN(sym));
@@ -4738,6 +4699,10 @@ sym_inspect(VALUE sym)
str = rb_str_dump(str);
strncpy(RSTRING_PTR(str), ":\"", 2);
}
+ if (klass != Qundef) {
+ rb_str_cat2(str, "/");
+ rb_str_append(str, rb_inspect(klass));
+ }
return str;
}
@@ -4753,10 +4718,12 @@ sym_inspect(VALUE sym)
*/
-static VALUE
-sym_to_s(VALUE sym)
+VALUE
+rb_sym_to_s(VALUE sym)
{
- return rb_str_new(RSTRING_PTR(sym), RSTRING_LEN(sym));
+ ID id = SYM2ID(sym);
+
+ return str_new3(rb_cString, rb_id2str(id));
}
@@ -4809,14 +4776,73 @@ sym_to_proc(VALUE sym)
static VALUE
sym_succ(VALUE sym)
{
- return rb_str_intern(rb_str_succ(sym));
+ return rb_str_intern(rb_str_succ(rb_sym_to_s(sym)));
+}
+
+static VALUE
+sym_cmp(VALUE sym, VALUE other)
+{
+ if (!SYMBOL_P(other)) {
+ return Qnil;
+ }
+ return rb_str_cmp_m(rb_sym_to_s(sym), rb_sym_to_s(other));
+}
+
+static VALUE
+sym_casecmp(VALUE sym, VALUE other)
+{
+ if (!SYMBOL_P(other)) {
+ return Qnil;
+ }
+ return rb_str_casecmp(rb_sym_to_s(sym), rb_sym_to_s(other));
}
-static ID
-str_to_id(VALUE str)
+static VALUE
+sym_match(VALUE sym, VALUE other)
{
- VALUE sym = rb_str_intern(str);
- return SYM2ID(sym);
+ return rb_str_match(rb_sym_to_s(sym), other);
+}
+
+static VALUE
+sym_aref(int argc, VALUE *argv, VALUE sym)
+{
+ return rb_str_aref_m(argc, argv, rb_sym_to_s(sym));
+}
+
+static VALUE
+sym_length(VALUE sym)
+{
+ return rb_str_length(rb_id2str(SYM2ID(sym)));
+}
+
+static VALUE
+sym_empty(VALUE sym)
+{
+ return rb_str_empty(rb_id2str(SYM2ID(sym)));
+}
+
+static VALUE
+sym_upcase(VALUE sym)
+{
+ return rb_str_intern(rb_str_upcase(rb_id2str(SYM2ID(sym))));
+}
+
+static VALUE
+sym_downcase(VALUE sym)
+{
+ return rb_str_intern(rb_str_downcase(rb_id2str(SYM2ID(sym))));
+}
+
+static VALUE
+sym_capitalize(VALUE sym)
+{
+ return rb_str_intern(rb_str_capitalize(rb_id2str(SYM2ID(sym))));
+}
+
+static VALUE
+sym_swapcase(VALUE sym)
+{
+ return rb_str_intern(rb_str_swapcase(rb_id2str(SYM2ID(sym))));
}
ID
@@ -4826,24 +4852,18 @@ rb_to_id(VALUE name)
ID id;
switch (TYPE(name)) {
- case T_STRING:
- return str_to_id(name);
- case T_FIXNUM:
- rb_warn("do not use Fixnums as Symbols");
- id = FIX2LONG(name);
- if (!rb_id2name(id)) {
- rb_raise(rb_eArgError, "%ld is not a symbol", id);
- }
- break;
- case T_SYMBOL:
- return SYM2ID(name);
- break;
default:
tmp = rb_check_string_type(name);
- if (!NIL_P(tmp)) {
- return str_to_id(tmp);
+ if (NIL_P(tmp)) {
+ rb_raise(rb_eTypeError, "%s is not a symbol",
+ RSTRING_PTR(rb_inspect(name)));
}
- rb_raise(rb_eTypeError, "%s is not a symbol", RSTRING_PTR(rb_inspect(name)));
+ name = tmp;
+ case T_STRING:
+ name = rb_str_intern(name);
+ /* fall through */
+ case T_SYMBOL:
+ return SYM2ID(name);
}
return id;
}
@@ -4989,60 +5009,30 @@ Init_String(void)
rb_define_singleton_method(rb_cSymbol, "intern", rb_sym_s_intern, 1);
rb_define_method(rb_cSymbol, "==", sym_equal, 1);
- rb_define_method(rb_cSymbol, "eql?", sym_eql, 1);
- rb_define_method(rb_cSymbol, "hash", sym_hash, 0);
rb_define_method(rb_cSymbol, "to_i", sym_to_i, 0);
rb_define_method(rb_cSymbol, "inspect", sym_inspect, 0);
- rb_define_method(rb_cSymbol, "to_s", sym_to_s, 0);
- rb_define_method(rb_cSymbol, "id2name", sym_to_s, 0);
+ rb_define_method(rb_cSymbol, "to_s", rb_sym_to_s, 0);
+ rb_define_method(rb_cSymbol, "id2name", rb_sym_to_s, 0);
rb_define_method(rb_cSymbol, "intern", sym_to_sym, 0);
rb_define_method(rb_cSymbol, "to_sym", sym_to_sym, 0);
rb_define_method(rb_cSymbol, "to_proc", sym_to_proc, 0);
rb_define_method(rb_cSymbol, "succ", sym_succ, 0);
rb_define_method(rb_cSymbol, "next", sym_succ, 0);
-
- rb_define_method(rb_cSymbol, "<=>", rb_str_cmp_m, 1);
- rb_define_method(rb_cSymbol, "casecmp", rb_str_casecmp, 1);
- rb_define_method(rb_cSymbol, "+", rb_str_plus, 1);
- rb_define_method(rb_cSymbol, "*", rb_str_times, 1);
- rb_define_method(rb_cSymbol, "%", rb_str_format_m, 1);
- rb_define_method(rb_cSymbol, "[]", rb_str_aref_m, -1);
- rb_define_method(rb_cSymbol, "length", rb_str_length, 0);
- rb_define_method(rb_cSymbol, "size", rb_str_length, 0);
- rb_define_method(rb_cSymbol, "empty?", rb_str_empty, 0);
- rb_define_method(rb_cSymbol, "=~", rb_str_match, 1);
- rb_define_method(rb_cSymbol, "match", rb_str_match_m, -1);
- rb_define_method(rb_cSymbol, "index", rb_str_index_m, -1);
- rb_define_method(rb_cSymbol, "rindex", rb_str_rindex_m, -1);
- rb_define_method(rb_cSymbol, "chr", rb_str_chr, 0);
-
- rb_define_method(rb_cSymbol, "to_f", rb_str_to_f, 0);
- rb_define_method(rb_cSymbol, "to_str", rb_str_to_s, 0);
rb_define_method(rb_cSymbol, "dump", rb_str_dump, 0);
- rb_define_method(rb_cSymbol, "upcase", rb_str_upcase, 0);
- rb_define_method(rb_cSymbol, "downcase", rb_str_downcase, 0);
- rb_define_method(rb_cSymbol, "capitalize", rb_str_capitalize, 0);
- rb_define_method(rb_cSymbol, "swapcase", rb_str_swapcase, 0);
-
- rb_define_method(rb_cSymbol, "ord", rb_str_ord, 0);
-
- rb_define_method(rb_cSymbol, "include?", rb_str_include, 1);
- rb_define_method(rb_cSymbol, "start_with?", rb_str_start_with, -1);
- rb_define_method(rb_cSymbol, "end_with?", rb_str_end_with, -1);
-
- rb_define_method(rb_cSymbol, "scan", rb_str_scan, 1);
-
- rb_define_method(rb_cSymbol, "sub", rb_str_sub, -1);
- rb_define_method(rb_cSymbol, "gsub", rb_str_gsub, -1);
-
- rb_define_method(rb_cSymbol, "tr", rb_str_tr, 2);
- rb_define_method(rb_cSymbol, "tr_s", rb_str_tr_s, 2);
- rb_define_method(rb_cSymbol, "delete", rb_str_delete, -1);
- rb_define_method(rb_cSymbol, "squeeze", rb_str_squeeze, -1);
- rb_define_method(rb_cSymbol, "count", rb_str_count, -1);
-
- rb_define_method(rb_cSymbol, "each_byte", rb_str_each_byte, 0);
-
- rb_define_method(rb_cSymbol, "slice", rb_str_aref_m, -1);
+ rb_define_method(rb_cSymbol, "<=>", sym_cmp, 1);
+ rb_define_method(rb_cSymbol, "casecmp", sym_casecmp, 1);
+ rb_define_method(rb_cSymbol, "=~", sym_match, 1);
+
+ rb_define_method(rb_cSymbol, "[]", sym_aref, -1);
+ rb_define_method(rb_cSymbol, "slice", sym_aref, -1);
+ rb_define_method(rb_cSymbol, "length", sym_length, 0);
+ rb_define_method(rb_cSymbol, "size", sym_length, 0);
+ rb_define_method(rb_cSymbol, "empty?", sym_empty, 0);
+ rb_define_method(rb_cSymbol, "match", sym_match, -1);
+
+ rb_define_method(rb_cSymbol, "upcase", sym_upcase, 0);
+ rb_define_method(rb_cSymbol, "downcase", sym_downcase, 0);
+ rb_define_method(rb_cSymbol, "capitalize", sym_capitalize, 0);
+ rb_define_method(rb_cSymbol, "swapcase", sym_swapcase, 0);
}