summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2006-09-02 15:05:27 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2006-09-02 15:05:27 +0000
commitccf5372b252a229ddb6693db132edc013419ccdc (patch)
tree2d0619ed60d5749e05e9479c5aa1cab82959266e
parent2156870525be05f0bd769af141c3f6cff9fff8c4 (diff)
* object.c (Init_Object): move symbol related code to string.c
* string.c (Init_String): Symbol as subclass of String. * parse.y (rb_intern2): handle symbol as strings. * string.c (str_new): substring of symbols are mere strings, not symbols. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10834 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog18
-rw-r--r--class.c2
-rw-r--r--gc.c1
-rw-r--r--object.c198
-rw-r--r--parse.y75
-rw-r--r--ruby.h10
-rw-r--r--string.c216
7 files changed, 301 insertions, 219 deletions
diff --git a/ChangeLog b/ChangeLog
index cb8b172f8f..da69b4cf0a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+Sat Sep 2 23:53:28 2006 Yukihiro Matsumoto <matz@ruby-lang.org>
+
+ * object.c (Init_Object): move symbol related code to string.c
+
+ * string.c (Init_String): Symbol as subclass of String.
+
+ * parse.y (rb_intern2): handle symbol as strings.
+
+ * string.c (str_new): substring of symbols are mere strings, not
+ symbols.
+
Sat Sep 2 23:37:29 2006 Yukihiro Matsumoto <matz@ruby-lang.org>
* ruby.h (struct RArray): embed small arrays.
@@ -17,6 +28,13 @@ Sat Sep 2 12:06:35 2006 NAKAMURA, Hiroshi <nahi@ruby-lang.org>
XML attribute which value is nil. value "" and nil both were dumped
as 'attr="value"'. [ruby-dev:29395]
+Sat Sep 2 11:47:58 2006 Yukihiro Matsumoto <matz@ruby-lang.org>
+
+ * eval.c (rb_eval): should handle when in else clause. a patch
+ from Eric Hodel <drbrain at segment7.net>. [ruby-core:08662]
+
+ * parse.y (primary): wrap with NODE_CASE. [ruby-core:08663]
+
Sat Sep 2 12:00:32 2006 NAKAMURA, Hiroshi <nahi@ruby-lang.org>
* lib/csv.rb (CSV::IOReader#initialize): use String#[](pos, len)
diff --git a/class.c b/class.c
index c7aa72344d..81956e5feb 100644
--- a/class.c
+++ b/class.c
@@ -489,7 +489,7 @@ ins_methods_push(ID name, long type, VALUE ary, long visi)
break;
}
if (visi) {
- rb_ary_push(ary, rb_str_new2(rb_id2name(name)));
+ rb_ary_push(ary, ID2SYM(name));
}
return ST_CONTINUE;
}
diff --git a/gc.c b/gc.c
index 0ae9bb62a0..8681175191 100644
--- a/gc.c
+++ b/gc.c
@@ -1381,6 +1381,7 @@ garbage_collect(void)
(VALUE*)((char*)rb_gc_stack_start + 2));
#endif
rb_gc_mark_threads();
+ rb_gc_mark_symbols();
/* mark protected global variables */
for (list = global_List; list; list = list->next) {
diff --git a/object.c b/object.c
index 51a173658b..22f9ca7da5 100644
--- a/object.c
+++ b/object.c
@@ -30,7 +30,6 @@ VALUE rb_cData;
VALUE rb_cNilClass;
VALUE rb_cTrueClass;
VALUE rb_cFalseClass;
-VALUE rb_cSymbol;
static ID id_eq, id_eql, id_inspect, id_init_copy;
@@ -938,149 +937,6 @@ rb_obj_pattern_match(VALUE obj1, VALUE obj2)
return Qnil;
}
-/**********************************************************************
- * Document-class: Symbol
- *
- * <code>Symbol</code> objects represent names and some strings
- * inside the Ruby
- * interpreter. They are generated using the <code>:name</code> and
- * <code>:"string"</code> literals
- * syntax, and by the various <code>to_sym</code> methods. The same
- * <code>Symbol</code> object will be created for a given name or string
- * for the duration of a program's execution, regardless of the context
- * or meaning of that name. Thus if <code>Fred</code> is a constant in
- * one context, a method in another, and a class in a third, the
- * <code>Symbol</code> <code>:Fred</code> will be the same object in
- * all three contexts.
- *
- * module One
- * class Fred
- * end
- * $f1 = :Fred
- * end
- * module Two
- * Fred = 1
- * $f2 = :Fred
- * end
- * def Fred()
- * end
- * $f3 = :Fred
- * $f1.id #=> 2514190
- * $f2.id #=> 2514190
- * $f3.id #=> 2514190
- *
- */
-
-/*
- * call-seq:
- * sym.to_i => fixnum
- *
- * Returns an integer that is unique for each symbol within a
- * particular execution of a program.
- *
- * :fred.to_i #=> 9809
- * "fred".to_sym.to_i #=> 9809
- */
-
-static VALUE
-sym_to_i(VALUE sym)
-{
- ID id = SYM2ID(sym);
-
- return LONG2FIX(id);
-}
-
-
-/*
- * call-seq:
- * sym.inspect => string
- *
- * Returns the representation of <i>sym</i> as a symbol literal.
- *
- * :fred.inspect #=> ":fred"
- */
-
-static VALUE
-sym_inspect(VALUE sym)
-{
- VALUE str;
- const char *name;
- ID id = SYM2ID(sym);
-
- name = rb_id2name(id);
- str = rb_str_new(0, strlen(name)+1);
- RSTRING_PTR(str)[0] = ':';
- strcpy(RSTRING_PTR(str)+1, name);
- if (!rb_symname_p(name)) {
- str = rb_str_dump(str);
- strncpy(RSTRING_PTR(str), ":\"", 2);
- }
- return str;
-}
-
-
-/*
- * call-seq:
- * sym.id2name => string
- * sym.to_s => string
- *
- * Returns the name or string corresponding to <i>sym</i>.
- *
- * :fred.id2name #=> "fred"
- */
-
-
-static VALUE
-sym_to_s(VALUE sym)
-{
- return rb_str_new2(rb_id2name(SYM2ID(sym)));
-}
-
-
-/*
- * call-seq:
- * sym.to_sym => sym
- *
- * In general, <code>to_sym</code> returns the <code>Symbol</code> corresponding
- * to an object. As <i>sym</i> is already a symbol, <code>self</code> is returned
- * in this case.
- */
-
-static VALUE
-sym_to_sym(VALUE sym)
-{
- return sym;
-}
-
-static VALUE
-sym_call(VALUE args, VALUE sym)
-{
- VALUE obj;
-
- if (RARRAY_LEN(args) < 1) {
- rb_raise(rb_eArgError, "no receiver given");
- }
- obj = RARRAY_PTR(args)[0];
- return rb_funcall3(obj, (ID)sym,
- RARRAY_LEN(args) - 1,
- RARRAY_PTR(args) + 1);
-}
-
-/*
- * call-seq:
- * sym.to_proc
- *
- * Returns a _Proc_ object which respond to the given method by _sym_.
- *
- * (1..3).collect(&:to_s) #=> ["1", "2", "3"]
- */
-
-static VALUE
-sym_to_proc(VALUE sym)
-{
- return rb_proc_new(sym_call, (VALUE)SYM2ID(sym));
-}
-
/***********************************************************************
*
@@ -1473,47 +1329,6 @@ rb_class_superclass(VALUE klass)
return super;
}
-static ID
-str_to_id(VALUE str)
-{
- if (!RSTRING_PTR(str) || RSTRING_LEN(str) == 0) {
- rb_raise(rb_eArgError, "empty symbol string");
- }
- if (RSTRING_LEN(str) != strlen(RSTRING_PTR(str))) {
- rb_raise(rb_eArgError, "Symbols should not contain NUL (\\0)");
- }
- return rb_intern(RSTRING_PTR(str));
-}
-
-ID
-rb_to_id(VALUE name)
-{
- VALUE tmp;
- ID id;
-
- switch (TYPE(name)) {
- case T_STRING:
- return str_to_id(name);
- case T_FIXNUM:
- rb_warn("do not use Fixnums as Symbols");
- id = FIX2LONG(name);
- if (!rb_id2name(id)) {
- rb_raise(rb_eArgError, "%ld is not a symbol", id);
- }
- break;
- case T_SYMBOL:
- id = SYM2ID(name);
- break;
- default:
- tmp = rb_check_string_type(name);
- if (!NIL_P(tmp)) {
- return str_to_id(tmp);
- }
- rb_raise(rb_eTypeError, "%s is not a symbol", RSTRING_PTR(rb_inspect(name)));
- }
- return id;
-}
-
/*
* call-seq:
* attr_reader(symbol, ...) => nil
@@ -2449,19 +2264,6 @@ Init_Object(void)
rb_undef_method(CLASS_OF(rb_cNilClass), "new");
rb_define_global_const("NIL", Qnil);
- rb_cSymbol = rb_define_class("Symbol", rb_cObject);
- rb_define_singleton_method(rb_cSymbol, "all_symbols", rb_sym_all_symbols, 0); /* in parse.y */
- rb_undef_alloc_func(rb_cSymbol);
- rb_undef_method(CLASS_OF(rb_cSymbol), "new");
-
- rb_define_method(rb_cSymbol, "to_i", sym_to_i, 0);
- rb_define_method(rb_cSymbol, "inspect", sym_inspect, 0);
- rb_define_method(rb_cSymbol, "to_s", sym_to_s, 0);
- rb_define_method(rb_cSymbol, "id2name", sym_to_s, 0);
- rb_define_method(rb_cSymbol, "to_sym", sym_to_sym, 0);
- rb_define_method(rb_cSymbol, "to_proc", sym_to_proc, 0);
- rb_define_method(rb_cSymbol, "===", rb_obj_equal, 1);
-
rb_define_method(rb_cModule, "freeze", rb_mod_freeze, 0);
rb_define_method(rb_cModule, "===", rb_mod_eqq, 1);
rb_define_method(rb_cModule, "==", rb_obj_equal, 1);
diff --git a/parse.y b/parse.y
index d1984259bd..34d1183777 100644
--- a/parse.y
+++ b/parse.y
@@ -8255,15 +8255,26 @@ static const struct {
static struct symbols {
ID last_id;
- st_table *tbl;
- st_table *rev;
+ st_table *sym_id;
+ st_table *id_sym;
} global_symbols = {tLAST_TOKEN};
+static struct st_hash_type symhash = {
+ rb_str_cmp,
+ rb_str_hash,
+};
+
void
Init_sym(void)
{
- global_symbols.tbl = st_init_strtable_with_size(200);
- global_symbols.rev = st_init_numtable_with_size(200);
+ global_symbols.sym_id = st_init_table_with_size(&symhash, 1000);
+ global_symbols.id_sym = st_init_numtable_with_size(1000);
+}
+
+void
+rb_gc_mark_symbols(int lev)
+{
+ rb_mark_tbl(global_symbols.id_sym);
}
static ID
@@ -8368,16 +8379,17 @@ rb_symname_p(const char *name)
}
ID
-rb_intern(const char *name)
+rb_intern2(const char *name, long len)
{
const char *m = name;
+ VALUE sym = rb_str_new(name, len);
ID id;
int last;
- if (st_lookup(global_symbols.tbl, (st_data_t)name, (st_data_t *)&id))
+ if (st_lookup(global_symbols.sym_id, (st_data_t)sym, (st_data_t *)&id))
return id;
- last = strlen(name)-1;
+ last = len-1;
id = 0;
switch (*name) {
case '$':
@@ -8438,12 +8450,42 @@ rb_intern(const char *name)
new_id:
id |= ++global_symbols.last_id << ID_SCOPE_SHIFT;
id_register:
- name = strdup(name);
- st_add_direct(global_symbols.tbl, (st_data_t)name, id);
- st_add_direct(global_symbols.rev, id, (st_data_t)name);
+ RBASIC(sym)->klass = rb_cSymbol;
+ OBJ_FREEZE(sym);
+ st_add_direct(global_symbols.sym_id, (st_data_t)sym, id);
+ st_add_direct(global_symbols.id_sym, id, (st_data_t)sym);
return id;
}
+ID
+rb_intern(const char *name)
+{
+ return rb_intern2(name, strlen(name));
+}
+
+VALUE
+rb_id2sym(ID id)
+{
+ VALUE data;
+
+ if (st_lookup(global_symbols.id_sym, id, &data)) {
+ if (!RBASIC(data)->klass) {
+ RBASIC(data)->klass = rb_cSymbol;
+ }
+ return data;
+ }
+}
+
+ID
+rb_sym2id(VALUE sym)
+{
+ ID data;
+
+ if (st_lookup(global_symbols.sym_id, sym, &data))
+ return data;
+ return rb_intern2(RSTRING_PTR(sym), RSTRING_LEN(sym));
+}
+
const char *
rb_id2name(ID id)
{
@@ -8459,8 +8501,8 @@ rb_id2name(ID id)
}
}
- if (st_lookup(global_symbols.rev, id, &data))
- return (char *)data;
+ if (st_lookup(global_symbols.id_sym, id, &data))
+ return RSTRING_PTR(data);
if (is_attrset_id(id)) {
ID id2 = (id & ~ID_SCOPE_MASK) | ID_LOCAL;
@@ -8484,9 +8526,9 @@ rb_id2name(ID id)
}
static int
-symbols_i(char *key, ID value, VALUE ary)
+symbols_i(VALUE sym, ID value, VALUE ary)
{
- rb_ary_push(ary, ID2SYM(value));
+ rb_ary_push(ary, sym);
return ST_CONTINUE;
}
@@ -8509,9 +8551,9 @@ symbols_i(char *key, ID value, VALUE ary)
VALUE
rb_sym_all_symbols(void)
{
- VALUE ary = rb_ary_new2(global_symbols.tbl->num_entries);
+ VALUE ary = rb_ary_new2(global_symbols.sym_id->num_entries);
- st_foreach(global_symbols.tbl, symbols_i, ary);
+ st_foreach(global_symbols.sym_id, symbols_i, ary);
return ary;
}
@@ -9261,3 +9303,4 @@ Init_ripper(void)
rb_intern("&&");
}
#endif /* RIPPER */
+
diff --git a/ruby.h b/ruby.h
index b9943fdd80..c6cf32186d 100644
--- a/ruby.h
+++ b/ruby.h
@@ -195,10 +195,11 @@ VALUE rb_ull2inum(unsigned LONG_LONG);
#define IMMEDIATE_MASK 0x03
#define IMMEDIATE_P(x) ((VALUE)(x) & IMMEDIATE_MASK)
-#define SYMBOL_FLAG 0x0e
-#define SYMBOL_P(x) (((VALUE)(x)&0xff)==SYMBOL_FLAG)
-#define ID2SYM(x) ((VALUE)(((long)(x))<<8|SYMBOL_FLAG))
-#define SYM2ID(x) RSHIFT((VALUE)x,8)
+#define SYMBOL_P(x) (!IMMEDIATE_P(x) && RBASIC(x)->klass == rb_cSymbol)
+VALUE rb_id2sym(ID);
+ID rb_sym2id(VALUE);
+#define ID2SYM(x) rb_id2sym(x)
+#define SYM2ID(x) rb_sym2id(x)
/* special contants - i.e. non-zero and non-fixnum constants */
#define Qfalse ((VALUE)0)
@@ -596,6 +597,7 @@ void rb_gc_register_address(VALUE*);
void rb_gc_unregister_address(VALUE*);
ID rb_intern(const char*);
+ID rb_intern2(const char*, long);
const char *rb_id2name(ID);
ID rb_to_id(VALUE);
diff --git a/string.c b/string.c
index 05d0d32325..8bb3158917 100644
--- a/string.c
+++ b/string.c
@@ -26,6 +26,7 @@
#endif
VALUE rb_cString;
+VALUE rb_cSymbol;
#define STR_TMPLOCK FL_USER7
#define STR_NOEMBED FL_USER1
@@ -134,6 +135,7 @@ str_new(VALUE klass, const char *ptr, long len)
rb_raise(rb_eArgError, "negative string size (or size too big)");
}
+ if (klass == rb_cSymbol) klass = rb_cString;
str = str_alloc(klass);
if (len > RSTRING_EMBED_LEN_MAX) {
RSTRING(str)->as.heap.aux.capa = len;
@@ -4369,6 +4371,207 @@ rb_str_setter(VALUE val, ID id, VALUE *var)
}
+/**********************************************************************
+ * Document-class: Symbol
+ *
+ * <code>Symbol</code> objects represent names and some strings
+ * inside the Ruby
+ * interpreter. They are generated using the <code>:name</code> and
+ * <code>:"string"</code> literals
+ * syntax, and by the various <code>to_sym</code> methods. The same
+ * <code>Symbol</code> object will be created for a given name or string
+ * for the duration of a program's execution, regardless of the context
+ * or meaning of that name. Thus if <code>Fred</code> is a constant in
+ * one context, a method in another, and a class in a third, the
+ * <code>Symbol</code> <code>:Fred</code> will be the same object in
+ * all three contexts.
+ *
+ * module One
+ * class Fred
+ * end
+ * $f1 = :Fred
+ * end
+ * module Two
+ * Fred = 1
+ * $f2 = :Fred
+ * end
+ * def Fred()
+ * end
+ * $f3 = :Fred
+ * $f1.id #=> 2514190
+ * $f2.id #=> 2514190
+ * $f3.id #=> 2514190
+ *
+ */
+
+
+/*
+ * call-seq:
+ * Symbol.new(str) => new_sym
+ * Symbol.intern(str) => new_sym
+ *
+ * Returns a new symbol corresponding to <i>str</i>.
+ */
+
+static VALUE
+rb_sym_s_intern(VALUE s)
+{
+ if (rb_class_real(s) == rb_cSymbol) {
+ return s;
+ }
+ StringValue(s);
+ return rb_intern2(RSTRING_PTR(s), RSTRING_LEN(s));
+}
+
+/*
+ * call-seq:
+ * sym.to_i => fixnum
+ *
+ * Returns an integer that is unique for each symbol within a
+ * particular execution of a program.
+ *
+ * :fred.to_i #=> 9809
+ * "fred".to_sym.to_i #=> 9809
+ */
+
+static VALUE
+sym_to_i(VALUE sym)
+{
+ ID id = SYM2ID(sym);
+
+ return LONG2FIX(id);
+}
+
+
+/*
+ * call-seq:
+ * sym.inspect => string
+ *
+ * Returns the representation of <i>sym</i> as a symbol literal.
+ *
+ * :fred.inspect #=> ":fred"
+ */
+
+static VALUE
+sym_inspect(VALUE sym)
+{
+ VALUE str;
+
+ str = rb_str_new(0, RSTRING_LEN(sym)+1);
+ RSTRING_PTR(str)[0] = ':';
+ memcpy(RSTRING_PTR(str)+1, RSTRING_PTR(sym), RSTRING_LEN(sym));
+ if (!rb_symname_p(RSTRING_PTR(sym))) {
+ str = rb_str_dump(str);
+ strncpy(RSTRING_PTR(str), ":\"", 2);
+ }
+ return str;
+}
+
+
+/*
+ * call-seq:
+ * sym.id2name => string
+ * sym.to_s => string
+ *
+ * Returns the name or string corresponding to <i>sym</i>.
+ *
+ * :fred.id2name #=> "fred"
+ */
+
+
+static VALUE
+sym_to_s(VALUE sym)
+{
+ return rb_str_new(RSTRING_PTR(sym), RSTRING_LEN(sym));
+}
+
+
+/*
+ * call-seq:
+ * sym.to_sym => sym
+ * sym.intern => sym
+ *
+ * In general, <code>to_sym</code> returns the <code>Symbol</code> corresponding
+ * to an object. As <i>sym</i> is already a symbol, <code>self</code> is returned
+ * in this case.
+ */
+
+static VALUE
+sym_to_sym(VALUE sym)
+{
+ return sym;
+}
+
+static VALUE
+sym_call(VALUE args, VALUE sym)
+{
+ VALUE obj;
+
+ if (RARRAY_LEN(args) < 1) {
+ rb_raise(rb_eArgError, "no receiver given");
+ }
+ obj = RARRAY_PTR(args)[0];
+ return rb_funcall3(obj, (ID)sym,
+ RARRAY_LEN(args) - 1,
+ RARRAY_PTR(args) + 1);
+}
+
+/*
+ * call-seq:
+ * sym.to_proc
+ *
+ * Returns a _Proc_ object which respond to the given method by _sym_.
+ *
+ * (1..3).collect(&:to_s) #=> ["1", "2", "3"]
+ */
+
+static VALUE
+sym_to_proc(VALUE sym)
+{
+ return rb_proc_new(sym_call, (VALUE)SYM2ID(sym));
+}
+
+
+static ID
+str_to_id(VALUE str)
+{
+ if (!RSTRING_PTR(str) || RSTRING_LEN(str) == 0) {
+ rb_raise(rb_eArgError, "empty symbol string");
+ }
+ if (RBASIC(str)->klass == rb_cSymbol)
+ return str;
+ return rb_intern2(RSTRING_PTR(str), RSTRING_LEN(str));
+}
+
+ID
+rb_to_id(VALUE name)
+{
+ VALUE tmp;
+ ID id;
+
+ switch (TYPE(name)) {
+ case T_STRING:
+ return str_to_id(name);
+ case T_FIXNUM:
+ rb_warn("do not use Fixnums as Symbols");
+ id = FIX2LONG(name);
+ if (!rb_id2name(id)) {
+ rb_raise(rb_eArgError, "%ld is not a symbol", id);
+ }
+ break;
+ case T_SYMBOL:
+ id = SYM2ID(name);
+ break;
+ default:
+ tmp = rb_check_string_type(name);
+ if (!NIL_P(tmp)) {
+ return str_to_id(tmp);
+ }
+ rb_raise(rb_eTypeError, "%s is not a symbol", RSTRING_PTR(rb_inspect(name)));
+ }
+ return id;
+}
+
/*
* A <code>String</code> object holds and manipulates an arbitrary sequence of
* bytes, typically representing characters. String objects may be created
@@ -4496,4 +4699,17 @@ Init_String(void)
rb_fs = Qnil;
rb_define_variable("$;", &rb_fs);
rb_define_variable("$-F", &rb_fs);
+
+ rb_cSymbol = rb_define_class("Symbol", rb_cString);
+ rb_define_singleton_method(rb_cSymbol, "all_symbols", rb_sym_all_symbols, 0); /* in parse.y */
+ rb_define_singleton_method(rb_cSymbol, "intern", rb_sym_s_intern, 1);
+ rb_define_singleton_method(rb_cSymbol, "new", rb_sym_s_intern, 1);
+
+ rb_define_method(rb_cSymbol, "to_i", sym_to_i, 0);
+ rb_define_method(rb_cSymbol, "inspect", sym_inspect, 0);
+ rb_define_method(rb_cSymbol, "to_s", sym_to_s, 0);
+ rb_define_method(rb_cSymbol, "id2name", sym_to_s, 0);
+ rb_define_method(rb_cSymbol, "intern", sym_to_sym, 0);
+ rb_define_method(rb_cSymbol, "to_sym", sym_to_sym, 0);
+ rb_define_method(rb_cSymbol, "to_proc", sym_to_proc, 0);
}