diff options
Diffstat (limited to 'symbol.c')
| -rw-r--r-- | symbol.c | 1019 |
1 files changed, 521 insertions, 498 deletions
@@ -9,7 +9,9 @@ **********************************************************************/ +#include "darray.h" #include "internal.h" +#include "internal/concurrent_set.h" #include "internal/error.h" #include "internal/gc.h" #include "internal/hash.h" @@ -18,18 +20,13 @@ #include "internal/vm.h" #include "probes.h" #include "ruby/encoding.h" +#include "ruby/ractor.h" #include "ruby/st.h" #include "symbol.h" #include "vm_sync.h" #include "builtin.h" +#include "ruby/internal/attr/nonstring.h" -#if defined(USE_SYMBOL_GC) && !(USE_SYMBOL_GC+0) -# undef USE_SYMBOL_GC -# define USE_SYMBOL_GC 0 -#else -# undef USE_SYMBOL_GC -# define USE_SYMBOL_GC 1 -#endif #if defined(SYMBOL_DEBUG) && (SYMBOL_DEBUG+0) # undef SYMBOL_DEBUG # define SYMBOL_DEBUG 1 @@ -41,12 +38,14 @@ # define CHECK_ID_SERIAL SYMBOL_DEBUG #endif +#define IDSET_ATTRSET_FOR_SYNTAX ((1U<<ID_LOCAL)|(1U<<ID_CONST)) +#define IDSET_ATTRSET_FOR_INTERN (~(~0U<<(1<<ID_SCOPE_SHIFT)) & ~(1U<<ID_ATTRSET)) + #define SYMBOL_PINNED_P(sym) (RSYMBOL(sym)->id&~ID_SCOPE_MASK) #define STATIC_SYM2ID(sym) RSHIFT((VALUE)(sym), RUBY_SPECIAL_SHIFT) static ID register_static_symid(ID, const char *, long, rb_encoding *); -static ID register_static_symid_str(ID, VALUE); #define REGISTER_SYMID(id, name) register_static_symid((id), (name), strlen(name), enc) #include "id.c" @@ -56,6 +55,13 @@ static ID register_static_symid_str(ID, VALUE); STATIC_ASSERT(op_tbl_name_size, sizeof(op_tbl[0].name) == 3); #define op_tbl_len(i) (!op_tbl[i].name[1] ? 1 : !op_tbl[i].name[2] ? 2 : 3) + +#define GLOBAL_SYMBOLS_LOCKING(symbols) \ + for (rb_symbols_t *symbols = &ruby_global_symbols, **locking = &symbols; \ + locking; \ + locking = NULL) \ + RB_VM_LOCKING() + static void Init_op_tbl(void) { @@ -75,29 +81,341 @@ Init_op_tbl(void) static const int ID_ENTRY_UNIT = 512; -enum id_entry_type { - ID_ENTRY_STR, - ID_ENTRY_SYM, - ID_ENTRY_SIZE +typedef struct { + rb_atomic_t next_id; + VALUE sym_set; + + VALUE ids; +} rb_symbols_t; + +rb_symbols_t ruby_global_symbols = { + .next_id = tNEXT_ID, }; -rb_symbols_t ruby_global_symbols = {tNEXT_ID-1}; +struct sym_set_static_sym_entry { + VALUE sym; + VALUE str; +}; + +#define SYM_SET_SYM_STATIC_TAG 1 + +static bool +sym_set_sym_static_p(VALUE sym) +{ + return sym & SYM_SET_SYM_STATIC_TAG; +} + +static VALUE +sym_set_static_sym_tag(struct sym_set_static_sym_entry *sym) +{ + VALUE value = (VALUE)sym | SYM_SET_SYM_STATIC_TAG; + RUBY_ASSERT(IMMEDIATE_P(value)); + RUBY_ASSERT(sym_set_sym_static_p(value)); + + return value; +} + +static struct sym_set_static_sym_entry * +sym_set_static_sym_untag(VALUE sym) +{ + RUBY_ASSERT(sym_set_sym_static_p(sym)); + + return (struct sym_set_static_sym_entry *)(sym & ~((VALUE)SYM_SET_SYM_STATIC_TAG)); +} + +static VALUE +sym_set_sym_get_str(VALUE sym) +{ + VALUE str; + if (sym_set_sym_static_p(sym)) { + str = sym_set_static_sym_untag(sym)->str; + } + else { + RUBY_ASSERT(RB_TYPE_P(sym, T_SYMBOL)); + str = RSYMBOL(sym)->fstr; + } + + RUBY_ASSERT(RB_TYPE_P(str, T_STRING)); + + return str; +} + +static VALUE +sym_set_hash(VALUE sym) +{ + if (sym_set_sym_static_p(sym)) { + return (VALUE)rb_str_hash(sym_set_static_sym_untag(sym)->str); + } + else { + return (VALUE)RSYMBOL(sym)->hashval; + } +} + +static bool +sym_set_cmp(VALUE a, VALUE b) +{ + return rb_str_hash_cmp(sym_set_sym_get_str(a), sym_set_sym_get_str(b)) == false; +} + +struct sym_id_entry { + VALUE sym; + VALUE str; +}; + +static void +sym_id_entry_list_mark(void *ptr) +{ + rb_darray(struct sym_id_entry) ary = ptr; + + struct sym_id_entry *entry; + rb_darray_foreach(ary, i, entry) { + // sym must be pinned because it may be used in places that don't + // support compaction + rb_gc_mark(entry->sym); + rb_gc_mark_movable(entry->str); + } +} + +static void +sym_id_entry_list_free(void *ptr) +{ + rb_darray_free_sized(ptr, struct sym_id_entry); +} + +static size_t +sym_id_entry_list_memsize(const void *ptr) +{ + const rb_darray(struct sym_id_entry) ary = ptr; + + return rb_darray_memsize(ary); +} + +static void +sym_id_entry_list_compact(void *ptr) +{ + rb_darray(struct sym_id_entry) ary = ptr; + + struct sym_id_entry *entry; + rb_darray_foreach(ary, i, entry) { + entry->str = rb_gc_location(entry->str); + } +} + +static const rb_data_type_t sym_id_entry_list_type = { + "symbol_id_entry_list", + { + sym_id_entry_list_mark, + sym_id_entry_list_free, + sym_id_entry_list_memsize, + sym_id_entry_list_compact, + }, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED +}; + +static int +sym_check_asciionly(VALUE str, bool fake_str) +{ + if (!rb_enc_asciicompat(rb_enc_get(str))) return FALSE; + switch (rb_enc_str_coderange(str)) { + case ENC_CODERANGE_BROKEN: + if (fake_str) { + str = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), rb_enc_get(str)); + } + rb_raise(rb_eEncodingError, "invalid symbol in encoding %s :%+"PRIsVALUE, + rb_enc_name(rb_enc_get(str)), str); + case ENC_CODERANGE_7BIT: + return TRUE; + } + return FALSE; +} + +static VALUE +dup_string_for_create(VALUE str) +{ + rb_encoding *enc = rb_enc_get(str); + + str = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), enc); + + rb_encoding *ascii = rb_usascii_encoding(); + if (enc != ascii && sym_check_asciionly(str, false)) { + rb_enc_associate(str, ascii); + } + OBJ_FREEZE(str); + + str = rb_fstring(str); + return str; +} + +static int +rb_str_symname_type(VALUE name, unsigned int allowed_attrset) +{ + const char *ptr = StringValuePtr(name); + long len = RSTRING_LEN(name); + int type = rb_enc_symname_type(ptr, len, rb_enc_get(name), allowed_attrset); + RB_GC_GUARD(name); + return type; +} + +static ID +next_id_base(void) +{ + rb_atomic_t serial = RUBY_ATOMIC_FETCH_ADD(ruby_global_symbols.next_id, 1); + + return (ID)serial << ID_SCOPE_SHIFT; +} + +static void +set_id_entry(rb_symbols_t *symbols, rb_id_serial_t num, VALUE str, VALUE sym) +{ + ASSERT_vm_locking(); + RUBY_ASSERT_BUILTIN_TYPE(str, T_STRING); + RUBY_ASSERT_BUILTIN_TYPE(sym, T_SYMBOL); + + size_t idx = num / ID_ENTRY_UNIT; + + VALUE id_entry_list, ids = symbols->ids; + rb_darray(struct sym_id_entry) entries; + if (idx >= (size_t)RARRAY_LEN(ids) || NIL_P(id_entry_list = rb_ary_entry(ids, (long)idx))) { + rb_darray_make(&entries, ID_ENTRY_UNIT); + id_entry_list = TypedData_Wrap_Struct(0, &sym_id_entry_list_type, entries); + rb_ary_store(ids, (long)idx, id_entry_list); + } + else { + entries = RTYPEDDATA_GET_DATA(id_entry_list); + } + + idx = num % ID_ENTRY_UNIT; + struct sym_id_entry *entry = rb_darray_ref(entries, idx); + RUBY_ASSERT(entry->str == 0); + RUBY_ASSERT(entry->sym == 0); -static const struct st_hash_type symhash = { - rb_str_hash_cmp, - rb_str_hash, + RB_OBJ_WRITE(id_entry_list, &entry->str, str); + RB_OBJ_WRITE(id_entry_list, &entry->sym, sym); +} + +static VALUE +sym_set_create(VALUE sym, void *data) +{ + bool create_dynamic_symbol = (bool)data; + + struct sym_set_static_sym_entry *static_sym_entry = sym_set_static_sym_untag(sym); + + VALUE str = dup_string_for_create(static_sym_entry->str); + + if (create_dynamic_symbol) { + NEWOBJ_OF(obj, struct RSymbol, rb_cSymbol, T_SYMBOL, sizeof(struct RSymbol)); + + rb_encoding *enc = rb_enc_get(str); + rb_enc_set_index((VALUE)obj, rb_enc_to_index(enc)); + RB_OBJ_WRITE((VALUE)obj, &obj->fstr, str); + RB_OBJ_SET_FROZEN_SHAREABLE((VALUE)obj); + + int id = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN); + if (id < 0) id = ID_INTERNAL; + obj->id = id; + + obj->hashval = rb_str_hash(str); + RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(obj->fstr)); + + return (VALUE)obj; + } + else { + struct sym_set_static_sym_entry *new_static_sym_entry = xmalloc(sizeof(struct sym_set_static_sym_entry)); + new_static_sym_entry->str = str; + + VALUE static_sym = static_sym_entry->sym; + if (static_sym == 0) { + ID id = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN); + if (id == (ID)-1) id = ID_INTERNAL; + + id |= next_id_base(); + id |= ID_STATIC_SYM; + + static_sym = STATIC_ID2SYM(id); + } + new_static_sym_entry->sym = static_sym; + + RB_VM_LOCKING() { + set_id_entry(&ruby_global_symbols, rb_id_to_serial(STATIC_SYM2ID(static_sym)), str, static_sym); + } + + return sym_set_static_sym_tag(new_static_sym_entry); + } +} + +static void +sym_set_free(VALUE sym) +{ + if (sym_set_sym_static_p(sym)) { + xfree(sym_set_static_sym_untag(sym)); + } +} + +static const struct rb_concurrent_set_funcs sym_set_funcs = { + .hash = sym_set_hash, + .cmp = sym_set_cmp, + .create = sym_set_create, + .free = sym_set_free, }; +static VALUE +sym_set_entry_to_sym(VALUE entry) +{ + if (sym_set_sym_static_p(entry)) { + RUBY_ASSERT(STATIC_SYM_P(sym_set_static_sym_untag(entry)->sym)); + + if (!STATIC_SYM_P(sym_set_static_sym_untag(entry)->sym)) rb_bug("not sym"); + + return sym_set_static_sym_untag(entry)->sym; + } + else { + RUBY_ASSERT(DYNAMIC_SYM_P(entry)); + if (!DYNAMIC_SYM_P(entry)) rb_bug("not sym"); + + return entry; + } +} + +static VALUE +sym_find_or_insert_dynamic_symbol(rb_symbols_t *symbols, const VALUE str) +{ + struct sym_set_static_sym_entry static_sym = { + .str = str + }; + return sym_set_entry_to_sym( + rb_concurrent_set_find_or_insert(&symbols->sym_set, sym_set_static_sym_tag(&static_sym), (void *)true) + ); +} + +static VALUE +sym_find_or_insert_static_symbol(rb_symbols_t *symbols, const VALUE str) +{ + struct sym_set_static_sym_entry static_sym = { + .str = str + }; + return sym_set_entry_to_sym( + rb_concurrent_set_find_or_insert(&symbols->sym_set, sym_set_static_sym_tag(&static_sym), (void *)false) + ); +} + +static VALUE +sym_find_or_insert_static_symbol_id(rb_symbols_t *symbols, const VALUE str, ID id) +{ + struct sym_set_static_sym_entry static_sym = { + .sym = STATIC_ID2SYM(id), + .str = str, + }; + return sym_set_entry_to_sym( + rb_concurrent_set_find_or_insert(&symbols->sym_set, sym_set_static_sym_tag(&static_sym), (void *)false) + ); +} + void Init_sym(void) { rb_symbols_t *symbols = &ruby_global_symbols; - VALUE dsym_fstrs = rb_ident_hash_new(); - symbols->dsymbol_fstr_hash = dsym_fstrs; - rb_obj_hide(dsym_fstrs); - - symbols->str_sym = st_init_table_with_size(&symhash, 1000); + symbols->sym_set = rb_concurrent_set_new(&sym_set_funcs, 1024); symbols->ids = rb_ary_hidden_new(0); Init_op_tbl(); @@ -105,38 +423,34 @@ Init_sym(void) } void -rb_sym_global_symbols_mark(void) +rb_sym_global_symbols_mark_and_move(void) { rb_symbols_t *symbols = &ruby_global_symbols; - rb_gc_mark_movable(symbols->ids); - rb_gc_mark_movable(symbols->dsymbol_fstr_hash); + rb_gc_mark_and_move(&symbols->sym_set); + rb_gc_mark_and_move(&symbols->ids); } -void -rb_sym_global_symbols_update_references(void) +static int +rb_free_global_symbol_table_i(VALUE *sym_ptr, void *data) { - rb_symbols_t *symbols = &ruby_global_symbols; + sym_set_free(*sym_ptr); - symbols->ids = rb_gc_location(symbols->ids); - symbols->dsymbol_fstr_hash = rb_gc_location(symbols->dsymbol_fstr_hash); + return ST_DELETE; } -WARN_UNUSED_RESULT(static VALUE dsymbol_alloc(rb_symbols_t *symbols, const VALUE klass, const VALUE str, rb_encoding *const enc, const ID type)); -WARN_UNUSED_RESULT(static VALUE dsymbol_check(rb_symbols_t *symbols, const VALUE sym)); -WARN_UNUSED_RESULT(static ID lookup_str_id(VALUE str)); -WARN_UNUSED_RESULT(static VALUE lookup_str_sym_with_lock(rb_symbols_t *symbols, const VALUE str)); -WARN_UNUSED_RESULT(static VALUE lookup_str_sym(const VALUE str)); -WARN_UNUSED_RESULT(static VALUE lookup_id_str(ID id)); -WARN_UNUSED_RESULT(static ID intern_str(VALUE str, int mutable)); +void +rb_free_global_symbol_table(void) +{ + rb_concurrent_set_foreach_with_replace(ruby_global_symbols.sym_set, rb_free_global_symbol_table_i, NULL); +} -#define GLOBAL_SYMBOLS_ENTER(symbols) rb_symbols_t *symbols = &ruby_global_symbols; RB_VM_LOCK_ENTER() -#define GLOBAL_SYMBOLS_LEAVE() RB_VM_LOCK_LEAVE() +WARN_UNUSED_RESULT(static ID lookup_str_id(VALUE str)); +WARN_UNUSED_RESULT(static VALUE get_id_str(ID id)); ID rb_id_attrset(ID id) { - VALUE str, sym; int scope; if (!is_notop_id(id)) { @@ -151,13 +465,14 @@ rb_id_attrset(ID id) scope = id_type(id); switch (scope) { case ID_LOCAL: case ID_INSTANCE: case ID_GLOBAL: - case ID_CONST: case ID_CLASS: case ID_JUNK: + case ID_CONST: case ID_CLASS: case ID_INTERNAL: break; case ID_ATTRSET: return id; default: { - if ((str = lookup_id_str(id)) != 0) { + VALUE str = get_id_str(id); + if (str != 0) { rb_name_error(id, "cannot make unknown type ID %d:%"PRIsVALUE" attrset", scope, str); } @@ -169,9 +484,25 @@ rb_id_attrset(ID id) } } + bool error = false; /* make new symbol and ID */ - if (!(str = lookup_id_str(id))) { - static const char id_types[][8] = { + VALUE str = get_id_str(id); + if (str) { + str = rb_str_dup(str); + rb_str_cat(str, "=", 1); + if (sym_check_asciionly(str, false)) { + rb_enc_associate(str, rb_usascii_encoding()); + } + + VALUE sym = sym_find_or_insert_static_symbol(&ruby_global_symbols, str); + id = rb_sym2id(sym); + } + else { + error = true; + } + + if (error) { + RBIMPL_ATTR_NONSTRING_ARRAY() static const char id_types[][8] = { "local", "instance", "invalid", @@ -179,15 +510,12 @@ rb_id_attrset(ID id) "attrset", "const", "class", - "junk", + "internal", }; rb_name_error(id, "cannot make anonymous %.*s ID %"PRIxVALUE" attrset", (int)sizeof(id_types[0]), id_types[scope], (VALUE)id); } - str = rb_str_dup(str); - rb_str_cat(str, "=", 1); - sym = lookup_str_sym(str); - id = sym ? rb_sym2id(sym) : intern_str(str, 1); + return id; } @@ -265,9 +593,6 @@ rb_sym_constant_char_p(const char *name, long nlen, rb_encoding *enc) return FALSE; } -#define IDSET_ATTRSET_FOR_SYNTAX ((1U<<ID_LOCAL)|(1U<<ID_CONST)) -#define IDSET_ATTRSET_FOR_INTERN (~(~0U<<(1<<ID_SCOPE_SHIFT)) & ~(1U<<ID_ATTRSET)) - struct enc_synmane_type_leading_chars_tag { const enum { invalid, stophere, needmore, } kind; const enum ruby_id_types type; @@ -311,66 +636,66 @@ enc_synmane_type_leading_chars(const char *name, long len, rb_encoding *enc, int case '<': switch (*++m) { - default: return (t) { stophere, ID_JUNK, 1, }; - case '<': return (t) { stophere, ID_JUNK, 2, }; + default: return (t) { stophere, ID_INTERNAL, 1, }; + case '<': return (t) { stophere, ID_INTERNAL, 2, }; case '=': switch (*++m) { - default: return (t) { stophere, ID_JUNK, 2, }; - case '>': return (t) { stophere, ID_JUNK, 3, }; + default: return (t) { stophere, ID_INTERNAL, 2, }; + case '>': return (t) { stophere, ID_INTERNAL, 3, }; } } case '>': switch (*++m) { - default: return (t) { stophere, ID_JUNK, 1, }; - case '>': case '=': return (t) { stophere, ID_JUNK, 2, }; + default: return (t) { stophere, ID_INTERNAL, 1, }; + case '>': case '=': return (t) { stophere, ID_INTERNAL, 2, }; } case '=': switch (*++m) { - default: return (t) { invalid, 0, 1, }; - case '~': return (t) { stophere, ID_JUNK, 2, }; + default: return (t) { invalid, 0, 1, }; + case '~': return (t) { stophere, ID_INTERNAL, 2, }; case '=': switch (*++m) { - default: return (t) { stophere, ID_JUNK, 2, }; - case '=': return (t) { stophere, ID_JUNK, 3, }; + default: return (t) { stophere, ID_INTERNAL, 2, }; + case '=': return (t) { stophere, ID_INTERNAL, 3, }; } } case '*': switch (*++m) { - default: return (t) { stophere, ID_JUNK, 1, }; - case '*': return (t) { stophere, ID_JUNK, 2, }; + default: return (t) { stophere, ID_INTERNAL, 1, }; + case '*': return (t) { stophere, ID_INTERNAL, 2, }; } case '+': case '-': switch (*++m) { - default: return (t) { stophere, ID_JUNK, 1, }; - case '@': return (t) { stophere, ID_JUNK, 2, }; + default: return (t) { stophere, ID_INTERNAL, 1, }; + case '@': return (t) { stophere, ID_INTERNAL, 2, }; } case '|': case '^': case '&': case '/': case '%': case '~': case '`': - return (t) { stophere, ID_JUNK, 1, }; + return (t) { stophere, ID_INTERNAL, 1, }; case '[': switch (*++m) { - default: return (t) { needmore, ID_JUNK, 0, }; + default: return (t) { needmore, ID_INTERNAL, 0, }; case ']': switch (*++m) { - default: return (t) { stophere, ID_JUNK, 2, }; - case '=': return (t) { stophere, ID_JUNK, 3, }; + default: return (t) { stophere, ID_INTERNAL, 2, }; + case '=': return (t) { stophere, ID_INTERNAL, 3, }; } } case '!': switch (*++m) { - case '=': case '~': return (t) { stophere, ID_JUNK, 2, }; + case '=': case '~': return (t) { stophere, ID_INTERNAL, 2, }; default: - if (allowed_attrset & (1U << ID_JUNK)) { - return (t) { needmore, ID_JUNK, 1, }; + if (allowed_attrset & (1U << ID_INTERNAL)) { + return (t) { needmore, ID_INTERNAL, 1, }; } else { - return (t) { stophere, ID_JUNK, 1, }; + return (t) { stophere, ID_INTERNAL, 1, }; } } @@ -411,7 +736,7 @@ rb_enc_symname_type(const char *name, long len, rb_encoding *enc, unsigned int a switch (*m) { case '!': case '?': if (type == ID_GLOBAL || type == ID_CLASS || type == ID_INSTANCE) return -1; - type = ID_JUNK; + type = ID_INTERNAL; ++m; if (m + 1 < e || *m != '=') break; /* fall through */ @@ -432,400 +757,131 @@ rb_enc_symname2_p(const char *name, long len, rb_encoding *enc) return rb_enc_symname_type(name, len, enc, IDSET_ATTRSET_FOR_SYNTAX) != -1; } -static int -rb_str_symname_type(VALUE name, unsigned int allowed_attrset) +static struct sym_id_entry * +get_id_serial_entry(rb_id_serial_t num) { - const char *ptr = StringValuePtr(name); - long len = RSTRING_LEN(name); - int type = rb_enc_symname_type(ptr, len, rb_enc_get(name), allowed_attrset); - RB_GC_GUARD(name); - return type; -} + struct sym_id_entry *entry = NULL; -static void -set_id_entry(rb_symbols_t *symbols, rb_id_serial_t num, VALUE str, VALUE sym) -{ - ASSERT_vm_locking(); - RUBY_ASSERT_BUILTIN_TYPE(str, T_STRING); - RUBY_ASSERT_BUILTIN_TYPE(sym, T_SYMBOL); - - size_t idx = num / ID_ENTRY_UNIT; - - VALUE ary, ids = symbols->ids; - if (idx >= (size_t)RARRAY_LEN(ids) || NIL_P(ary = rb_ary_entry(ids, (long)idx))) { - ary = rb_ary_hidden_new(ID_ENTRY_UNIT * ID_ENTRY_SIZE); - rb_ary_store(ids, (long)idx, ary); - } - idx = (num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE; - rb_ary_store(ary, (long)idx + ID_ENTRY_STR, str); - rb_ary_store(ary, (long)idx + ID_ENTRY_SYM, sym); -} - -static VALUE -get_id_serial_entry(rb_id_serial_t num, ID id, const enum id_entry_type t) -{ - VALUE result = 0; - - GLOBAL_SYMBOLS_ENTER(symbols); - { - if (num && num <= symbols->last_id) { + GLOBAL_SYMBOLS_LOCKING(symbols) { + if (num && num < RUBY_ATOMIC_LOAD(symbols->next_id)) { size_t idx = num / ID_ENTRY_UNIT; VALUE ids = symbols->ids; - VALUE ary; - if (idx < (size_t)RARRAY_LEN(ids) && !NIL_P(ary = rb_ary_entry(ids, (long)idx))) { - long pos = (long)(num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE; - result = rb_ary_entry(ary, pos + t); + VALUE id_entry_list; + if (idx < (size_t)RARRAY_LEN(ids) && !NIL_P(id_entry_list = rb_ary_entry(ids, (long)idx))) { + rb_darray(struct sym_id_entry) entries = RTYPEDDATA_GET_DATA(id_entry_list); - if (NIL_P(result)) { - result = 0; - } - else if (CHECK_ID_SERIAL) { - if (id) { - VALUE sym = result; - if (t != ID_ENTRY_SYM) - sym = rb_ary_entry(ary, pos + ID_ENTRY_SYM); - if (STATIC_SYM_P(sym)) { - if (STATIC_SYM2ID(sym) != id) result = 0; - } - else { - if (RSYMBOL(sym)->id != id) result = 0; - } - } - } + size_t pos = (size_t)(num % ID_ENTRY_UNIT); + RUBY_ASSERT(pos < rb_darray_size(entries)); + entry = rb_darray_ref(entries, pos); } } } - GLOBAL_SYMBOLS_LEAVE(); - if (result) { - switch (t) { - case ID_ENTRY_STR: - RUBY_ASSERT_BUILTIN_TYPE(result, T_STRING); - break; - case ID_ENTRY_SYM: - RUBY_ASSERT_BUILTIN_TYPE(result, T_SYMBOL); - break; - default: - break; - } - } + return entry; +} - return result; +static VALUE +get_id_sym(ID id) +{ + struct sym_id_entry *entry = get_id_serial_entry(rb_id_to_serial(id)); + return entry ? entry->sym : 0; } static VALUE -get_id_entry(ID id, const enum id_entry_type t) +get_id_str(ID id) { - return get_id_serial_entry(rb_id_to_serial(id), id, t); + struct sym_id_entry *entry = get_id_serial_entry(rb_id_to_serial(id)); + return entry ? entry->str : 0; } int rb_static_id_valid_p(ID id) { - return STATIC_ID2SYM(id) == get_id_entry(id, ID_ENTRY_SYM); + return STATIC_ID2SYM(id) == get_id_sym(id); } static inline ID rb_id_serial_to_id(rb_id_serial_t num) { if (is_notop_id((ID)num)) { - VALUE sym = get_id_serial_entry(num, 0, ID_ENTRY_SYM); - if (sym) return SYM2ID(sym); - return ((ID)num << ID_SCOPE_SHIFT) | ID_INTERNAL | ID_STATIC_SYM; + struct sym_id_entry *entry = get_id_serial_entry(num); + if (entry && entry->sym != 0) { + return SYM2ID(entry->sym); + } + else { + return ((ID)num << ID_SCOPE_SHIFT) | ID_INTERNAL | ID_STATIC_SYM; + } } else { return (ID)num; } } -static int -register_sym_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existing) -{ - if (existing) { - rb_fatal("symbol :% "PRIsVALUE" is already registered with %"PRIxVALUE, - (VALUE)*key, (VALUE)*value); - } - *value = arg; - return ST_CONTINUE; -} - -static void -register_sym(rb_symbols_t *symbols, VALUE str, VALUE sym) -{ - ASSERT_vm_locking(); - - if (SYMBOL_DEBUG) { - st_update(symbols->str_sym, (st_data_t)str, - register_sym_update_callback, (st_data_t)sym); - } - else { - st_add_direct(symbols->str_sym, (st_data_t)str, (st_data_t)sym); - } -} - -void -rb_free_static_symid_str(void) -{ - GLOBAL_SYMBOLS_ENTER(symbols) - { - st_free_table(symbols->str_sym); - } - GLOBAL_SYMBOLS_LEAVE(); -} - -static void -unregister_sym(rb_symbols_t *symbols, VALUE str, VALUE sym) -{ - ASSERT_vm_locking(); - - st_data_t str_data = (st_data_t)str; - if (!st_delete(symbols->str_sym, &str_data, NULL)) { - rb_bug("%p can't remove str from str_id (%s)", (void *)sym, RSTRING_PTR(str)); - } -} - static ID register_static_symid(ID id, const char *name, long len, rb_encoding *enc) { VALUE str = rb_enc_str_new(name, len, enc); - return register_static_symid_str(id, str); -} - -static ID -register_static_symid_str(ID id, VALUE str) -{ - rb_id_serial_t num = rb_id_to_serial(id); - VALUE sym = STATIC_ID2SYM(id); - OBJ_FREEZE(str); str = rb_fstring(str); RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(str)); - GLOBAL_SYMBOLS_ENTER(symbols) - { - register_sym(symbols, str, sym); - set_id_entry(symbols, num, str, sym); - } - GLOBAL_SYMBOLS_LEAVE(); + sym_find_or_insert_static_symbol_id(&ruby_global_symbols, str, id); return id; } -static int -sym_check_asciionly(VALUE str, bool fake_str) -{ - if (!rb_enc_asciicompat(rb_enc_get(str))) return FALSE; - switch (rb_enc_str_coderange(str)) { - case ENC_CODERANGE_BROKEN: - if (fake_str) { - str = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), rb_enc_get(str)); - } - rb_raise(rb_eEncodingError, "invalid symbol in encoding %s :%+"PRIsVALUE, - rb_enc_name(rb_enc_get(str)), str); - case ENC_CODERANGE_7BIT: - return TRUE; - } - return FALSE; -} - -#if 0 -/* - * _str_ itself will be registered at the global symbol table. _str_ - * can be modified before the registration, since the encoding will be - * set to ASCII-8BIT if it is a special global name. - */ - -static inline void -must_be_dynamic_symbol(VALUE x) -{ - if (UNLIKELY(!DYNAMIC_SYM_P(x))) { - if (STATIC_SYM_P(x)) { - VALUE str = lookup_id_str(RSHIFT((unsigned long)(x),RUBY_SPECIAL_SHIFT)); - - if (str) { - rb_bug("wrong argument: %s (inappropriate Symbol)", RSTRING_PTR(str)); - } - else { - rb_bug("wrong argument: inappropriate Symbol (%p)", (void *)x); - } - } - else { - rb_bug("wrong argument type %s (expected Symbol)", rb_builtin_class_name(x)); - } - } -} -#endif - static VALUE -dsymbol_alloc(rb_symbols_t *symbols, const VALUE klass, const VALUE str, rb_encoding * const enc, const ID type) +sym_find(VALUE str) { - ASSERT_vm_locking(); - - NEWOBJ_OF(obj, struct RSymbol, klass, T_SYMBOL | FL_WB_PROTECTED, sizeof(struct RSymbol), 0); - - long hashval; - - rb_enc_set_index((VALUE)obj, rb_enc_to_index(enc)); - OBJ_FREEZE((VALUE)obj); - RB_OBJ_WRITE((VALUE)obj, &obj->fstr, str); - obj->id = type; - - /* we want hashval to be in Fixnum range [ruby-core:15713] r15672 */ - hashval = (long)rb_str_hash(str); - obj->hashval = RSHIFT((long)hashval, 1); - register_sym(symbols, str, (VALUE)obj); - rb_hash_aset(symbols->dsymbol_fstr_hash, str, Qtrue); - RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(obj->fstr)); + VALUE sym; - return (VALUE)obj; -} + struct sym_set_static_sym_entry static_sym = { + .str = str + }; + sym = rb_concurrent_set_find(&ruby_global_symbols.sym_set, sym_set_static_sym_tag(&static_sym)); -static inline VALUE -dsymbol_check(rb_symbols_t *symbols, const VALUE sym) -{ - ASSERT_vm_locking(); - - if (UNLIKELY(rb_objspace_garbage_object_p(sym))) { - const VALUE fstr = RSYMBOL(sym)->fstr; - const ID type = RSYMBOL(sym)->id & ID_SCOPE_MASK; - RSYMBOL(sym)->fstr = 0; - unregister_sym(symbols, fstr, sym); - return dsymbol_alloc(symbols, rb_cSymbol, fstr, rb_enc_get(fstr), type); + if (sym) { + return sym_set_entry_to_sym(sym); } else { - return sym; + return 0; } } static ID lookup_str_id(VALUE str) { - st_data_t sym_data; - int found; + VALUE sym = sym_find(str); - GLOBAL_SYMBOLS_ENTER(symbols); - { - found = st_lookup(symbols->str_sym, (st_data_t)str, &sym_data); + if (sym == 0) { + return (ID)0; } - GLOBAL_SYMBOLS_LEAVE(); - if (found) { - const VALUE sym = (VALUE)sym_data; - - if (STATIC_SYM_P(sym)) { - return STATIC_SYM2ID(sym); - } - else if (DYNAMIC_SYM_P(sym)) { - ID id = RSYMBOL(sym)->id; - if (id & ~ID_SCOPE_MASK) return id; - } - else { - rb_bug("non-symbol object %s:%"PRIxVALUE" for %"PRIsVALUE" in symbol table", - rb_builtin_class_name(sym), sym, str); - } + if (STATIC_SYM_P(sym)) { + return STATIC_SYM2ID(sym); } - return (ID)0; -} - -static VALUE -lookup_str_sym_with_lock(rb_symbols_t *symbols, const VALUE str) -{ - st_data_t sym_data; - if (st_lookup(symbols->str_sym, (st_data_t)str, &sym_data)) { - VALUE sym = (VALUE)sym_data; - if (DYNAMIC_SYM_P(sym)) { - sym = dsymbol_check(symbols, sym); - } - return sym; + else if (DYNAMIC_SYM_P(sym)) { + ID id = RSYMBOL(sym)->id; + if (id & ~ID_SCOPE_MASK) return id; } else { - return Qfalse; - } -} - -static VALUE -lookup_str_sym(const VALUE str) -{ - VALUE sym; - - GLOBAL_SYMBOLS_ENTER(symbols); - { - sym = lookup_str_sym_with_lock(symbols, str); + rb_bug("non-symbol object %s:%"PRIxVALUE" for %"PRIsVALUE" in symbol table", + rb_builtin_class_name(sym), sym, str); } - GLOBAL_SYMBOLS_LEAVE(); - return sym; -} - -static VALUE -lookup_id_str(ID id) -{ - return get_id_entry(id, ID_ENTRY_STR); + return (ID)0; } ID rb_intern3(const char *name, long len, rb_encoding *enc) { - VALUE sym; - struct RString fake_str; + struct RString fake_str = {RBASIC_INIT}; VALUE str = rb_setup_fake_str(&fake_str, name, len, enc); OBJ_FREEZE(str); - sym = lookup_str_sym(str); - if (sym) return rb_sym2id(sym); - str = rb_enc_str_new(name, len, enc); /* make true string */ - return intern_str(str, 1); -} - -static ID -next_id_base_with_lock(rb_symbols_t *symbols) -{ - ID id; - rb_id_serial_t next_serial = symbols->last_id + 1; - - if (next_serial == 0) { - id = (ID)-1; - } - else { - const size_t num = ++symbols->last_id; - id = num << ID_SCOPE_SHIFT; - } - - return id; -} - -static ID -next_id_base(void) -{ - ID id; - GLOBAL_SYMBOLS_ENTER(symbols); - { - id = next_id_base_with_lock(symbols); - } - GLOBAL_SYMBOLS_LEAVE(); - return id; -} -static ID -intern_str(VALUE str, int mutable) -{ - ID id; - ID nid; - - id = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN); - if (id == (ID)-1) id = ID_JUNK; - if (sym_check_asciionly(str, false)) { - if (!mutable) str = rb_str_dup(str); - rb_enc_associate(str, rb_usascii_encoding()); - } - if ((nid = next_id_base()) == (ID)-1) { - str = rb_str_ellipsize(str, 20); - rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %"PRIsVALUE")", - str); - } - id |= nid; - id |= ID_STATIC_SYM; - return register_static_symid_str(id, str); + VALUE sym = sym_find_or_insert_static_symbol(&ruby_global_symbols, str); + return rb_sym2id(sym); } ID @@ -844,13 +900,48 @@ rb_intern(const char *name) ID rb_intern_str(VALUE str) { - VALUE sym = lookup_str_sym(str); + VALUE sym = sym_find_or_insert_static_symbol(&ruby_global_symbols, str); + return SYM2ID(sym); +} - if (sym) { - return SYM2ID(sym); +bool +rb_obj_is_symbol_table(VALUE obj) +{ + return obj == ruby_global_symbols.sym_set; +} + +struct global_symbol_table_foreach_weak_reference_data { + int (*callback)(VALUE *key, void *data); + void *data; +}; + +static int +rb_sym_global_symbol_table_foreach_weak_reference_i(VALUE *key, void *d) +{ + struct global_symbol_table_foreach_weak_reference_data *data = d; + VALUE sym = *key; + + if (sym_set_sym_static_p(sym)) { + struct sym_set_static_sym_entry *static_sym = sym_set_static_sym_untag(sym); + + return data->callback(&static_sym->str, data->data); + } + else { + return data->callback(key, data->data); } +} + +void +rb_sym_global_symbol_table_foreach_weak_reference(int (*callback)(VALUE *key, void *data), void *data) +{ + if (!ruby_global_symbols.sym_set) return; + + struct global_symbol_table_foreach_weak_reference_data foreach_data = { + .callback = callback, + .data = data, + }; - return intern_str(str, 0); + rb_concurrent_set_foreach_with_replace(ruby_global_symbols.sym_set, rb_sym_global_symbol_table_foreach_weak_reference_i, &foreach_data); } void @@ -859,101 +950,48 @@ rb_gc_free_dsymbol(VALUE sym) VALUE str = RSYMBOL(sym)->fstr; if (str) { - RSYMBOL(sym)->fstr = 0; + rb_concurrent_set_delete_by_identity(ruby_global_symbols.sym_set, sym); - GLOBAL_SYMBOLS_ENTER(symbols); - { - unregister_sym(symbols, str, sym); - rb_hash_delete_entry(symbols->dsymbol_fstr_hash, str); - } - GLOBAL_SYMBOLS_LEAVE(); + RSYMBOL(sym)->fstr = 0; } } /* * call-seq: - * str.intern -> symbol - * str.to_sym -> symbol - * - * Returns the +Symbol+ corresponding to <i>str</i>, creating the - * symbol if it did not previously exist. See Symbol#id2name. - * - * "Koala".intern #=> :Koala - * s = 'cat'.to_sym #=> :cat - * s == :cat #=> true - * s = '@cat'.to_sym #=> :@cat - * s == :@cat #=> true + * intern -> symbol * - * This can also be used to create symbols that cannot be represented using the - * <code>:xxx</code> notation. + * :include: doc/string/intern.rdoc * - * 'cat and dog'.to_sym #=> :"cat and dog" */ VALUE rb_str_intern(VALUE str) { - VALUE sym; - - GLOBAL_SYMBOLS_ENTER(symbols); - { - sym = lookup_str_sym_with_lock(symbols, str); - - if (sym) { - // ok - } - else if (USE_SYMBOL_GC) { - rb_encoding *enc = rb_enc_get(str); - rb_encoding *ascii = rb_usascii_encoding(); - if (enc != ascii && sym_check_asciionly(str, false)) { - str = rb_str_dup(str); - rb_enc_associate(str, ascii); - OBJ_FREEZE(str); - enc = ascii; - } - else { - str = rb_str_dup(str); - OBJ_FREEZE(str); - } - str = rb_fstring(str); - int type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN); - if (type < 0) type = ID_JUNK; - sym = dsymbol_alloc(symbols, rb_cSymbol, str, enc, type); - } - else { - ID id = intern_str(str, 0); - sym = ID2SYM(id); - } - } - GLOBAL_SYMBOLS_LEAVE(); - return sym; + return sym_find_or_insert_dynamic_symbol(&ruby_global_symbols, str); } ID rb_sym2id(VALUE sym) { - ID id; + ID id = 0; if (STATIC_SYM_P(sym)) { id = STATIC_SYM2ID(sym); } else if (DYNAMIC_SYM_P(sym)) { - GLOBAL_SYMBOLS_ENTER(symbols); - { - sym = dsymbol_check(symbols, sym); + GLOBAL_SYMBOLS_LOCKING(symbols) { + RUBY_ASSERT(!rb_objspace_garbage_object_p(sym)); id = RSYMBOL(sym)->id; if (UNLIKELY(!(id & ~ID_SCOPE_MASK))) { VALUE fstr = RSYMBOL(sym)->fstr; - ID num = next_id_base_with_lock(symbols); + ID num = next_id_base(); RSYMBOL(sym)->id = id |= num; /* make it permanent object */ set_id_entry(symbols, rb_id_to_serial(num), fstr, sym); - rb_hash_delete_entry(symbols->dsymbol_fstr_hash, fstr); } } - GLOBAL_SYMBOLS_LEAVE(); } else { rb_raise(rb_eTypeError, "wrong argument type %s (expected Symbol)", @@ -967,7 +1005,7 @@ VALUE rb_id2sym(ID x) { if (!DYNAMIC_ID_P(x)) return STATIC_ID2SYM(x); - return get_id_entry(x, ID_ENTRY_SYM); + return get_id_sym(x); } /* @@ -1001,7 +1039,7 @@ rb_sym2str(VALUE sym) VALUE rb_id2str(ID id) { - return lookup_id_str(id); + return get_id_str(id); } const char * @@ -1024,34 +1062,29 @@ rb_make_temporary_id(size_t n) { const ID max_id = RB_ID_SERIAL_MAX & ~0xffff; const ID id = max_id - (ID)n; - if (id <= ruby_global_symbols.last_id) { + if (id < RUBY_ATOMIC_LOAD(ruby_global_symbols.next_id)) { rb_raise(rb_eRuntimeError, "too big to make temporary ID: %" PRIdSIZE, n); } return (id << ID_SCOPE_SHIFT) | ID_STATIC_SYM | ID_INTERNAL; } static int -symbols_i(st_data_t key, st_data_t value, st_data_t arg) +symbols_i(VALUE *key, void *data) { - VALUE ary = (VALUE)arg; - VALUE sym = (VALUE)value; + VALUE ary = (VALUE)data; + VALUE sym = (VALUE)*key; - if (STATIC_SYM_P(sym)) { - rb_ary_push(ary, sym); - return ST_CONTINUE; - } - else if (!DYNAMIC_SYM_P(sym)) { - rb_bug("invalid symbol: %s", RSTRING_PTR((VALUE)key)); + if (sym_set_sym_static_p(sym)) { + rb_ary_push(ary, sym_set_static_sym_untag(sym)->sym); } - else if (!SYMBOL_PINNED_P(sym) && rb_objspace_garbage_object_p(sym)) { - RSYMBOL(sym)->fstr = 0; + else if (rb_objspace_garbage_object_p(sym)) { return ST_DELETE; } else { rb_ary_push(ary, sym); - return ST_CONTINUE; } + return ST_CONTINUE; } VALUE @@ -1059,12 +1092,10 @@ rb_sym_all_symbols(void) { VALUE ary; - GLOBAL_SYMBOLS_ENTER(symbols); - { - ary = rb_ary_new2(symbols->str_sym->num_entries); - st_foreach(symbols->str_sym, symbols_i, ary); + GLOBAL_SYMBOLS_LOCKING(symbols) { + ary = rb_ary_new2(rb_concurrent_set_size(symbols->sym_set)); + rb_concurrent_set_foreach_with_replace(symbols->sym_set, symbols_i, (void *)ary); } - GLOBAL_SYMBOLS_LEAVE(); return ary; } @@ -1072,7 +1103,7 @@ rb_sym_all_symbols(void) size_t rb_sym_immortal_count(void) { - return (size_t)ruby_global_symbols.last_id; + return (size_t)(RUBY_ATOMIC_LOAD(ruby_global_symbols.next_id) - 1); } int @@ -1114,7 +1145,7 @@ rb_is_local_id(ID id) int rb_is_junk_id(ID id) { - return is_junk_id(id); + return is_internal_id(id); } int @@ -1197,15 +1228,7 @@ rb_check_symbol(volatile VALUE *namep) return name; } else if (DYNAMIC_SYM_P(name)) { - if (!SYMBOL_PINNED_P(name)) { - GLOBAL_SYMBOLS_ENTER(symbols); - { - name = dsymbol_check(symbols, name); - } - GLOBAL_SYMBOLS_LEAVE(); - - *namep = name; - } + RUBY_ASSERT(!rb_objspace_garbage_object_p(name)); return name; } else if (!RB_TYPE_P(name, T_STRING)) { @@ -1220,7 +1243,7 @@ rb_check_symbol(volatile VALUE *namep) sym_check_asciionly(name, false); - if ((sym = lookup_str_sym(name)) != 0) { + if ((sym = sym_find(name)) != 0) { return sym; } @@ -1230,7 +1253,7 @@ rb_check_symbol(volatile VALUE *namep) ID rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc) { - struct RString fake_str; + struct RString fake_str = {RBASIC_INIT}; const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc); sym_check_asciionly(name, true); @@ -1242,12 +1265,12 @@ VALUE rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc) { VALUE sym; - struct RString fake_str; + struct RString fake_str = {RBASIC_INIT}; const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc); sym_check_asciionly(name, true); - if ((sym = lookup_str_sym(name)) != 0) { + if ((sym = sym_find(name)) != 0) { return sym; } @@ -1266,7 +1289,7 @@ FUNC_MINIMIZED(VALUE rb_sym_intern_ascii_cstr(const char *ptr)); VALUE rb_sym_intern(const char *ptr, long len, rb_encoding *enc) { - struct RString fake_str; + struct RString fake_str = {RBASIC_INIT}; const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc); return rb_str_intern(name); } |
