diff options
Diffstat (limited to 'encoding.c')
| -rw-r--r-- | encoding.c | 200 |
1 files changed, 109 insertions, 91 deletions
diff --git a/encoding.c b/encoding.c index 2416acecea..c17f118eef 100644 --- a/encoding.c +++ b/encoding.c @@ -27,6 +27,7 @@ #include "ruby/atomic.h" #include "ruby/encoding.h" #include "ruby/util.h" +#include "ruby/ractor.h" #include "ruby_assert.h" #include "vm_sync.h" #include "ruby_atomic.h" @@ -55,7 +56,7 @@ int rb_encdb_alias(const char *alias, const char *orig); #pragma GCC visibility pop #endif -static ID id_encoding; +static ID id_encoding, id_i_name; VALUE rb_cEncoding; #define ENCODING_LIST_CAPA 256 @@ -96,6 +97,9 @@ static rb_encoding *global_enc_ascii, *global_enc_utf_8, *global_enc_us_ascii; +static int filesystem_encindex = ENCINDEX_ASCII_8BIT; +static rb_atomic_t locale_alias_registered; + #define GLOBAL_ENC_TABLE_LOCKING(tbl) \ for (struct enc_table *tbl = &global_enc_table, **locking = &tbl; \ locking; \ @@ -122,8 +126,9 @@ static const rb_data_type_t encoding_data_type = { 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED }; -#define is_data_encoding(obj) (RTYPEDDATA_P(obj) && RTYPEDDATA_TYPE(obj) == &encoding_data_type) -#define is_obj_encoding(obj) (RB_TYPE_P((obj), T_DATA) && is_data_encoding(obj)) +#define is_encoding_type(obj) (RTYPEDDATA_TYPE(obj) == &encoding_data_type) +#define is_data_encoding(obj) is_encoding_type(obj) +#define is_obj_encoding(obj) (rbimpl_obj_typeddata_p(obj) && is_encoding_type(obj)) int rb_data_is_encoding(VALUE obj) @@ -135,8 +140,8 @@ static VALUE enc_new(rb_encoding *encoding) { VALUE enc = TypedData_Wrap_Struct(rb_cEncoding, &encoding_data_type, (void *)encoding); - rb_obj_freeze(enc); - FL_SET_RAW(enc, RUBY_FL_SHAREABLE); + rb_ivar_set(enc, id_i_name, rb_fstring_cstr(encoding->name)); + RB_OBJ_SET_FROZEN_SHAREABLE(enc); return enc; } @@ -152,6 +157,8 @@ enc_list_update(int index, rb_raw_encoding *encoding) RBASIC_CLEAR_CLASS(new_list); /* initialize encoding data */ rb_ary_store(new_list, index, enc_new(encoding)); + rb_ary_freeze(new_list); + FL_SET_RAW(new_list, RUBY_FL_SHAREABLE); RUBY_ATOMIC_VALUE_SET(rb_encoding_list, new_list); } } @@ -220,7 +227,7 @@ enc_check_encoding(VALUE obj) if (!is_obj_encoding(obj)) { return -1; } - return check_encoding(RDATA(obj)->data); + return check_encoding(RTYPEDDATA_GET_DATA(obj)); } NORETURN(static void not_encoding(VALUE enc)); @@ -238,7 +245,7 @@ must_encoding(VALUE enc) if (index < 0) { not_encoding(enc); } - return DATA_PTR(enc); + return RTYPEDDATA_GET_DATA(enc); } static rb_encoding * @@ -326,7 +333,7 @@ str_to_encoding(VALUE enc) rb_encoding * rb_to_encoding(VALUE enc) { - if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data; + if (enc_check_encoding(enc) >= 0) return RTYPEDDATA_GET_DATA(enc); return str_to_encoding(enc); } @@ -334,7 +341,7 @@ rb_encoding * rb_find_encoding(VALUE enc) { int idx; - if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data; + if (enc_check_encoding(enc) >= 0) return RTYPEDDATA_GET_DATA(enc); idx = str_find_encindex(enc); if (idx < 0) return NULL; return rb_enc_from_index(idx); @@ -349,6 +356,35 @@ enc_table_expand(struct enc_table *enc_table, int newsize) return newsize; } +/* Load an encoding using the values from base_encoding */ +static void +enc_load_from_base(struct enc_table *enc_table, int index, rb_encoding *base_encoding) +{ + ASSERT_vm_locking(); + + struct rb_encoding_entry *ent = &enc_table->list[index]; + + if (ent->loaded) { + return; + } + + rb_raw_encoding *encoding = (rb_raw_encoding *)ent->enc; + RUBY_ASSERT(encoding); + + // FIXME: Before the base is loaded, the encoding may be accessed + // concurrently by other Ractors. + // We're copying all fields from base_encoding except name and + // ruby_encoding_index which we preserve from the original. Since these are + // the only fields other threads should read it is likely safe despite + // technically being a data race. + rb_raw_encoding tmp_encoding = *base_encoding; + tmp_encoding.name = encoding->name; + tmp_encoding.ruby_encoding_index = encoding->ruby_encoding_index; + *encoding = tmp_encoding; + + RUBY_ATOMIC_SET(ent->loaded, encoding->max_enc_len); +} + static int enc_register_at(struct enc_table *enc_table, int index, const char *name, rb_encoding *base_encoding) { @@ -357,33 +393,33 @@ enc_register_at(struct enc_table *enc_table, int index, const char *name, rb_enc struct rb_encoding_entry *ent = &enc_table->list[index]; rb_raw_encoding *encoding; - if (!valid_encoding_name_p(name)) return -1; - if (!ent->name) { - ent->name = name = strdup(name); - } - else if (STRCASECMP(name, ent->name)) { - return -1; - } - encoding = (rb_raw_encoding *)ent->enc; - if (!encoding) { - encoding = xmalloc(sizeof(rb_encoding)); - } + RUBY_ASSERT(!ent->loaded); + RUBY_ASSERT(!ent->name); + RUBY_ASSERT(!ent->enc); + RUBY_ASSERT(!ent->base); - if (base_encoding) { - *encoding = *base_encoding; - } - else { - memset(encoding, 0, sizeof(*ent->enc)); - } + RUBY_ASSERT(valid_encoding_name_p(name)); + + ent->name = name = strdup(name); + + encoding = ZALLOC(rb_raw_encoding); encoding->name = name; encoding->ruby_encoding_index = index; ent->enc = encoding; - st_insert(enc_table->names, (st_data_t)name, (st_data_t)index); + + if (st_insert(enc_table->names, (st_data_t)name, (st_data_t)index)) { + rb_bug("encoding name was somehow registered twice"); + } enc_list_update(index, encoding); - // max_enc_len is used to mark a fully loaded encoding. - RUBY_ATOMIC_SET(ent->loaded, encoding->max_enc_len); + if (base_encoding) { + enc_load_from_base(enc_table, index, base_encoding); + } + else { + /* it should not be loaded yet */ + RUBY_ASSERT(!encoding->max_enc_len); + } return index; } @@ -393,6 +429,8 @@ enc_register(struct enc_table *enc_table, const char *name, rb_encoding *encodin { ASSERT_vm_locking(); + if (!valid_encoding_name_p(name)) return -1; + int index = enc_table->count; enc_table->count = enc_table_expand(enc_table, index + 1); @@ -408,7 +446,9 @@ enc_from_index(struct enc_table *enc_table, int index) if (UNLIKELY(index < 0 || enc_table->count <= (index &= ENC_INDEX_MASK))) { return 0; } - return enc_table->list[index].enc; + rb_encoding *enc = enc_table->list[index].enc; + RUBY_ASSERT(ENC_TO_ENCINDEX(enc) == index); + return enc; } rb_encoding * @@ -431,7 +471,7 @@ rb_enc_register(const char *name, rb_encoding *encoding) index = enc_register(enc_table, name, encoding); } else if (rb_enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) { - enc_register_at(enc_table, index, name, encoding); + enc_load_from_base(enc_table, index, encoding); } else { rb_raise(rb_eArgError, "encoding %s is already registered", name); @@ -548,7 +588,7 @@ enc_replicate_with_index(struct enc_table *enc_table, const char *name, rb_encod idx = enc_register(enc_table, name, origenc); } else { - idx = enc_register_at(enc_table, idx, name, origenc); + enc_load_from_base(enc_table, idx, origenc); } if (idx >= 0) { set_base_encoding(enc_table, idx, origenc); @@ -802,41 +842,28 @@ enc_autoload_body(rb_encoding *enc) GLOBAL_ENC_TABLE_LOCKING(enc_table) { base = enc_table->list[ENC_TO_ENCINDEX(enc)].base; - if (base) { - do { - if (i >= enc_table->count) { - i = -1; - break; - } - } while (enc_table->list[i].enc != base && (++i, 1)); - } } - - if (i != -1) { - if (base) { - bool do_register = true; - if (rb_enc_autoload_p(base)) { - if (rb_enc_autoload(base) < 0) { - do_register = false; - i = -1; - } + if (base) { + bool do_register = true; + if (rb_enc_autoload_p(base)) { + if (rb_enc_autoload(base) < 0) { + do_register = false; + i = -1; } + } - if (do_register) { - GLOBAL_ENC_TABLE_LOCKING(enc_table) { - i = enc->ruby_encoding_index; - enc_register_at(enc_table, i & ENC_INDEX_MASK, rb_enc_name(enc), base); - ((rb_raw_encoding *)enc)->ruby_encoding_index = i; - } + if (do_register) { + GLOBAL_ENC_TABLE_LOCKING(enc_table) { + i = ENC_TO_ENCINDEX(enc); + enc_load_from_base(enc_table, i, base); + RUBY_ASSERT(((rb_raw_encoding *)enc)->ruby_encoding_index == i); } - - i &= ENC_INDEX_MASK; - } - else { - i = -2; } } + else { + i = -2; + } return i; } @@ -1320,10 +1347,10 @@ enc_inspect(VALUE self) { rb_encoding *enc; - if (!is_data_encoding(self)) { + if (!is_obj_encoding(self)) { /* do not resolve autoload */ not_encoding(self); } - if (!(enc = DATA_PTR(self)) || rb_enc_from_index(rb_enc_to_index(enc)) != enc) { + if (!(enc = RTYPEDDATA_GET_DATA(self)) || rb_enc_from_index(rb_enc_to_index(enc)) != enc) { rb_raise(rb_eTypeError, "broken Encoding"); } @@ -1334,20 +1361,6 @@ enc_inspect(VALUE self) rb_enc_autoload_p(enc) ? " (autoload)" : ""); } -/* - * call-seq: - * enc.name -> string - * enc.to_s -> string - * - * Returns the name of the encoding. - * - * Encoding::UTF_8.name #=> "UTF-8" - */ -static VALUE -enc_name(VALUE self) -{ - return rb_fstring_cstr(rb_enc_name((rb_encoding*)DATA_PTR(self))); -} static int enc_names_i(st_data_t name, st_data_t idx, st_data_t args) @@ -1491,7 +1504,7 @@ static VALUE enc_dump(int argc, VALUE *argv, VALUE self) { rb_check_arity(argc, 0, 1); - return enc_name(self); + return rb_attr_get(self, id_i_name); } /* :nodoc: */ @@ -1556,15 +1569,16 @@ rb_locale_encindex(void) if (idx < 0) idx = ENCINDEX_UTF_8; - GLOBAL_ENC_TABLE_LOCKING(enc_table) { - if (enc_registered(enc_table, "locale") < 0) { + if (!RUBY_ATOMIC_LOAD(locale_alias_registered)) { + GLOBAL_ENC_TABLE_LOCKING(enc_table) { + if (enc_registered(enc_table, "locale") < 0) { # if defined _WIN32 - void Init_w32_codepage(void); - Init_w32_codepage(); + void Init_w32_codepage(void); + Init_w32_codepage(); # endif - GLOBAL_ENC_TABLE_LOCKING(enc_table) { enc_alias_internal(enc_table, "locale", idx); } + RUBY_ATOMIC_SET(locale_alias_registered, 1); } } @@ -1580,12 +1594,7 @@ rb_locale_encoding(void) int rb_filesystem_encindex(void) { - int idx; - GLOBAL_ENC_TABLE_LOCKING(enc_table) { - idx = enc_registered(enc_table, "filesystem"); - } - if (idx < 0) idx = ENCINDEX_ASCII_8BIT; - return idx; + return filesystem_encindex; } rb_encoding * @@ -1637,7 +1646,9 @@ enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const cha } if (def == &default_external) { - enc_alias_internal(enc_table, "filesystem", Init_enc_set_filesystem_encoding()); + int fs_idx = Init_enc_set_filesystem_encoding(); + enc_alias_internal(enc_table, "filesystem", fs_idx); + filesystem_encindex = fs_idx; } } @@ -1980,12 +1991,19 @@ Init_Encoding(void) VALUE list; int i; + id_i_name = rb_intern_const("@name"); rb_cEncoding = rb_define_class("Encoding", rb_cObject); rb_define_alloc_func(rb_cEncoding, enc_s_alloc); rb_undef_method(CLASS_OF(rb_cEncoding), "new"); - rb_define_method(rb_cEncoding, "to_s", enc_name, 0); + + /* The name of the encoding. + * + * Encoding::UTF_8.name #=> "UTF-8" + */ + rb_attr(rb_cEncoding, rb_intern("name"), TRUE, FALSE, Qfalse); + rb_define_alias(rb_cEncoding, "to_s", "name"); + rb_define_method(rb_cEncoding, "inspect", enc_inspect, 0); - rb_define_method(rb_cEncoding, "name", enc_name, 0); rb_define_method(rb_cEncoding, "names", enc_names, 0); rb_define_method(rb_cEncoding, "dummy?", enc_dummy_p, 0); rb_define_method(rb_cEncoding, "ascii_compatible?", enc_ascii_compatible_p, 0); |
