summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--encoding.c93
1 files changed, 55 insertions, 38 deletions
diff --git a/encoding.c b/encoding.c
index e2c061480e..4ccb29d2a1 100644
--- a/encoding.c
+++ b/encoding.c
@@ -351,6 +351,35 @@ enc_table_expand(struct enc_table *enc_table, int newsize)
return newsize;
}
+/* Load an encoding using the values from base_encoding */
+static void
+enc_load_from_base(struct enc_table *enc_table, int index, rb_encoding *base_encoding)
+{
+ ASSERT_vm_locking();
+
+ struct rb_encoding_entry *ent = &enc_table->list[index];
+
+ if (ent->loaded) {
+ return;
+ }
+
+ rb_raw_encoding *encoding = (rb_raw_encoding *)ent->enc;
+ RUBY_ASSERT(encoding);
+
+ // FIXME: Before the base is loaded, the encoding may be accessed
+ // concurrently by other Ractors.
+ // We're copying all fields from base_encoding except name and
+ // ruby_encoding_index which we preserve from the original. Since these are
+ // the only fields other threads should read it is likely safe despite
+ // technically being a data race.
+ rb_raw_encoding tmp_encoding = *base_encoding;
+ tmp_encoding.name = encoding->name;
+ tmp_encoding.ruby_encoding_index = encoding->ruby_encoding_index;
+ *encoding = tmp_encoding;
+
+ RUBY_ATOMIC_SET(ent->loaded, encoding->max_enc_len);
+}
+
static int
enc_register_at(struct enc_table *enc_table, int index, const char *name, rb_encoding *base_encoding)
{
@@ -359,45 +388,33 @@ enc_register_at(struct enc_table *enc_table, int index, const char *name, rb_enc
struct rb_encoding_entry *ent = &enc_table->list[index];
rb_raw_encoding *encoding;
- if (ent->loaded) {
- RUBY_ASSERT(ent->base == base_encoding);
- RUBY_ASSERT(!strcmp(name, ent->name));
- return index;
- }
+ RUBY_ASSERT(!ent->loaded);
+ RUBY_ASSERT(!ent->name);
+ RUBY_ASSERT(!ent->enc);
+ RUBY_ASSERT(!ent->base);
- if (!valid_encoding_name_p(name)) return -1;
- if (!ent->name) {
- ent->name = name = strdup(name);
- }
- else if (STRCASECMP(name, ent->name)) {
- return -1;
- }
- encoding = (rb_raw_encoding *)ent->enc;
- if (!encoding) {
- encoding = xmalloc(sizeof(rb_encoding));
- }
+ RUBY_ASSERT(valid_encoding_name_p(name));
- rb_raw_encoding tmp_encoding;
- if (base_encoding) {
- tmp_encoding = *base_encoding;
- }
- else {
- memset(&tmp_encoding, 0, sizeof(*ent->enc));
- }
- tmp_encoding.name = name;
- tmp_encoding.ruby_encoding_index = index;
+ ent->name = name = strdup(name);
- // FIXME: If encoding already existed, it may be concurrently accessed
- // It's technically invalid to write to this memory as it's read, but as all
- // values are set up it _probably_ works.
- *encoding = tmp_encoding;
+ encoding = ZALLOC(rb_raw_encoding);
+ encoding->name = name;
+ encoding->ruby_encoding_index = index;
ent->enc = encoding;
- st_insert(enc_table->names, (st_data_t)name, (st_data_t)index);
+
+ if (st_insert(enc_table->names, (st_data_t)name, (st_data_t)index)) {
+ rb_bug("encoding name was somehow registered twice");
+ }
enc_list_update(index, encoding);
- // max_enc_len is used to mark a fully loaded encoding.
- RUBY_ATOMIC_SET(ent->loaded, encoding->max_enc_len);
+ if (base_encoding) {
+ enc_load_from_base(enc_table, index, base_encoding);
+ }
+ else {
+ /* it should not be loaded yet */
+ RUBY_ASSERT(!encoding->max_enc_len);
+ }
return index;
}
@@ -407,6 +424,8 @@ enc_register(struct enc_table *enc_table, const char *name, rb_encoding *encodin
{
ASSERT_vm_locking();
+ if (!valid_encoding_name_p(name)) return -1;
+
int index = enc_table->count;
enc_table->count = enc_table_expand(enc_table, index + 1);
@@ -447,7 +466,7 @@ rb_enc_register(const char *name, rb_encoding *encoding)
index = enc_register(enc_table, name, encoding);
}
else if (rb_enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) {
- enc_register_at(enc_table, index, name, encoding);
+ enc_load_from_base(enc_table, index, encoding);
}
else {
rb_raise(rb_eArgError, "encoding %s is already registered", name);
@@ -564,7 +583,7 @@ enc_replicate_with_index(struct enc_table *enc_table, const char *name, rb_encod
idx = enc_register(enc_table, name, origenc);
}
else {
- idx = enc_register_at(enc_table, idx, name, origenc);
+ enc_load_from_base(enc_table, idx, origenc);
}
if (idx >= 0) {
set_base_encoding(enc_table, idx, origenc);
@@ -841,13 +860,11 @@ enc_autoload_body(rb_encoding *enc)
if (do_register) {
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
- i = enc->ruby_encoding_index;
- enc_register_at(enc_table, i & ENC_INDEX_MASK, rb_enc_name(enc), base);
+ i = ENC_TO_ENCINDEX(enc);
+ enc_load_from_base(enc_table, i, base);
RUBY_ASSERT(((rb_raw_encoding *)enc)->ruby_encoding_index == i);
}
}
-
- i &= ENC_INDEX_MASK;
}
else {
i = -2;