diff options
| -rw-r--r-- | encoding.c | 93 |
1 files changed, 55 insertions, 38 deletions
diff --git a/encoding.c b/encoding.c index e2c061480e..4ccb29d2a1 100644 --- a/encoding.c +++ b/encoding.c @@ -351,6 +351,35 @@ enc_table_expand(struct enc_table *enc_table, int newsize) return newsize; } +/* Load an encoding using the values from base_encoding */ +static void +enc_load_from_base(struct enc_table *enc_table, int index, rb_encoding *base_encoding) +{ + ASSERT_vm_locking(); + + struct rb_encoding_entry *ent = &enc_table->list[index]; + + if (ent->loaded) { + return; + } + + rb_raw_encoding *encoding = (rb_raw_encoding *)ent->enc; + RUBY_ASSERT(encoding); + + // FIXME: Before the base is loaded, the encoding may be accessed + // concurrently by other Ractors. + // We're copying all fields from base_encoding except name and + // ruby_encoding_index which we preserve from the original. Since these are + // the only fields other threads should read it is likely safe despite + // technically being a data race. + rb_raw_encoding tmp_encoding = *base_encoding; + tmp_encoding.name = encoding->name; + tmp_encoding.ruby_encoding_index = encoding->ruby_encoding_index; + *encoding = tmp_encoding; + + RUBY_ATOMIC_SET(ent->loaded, encoding->max_enc_len); +} + static int enc_register_at(struct enc_table *enc_table, int index, const char *name, rb_encoding *base_encoding) { @@ -359,45 +388,33 @@ enc_register_at(struct enc_table *enc_table, int index, const char *name, rb_enc struct rb_encoding_entry *ent = &enc_table->list[index]; rb_raw_encoding *encoding; - if (ent->loaded) { - RUBY_ASSERT(ent->base == base_encoding); - RUBY_ASSERT(!strcmp(name, ent->name)); - return index; - } + RUBY_ASSERT(!ent->loaded); + RUBY_ASSERT(!ent->name); + RUBY_ASSERT(!ent->enc); + RUBY_ASSERT(!ent->base); - if (!valid_encoding_name_p(name)) return -1; - if (!ent->name) { - ent->name = name = strdup(name); - } - else if (STRCASECMP(name, ent->name)) { - return -1; - } - encoding = (rb_raw_encoding *)ent->enc; - if (!encoding) { - encoding = xmalloc(sizeof(rb_encoding)); - } + RUBY_ASSERT(valid_encoding_name_p(name)); - rb_raw_encoding tmp_encoding; - if (base_encoding) { - tmp_encoding = *base_encoding; - } - else { - memset(&tmp_encoding, 0, sizeof(*ent->enc)); - } - tmp_encoding.name = name; - tmp_encoding.ruby_encoding_index = index; + ent->name = name = strdup(name); - // FIXME: If encoding already existed, it may be concurrently accessed - // It's technically invalid to write to this memory as it's read, but as all - // values are set up it _probably_ works. - *encoding = tmp_encoding; + encoding = ZALLOC(rb_raw_encoding); + encoding->name = name; + encoding->ruby_encoding_index = index; ent->enc = encoding; - st_insert(enc_table->names, (st_data_t)name, (st_data_t)index); + + if (st_insert(enc_table->names, (st_data_t)name, (st_data_t)index)) { + rb_bug("encoding name was somehow registered twice"); + } enc_list_update(index, encoding); - // max_enc_len is used to mark a fully loaded encoding. - RUBY_ATOMIC_SET(ent->loaded, encoding->max_enc_len); + if (base_encoding) { + enc_load_from_base(enc_table, index, base_encoding); + } + else { + /* it should not be loaded yet */ + RUBY_ASSERT(!encoding->max_enc_len); + } return index; } @@ -407,6 +424,8 @@ enc_register(struct enc_table *enc_table, const char *name, rb_encoding *encodin { ASSERT_vm_locking(); + if (!valid_encoding_name_p(name)) return -1; + int index = enc_table->count; enc_table->count = enc_table_expand(enc_table, index + 1); @@ -447,7 +466,7 @@ rb_enc_register(const char *name, rb_encoding *encoding) index = enc_register(enc_table, name, encoding); } else if (rb_enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) { - enc_register_at(enc_table, index, name, encoding); + enc_load_from_base(enc_table, index, encoding); } else { rb_raise(rb_eArgError, "encoding %s is already registered", name); @@ -564,7 +583,7 @@ enc_replicate_with_index(struct enc_table *enc_table, const char *name, rb_encod idx = enc_register(enc_table, name, origenc); } else { - idx = enc_register_at(enc_table, idx, name, origenc); + enc_load_from_base(enc_table, idx, origenc); } if (idx >= 0) { set_base_encoding(enc_table, idx, origenc); @@ -841,13 +860,11 @@ enc_autoload_body(rb_encoding *enc) if (do_register) { GLOBAL_ENC_TABLE_LOCKING(enc_table) { - i = enc->ruby_encoding_index; - enc_register_at(enc_table, i & ENC_INDEX_MASK, rb_enc_name(enc), base); + i = ENC_TO_ENCINDEX(enc); + enc_load_from_base(enc_table, i, base); RUBY_ASSERT(((rb_raw_encoding *)enc)->ruby_encoding_index == i); } } - - i &= ENC_INDEX_MASK; } else { i = -2; |
