summaryrefslogtreecommitdiff
path: root/transcode.c
diff options
context:
space:
mode:
Diffstat (limited to 'transcode.c')
-rw-r--r--transcode.c302
1 files changed, 185 insertions, 117 deletions
diff --git a/transcode.c b/transcode.c
index bff9268e1c..86e828c479 100644
--- a/transcode.c
+++ b/transcode.c
@@ -19,7 +19,9 @@
#include "internal/object.h"
#include "internal/string.h"
#include "internal/transcode.h"
+#include "internal/encoding.h"
#include "ruby/encoding.h"
+#include "vm_sync.h"
#include "transcode_data.h"
#include "id.h"
@@ -209,19 +211,21 @@ make_transcoder_entry(const char *sname, const char *dname)
st_data_t val;
st_table *table2;
- if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) {
- val = (st_data_t)st_init_strcasetable();
- st_add_direct(transcoder_table, (st_data_t)sname, val);
- }
- table2 = (st_table *)val;
- if (!st_lookup(table2, (st_data_t)dname, &val)) {
- transcoder_entry_t *entry = ALLOC(transcoder_entry_t);
- entry->sname = sname;
- entry->dname = dname;
- entry->lib = NULL;
- entry->transcoder = NULL;
- val = (st_data_t)entry;
- st_add_direct(table2, (st_data_t)dname, val);
+ RB_VM_LOCKING() {
+ if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) {
+ val = (st_data_t)st_init_strcasetable();
+ st_add_direct(transcoder_table, (st_data_t)sname, val);
+ }
+ table2 = (st_table *)val;
+ if (!st_lookup(table2, (st_data_t)dname, &val)) {
+ transcoder_entry_t *entry = ALLOC(transcoder_entry_t);
+ entry->sname = sname;
+ entry->dname = dname;
+ entry->lib = NULL;
+ entry->transcoder = NULL;
+ val = (st_data_t)entry;
+ st_add_direct(table2, (st_data_t)dname, val);
+ }
}
return (transcoder_entry_t *)val;
}
@@ -229,15 +233,15 @@ make_transcoder_entry(const char *sname, const char *dname)
static transcoder_entry_t *
get_transcoder_entry(const char *sname, const char *dname)
{
- st_data_t val;
+ st_data_t val = 0;
st_table *table2;
-
- if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) {
- return NULL;
- }
- table2 = (st_table *)val;
- if (!st_lookup(table2, (st_data_t)dname, &val)) {
- return NULL;
+ RB_VM_LOCKING() {
+ if (st_lookup(transcoder_table, (st_data_t)sname, &val)) {
+ table2 = (st_table *)val;
+ if (!st_lookup(table2, (st_data_t)dname, &val)) {
+ val = 0;
+ }
+ }
}
return (transcoder_entry_t *)val;
}
@@ -250,13 +254,14 @@ rb_register_transcoder(const rb_transcoder *tr)
transcoder_entry_t *entry;
- entry = make_transcoder_entry(sname, dname);
- if (entry->transcoder) {
- rb_raise(rb_eArgError, "transcoder from %s to %s has been already registered",
- sname, dname);
+ RB_VM_LOCKING() {
+ entry = make_transcoder_entry(sname, dname);
+ if (entry->transcoder) {
+ rb_raise(rb_eArgError, "transcoder from %s to %s has been already registered",
+ sname, dname);
+ }
+ entry->transcoder = tr;
}
-
- entry->transcoder = tr;
}
static void
@@ -323,8 +328,9 @@ transcode_search_path(const char *sname, const char *dname,
search_path_queue_t *q;
st_data_t val;
st_table *table2;
- int found;
int pathlen = -1;
+ bool found = false;
+ bool lookup_res;
if (encoding_equal(sname, dname))
return -1;
@@ -335,37 +341,39 @@ transcode_search_path(const char *sname, const char *dname,
bfs.queue_last_ptr = &q->next;
bfs.queue = q;
- bfs.visited = st_init_strcasetable();
+ bfs.visited = st_init_strcasetable(); // due to base encodings, we need to do search in a loop
st_add_direct(bfs.visited, (st_data_t)sname, (st_data_t)NULL);
- while (bfs.queue) {
- q = bfs.queue;
- bfs.queue = q->next;
- if (!bfs.queue)
- bfs.queue_last_ptr = &bfs.queue;
+ RB_VM_LOCKING() {
+ while (bfs.queue) {
+ q = bfs.queue;
+ bfs.queue = q->next;
+ if (!bfs.queue) {
+ bfs.queue_last_ptr = &bfs.queue;
+ }
- if (!st_lookup(transcoder_table, (st_data_t)q->enc, &val)) {
- xfree(q);
- continue;
- }
- table2 = (st_table *)val;
+ lookup_res = st_lookup(transcoder_table, (st_data_t)q->enc, &val); // src => table2
+ if (!lookup_res) {
+ xfree(q);
+ continue;
+ }
+ table2 = (st_table *)val;
- if (st_lookup(table2, (st_data_t)dname, &val)) {
- st_add_direct(bfs.visited, (st_data_t)dname, (st_data_t)q->enc);
- xfree(q);
- found = 1;
- goto cleanup;
- }
+ if (st_lookup(table2, (st_data_t)dname, &val)) { // dest => econv
+ st_add_direct(bfs.visited, (st_data_t)dname, (st_data_t)q->enc);
+ xfree(q);
+ found = true;
+ break;
+ }
- bfs.base_enc = q->enc;
- st_foreach(table2, transcode_search_path_i, (st_data_t)&bfs);
- bfs.base_enc = NULL;
+ bfs.base_enc = q->enc;
+ st_foreach(table2, transcode_search_path_i, (st_data_t)&bfs);
- xfree(q);
+ bfs.base_enc = NULL;
+ xfree(q);
+ }
}
- found = 0;
- cleanup:
while (bfs.queue) {
q = bfs.queue;
bfs.queue = q->next;
@@ -404,6 +412,7 @@ int rb_require_internal_silent(VALUE fname);
static const rb_transcoder *
load_transcoder_entry(transcoder_entry_t *entry)
{
+ ASSERT_vm_unlocking();
if (entry->transcoder)
return entry->transcoder;
@@ -418,7 +427,7 @@ load_transcoder_entry(transcoder_entry_t *entry)
memcpy(path + sizeof(transcoder_lib_prefix) - 1, lib, len);
rb_str_set_len(fn, total_len);
OBJ_FREEZE(fn);
- rb_require_internal_silent(fn);
+ rb_require_internal_silent(fn); // Sets entry->transcoder
}
if (entry->transcoder)
@@ -1017,8 +1026,7 @@ rb_econv_open0(const char *sname, const char *dname, int ecflags)
int num_trans;
rb_econv_t *ec;
- /* Just check if sname and dname are defined */
- /* (This check is needed?) */
+ // loads encodings if not loaded already
if (*sname) rb_enc_find_index(sname);
if (*dname) rb_enc_find_index(dname);
@@ -1107,18 +1115,20 @@ rb_econv_open(const char *sname, const char *dname, int ecflags)
return NULL;
ec = rb_econv_open0(sname, dname, ecflags & ECONV_ERROR_HANDLER_MASK);
- if (!ec)
- return NULL;
-
- for (i = 0; i < num_decorators; i++)
- if (rb_econv_decorate_at_last(ec, decorators[i]) == -1) {
- rb_econv_close(ec);
- return NULL;
+ if (ec) {
+ for (i = 0; i < num_decorators; i++) {
+ if (rb_econv_decorate_at_last(ec, decorators[i]) == -1) {
+ rb_econv_close(ec);
+ ec = NULL;
+ break;
+ }
}
+ }
- ec->flags |= ecflags & ~ECONV_ERROR_HANDLER_MASK;
-
- return ec;
+ if (ec) {
+ ec->flags |= ecflags & ~ECONV_ERROR_HANDLER_MASK;
+ }
+ return ec; // can be NULL
}
static int
@@ -1815,26 +1825,44 @@ rb_econv_asciicompat_encoding(const char *ascii_incompat_name)
{
st_data_t v;
st_table *table2;
- struct asciicompat_encoding_t data;
+ struct asciicompat_encoding_t data = {0};
- if (!st_lookup(transcoder_table, (st_data_t)ascii_incompat_name, &v))
- return NULL;
- table2 = (st_table *)v;
+ unsigned int lev;
+ RB_VM_LOCK_ENTER_LEV(&lev);
+ {
+ if (st_lookup(transcoder_table, (st_data_t)ascii_incompat_name, &v)) {
+ table2 = (st_table *)v;
+ /*
+ * Assumption:
+ * There is at most one transcoder for
+ * converting from ASCII incompatible encoding.
+ *
+ * For ISO-2022-JP, there is ISO-2022-JP -> stateless-ISO-2022-JP and no others.
+ */
+ if (table2->num_entries == 1) {
+ data.ascii_incompat_name = ascii_incompat_name;
+ data.ascii_compat_name = NULL;
+ if (rb_multi_ractor_p()) {
+ /*
+ * We need to unlock in case `load_transcoder_entry` actually loads the encoding
+ * and table2 could be inserted into when we unlock.
+ */
+ st_table *dup_table2 = st_copy(table2);
+ RB_VM_LOCK_LEAVE_LEV(&lev);
+ st_foreach(dup_table2, asciicompat_encoding_i, (st_data_t)&data);
+ st_free_table(dup_table2);
+ RB_VM_LOCK_ENTER_LEV(&lev);
+ }
+ else {
+ st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data);
+ }
+ }
- /*
- * Assumption:
- * There is at most one transcoder for
- * converting from ASCII incompatible encoding.
- *
- * For ISO-2022-JP, there is ISO-2022-JP -> stateless-ISO-2022-JP and no others.
- */
- if (table2->num_entries != 1)
- return NULL;
+ }
+ }
+ RB_VM_LOCK_LEAVE_LEV(&lev);
- data.ascii_incompat_name = ascii_incompat_name;
- data.ascii_compat_name = NULL;
- st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data);
- return data.ascii_compat_name;
+ return data.ascii_compat_name; // can be NULL
}
/*
@@ -1937,19 +1965,17 @@ static int
rb_econv_add_converter(rb_econv_t *ec, const char *sname, const char *dname, int n)
{
transcoder_entry_t *entry;
- const rb_transcoder *tr;
+ const rb_transcoder *tr = NULL;
if (ec->started != 0)
return -1;
entry = get_transcoder_entry(sname, dname);
- if (!entry)
- return -1;
-
- tr = load_transcoder_entry(entry);
- if (!tr) return -1;
+ if (entry) {
+ tr = load_transcoder_entry(entry);
+ }
- return rb_econv_add_transcoder_at(ec, tr, n);
+ return tr ? rb_econv_add_transcoder_at(ec, tr, n) : -1;
}
static int
@@ -2323,6 +2349,26 @@ aref_fallback(VALUE fallback, VALUE c)
return rb_funcallv_public(fallback, idAREF, 1, &c);
}
+struct transcode_loop_fallback_args {
+ VALUE (*fallback_func)(VALUE, VALUE);
+ VALUE fallback;
+ VALUE rep;
+};
+
+static VALUE
+transcode_loop_fallback_try(VALUE a)
+{
+ struct transcode_loop_fallback_args *args = (struct transcode_loop_fallback_args *)a;
+
+ VALUE ret = args->fallback_func(args->fallback, args->rep);
+
+ if (!UNDEF_P(ret) && !NIL_P(ret)) {
+ StringValue(ret);
+ }
+
+ return ret;
+}
+
static void
transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
const unsigned char *in_stop, unsigned char *out_stop,
@@ -2372,12 +2418,26 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
(const char *)ec->last_error.error_bytes_start,
ec->last_error.error_bytes_len,
rb_enc_find(ec->last_error.source_encoding));
- rep = (*fallback_func)(fallback, rep);
+
+
+ struct transcode_loop_fallback_args args = {
+ .fallback_func = fallback_func,
+ .fallback = fallback,
+ .rep = rep,
+ };
+
+ int state;
+ rep = rb_protect(transcode_loop_fallback_try, (VALUE)&args, &state);
+ if (state) {
+ rb_econv_close(ec);
+ rb_jump_tag(state);
+ }
+
if (!UNDEF_P(rep) && !NIL_P(rep)) {
- StringValue(rep);
ret = rb_econv_insert_output(ec, (const unsigned char *)RSTRING_PTR(rep),
RSTRING_LEN(rep), rb_enc_name(rb_enc_get(rep)));
if ((int)ret == -1) {
+ rb_econv_close(ec);
rb_raise(rb_eArgError, "too big fallback string");
}
goto resume;
@@ -2663,23 +2723,22 @@ rb_econv_open_opts(const char *source_encoding, const char *destination_encoding
}
ec = rb_econv_open(source_encoding, destination_encoding, ecflags);
- if (!ec)
- return ec;
-
- if (!NIL_P(replacement)) {
- int ret;
- rb_encoding *enc = rb_enc_get(replacement);
-
- ret = rb_econv_set_replacement(ec,
- (const unsigned char *)RSTRING_PTR(replacement),
- RSTRING_LEN(replacement),
- rb_enc_name(enc));
- if (ret == -1) {
- rb_econv_close(ec);
- return NULL;
+ if (ec) {
+ if (!NIL_P(replacement)) {
+ int ret;
+ rb_encoding *enc = rb_enc_get(replacement);
+
+ ret = rb_econv_set_replacement(ec,
+ (const unsigned char *)RSTRING_PTR(replacement),
+ RSTRING_LEN(replacement),
+ rb_enc_name(enc));
+ if (ret == -1) {
+ rb_econv_close(ec);
+ ec = NULL;
+ }
}
}
- return ec;
+ return ec; // can be NULL
}
static int
@@ -2869,6 +2928,7 @@ str_encode_associate(VALUE str, int encidx)
*
* Like #encode, but applies encoding changes to +self+; returns +self+.
*
+ * Related: see {Modifying}[rdoc-ref:String@Modifying].
*/
static VALUE
@@ -2980,8 +3040,16 @@ make_encoding(const char *name)
{
rb_encoding *enc;
enc = rb_enc_find(name);
- if (!enc)
- enc = make_dummy_encoding(name);
+ if (!enc) {
+ RB_VM_LOCKING() {
+ if (rb_enc_registered(name)) {
+ enc = NULL;
+ }
+ else {
+ enc = make_dummy_encoding(name);
+ }
+ }
+ }
return enc;
}
@@ -3014,17 +3082,15 @@ econv_s_asciicompat_encoding(VALUE klass, VALUE arg)
{
const char *arg_name, *result_name;
rb_encoding *arg_enc, *result_enc;
+ VALUE enc = Qnil;
enc_arg(&arg, &arg_name, &arg_enc);
-
result_name = rb_econv_asciicompat_encoding(arg_name);
-
- if (result_name == NULL)
- return Qnil;
-
- result_enc = make_encoding(result_name);
-
- return rb_enc_from_encoding(result_enc);
+ if (result_name) {
+ result_enc = make_encoding(result_name);
+ enc = rb_enc_from_encoding(result_enc);
+ }
+ return enc;
}
static void
@@ -3105,8 +3171,10 @@ decorate_convpath(VALUE convpath, int ecflags)
if (RB_TYPE_P(pair, T_ARRAY)) {
const char *sname = rb_enc_name(rb_to_encoding(RARRAY_AREF(pair, 0)));
const char *dname = rb_enc_name(rb_to_encoding(RARRAY_AREF(pair, 1)));
- transcoder_entry_t *entry = get_transcoder_entry(sname, dname);
- const rb_transcoder *tr = load_transcoder_entry(entry);
+ transcoder_entry_t *entry;
+ const rb_transcoder *tr;
+ entry = get_transcoder_entry(sname, dname);
+ tr = load_transcoder_entry(entry);
if (!tr)
return -1;
if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&