diff options
| author | Luke Gruber <luke.gruber@shopify.com> | 2025-08-14 19:21:49 -0400 |
|---|---|---|
| committer | John Hawthorn <john@hawthorn.email> | 2025-08-22 10:49:44 -0700 |
| commit | 9db54a1a98956ea37a04d6e3f83fcd6745ed96a6 (patch) | |
| tree | 7065f8b9fdf843de0c6db5b72d77087e51fe17e4 | |
| parent | 48fc41cc19235711cd9a8e5afd37af91fd78badd (diff) | |
Fixes to encoding/transcoding for ractors.
Not all ractor-related encoding issues were fixed by 1afc07e815051e2f73493f055f2130cb642ba12a.
I found more by running my test-all branch with 3 ractors for each test.
| -rw-r--r-- | depend | 2 | ||||
| -rw-r--r-- | encoding.c | 14 | ||||
| -rw-r--r-- | internal/encoding.h | 1 | ||||
| -rw-r--r-- | test/ruby/test_encoding.rb | 19 | ||||
| -rw-r--r-- | test/ruby/test_transcode.rb | 46 | ||||
| -rw-r--r-- | transcode.c | 48 |
6 files changed, 113 insertions, 17 deletions
@@ -1401,7 +1401,6 @@ compile.$(OBJEXT): $(top_srcdir)/prism/pack.h compile.$(OBJEXT): $(top_srcdir)/prism/parser.h compile.$(OBJEXT): $(top_srcdir)/prism/prettyprint.h compile.$(OBJEXT): $(top_srcdir)/prism/prism.h -compile.$(OBJEXT): $(top_srcdir)/prism/prism.h compile.$(OBJEXT): $(top_srcdir)/prism/regexp.h compile.$(OBJEXT): $(top_srcdir)/prism/static_literals.h compile.$(OBJEXT): $(top_srcdir)/prism/util/pm_buffer.h @@ -17727,6 +17726,7 @@ transcode.$(OBJEXT): $(hdrdir)/ruby/ruby.h transcode.$(OBJEXT): $(top_srcdir)/internal/array.h transcode.$(OBJEXT): $(top_srcdir)/internal/class.h transcode.$(OBJEXT): $(top_srcdir)/internal/compilers.h +transcode.$(OBJEXT): $(top_srcdir)/internal/encoding.h transcode.$(OBJEXT): $(top_srcdir)/internal/gc.h transcode.$(OBJEXT): $(top_srcdir)/internal/inits.h transcode.$(OBJEXT): $(top_srcdir)/internal/object.h diff --git a/encoding.c b/encoding.c index 0a5d61ee4a..2416acecea 100644 --- a/encoding.c +++ b/encoding.c @@ -459,6 +459,16 @@ enc_registered(struct enc_table *enc_table, const char *name) return -1; } +int +rb_enc_registered(const char *name) +{ + int idx; + GLOBAL_ENC_TABLE_LOCKING(enc_table) { + idx = enc_registered(enc_table, name); + } + return idx; +} + void rb_encdb_declare(const char *name) { @@ -1600,8 +1610,10 @@ enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const cha /* Already set */ overridden = TRUE; + int index = 0; if (!NIL_P(encoding)) { enc_check_encoding(encoding); // loads it if necessary. Needs to be done outside of VM lock. + index = rb_enc_to_index(rb_to_encoding(encoding)); } GLOBAL_ENC_TABLE_LOCKING(enc_table) { @@ -1619,7 +1631,7 @@ enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const cha (st_data_t)UNSPECIFIED_ENCODING); } else { - def->index = rb_enc_to_index(rb_to_encoding(encoding)); + def->index = index; def->enc = 0; enc_alias_internal(enc_table, name, def->index); } diff --git a/internal/encoding.h b/internal/encoding.h index c2ffaf4514..38bf8fc9da 100644 --- a/internal/encoding.h +++ b/internal/encoding.h @@ -29,6 +29,7 @@ void rb_encdb_declare(const char *name); void rb_enc_set_base(const char *name, const char *orig); int rb_enc_set_dummy(int index); void rb_enc_raw_set(VALUE obj, rb_encoding *enc); +int rb_enc_registered(const char *name); PUREFUNC(int rb_data_is_encoding(VALUE obj)); diff --git a/test/ruby/test_encoding.rb b/test/ruby/test_encoding.rb index 7ccbb31f50..5c1eb50bb1 100644 --- a/test/ruby/test_encoding.rb +++ b/test/ruby/test_encoding.rb @@ -157,4 +157,23 @@ class TestEncoding < Test::Unit::TestCase assert rs.empty? end; end + + def test_ractor_set_default_external_string + assert_ractor("#{<<~"begin;"}\n#{<<~'end;'}") + begin; + $-w = nil + rs = [] + 7.times do |i| + rs << Ractor.new(i) do |i| + Encoding.default_external = "us-ascii" + end + end + + while rs.any? + r, _obj = Ractor.select(*rs) + rs.delete(r) + end + assert rs.empty? + end; + end end diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index 938e20e9a1..71e1cc9e2a 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -2361,6 +2361,52 @@ class TestTranscode < Test::Unit::TestCase end; end + def test_ractor_asciicompat_encoding_exists + assert_ractor("#{<<~"begin;"}\n#{<<~'end;'}") + begin; + rs = [] + 7.times do + rs << Ractor.new do + string = "ISO-2022-JP" + encoding = Encoding.find(string) + 20_000.times do + Encoding::Converter.asciicompat_encoding(string) + Encoding::Converter.asciicompat_encoding(encoding) + end + end + end + + while rs.any? + r, _obj = Ractor.select(*rs) + rs.delete(r) + end + assert rs.empty? + end; + end + + def test_ractor_asciicompat_encoding_doesnt_exist + assert_ractor("#{<<~"begin;"}\n#{<<~'end;'}") + begin; + rs = [] + NO_EXIST = "I".freeze + 7.times do + rs << Ractor.new do + 50.times do + if (val = Encoding::Converter.asciicompat_encoding(NO_EXIST)) + raise "Got #{val}, expected nil" + end + end + end + end + + while rs.any? + r, _obj = Ractor.select(*rs) + rs.delete(r) + end + assert rs.empty? + end; + end + private def assert_conversion_both_ways_utf8(utf8, raw, encoding) diff --git a/transcode.c b/transcode.c index 507bce78e1..072e1942b1 100644 --- a/transcode.c +++ b/transcode.c @@ -19,6 +19,7 @@ #include "internal/object.h" #include "internal/string.h" #include "internal/transcode.h" +#include "internal/encoding.h" #include "ruby/encoding.h" #include "vm_sync.h" @@ -1826,7 +1827,9 @@ rb_econv_asciicompat_encoding(const char *ascii_incompat_name) st_table *table2; struct asciicompat_encoding_t data = {0}; - RB_VM_LOCKING() { + unsigned int lev; + RB_VM_LOCK_ENTER_LEV(&lev); + { if (st_lookup(transcoder_table, (st_data_t)ascii_incompat_name, &v)) { table2 = (st_table *)v; /* @@ -1839,12 +1842,25 @@ rb_econv_asciicompat_encoding(const char *ascii_incompat_name) if (table2->num_entries == 1) { data.ascii_incompat_name = ascii_incompat_name; data.ascii_compat_name = NULL; - st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data); + if (rb_multi_ractor_p()) { + /* + * We need to unlock in case `load_transcoder_entry` actually loads the encoding + * and table2 could be inserted into when we unlock. + */ + st_table *dup_table2 = st_copy(table2); + RB_VM_LOCK_LEAVE_LEV(&lev); + st_foreach(dup_table2, asciicompat_encoding_i, (st_data_t)&data); + st_free_table(dup_table2); + RB_VM_LOCK_ENTER_LEV(&lev); + } + else { + st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data); + } } } - } + RB_VM_LOCK_LEAVE_LEV(&lev); return data.ascii_compat_name; // can be NULL } @@ -2989,10 +3005,16 @@ static rb_encoding * make_encoding(const char *name) { rb_encoding *enc; - RB_VM_LOCKING() { - enc = rb_enc_find(name); - if (!enc) - enc = make_dummy_encoding(name); + enc = rb_enc_find(name); + if (!enc) { + RB_VM_LOCKING() { + if (rb_enc_registered(name)) { + enc = NULL; + } + else { + enc = make_dummy_encoding(name); + } + } } return enc; } @@ -3029,14 +3051,10 @@ econv_s_asciicompat_encoding(VALUE klass, VALUE arg) VALUE enc = Qnil; enc_arg(&arg, &arg_name, &arg_enc); - - RB_VM_LOCKING() { - result_name = rb_econv_asciicompat_encoding(arg_name); - - if (result_name) { - result_enc = make_encoding(result_name); - enc = rb_enc_from_encoding(result_enc); - } + result_name = rb_econv_asciicompat_encoding(arg_name); + if (result_name) { + result_enc = make_encoding(result_name); + enc = rb_enc_from_encoding(result_enc); } return enc; } |
