summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Gruber <luke.gruber@shopify.com>2025-08-14 19:21:49 -0400
committerJohn Hawthorn <john@hawthorn.email>2025-08-22 10:49:44 -0700
commit9db54a1a98956ea37a04d6e3f83fcd6745ed96a6 (patch)
tree7065f8b9fdf843de0c6db5b72d77087e51fe17e4
parent48fc41cc19235711cd9a8e5afd37af91fd78badd (diff)
Fixes to encoding/transcoding for ractors.
Not all ractor-related encoding issues were fixed by 1afc07e815051e2f73493f055f2130cb642ba12a. I found more by running my test-all branch with 3 ractors for each test.
-rw-r--r--depend2
-rw-r--r--encoding.c14
-rw-r--r--internal/encoding.h1
-rw-r--r--test/ruby/test_encoding.rb19
-rw-r--r--test/ruby/test_transcode.rb46
-rw-r--r--transcode.c48
6 files changed, 113 insertions, 17 deletions
diff --git a/depend b/depend
index 334bab5684..7fdf369158 100644
--- a/depend
+++ b/depend
@@ -1401,7 +1401,6 @@ compile.$(OBJEXT): $(top_srcdir)/prism/pack.h
compile.$(OBJEXT): $(top_srcdir)/prism/parser.h
compile.$(OBJEXT): $(top_srcdir)/prism/prettyprint.h
compile.$(OBJEXT): $(top_srcdir)/prism/prism.h
-compile.$(OBJEXT): $(top_srcdir)/prism/prism.h
compile.$(OBJEXT): $(top_srcdir)/prism/regexp.h
compile.$(OBJEXT): $(top_srcdir)/prism/static_literals.h
compile.$(OBJEXT): $(top_srcdir)/prism/util/pm_buffer.h
@@ -17727,6 +17726,7 @@ transcode.$(OBJEXT): $(hdrdir)/ruby/ruby.h
transcode.$(OBJEXT): $(top_srcdir)/internal/array.h
transcode.$(OBJEXT): $(top_srcdir)/internal/class.h
transcode.$(OBJEXT): $(top_srcdir)/internal/compilers.h
+transcode.$(OBJEXT): $(top_srcdir)/internal/encoding.h
transcode.$(OBJEXT): $(top_srcdir)/internal/gc.h
transcode.$(OBJEXT): $(top_srcdir)/internal/inits.h
transcode.$(OBJEXT): $(top_srcdir)/internal/object.h
diff --git a/encoding.c b/encoding.c
index 0a5d61ee4a..2416acecea 100644
--- a/encoding.c
+++ b/encoding.c
@@ -459,6 +459,16 @@ enc_registered(struct enc_table *enc_table, const char *name)
return -1;
}
+int
+rb_enc_registered(const char *name)
+{
+ int idx;
+ GLOBAL_ENC_TABLE_LOCKING(enc_table) {
+ idx = enc_registered(enc_table, name);
+ }
+ return idx;
+}
+
void
rb_encdb_declare(const char *name)
{
@@ -1600,8 +1610,10 @@ enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const cha
/* Already set */
overridden = TRUE;
+ int index = 0;
if (!NIL_P(encoding)) {
enc_check_encoding(encoding); // loads it if necessary. Needs to be done outside of VM lock.
+ index = rb_enc_to_index(rb_to_encoding(encoding));
}
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
@@ -1619,7 +1631,7 @@ enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const cha
(st_data_t)UNSPECIFIED_ENCODING);
}
else {
- def->index = rb_enc_to_index(rb_to_encoding(encoding));
+ def->index = index;
def->enc = 0;
enc_alias_internal(enc_table, name, def->index);
}
diff --git a/internal/encoding.h b/internal/encoding.h
index c2ffaf4514..38bf8fc9da 100644
--- a/internal/encoding.h
+++ b/internal/encoding.h
@@ -29,6 +29,7 @@ void rb_encdb_declare(const char *name);
void rb_enc_set_base(const char *name, const char *orig);
int rb_enc_set_dummy(int index);
void rb_enc_raw_set(VALUE obj, rb_encoding *enc);
+int rb_enc_registered(const char *name);
PUREFUNC(int rb_data_is_encoding(VALUE obj));
diff --git a/test/ruby/test_encoding.rb b/test/ruby/test_encoding.rb
index 7ccbb31f50..5c1eb50bb1 100644
--- a/test/ruby/test_encoding.rb
+++ b/test/ruby/test_encoding.rb
@@ -157,4 +157,23 @@ class TestEncoding < Test::Unit::TestCase
assert rs.empty?
end;
end
+
+ def test_ractor_set_default_external_string
+ assert_ractor("#{<<~"begin;"}\n#{<<~'end;'}")
+ begin;
+ $-w = nil
+ rs = []
+ 7.times do |i|
+ rs << Ractor.new(i) do |i|
+ Encoding.default_external = "us-ascii"
+ end
+ end
+
+ while rs.any?
+ r, _obj = Ractor.select(*rs)
+ rs.delete(r)
+ end
+ assert rs.empty?
+ end;
+ end
end
diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb
index 938e20e9a1..71e1cc9e2a 100644
--- a/test/ruby/test_transcode.rb
+++ b/test/ruby/test_transcode.rb
@@ -2361,6 +2361,52 @@ class TestTranscode < Test::Unit::TestCase
end;
end
+ def test_ractor_asciicompat_encoding_exists
+ assert_ractor("#{<<~"begin;"}\n#{<<~'end;'}")
+ begin;
+ rs = []
+ 7.times do
+ rs << Ractor.new do
+ string = "ISO-2022-JP"
+ encoding = Encoding.find(string)
+ 20_000.times do
+ Encoding::Converter.asciicompat_encoding(string)
+ Encoding::Converter.asciicompat_encoding(encoding)
+ end
+ end
+ end
+
+ while rs.any?
+ r, _obj = Ractor.select(*rs)
+ rs.delete(r)
+ end
+ assert rs.empty?
+ end;
+ end
+
+ def test_ractor_asciicompat_encoding_doesnt_exist
+ assert_ractor("#{<<~"begin;"}\n#{<<~'end;'}")
+ begin;
+ rs = []
+ NO_EXIST = "I".freeze
+ 7.times do
+ rs << Ractor.new do
+ 50.times do
+ if (val = Encoding::Converter.asciicompat_encoding(NO_EXIST))
+ raise "Got #{val}, expected nil"
+ end
+ end
+ end
+ end
+
+ while rs.any?
+ r, _obj = Ractor.select(*rs)
+ rs.delete(r)
+ end
+ assert rs.empty?
+ end;
+ end
+
private
def assert_conversion_both_ways_utf8(utf8, raw, encoding)
diff --git a/transcode.c b/transcode.c
index 507bce78e1..072e1942b1 100644
--- a/transcode.c
+++ b/transcode.c
@@ -19,6 +19,7 @@
#include "internal/object.h"
#include "internal/string.h"
#include "internal/transcode.h"
+#include "internal/encoding.h"
#include "ruby/encoding.h"
#include "vm_sync.h"
@@ -1826,7 +1827,9 @@ rb_econv_asciicompat_encoding(const char *ascii_incompat_name)
st_table *table2;
struct asciicompat_encoding_t data = {0};
- RB_VM_LOCKING() {
+ unsigned int lev;
+ RB_VM_LOCK_ENTER_LEV(&lev);
+ {
if (st_lookup(transcoder_table, (st_data_t)ascii_incompat_name, &v)) {
table2 = (st_table *)v;
/*
@@ -1839,12 +1842,25 @@ rb_econv_asciicompat_encoding(const char *ascii_incompat_name)
if (table2->num_entries == 1) {
data.ascii_incompat_name = ascii_incompat_name;
data.ascii_compat_name = NULL;
- st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data);
+ if (rb_multi_ractor_p()) {
+ /*
+ * We need to unlock in case `load_transcoder_entry` actually loads the encoding
+ * and table2 could be inserted into when we unlock.
+ */
+ st_table *dup_table2 = st_copy(table2);
+ RB_VM_LOCK_LEAVE_LEV(&lev);
+ st_foreach(dup_table2, asciicompat_encoding_i, (st_data_t)&data);
+ st_free_table(dup_table2);
+ RB_VM_LOCK_ENTER_LEV(&lev);
+ }
+ else {
+ st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data);
+ }
}
}
-
}
+ RB_VM_LOCK_LEAVE_LEV(&lev);
return data.ascii_compat_name; // can be NULL
}
@@ -2989,10 +3005,16 @@ static rb_encoding *
make_encoding(const char *name)
{
rb_encoding *enc;
- RB_VM_LOCKING() {
- enc = rb_enc_find(name);
- if (!enc)
- enc = make_dummy_encoding(name);
+ enc = rb_enc_find(name);
+ if (!enc) {
+ RB_VM_LOCKING() {
+ if (rb_enc_registered(name)) {
+ enc = NULL;
+ }
+ else {
+ enc = make_dummy_encoding(name);
+ }
+ }
}
return enc;
}
@@ -3029,14 +3051,10 @@ econv_s_asciicompat_encoding(VALUE klass, VALUE arg)
VALUE enc = Qnil;
enc_arg(&arg, &arg_name, &arg_enc);
-
- RB_VM_LOCKING() {
- result_name = rb_econv_asciicompat_encoding(arg_name);
-
- if (result_name) {
- result_enc = make_encoding(result_name);
- enc = rb_enc_from_encoding(result_enc);
- }
+ result_name = rb_econv_asciicompat_encoding(arg_name);
+ if (result_name) {
+ result_enc = make_encoding(result_name);
+ enc = rb_enc_from_encoding(result_enc);
}
return enc;
}