summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornagachika <nagachika@ruby-lang.org>2021-10-09 15:08:38 +0900
committernagachika <nagachika@ruby-lang.org>2021-10-09 15:08:38 +0900
commit2c947e74a0a11fe6c54253c15224dc80054c62a2 (patch)
treea1762cf792182842d3bcbd94ba05018095c1c30e
parentfe9d33beb78d5c7932a5c2ca3953045c0ae751d5 (diff)
merge revision(s) 60d0421ca861944459f52292d65dbf0ece26e38a,b6534691a16d751d59fc572d5dddebcaeb21f007,409dbc951b9875d27bd73748c88e15386473cffb,842b0008c132dd587f09766a228041afb7fed24f: [Backport #18191]
Fix the encoding of loaded feature names [Bug #18191] The feature names loaded from the default load paths should also be in the file system encoding. --- ruby.c | 12 +++++++++++- test/ruby/test_require.rb | 22 ++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) Copy path strings as interned strings --- ruby.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) Replace expanded load path only when modified --- ruby.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) Skip broken strings as the locale encoding --- internal/string.h | 1 + ruby.c | 11 +++++++---- string.c | 6 ++++++ 3 files changed, 14 insertions(+), 4 deletions(-)
-rw-r--r--internal/string.h1
-rw-r--r--ruby.c29
-rw-r--r--string.c6
-rw-r--r--test/ruby/test_require.rb22
-rw-r--r--version.h2
5 files changed, 57 insertions, 3 deletions
diff --git a/internal/string.h b/internal/string.h
index 8907a1a6e6..adc8385cb6 100644
--- a/internal/string.h
+++ b/internal/string.h
@@ -43,6 +43,7 @@ size_t rb_str_memsize(VALUE);
char *rb_str_to_cstr(VALUE str);
const char *ruby_escaped_char(int c);
void rb_str_make_independent(VALUE str);
+int rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc);
static inline bool STR_EMBED_P(VALUE str);
static inline bool STR_SHARED_P(VALUE str);
diff --git a/ruby.c b/ruby.c
index 9990882423..2251fde4e2 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1667,6 +1667,17 @@ tty_enabled(void)
#endif
static VALUE
+copy_str(VALUE str, rb_encoding *enc, bool intern)
+{
+ if (!intern) {
+ if (rb_enc_str_coderange_scan(str, enc) == ENC_CODERANGE_BROKEN)
+ return 0;
+ return rb_enc_associate(rb_str_dup(str), enc);
+ }
+ return rb_enc_interned_str(RSTRING_PTR(str), RSTRING_LEN(str), enc);
+}
+
+static VALUE
process_options(int argc, char **argv, ruby_cmdline_options_t *opt)
{
rb_ast_t *ast = 0;
@@ -1682,6 +1693,8 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt)
char fbuf[MAXPATHLEN];
int i = (int)proc_options(argc, argv, opt, 0);
unsigned int dump = opt->dump & dump_exit_bits;
+ rb_vm_t *vm = GET_VM();
+ const long loaded_before_enc = RARRAY_LEN(vm->loaded_features);
if (opt->dump & (DUMP_BIT(usage)|DUMP_BIT(help))) {
int tty = isatty(1);
@@ -1883,7 +1896,6 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt)
rb_obj_freeze(opt->script_name);
if (IF_UTF8_PATH(uenc != lenc, 1)) {
long i;
- rb_vm_t *vm = GET_VM();
VALUE load_path = vm->load_path;
const ID id_initial_load_path_mark = INITIAL_LOAD_PATH_MARK;
int modifiable = FALSE;
@@ -1897,7 +1909,7 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt)
if (newpath == path) continue;
path = newpath;
#else
- path = rb_enc_associate(rb_str_dup(path), lenc);
+ if (!(path = copy_str(path, lenc, !mark))) continue;
#endif
if (mark) rb_ivar_set(path, id_initial_load_path_mark, path);
if (!modifiable) {
@@ -1910,6 +1922,19 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt)
rb_ary_replace(vm->load_path_snapshot, load_path);
}
}
+ {
+ VALUE loaded_features = vm->loaded_features;
+ bool modified = false;
+ for (long i = loaded_before_enc; i < RARRAY_LEN(loaded_features); ++i) {
+ VALUE path = RARRAY_AREF(loaded_features, i);
+ if (!(path = copy_str(path, IF_UTF8_PATH(uenc, lenc), true))) continue;
+ modified = true;
+ RARRAY_ASET(loaded_features, i, path);
+ }
+ if (modified) {
+ rb_ary_replace(vm->loaded_features_snapshot, loaded_features);
+ }
+ }
if (opt->features.mask & COMPILATION_FEATURES) {
VALUE option = rb_hash_new();
diff --git a/string.c b/string.c
index e8021ccd8d..7fb45c1f72 100644
--- a/string.c
+++ b/string.c
@@ -711,6 +711,12 @@ enc_coderange_scan(VALUE str, rb_encoding *enc, int encidx)
}
int
+rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc)
+{
+ return enc_coderange_scan(str, enc, rb_enc_to_index(enc));
+}
+
+int
rb_enc_str_coderange(VALUE str)
{
int cr = ENC_CODERANGE(str);
diff --git a/test/ruby/test_require.rb b/test/ruby/test_require.rb
index 52566ecbfb..9b658286c9 100644
--- a/test/ruby/test_require.rb
+++ b/test/ruby/test_require.rb
@@ -531,6 +531,28 @@ class TestRequire < Test::Unit::TestCase
$".replace(features)
end
+ def test_default_loaded_features_encoding
+ Dir.mktmpdir {|tmp|
+ Dir.mkdir("#{tmp}/1")
+ Dir.mkdir("#{tmp}/2")
+ File.write("#{tmp}/1/bug18191-1.rb", "")
+ File.write("#{tmp}/2/bug18191-2.rb", "")
+ assert_separately(%W[-Eutf-8 -I#{tmp}/1 -], "#{<<~"begin;"}\n#{<<~'end;'}")
+ tmp = #{tmp.dump}"/2"
+ begin;
+ $:.unshift(tmp)
+ require "bug18191-1"
+ require "bug18191-2"
+ encs = [Encoding::US_ASCII, Encoding.find("filesystem")]
+ message = -> {
+ require "pp"
+ {filesystem: encs[1], **$".group_by(&:encoding)}.pretty_inspect
+ }
+ assert($".all? {|n| encs.include?(n.encoding)}, message)
+ end;
+ }
+ end
+
def test_require_changed_current_dir
bug7158 = '[ruby-core:47970]'
Dir.mktmpdir {|tmp|
diff --git a/version.h b/version.h
index 862232ba9c..22a9985392 100644
--- a/version.h
+++ b/version.h
@@ -12,7 +12,7 @@
# define RUBY_VERSION_MINOR RUBY_API_VERSION_MINOR
#define RUBY_VERSION_TEENY 3
#define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR
-#define RUBY_PATCHLEVEL 141
+#define RUBY_PATCHLEVEL 142
#define RUBY_RELEASE_YEAR 2021
#define RUBY_RELEASE_MONTH 10