From ca76337a00244635faa331afd04f4b75161ce6fb Mon Sep 17 00:00:00 2001 From: Lars Kanis Date: Mon, 7 Dec 2020 18:00:39 +0100 Subject: Windows: Read ENV names and values as UTF-8 encoded Strings (#3818) * Windows: Read ENV names and values as UTF-8 encoded Strings Implements issue #12650: fix https://bugs.ruby-lang.org/issues/12650 This also removes the special encoding for ENV['PATH'] and some complexity in the code that is unnecessary now. * Windows: Improve readablity of getenv() encoding getenv() did use the expected codepage as an implicit parameter of the macro. This is mis-leading since include/ruby/win32.h has a different definition. Using the "cp" variable explicit (like the other function calls) makes it more readable and consistent. * Windows: Change external C-API macros getenv() and execv() to use UTF-8 They used to process and return strings with locale encoding, but since all ruby-internal spawn and environment functions use UTF-8, it makes sense to change the C-API equally. --- hash.c | 75 +++++----------------------- include/ruby/win32.h | 4 +- spec/ruby/core/env/element_reference_spec.rb | 1 + spec/ruby/core/env/fetch_spec.rb | 3 +- spec/ruby/core/env/shift_spec.rb | 5 +- spec/ruby/core/env/values_at_spec.rb | 3 +- test/ruby/test_env.rb | 19 ++++--- test/ruby/test_m17n.rb | 10 ++-- win32/win32.c | 1 - 9 files changed, 39 insertions(+), 82 deletions(-) diff --git a/hash.c b/hash.c index bde35d06b0..98405ec541 100644 --- a/hash.c +++ b/hash.c @@ -4815,22 +4815,7 @@ static char **my_environ; #undef environ #define environ my_environ #undef getenv -static char *(*w32_getenv)(const char*); -static char * -w32_getenv_unknown(const char *name) -{ - char *(*func)(const char*); - if (rb_locale_encindex() == rb_ascii8bit_encindex()) { - func = rb_w32_getenv; - } - else { - func = rb_w32_ugetenv; - } - /* atomic assignment in flat memory model */ - return (w32_getenv = func)(name); -} -static char *(*w32_getenv)(const char*) = w32_getenv_unknown; -#define getenv(n) w32_getenv(n) +#define getenv(n) rb_w32_ugetenv(n) #elif defined(__APPLE__) #undef environ #define environ (*_NSGetEnviron()) @@ -4849,20 +4834,20 @@ extern char **environ; #define ENVNMATCH(s1, s2, n) (memcmp((s1), (s2), (n)) == 0) #endif -static VALUE -env_enc_str_new(const char *ptr, long len, rb_encoding *enc) +static inline rb_encoding * +env_encoding() { #ifdef _WIN32 - rb_encoding *internal = rb_default_internal_encoding(); - const int ecflags = ECONV_INVALID_REPLACE | ECONV_UNDEF_REPLACE; - rb_encoding *utf8 = rb_utf8_encoding(); - VALUE str = rb_enc_str_new(NULL, 0, (internal ? internal : enc)); - if (NIL_P(rb_str_cat_conv_enc_opts(str, 0, ptr, len, utf8, ecflags, Qnil))) { - rb_str_initialize(str, ptr, len, NULL); - } + return rb_utf8_encoding(); #else - VALUE str = rb_external_str_new_with_enc(ptr, len, enc); + return rb_locale_encoding(); #endif +} + +static VALUE +env_enc_str_new(const char *ptr, long len, rb_encoding *enc) +{ + VALUE str = rb_external_str_new_with_enc(ptr, len, enc); rb_obj_freeze(str); return str; @@ -4877,7 +4862,7 @@ env_enc_str_new_cstr(const char *ptr, rb_encoding *enc) static VALUE env_str_new(const char *ptr, long len) { - return env_enc_str_new(ptr, len, rb_locale_encoding()); + return env_enc_str_new(ptr, len, env_encoding()); } static VALUE @@ -4889,46 +4874,23 @@ env_str_new2(const char *ptr) static const char TZ_ENV[] = "TZ"; -static rb_encoding * -env_encoding_for(const char *name, const char *ptr) -{ - if (ENVMATCH(name, PATH_ENV)) { - return rb_filesystem_encoding(); - } - else { - return rb_locale_encoding(); - } -} - static VALUE env_name_new(const char *name, const char *ptr) { - return env_enc_str_new_cstr(ptr, env_encoding_for(name, ptr)); + return env_enc_str_new_cstr(ptr, env_encoding()); } static void * get_env_cstr( -#ifdef _WIN32 - volatile VALUE *pstr, -#else VALUE str, -#endif const char *name) { -#ifdef _WIN32 - VALUE str = *pstr; -#endif char *var; rb_encoding *enc = rb_enc_get(str); if (!rb_enc_asciicompat(enc)) { rb_raise(rb_eArgError, "bad environment variable %s: ASCII incompatible encoding: %s", name, rb_enc_name(enc)); } -#ifdef _WIN32 - if (!rb_enc_str_asciionly_p(str)) { - *pstr = str = rb_str_conv_enc(str, NULL, rb_utf8_encoding()); - } -#endif var = RSTRING_PTR(str); if (memchr(var, '\0', RSTRING_LEN(str))) { rb_raise(rb_eArgError, "bad environment variable %s: contains null byte", name); @@ -4936,13 +4898,8 @@ get_env_cstr( return rb_str_fill_terminator(str, 1); /* ASCII compatible */ } -#ifdef _WIN32 -#define get_env_ptr(var, val) \ - (var = get_env_cstr(&(val), #var)) -#else #define get_env_ptr(var, val) \ (var = get_env_cstr(val, #var)) -#endif static inline const char * env_name(volatile VALUE *s) @@ -4983,9 +4940,6 @@ env_delete(VALUE name) VALUE value = env_str_new2(val); ruby_setenv(nam, 0); - if (ENVMATCH(nam, PATH_ENV)) { - RB_GC_GUARD(name); - } return value; } return Qnil; @@ -5407,9 +5361,6 @@ env_aset(VALUE nm, VALUE val) get_env_ptr(value, val); ruby_setenv(name, value); - if (ENVMATCH(name, PATH_ENV)) { - RB_GC_GUARD(nm); - } reset_by_modified_env(name); return val; } diff --git a/include/ruby/win32.h b/include/ruby/win32.h index b29470b0c4..b3b8b47fe3 100644 --- a/include/ruby/win32.h +++ b/include/ruby/win32.h @@ -160,7 +160,7 @@ typedef int clockid_t; #define Sleep(msec) (void)rb_w32_Sleep(msec) #undef execv -#define execv(path,argv) rb_w32_aspawn(P_OVERLAY,path,argv) +#define execv(path,argv) rb_w32_uaspawn(P_OVERLAY,path,argv) #undef isatty #define isatty(h) rb_w32_isatty(h) @@ -717,7 +717,7 @@ extern char *rb_w32_strerror(int); #define getcwd(b, s) rb_w32_getcwd(b, s) #undef getenv -#define getenv(n) rb_w32_getenv(n) +#define getenv(n) rb_w32_ugetenv(n) #undef rename #define rename(o, n) rb_w32_rename(o, n) diff --git a/spec/ruby/core/env/element_reference_spec.rb b/spec/ruby/core/env/element_reference_spec.rb index 1cd58ace54..22633e7a08 100644 --- a/spec/ruby/core/env/element_reference_spec.rb +++ b/spec/ruby/core/env/element_reference_spec.rb @@ -59,6 +59,7 @@ describe "ENV.[]" do Encoding.default_internal = nil locale = Encoding.find('locale') + locale = Encoding::UTF_8 if platform_is :windows locale = Encoding::BINARY if locale == Encoding::US_ASCII ENV[@variable] = "\xC3\xB8" ENV[@variable].encoding.should == locale diff --git a/spec/ruby/core/env/fetch_spec.rb b/spec/ruby/core/env/fetch_spec.rb index ef8f0a4ed3..f3af6f3dc2 100644 --- a/spec/ruby/core/env/fetch_spec.rb +++ b/spec/ruby/core/env/fetch_spec.rb @@ -56,7 +56,8 @@ describe "ENV.fetch" do end it "uses the locale encoding" do + encoding = platform_is(:windows) ? Encoding::UTF_8 : Encoding.find('locale') ENV["foo"] = "bar" - ENV.fetch("foo").encoding.should == Encoding.find('locale') + ENV.fetch("foo").encoding.should == encoding end end diff --git a/spec/ruby/core/env/shift_spec.rb b/spec/ruby/core/env/shift_spec.rb index 0fe08d4f6d..49d98c3729 100644 --- a/spec/ruby/core/env/shift_spec.rb +++ b/spec/ruby/core/env/shift_spec.rb @@ -42,9 +42,10 @@ describe "ENV.shift" do it "uses the locale encoding if Encoding.default_internal is nil" do Encoding.default_internal = nil + encoding = platform_is(:windows) ? Encoding::UTF_8 : Encoding.find('locale') pair = ENV.shift - pair.first.encoding.should equal(Encoding.find("locale")) - pair.last.encoding.should equal(Encoding.find("locale")) + pair.first.encoding.should equal(encoding) + pair.last.encoding.should equal(encoding) end it "transcodes from the locale encoding to Encoding.default_internal if set" do diff --git a/spec/ruby/core/env/values_at_spec.rb b/spec/ruby/core/env/values_at_spec.rb index ee970e5f65..87c8c7f663 100644 --- a/spec/ruby/core/env/values_at_spec.rb +++ b/spec/ruby/core/env/values_at_spec.rb @@ -28,7 +28,8 @@ describe "ENV.values_at" do end it "uses the locale encoding" do - ENV.values_at(ENV.keys.first).first.encoding.should == Encoding.find('locale') + encoding = platform_is(:windows) ? Encoding::UTF_8 : Encoding.find('locale') + ENV.values_at(ENV.keys.first).first.encoding.should == encoding end it "raises TypeError when a key is not coercible to String" do diff --git a/test/ruby/test_env.rb b/test/ruby/test_env.rb index 7735b53045..ddfce136a4 100644 --- a/test/ruby/test_env.rb +++ b/test/ruby/test_env.rb @@ -369,7 +369,8 @@ class TestEnv < Test::Unit::TestCase assert_equal("foo", v) end assert_invalid_env {|var| ENV.assoc(var)} - assert_equal(Encoding.find("locale"), v.encoding) + encoding = /mswin|mingw/ =~ RUBY_PLATFORM ? Encoding::UTF_8 : Encoding.find("locale") + assert_equal(encoding, v.encoding) end def test_has_value2 @@ -579,15 +580,13 @@ class TestEnv < Test::Unit::TestCase end; end - if Encoding.find("locale") == Encoding::UTF_8 - def test_utf8 - text = "testing \u{e5 e1 e2 e4 e3 101 3042}" - test = ENV["test"] - ENV["test"] = text - assert_equal text, ENV["test"] - ensure - ENV["test"] = test - end + def test_utf8 + text = "testing \u{e5 e1 e2 e4 e3 101 3042}" + test = ENV["test"] + ENV["test"] = text + assert_equal text, ENV["test"] + ensure + ENV["test"] = test end end end diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index 2c6dc3f8f5..3f28d55ac1 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -1325,10 +1325,14 @@ class TestM17N < Test::Unit::TestCase end def test_env - locale_encoding = Encoding.find("locale") + if RUBY_PLATFORM =~ /bccwin|mswin|mingw/ + env_encoding = Encoding::UTF_8 + else + env_encoding = Encoding.find("locale") + end ENV.each {|k, v| - assert_equal(locale_encoding, k.encoding, k) - assert_equal(locale_encoding, v.encoding, v) + assert_equal(env_encoding, k.encoding, k) + assert_equal(env_encoding, v.encoding, v) } end diff --git a/win32/win32.c b/win32/win32.c index 17449fad4f..ac8319529b 100644 --- a/win32/win32.c +++ b/win32/win32.c @@ -77,7 +77,6 @@ static char *w32_getenv(const char *name, UINT cp); #define DLN_FIND_EXTRA_ARG_DECL ,UINT cp #define DLN_FIND_EXTRA_ARG ,cp #define rb_w32_stati128(path, st) w32_stati128(path, st, cp, FALSE) -#define getenv(name) w32_getenv(name, cp) #undef CharNext #define CharNext(p) CharNextExA(cp, (p), 0) #define dln_find_exe_r rb_w32_udln_find_exe_r -- cgit v1.2.3