From 33ea2646b98adb49ae2e1781753bf22d33729ac0 Mon Sep 17 00:00:00 2001 From: nobu Date: Sat, 29 Nov 2014 07:53:17 +0000 Subject: win32.c: use UTF-8 for argv * ruby.c (ruby_set_argv): convert argv from UTF-8. * win32/win32.c (rb_w32_sysinit, cmdglob, w32_cmdvector): convert wide char command line to UTF-8 argv, and glob in UTF-8 so that metacharacters would match multibyte characters. [ruby-dev:48752] [Bug #10555] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48648 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 9 +++++++++ ruby.c | 22 +++++++++++++++++++++- test/ruby/test_rubyoptions.rb | 12 ++++++++++++ win32/win32.c | 13 +++++++------ 4 files changed, 49 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3fdb9f4961..00e94fb27c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +Sat Nov 29 16:53:14 2014 Nobuyoshi Nakada + + * ruby.c (ruby_set_argv): convert argv from UTF-8. + + * win32/win32.c (rb_w32_sysinit, cmdglob, w32_cmdvector): convert + wide char command line to UTF-8 argv, and glob in UTF-8 so that + metacharacters would match multibyte characters. + [ruby-dev:48752] [Bug #10555] + Sat Nov 29 16:14:50 2014 Nobuyoshi Nakada * error.c (rb_typeddata_is_kind_of, rb_check_typeddata): ditto. diff --git a/ruby.c b/ruby.c index b2f485076d..f73be51297 100644 --- a/ruby.c +++ b/ruby.c @@ -311,6 +311,7 @@ ruby_incpush_expand(const char *path) ruby_push_include(path, expand_include_path); } +#undef UTF8_PATH #if defined _WIN32 || defined __CYGWIN__ static HMODULE libruby; @@ -327,6 +328,12 @@ rb_libruby_handle(void) { return libruby; } + +# define UTF8_PATH 1 +#endif + +#ifndef UTF8_PATH +# define UTF8_PATH 0 #endif void ruby_init_loadpath_safe(int safe_level); @@ -1794,6 +1801,19 @@ set_arg0(VALUE val, ID id) rb_progname = rb_str_new_frozen(proc_setproctitle(rb_mProcess, val)); } +static inline VALUE +external_str_new_cstr(const char *p) +{ +#if UTF8_PATH + VALUE str = rb_utf8_str_new_cstr(p); + return rb_str_conv_enc_opts(str, NULL, rb_default_external_encoding(), + ECONV_UNDEF_REPLACE|ECONV_INVALID_REPLACE, + Qnil); +#else + return rb_external_str_new_cstr(p); +#endif +} + /*! Sets the current script name to this value. * * This is similar to $0 = name in Ruby level but also affects @@ -1910,7 +1930,7 @@ ruby_set_argv(int argc, char **argv) #endif rb_ary_clear(av); for (i = 0; i < argc; i++) { - VALUE arg = rb_external_str_new_cstr(argv[i]); + VALUE arg = external_str_new_cstr(argv[i]); OBJ_FREEZE(arg); rb_ary_push(av, arg); diff --git a/test/ruby/test_rubyoptions.rb b/test/ruby/test_rubyoptions.rb index 31fe9aecb0..20dbc7798f 100644 --- a/test/ruby/test_rubyoptions.rb +++ b/test/ruby/test_rubyoptions.rb @@ -694,6 +694,18 @@ class TestRubyOptions < Test::Unit::TestCase end end + if /mswin|mingw/ =~ RUBY_PLATFORM + def test_command_line_glob_nonascii + bug10555 = '[ruby-dev:48752] [Bug #10555]' + name = "\u{3042}.txt" + with_tmpchdir do |dir| + open(name, "w") {} + assert_in_out_err(["-Eutf-8", "-e", "puts ARGV", "?.txt"], "", [name], [], + bug10555, encoding: "utf-8") + end + end + end + def test_script_is_directory feature2408 = '[ruby-core:26925]' assert_in_out_err(%w[.], "", [], /Is a directory -- \./, feature2408) diff --git a/win32/win32.c b/win32/win32.c index 301c1522e3..949e918bd0 100644 --- a/win32/win32.c +++ b/win32/win32.c @@ -108,6 +108,7 @@ int rb_w32_wait_events(HANDLE *events, int num, DWORD timeout); static int rb_w32_open_osfhandle(intptr_t osfhandle, int flags); static int wstati64(const WCHAR *path, struct stati64 *st); VALUE rb_w32_conv_from_wchar(const WCHAR *wstr, rb_encoding *enc); +int ruby_brace_glob_with_enc(const char *str, int flags, ruby_glob_func *func, VALUE arg, rb_encoding *enc); #define RUBY_CRITICAL(expr) do { expr; } while (0) @@ -743,7 +744,7 @@ socklist_delete(SOCKET *sockp, int *flagp) return ret; } -static int w32_cmdvector(const WCHAR *, char ***, UINT); +static int w32_cmdvector(const WCHAR *, char ***, UINT, rb_encoding *); // // Initialization stuff // @@ -767,7 +768,7 @@ rb_w32_sysinit(int *argc, char ***argv) // // subvert cmd.exe's feeble attempt at command line parsing // - *argc = w32_cmdvector(GetCommandLineW(), argv, CP_ACP); + *argc = w32_cmdvector(GetCommandLineW(), argv, CP_UTF8, rb_utf8_encoding()); // // Now set up the correct time stuff @@ -1486,7 +1487,7 @@ insert(const char *path, VALUE vinfo, void *enc) /* License: Artistic or GPL */ static NtCmdLineElement ** -cmdglob(NtCmdLineElement *patt, NtCmdLineElement **tail, UINT cp) +cmdglob(NtCmdLineElement *patt, NtCmdLineElement **tail, UINT cp, rb_encoding *enc) { char buffer[MAXPATHLEN], *buf = buffer; NtCmdLineElement **last = tail; @@ -1498,7 +1499,7 @@ cmdglob(NtCmdLineElement *patt, NtCmdLineElement **tail, UINT cp) strlcpy(buf, patt->str, patt->len + 1); buf[patt->len] = '\0'; translate_char(buf, '\\', '/', cp); - status = ruby_brace_glob(buf, 0, insert, (VALUE)&tail); + status = ruby_brace_glob_with_enc(buf, 0, insert, (VALUE)&tail, enc); if (buf != buffer) free(buf); @@ -1574,7 +1575,7 @@ skipspace(WCHAR *ptr) /* License: Artistic or GPL */ static int -w32_cmdvector(const WCHAR *cmd, char ***vec, UINT cp) +w32_cmdvector(const WCHAR *cmd, char ***vec, UINT cp, rb_encoding *enc) { int globbing, len; int elements, strsz, done; @@ -1742,7 +1743,7 @@ w32_cmdvector(const WCHAR *cmd, char ***vec, UINT cp) curr->str = rb_w32_wstr_to_mbstr(cp, base, len, &curr->len); curr->flags |= NTMALLOC; - if (globbing && (tail = cmdglob(curr, cmdtail, cp))) { + if (globbing && (tail = cmdglob(curr, cmdtail, cp, enc))) { cmdtail = tail; } else { -- cgit v1.2.3