From 3f0ec8887f3bc75a98fd3ec8926518b635c70b7b Mon Sep 17 00:00:00 2001 From: matz Date: Sat, 18 Oct 2008 10:36:20 +0000 Subject: * string.c (rb_external_str_new): a new function to convert from external encoding to internal encoding. if something went wrong, it returns a string with the external encoding. * string.c (rb_external_str_new_with_enc): same as above besides you can specify the source encoding. * ruby.c (ruby_set_argv): use rb_external_str_new() * ruby.c (set_arg0, ruby_script): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19834 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 13 ++++++++++++ dir.c | 16 +++------------ include/ruby/encoding.h | 2 ++ include/ruby/intern.h | 1 + ruby.c | 14 +++++-------- string.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 78 insertions(+), 22 deletions(-) diff --git a/ChangeLog b/ChangeLog index a98a63971b..bbfc75eb3f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +Sat Oct 18 13:30:53 2008 Yukihiro Matsumoto + + * string.c (rb_external_str_new): a new function to convert from + external encoding to internal encoding. if something went + wrong, it returns a string with the external encoding. + + * string.c (rb_external_str_new_with_enc): same as above besides + you can specify the source encoding. + + * ruby.c (ruby_set_argv): use rb_external_str_new() + + * ruby.c (set_arg0, ruby_script): ditto. + Sat Oct 18 04:08:18 2008 Yukihiro Matsumoto * lib/tempfile.rb (Tempfile#initialize): now Tempfile.new takes diff --git a/dir.c b/dir.c index dd98b878cd..3de4c290be 100644 --- a/dir.c +++ b/dir.c @@ -423,16 +423,6 @@ dir_check(VALUE dir) if (dirp->dir == NULL) dir_closed();\ } while (0) -static VALUE -dir_enc_str_new(const char *p, long len, rb_encoding *enc) -{ - VALUE path = rb_tainted_str_new(p, len); - if (rb_enc_asciicompat(enc) && rb_enc_str_asciionly_p(path)) { - enc = rb_usascii_encoding(); - } - rb_enc_associate(path, enc); - return path; -} /* * call-seq: @@ -494,7 +484,7 @@ dir_read(VALUE dir) errno = 0; dp = readdir(dirp->dir); if (dp) { - return dir_enc_str_new(dp->d_name, NAMLEN(dp), dirp->enc); + return rb_external_str_new_with_enc(dp->d_name, NAMLEN(dp), dirp->enc); } else if (errno == 0) { /* end of stream */ return Qnil; @@ -532,7 +522,7 @@ dir_each(VALUE dir) GetDIR(dir, dirp); rewinddir(dirp->dir); for (dp = readdir(dirp->dir); dp != NULL; dp = readdir(dirp->dir)) { - rb_yield(dir_enc_str_new(dp->d_name, NAMLEN(dp), dirp->enc)); + rb_yield(rb_external_str_new_with_enc(dp->d_name, NAMLEN(dp), dirp->enc)); if (dirp->dir == NULL) dir_closed(); } return dir; @@ -1436,7 +1426,7 @@ rb_glob(const char *path, void (*func)(const char *, VALUE, void *), VALUE arg) static void push_pattern(const char *path, VALUE ary, void *enc) { - rb_ary_push(ary, dir_enc_str_new(path, strlen(path), enc)); + rb_ary_push(ary, rb_external_str_new_with_enc(path, strlen(path), enc)); } static int diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 9049df3fa2..1b1cf33d58 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -92,6 +92,8 @@ char* rb_enc_nth(const char*, const char*, int, rb_encoding*); VALUE rb_obj_encoding(VALUE); VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc); +VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc); + /* index -> rb_encoding */ rb_encoding* rb_enc_from_index(int idx); diff --git a/include/ruby/intern.h b/include/ruby/intern.h index d99c39636b..8e38217aac 100644 --- a/include/ruby/intern.h +++ b/include/ruby/intern.h @@ -546,6 +546,7 @@ VALUE rb_str_new5(VALUE, const char*, long); VALUE rb_tainted_str_new_cstr(const char*); VALUE rb_tainted_str_new(const char*, long); VALUE rb_tainted_str_new2(const char*); +VALUE rb_external_str_new(const char*, long); VALUE rb_str_buf_new(long); VALUE rb_str_buf_new_cstr(const char*); VALUE rb_str_buf_new2(const char*); diff --git a/ruby.c b/ruby.c index 881ebe7fb2..ba30be6eff 100644 --- a/ruby.c +++ b/ruby.c @@ -999,7 +999,6 @@ process_options(VALUE arg) NODE *tree = 0; VALUE parser; VALUE iseq; - VALUE args; rb_encoding *enc, *lenc; const char *s; char fbuf[MAXPATHLEN]; @@ -1108,17 +1107,12 @@ process_options(VALUE arg) opt->script = RSTRING_PTR(opt->script_name); safe = rb_safe_level(); rb_set_safe_level_force(0); - ruby_set_argv(argc, argv); - process_sflag(opt); ruby_init_loadpath(); ruby_init_gems(!(opt->disable & DISABLE_BIT(gems))); lenc = rb_locale_encoding(); rb_enc_associate(rb_progname, lenc); opt->script_name = rb_str_new4(rb_progname); - for (i = 0, args = rb_argv; i < RARRAY_LEN(args); i++) { - rb_enc_associate(RARRAY_PTR(args)[i], lenc); - } parser = rb_parser_new(); if (opt->yydebug) rb_parser_set_yydebug(parser, Qtrue); if (opt->ext.enc.name != 0) { @@ -1143,6 +1137,8 @@ process_options(VALUE arg) rb_enc_set_default_internal(rb_enc_from_encoding(enc)); opt->intern.enc.index = -1; } + ruby_set_argv(argc, argv); + process_sflag(opt); rb_set_safe_level_force(safe); if (opt->e_script) { @@ -1457,14 +1453,14 @@ set_arg0(VALUE val, ID id) } } #endif - rb_progname = rb_obj_freeze(rb_tainted_str_new(s, i)); + rb_progname = rb_obj_freeze(rb_external_str_new(s, i)); } void ruby_script(const char *name) { if (name) { - rb_progname = rb_obj_freeze(rb_tainted_str_new2(name)); + rb_progname = rb_obj_freeze(rb_external_str_new(name, strlen(name))); } } @@ -1547,7 +1543,7 @@ ruby_set_argv(int argc, char **argv) #endif rb_ary_clear(av); for (i = 0; i < argc; i++) { - VALUE arg = rb_tainted_str_new2(argv[i]); + VALUE arg = rb_external_str_new(argv[i], strlen(argv[i])); OBJ_FREEZE(arg); rb_ary_push(av, arg); diff --git a/string.c b/string.c index 601845c447..2b126934c1 100644 --- a/string.c +++ b/string.c @@ -472,6 +472,60 @@ rb_tainted_str_new_cstr(const char *ptr) RUBY_ALIAS_FUNCTION(rb_tainted_str_new2(const char *ptr), rb_tainted_str_new_cstr, (ptr)) #define rb_tainted_str_new2 rb_tainted_str_new_cstr +VALUE +rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc) +{ + VALUE str; + rb_encoding *ienc; + + if (len == 0 && !ptr) len = strlen(ptr); + str = rb_tainted_str_new(ptr, len); + rb_enc_associate(str, eenc); + ienc = rb_default_internal_encoding(); + if (ienc) { + rb_econv_t *ec; + rb_econv_result_t ret; + VALUE newstr = rb_str_new(0, len); + long nlen = len; + const unsigned char *sp; + unsigned char *dp; + + retry: + ec = rb_econv_open_opts(eenc->name, ienc->name, 0, Qnil); + if (!ec) return str; + + sp = (unsigned char*)RSTRING_PTR(str); + dp = (unsigned char*)RSTRING_PTR(newstr); + ret = rb_econv_convert(ec, &sp, (unsigned char*)RSTRING_END(str), + &dp, (unsigned char*)RSTRING_END(newstr), 0); + rb_econv_close(ec); + switch (ret) { + case econv_destination_buffer_full: + /* destination buffer short */ + nlen *= 2; + rb_str_resize(newstr, nlen); + goto retry; + + case econv_finished: + nlen = dp - (unsigned char*)RSTRING_PTR(newstr); + rb_str_set_len(newstr, nlen); + rb_enc_associate(newstr, ienc); + return newstr; + + default: + /* some error, return original */ + return str; + } + } + return str; +} + +VALUE +rb_external_str_new(const char *ptr, long len) +{ + return rb_external_str_new_with_enc(ptr, len, rb_default_external_encoding()); +} + static VALUE str_replace_shared(VALUE str2, VALUE str) { -- cgit v1.2.3