summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-10-18 10:36:20 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-10-18 10:36:20 +0000
commit3f0ec8887f3bc75a98fd3ec8926518b635c70b7b (patch)
treecd0f721ca0e03ccec39353962869b9a6e8477c93
parent88aa8632741d027d1d2e58f8073d4ec90845499d (diff)
* string.c (rb_external_str_new): a new function to convert from
external encoding to internal encoding. if something went wrong, it returns a string with the external encoding. * string.c (rb_external_str_new_with_enc): same as above besides you can specify the source encoding. * ruby.c (ruby_set_argv): use rb_external_str_new() * ruby.c (set_arg0, ruby_script): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19834 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog13
-rw-r--r--dir.c16
-rw-r--r--include/ruby/encoding.h2
-rw-r--r--include/ruby/intern.h1
-rw-r--r--ruby.c14
-rw-r--r--string.c54
6 files changed, 78 insertions, 22 deletions
diff --git a/ChangeLog b/ChangeLog
index a98a63971b..bbfc75eb3f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+Sat Oct 18 13:30:53 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
+
+ * string.c (rb_external_str_new): a new function to convert from
+ external encoding to internal encoding. if something went
+ wrong, it returns a string with the external encoding.
+
+ * string.c (rb_external_str_new_with_enc): same as above besides
+ you can specify the source encoding.
+
+ * ruby.c (ruby_set_argv): use rb_external_str_new()
+
+ * ruby.c (set_arg0, ruby_script): ditto.
+
Sat Oct 18 04:08:18 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
* lib/tempfile.rb (Tempfile#initialize): now Tempfile.new takes
diff --git a/dir.c b/dir.c
index dd98b878cd..3de4c290be 100644
--- a/dir.c
+++ b/dir.c
@@ -423,16 +423,6 @@ dir_check(VALUE dir)
if (dirp->dir == NULL) dir_closed();\
} while (0)
-static VALUE
-dir_enc_str_new(const char *p, long len, rb_encoding *enc)
-{
- VALUE path = rb_tainted_str_new(p, len);
- if (rb_enc_asciicompat(enc) && rb_enc_str_asciionly_p(path)) {
- enc = rb_usascii_encoding();
- }
- rb_enc_associate(path, enc);
- return path;
-}
/*
* call-seq:
@@ -494,7 +484,7 @@ dir_read(VALUE dir)
errno = 0;
dp = readdir(dirp->dir);
if (dp) {
- return dir_enc_str_new(dp->d_name, NAMLEN(dp), dirp->enc);
+ return rb_external_str_new_with_enc(dp->d_name, NAMLEN(dp), dirp->enc);
}
else if (errno == 0) { /* end of stream */
return Qnil;
@@ -532,7 +522,7 @@ dir_each(VALUE dir)
GetDIR(dir, dirp);
rewinddir(dirp->dir);
for (dp = readdir(dirp->dir); dp != NULL; dp = readdir(dirp->dir)) {
- rb_yield(dir_enc_str_new(dp->d_name, NAMLEN(dp), dirp->enc));
+ rb_yield(rb_external_str_new_with_enc(dp->d_name, NAMLEN(dp), dirp->enc));
if (dirp->dir == NULL) dir_closed();
}
return dir;
@@ -1436,7 +1426,7 @@ rb_glob(const char *path, void (*func)(const char *, VALUE, void *), VALUE arg)
static void
push_pattern(const char *path, VALUE ary, void *enc)
{
- rb_ary_push(ary, dir_enc_str_new(path, strlen(path), enc));
+ rb_ary_push(ary, rb_external_str_new_with_enc(path, strlen(path), enc));
}
static int
diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h
index 9049df3fa2..1b1cf33d58 100644
--- a/include/ruby/encoding.h
+++ b/include/ruby/encoding.h
@@ -92,6 +92,8 @@ char* rb_enc_nth(const char*, const char*, int, rb_encoding*);
VALUE rb_obj_encoding(VALUE);
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc);
+VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc);
+
/* index -> rb_encoding */
rb_encoding* rb_enc_from_index(int idx);
diff --git a/include/ruby/intern.h b/include/ruby/intern.h
index d99c39636b..8e38217aac 100644
--- a/include/ruby/intern.h
+++ b/include/ruby/intern.h
@@ -546,6 +546,7 @@ VALUE rb_str_new5(VALUE, const char*, long);
VALUE rb_tainted_str_new_cstr(const char*);
VALUE rb_tainted_str_new(const char*, long);
VALUE rb_tainted_str_new2(const char*);
+VALUE rb_external_str_new(const char*, long);
VALUE rb_str_buf_new(long);
VALUE rb_str_buf_new_cstr(const char*);
VALUE rb_str_buf_new2(const char*);
diff --git a/ruby.c b/ruby.c
index 881ebe7fb2..ba30be6eff 100644
--- a/ruby.c
+++ b/ruby.c
@@ -999,7 +999,6 @@ process_options(VALUE arg)
NODE *tree = 0;
VALUE parser;
VALUE iseq;
- VALUE args;
rb_encoding *enc, *lenc;
const char *s;
char fbuf[MAXPATHLEN];
@@ -1108,17 +1107,12 @@ process_options(VALUE arg)
opt->script = RSTRING_PTR(opt->script_name);
safe = rb_safe_level();
rb_set_safe_level_force(0);
- ruby_set_argv(argc, argv);
- process_sflag(opt);
ruby_init_loadpath();
ruby_init_gems(!(opt->disable & DISABLE_BIT(gems)));
lenc = rb_locale_encoding();
rb_enc_associate(rb_progname, lenc);
opt->script_name = rb_str_new4(rb_progname);
- for (i = 0, args = rb_argv; i < RARRAY_LEN(args); i++) {
- rb_enc_associate(RARRAY_PTR(args)[i], lenc);
- }
parser = rb_parser_new();
if (opt->yydebug) rb_parser_set_yydebug(parser, Qtrue);
if (opt->ext.enc.name != 0) {
@@ -1143,6 +1137,8 @@ process_options(VALUE arg)
rb_enc_set_default_internal(rb_enc_from_encoding(enc));
opt->intern.enc.index = -1;
}
+ ruby_set_argv(argc, argv);
+ process_sflag(opt);
rb_set_safe_level_force(safe);
if (opt->e_script) {
@@ -1457,14 +1453,14 @@ set_arg0(VALUE val, ID id)
}
}
#endif
- rb_progname = rb_obj_freeze(rb_tainted_str_new(s, i));
+ rb_progname = rb_obj_freeze(rb_external_str_new(s, i));
}
void
ruby_script(const char *name)
{
if (name) {
- rb_progname = rb_obj_freeze(rb_tainted_str_new2(name));
+ rb_progname = rb_obj_freeze(rb_external_str_new(name, strlen(name)));
}
}
@@ -1547,7 +1543,7 @@ ruby_set_argv(int argc, char **argv)
#endif
rb_ary_clear(av);
for (i = 0; i < argc; i++) {
- VALUE arg = rb_tainted_str_new2(argv[i]);
+ VALUE arg = rb_external_str_new(argv[i], strlen(argv[i]));
OBJ_FREEZE(arg);
rb_ary_push(av, arg);
diff --git a/string.c b/string.c
index 601845c447..2b126934c1 100644
--- a/string.c
+++ b/string.c
@@ -472,6 +472,60 @@ rb_tainted_str_new_cstr(const char *ptr)
RUBY_ALIAS_FUNCTION(rb_tainted_str_new2(const char *ptr), rb_tainted_str_new_cstr, (ptr))
#define rb_tainted_str_new2 rb_tainted_str_new_cstr
+VALUE
+rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc)
+{
+ VALUE str;
+ rb_encoding *ienc;
+
+ if (len == 0 && !ptr) len = strlen(ptr);
+ str = rb_tainted_str_new(ptr, len);
+ rb_enc_associate(str, eenc);
+ ienc = rb_default_internal_encoding();
+ if (ienc) {
+ rb_econv_t *ec;
+ rb_econv_result_t ret;
+ VALUE newstr = rb_str_new(0, len);
+ long nlen = len;
+ const unsigned char *sp;
+ unsigned char *dp;
+
+ retry:
+ ec = rb_econv_open_opts(eenc->name, ienc->name, 0, Qnil);
+ if (!ec) return str;
+
+ sp = (unsigned char*)RSTRING_PTR(str);
+ dp = (unsigned char*)RSTRING_PTR(newstr);
+ ret = rb_econv_convert(ec, &sp, (unsigned char*)RSTRING_END(str),
+ &dp, (unsigned char*)RSTRING_END(newstr), 0);
+ rb_econv_close(ec);
+ switch (ret) {
+ case econv_destination_buffer_full:
+ /* destination buffer short */
+ nlen *= 2;
+ rb_str_resize(newstr, nlen);
+ goto retry;
+
+ case econv_finished:
+ nlen = dp - (unsigned char*)RSTRING_PTR(newstr);
+ rb_str_set_len(newstr, nlen);
+ rb_enc_associate(newstr, ienc);
+ return newstr;
+
+ default:
+ /* some error, return original */
+ return str;
+ }
+ }
+ return str;
+}
+
+VALUE
+rb_external_str_new(const char *ptr, long len)
+{
+ return rb_external_str_new_with_enc(ptr, len, rb_default_external_encoding());
+}
+
static VALUE
str_replace_shared(VALUE str2, VALUE str)
{