summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog12
-rw-r--r--ext/dbm/dbm.c14
-rw-r--r--ext/sdbm/init.c62
-rw-r--r--include/ruby/encoding.h3
-rw-r--r--include/ruby/ruby.h6
-rw-r--r--string.c97
6 files changed, 117 insertions, 77 deletions
diff --git a/ChangeLog b/ChangeLog
index 4f0f2e646c..1996c22dab 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+Mon Oct 20 16:48:43 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
+
+ * include/ruby/ruby.h (ExportStringValue): new macro to convert
+ string in internal encoding to external to export.
+
+ * string.c (rb_str_export): new function to do conversion to
+ external encoding.
+
+ * ext/sdbm/init.c: encoding conversion support.
+
+ * ext/dbm/dbm.c: ditto.
+
Mon Oct 20 15:42:02 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
* string.c (rb_locale_str_new): new function to convert string
diff --git a/ext/dbm/dbm.c b/ext/dbm/dbm.c
index 42bd4ff057..47975cffc2 100644
--- a/ext/dbm/dbm.c
+++ b/ext/dbm/dbm.c
@@ -109,7 +109,7 @@ fdbm_initialize(int argc, VALUE *argv, VALUE obj)
if (!NIL_P(vflags))
flags = NUM2INT(vflags);
- SafeStringValue(file);
+ FilePathValue(file);
if (flags & RUBY_DBM_RW_BIT) {
flags &= ~RUBY_DBM_RW_BIT;
@@ -164,7 +164,7 @@ fdbm_fetch(VALUE obj, VALUE keystr, VALUE ifnone)
struct dbmdata *dbmp;
DBM *dbm;
- StringValue(keystr);
+ ExportStringValue(keystr);
key.dptr = RSTRING_PTR(keystr);
key.dsize = RSTRING_LEN(keystr);
@@ -204,7 +204,7 @@ fdbm_index(VALUE obj, VALUE valstr)
struct dbmdata *dbmp;
DBM *dbm;
- StringValue(valstr);
+ ExportStringValue(valstr);
val.dptr = RSTRING_PTR(valstr);
val.dsize = RSTRING_LEN(valstr);
@@ -272,7 +272,7 @@ fdbm_delete(VALUE obj, VALUE keystr)
VALUE valstr;
fdbm_modify(obj);
- StringValue(keystr);
+ ExportStringValue(keystr);
key.dptr = RSTRING_PTR(keystr);
key.dsize = RSTRING_LEN(keystr);
@@ -346,7 +346,7 @@ fdbm_delete_if(VALUE obj)
for (i = 0; i < RARRAY_LEN(ary); i++) {
keystr = RARRAY_PTR(ary)[i];
- StringValue(keystr);
+ ExportStringValue(keystr);
key.dptr = RSTRING_PTR(keystr);
key.dsize = RSTRING_LEN(keystr);
if (dbm_delete(dbm, key)) {
@@ -599,7 +599,7 @@ fdbm_has_key(VALUE obj, VALUE keystr)
struct dbmdata *dbmp;
DBM *dbm;
- StringValue(keystr);
+ ExportStringValue(keystr);
key.dptr = RSTRING_PTR(keystr);
key.dsize = RSTRING_LEN(keystr);
@@ -616,7 +616,7 @@ fdbm_has_value(VALUE obj, VALUE valstr)
struct dbmdata *dbmp;
DBM *dbm;
- StringValue(valstr);
+ ExportStringValue(valstr);
val.dptr = RSTRING_PTR(valstr);
val.dsize = RSTRING_LEN(valstr);
diff --git a/ext/sdbm/init.c b/ext/sdbm/init.c
index 70480f462b..ecedd6318a 100644
--- a/ext/sdbm/init.c
+++ b/ext/sdbm/init.c
@@ -96,7 +96,7 @@ fsdbm_initialize(int argc, VALUE *argv, VALUE obj)
else {
mode = NUM2INT(vmode);
}
- SafeStringValue(file);
+ FilePathValue(file);
dbm = 0;
if (mode >= 0)
@@ -142,7 +142,7 @@ fsdbm_fetch(VALUE obj, VALUE keystr, VALUE ifnone)
struct dbmdata *dbmp;
DBM *dbm;
- StringValue(keystr);
+ ExportStringValue(keystr);
key.dptr = RSTRING_PTR(keystr);
key.dsize = RSTRING_LEN(keystr);
@@ -150,10 +150,10 @@ fsdbm_fetch(VALUE obj, VALUE keystr, VALUE ifnone)
value = sdbm_fetch(dbm, key);
if (value.dptr == 0) {
if (ifnone == Qnil && rb_block_given_p())
- return rb_yield(rb_tainted_str_new(key.dptr, key.dsize));
+ return rb_yield(rb_external_str_new(key.dptr, key.dsize));
return ifnone;
}
- return rb_tainted_str_new(value.dptr, value.dsize);
+ return rb_external_str_new(value.dptr, value.dsize);
}
static VALUE
@@ -182,7 +182,7 @@ fsdbm_index(VALUE obj, VALUE valstr)
struct dbmdata *dbmp;
DBM *dbm;
- StringValue(valstr);
+ ExportStringValue(valstr);
val.dptr = RSTRING_PTR(valstr);
val.dsize = RSTRING_LEN(valstr);
@@ -191,7 +191,7 @@ fsdbm_index(VALUE obj, VALUE valstr)
val = sdbm_fetch(dbm, key);
if (val.dsize == RSTRING_LEN(valstr) &&
memcmp(val.dptr, RSTRING_PTR(valstr), val.dsize) == 0)
- return rb_tainted_str_new(key.dptr, key.dsize);
+ return rb_external_str_new(key.dptr, key.dsize);
}
return Qnil;
}
@@ -208,8 +208,8 @@ fsdbm_select(VALUE obj)
for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) {
VALUE assoc, v;
val = sdbm_fetch(dbm, key);
- assoc = rb_assoc_new(rb_tainted_str_new(key.dptr, key.dsize),
- rb_tainted_str_new(val.dptr, val.dsize));
+ assoc = rb_assoc_new(rb_external_str_new(key.dptr, key.dsize),
+ rb_external_str_new(val.dptr, val.dsize));
v = rb_yield(assoc);
if (RTEST(v)) {
rb_ary_push(new, assoc);
@@ -249,7 +249,7 @@ fsdbm_delete(VALUE obj, VALUE keystr)
VALUE valstr;
fdbm_modify(obj);
- StringValue(keystr);
+ ExportStringValue(keystr);
key.dptr = RSTRING_PTR(keystr);
key.dsize = RSTRING_LEN(keystr);
@@ -263,7 +263,7 @@ fsdbm_delete(VALUE obj, VALUE keystr)
}
/* need to save value before sdbm_delete() */
- valstr = rb_tainted_str_new(value.dptr, value.dsize);
+ valstr = rb_external_str_new(value.dptr, value.dsize);
if (sdbm_delete(dbm, key)) {
dbmp->di_size = -1;
@@ -288,8 +288,8 @@ fsdbm_shift(VALUE obj)
key = sdbm_firstkey(dbm);
if (!key.dptr) return Qnil;
val = sdbm_fetch(dbm, key);
- keystr = rb_tainted_str_new(key.dptr, key.dsize);
- valstr = rb_tainted_str_new(val.dptr, val.dsize);
+ keystr = rb_external_str_new(key.dptr, key.dsize);
+ valstr = rb_external_str_new(val.dptr, val.dsize);
sdbm_delete(dbm, key);
if (dbmp->di_size >= 0) {
dbmp->di_size--;
@@ -314,8 +314,8 @@ fsdbm_delete_if(VALUE obj)
dbmp->di_size = -1;
for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) {
val = sdbm_fetch(dbm, key);
- keystr = rb_tainted_str_new(key.dptr, key.dsize);
- valstr = rb_tainted_str_new(val.dptr, val.dsize);
+ keystr = rb_external_str_new(key.dptr, key.dsize);
+ valstr = rb_external_str_new(val.dptr, val.dsize);
ret = rb_protect(rb_yield, rb_assoc_new(rb_str_dup(keystr), valstr), &status);
if (status != 0) break;
if (RTEST(ret)) rb_ary_push(ary, keystr);
@@ -324,7 +324,7 @@ fsdbm_delete_if(VALUE obj)
for (i = 0; i < RARRAY_LEN(ary); i++) {
keystr = RARRAY_PTR(ary)[i];
- StringValue(keystr);
+ ExportStringValue(keystr);
key.dptr = RSTRING_PTR(keystr);
key.dsize = RSTRING_LEN(keystr);
if (sdbm_delete(dbm, key)) {
@@ -369,8 +369,8 @@ fsdbm_invert(VALUE obj)
GetDBM2(obj, dbmp, dbm);
for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) {
val = sdbm_fetch(dbm, key);
- keystr = rb_tainted_str_new(key.dptr, key.dsize);
- valstr = rb_tainted_str_new(val.dptr, val.dsize);
+ keystr = rb_external_str_new(key.dptr, key.dsize);
+ valstr = rb_external_str_new(val.dptr, val.dsize);
rb_hash_aset(hash, valstr, keystr);
}
return hash;
@@ -389,8 +389,8 @@ fsdbm_store(VALUE obj, VALUE keystr, VALUE valstr)
}
fdbm_modify(obj);
- StringValue(keystr);
- StringValue(valstr);
+ ExportStringValue(keystr);
+ ExportStringValue(valstr);
key.dptr = RSTRING_PTR(keystr);
key.dsize = RSTRING_LEN(keystr);
@@ -491,7 +491,7 @@ fsdbm_each_value(VALUE obj)
GetDBM2(obj, dbmp, dbm);
for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) {
val = sdbm_fetch(dbm, key);
- rb_yield(rb_tainted_str_new(val.dptr, val.dsize));
+ rb_yield(rb_external_str_new(val.dptr, val.dsize));
GetDBM2(obj, dbmp, dbm);
}
return obj;
@@ -508,7 +508,7 @@ fsdbm_each_key(VALUE obj)
GetDBM2(obj, dbmp, dbm);
for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) {
- rb_yield(rb_tainted_str_new(key.dptr, key.dsize));
+ rb_yield(rb_external_str_new(key.dptr, key.dsize));
GetDBM2(obj, dbmp, dbm);
}
return obj;
@@ -527,8 +527,8 @@ fsdbm_each_pair(VALUE obj)
GetDBM2(obj, dbmp, dbm);
for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) {
val = sdbm_fetch(dbm, key);
- keystr = rb_tainted_str_new(key.dptr, key.dsize);
- valstr = rb_tainted_str_new(val.dptr, val.dsize);
+ keystr = rb_external_str_new(key.dptr, key.dsize);
+ valstr = rb_external_str_new(val.dptr, val.dsize);
rb_yield(rb_assoc_new(keystr, valstr));
GetDBM2(obj, dbmp, dbm);
}
@@ -547,7 +547,7 @@ fsdbm_keys(VALUE obj)
GetDBM2(obj, dbmp, dbm);
ary = rb_ary_new();
for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) {
- rb_ary_push(ary, rb_tainted_str_new(key.dptr, key.dsize));
+ rb_ary_push(ary, rb_external_str_new(key.dptr, key.dsize));
}
return ary;
@@ -565,7 +565,7 @@ fsdbm_values(VALUE obj)
ary = rb_ary_new();
for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) {
val = sdbm_fetch(dbm, key);
- rb_ary_push(ary, rb_tainted_str_new(val.dptr, val.dsize));
+ rb_ary_push(ary, rb_external_str_new(val.dptr, val.dsize));
}
return ary;
@@ -578,7 +578,7 @@ fsdbm_has_key(VALUE obj, VALUE keystr)
struct dbmdata *dbmp;
DBM *dbm;
- StringValue(keystr);
+ ExportStringValue(keystr);
key.dptr = RSTRING_PTR(keystr);
key.dsize = RSTRING_LEN(keystr);
@@ -595,7 +595,7 @@ fsdbm_has_value(VALUE obj, VALUE valstr)
struct dbmdata *dbmp;
DBM *dbm;
- StringValue(valstr);
+ ExportStringValue(valstr);
val.dptr = RSTRING_PTR(valstr);
val.dsize = RSTRING_LEN(valstr);
@@ -621,8 +621,8 @@ fsdbm_to_a(VALUE obj)
ary = rb_ary_new();
for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) {
val = sdbm_fetch(dbm, key);
- rb_ary_push(ary, rb_assoc_new(rb_tainted_str_new(key.dptr, key.dsize),
- rb_tainted_str_new(val.dptr, val.dsize)));
+ rb_ary_push(ary, rb_assoc_new(rb_external_str_new(key.dptr, key.dsize),
+ rb_external_str_new(val.dptr, val.dsize)));
}
return ary;
@@ -640,8 +640,8 @@ fsdbm_to_hash(VALUE obj)
hash = rb_hash_new();
for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) {
val = sdbm_fetch(dbm, key);
- rb_hash_aset(hash, rb_tainted_str_new(key.dptr, key.dsize),
- rb_tainted_str_new(val.dptr, val.dsize));
+ rb_hash_aset(hash, rb_external_str_new(key.dptr, key.dsize),
+ rb_external_str_new(val.dptr, val.dsize));
}
return hash;
diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h
index 1b1cf33d58..8bd73fb61f 100644
--- a/include/ruby/encoding.h
+++ b/include/ruby/encoding.h
@@ -92,7 +92,8 @@ char* rb_enc_nth(const char*, const char*, int, rb_encoding*);
VALUE rb_obj_encoding(VALUE);
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc);
-VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc);
+VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *);
+VALUE rb_str_export_to_enc(VALUE, rb_encoding *);
/* index -> rb_encoding */
rb_encoding* rb_enc_from_index(int idx);
diff --git a/include/ruby/ruby.h b/include/ruby/ruby.h
index 9789d31ecc..1fd48baaba 100644
--- a/include/ruby/ruby.h
+++ b/include/ruby/ruby.h
@@ -373,6 +373,12 @@ void rb_check_safe_str(VALUE);
/* obsolete macro - use SafeStringValue(v) */
#define Check_SafeStr(v) rb_check_safe_str((VALUE)(v))
+VALUE rb_str_export(VALUE);
+#define ExportStringValue(v) do {\
+ SafeStringValue(v);\
+ (v) = rb_str_export(v);\
+} while (0)
+
VALUE rb_get_path(VALUE);
#define FilePathValue(v) ((v) = rb_get_path(v))
diff --git a/string.c b/string.c
index 054a1eb72b..fe6d1d600e 100644
--- a/string.c
+++ b/string.c
@@ -472,52 +472,61 @@ rb_tainted_str_new_cstr(const char *ptr)
RUBY_ALIAS_FUNCTION(rb_tainted_str_new2(const char *ptr), rb_tainted_str_new_cstr, (ptr))
#define rb_tainted_str_new2 rb_tainted_str_new_cstr
+static VALUE
+str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
+{
+ rb_econv_t *ec;
+ rb_econv_result_t ret;
+ long len;
+ VALUE newstr;
+ const unsigned char *sp;
+ unsigned char *dp;
+
+ if (!to) return str;
+ if (from == to) return str;
+ if (rb_enc_asciicompat(to) && ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
+ return str;
+
+ len = RSTRING_LEN(str);
+ newstr = rb_str_new(0, len);
+
+ retry:
+ ec = rb_econv_open_opts(from->name, to->name, 0, Qnil);
+ if (!ec) return str;
+
+ sp = (unsigned char*)RSTRING_PTR(str);
+ dp = (unsigned char*)RSTRING_PTR(newstr);
+ ret = rb_econv_convert(ec, &sp, (unsigned char*)RSTRING_END(str),
+ &dp, (unsigned char*)RSTRING_END(newstr), 0);
+ rb_econv_close(ec);
+ switch (ret) {
+ case econv_destination_buffer_full:
+ /* destination buffer short */
+ len *= 2;
+ rb_str_resize(newstr, len);
+ goto retry;
+
+ case econv_finished:
+ len = dp - (unsigned char*)RSTRING_PTR(newstr);
+ rb_str_set_len(newstr, len);
+ rb_enc_associate(newstr, to);
+ return newstr;
+
+ default:
+ /* some error, return original */
+ return str;
+ }
+}
+
VALUE
rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc)
{
VALUE str;
- rb_encoding *ienc;
if (len == 0 && !ptr) len = strlen(ptr);
str = rb_tainted_str_new(ptr, len);
rb_enc_associate(str, eenc);
- ienc = rb_default_internal_encoding();
- if (ienc) {
- rb_econv_t *ec;
- rb_econv_result_t ret;
- VALUE newstr = rb_str_new(0, len);
- long nlen = len;
- const unsigned char *sp;
- unsigned char *dp;
-
- retry:
- ec = rb_econv_open_opts(eenc->name, ienc->name, 0, Qnil);
- if (!ec) return str;
-
- sp = (unsigned char*)RSTRING_PTR(str);
- dp = (unsigned char*)RSTRING_PTR(newstr);
- ret = rb_econv_convert(ec, &sp, (unsigned char*)RSTRING_END(str),
- &dp, (unsigned char*)RSTRING_END(newstr), 0);
- rb_econv_close(ec);
- switch (ret) {
- case econv_destination_buffer_full:
- /* destination buffer short */
- nlen *= 2;
- rb_str_resize(newstr, nlen);
- goto retry;
-
- case econv_finished:
- nlen = dp - (unsigned char*)RSTRING_PTR(newstr);
- rb_str_set_len(newstr, nlen);
- rb_enc_associate(newstr, ienc);
- return newstr;
-
- default:
- /* some error, return original */
- return str;
- }
- }
- return str;
+ return str_conv_enc(str, eenc, rb_default_internal_encoding());
}
VALUE
@@ -532,6 +541,18 @@ rb_locale_str_new(const char *ptr, long len)
return rb_external_str_new_with_enc(ptr, len, rb_locale_encoding());
}
+VALUE
+rb_str_export(VALUE str)
+{
+ return str_conv_enc(str, STR_ENC_GET(str), rb_default_external_encoding());
+}
+
+VALUE
+rb_str_export_to_enc(VALUE str, rb_encoding *enc)
+{
+ return str_conv_enc(str, STR_ENC_GET(str), enc);
+}
+
static VALUE
str_replace_shared(VALUE str2, VALUE str)
{