summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-05-14 00:21:00 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-05-14 00:21:00 +0000
commitf3a49ebc92c01d32896002e02937ba06a88f7b55 (patch)
treefa9cde0f76c7d64657eeff2805e007b7b2f62c15 /string.c
parent7323de517d3459542ba7a6ef72152078b1f50fdf (diff)
string.c: cut down intermediate string
* string.c (rb_external_str_new_with_enc): cut down intermediate string for conversion source, by appending with conversion. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58709 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r--string.c33
1 files changed, 30 insertions, 3 deletions
diff --git a/string.c b/string.c
index 1d30a153cb..1d357638a5 100644
--- a/string.c
+++ b/string.c
@@ -997,10 +997,37 @@ rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
VALUE
rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc)
{
+ rb_encoding *ienc;
VALUE str;
-
- str = rb_tainted_str_new_with_enc(ptr, len, eenc);
- return rb_external_str_with_enc(str, eenc);
+ const int eidx = rb_enc_to_index(eenc);
+
+ /* ASCII-8BIT case, no conversion */
+ if ((eidx == rb_ascii8bit_encindex()) ||
+ (eidx == rb_usascii_encindex() && search_nonascii(ptr, ptr + len))) {
+ return rb_tainted_str_new(ptr, len);
+ }
+ /* no default_internal or same encoding, no conversion */
+ ienc = rb_default_internal_encoding();
+ if (!ienc || eenc == ienc) {
+ return rb_tainted_str_new_with_enc(ptr, len, eenc);
+ }
+ /* ASCII compatible, and ASCII only string, no conversion in
+ * default_internal */
+ if ((eidx == rb_ascii8bit_encindex()) ||
+ (eidx == rb_usascii_encindex()) ||
+ (rb_enc_asciicompat(eenc) && !search_nonascii(ptr, ptr + len))) {
+ return rb_tainted_str_new_with_enc(ptr, len, ienc);
+ }
+ /* convert from the given encoding to default_internal */
+ str = rb_tainted_str_new_with_enc(NULL, 0, ienc);
+ /* when the conversion failed for some reason, just ignore the
+ * default_internal and result in the given encoding as-is. */
+ if (NIL_P(rb_str_cat_conv_enc_opts(str, 0, ptr, len, eenc, 0, Qnil))) {
+ STR_SET_LEN(str, 0);
+ rb_enc_associate(str, eenc);
+ rb_str_cat(str, ptr, len);
+ }
+ return str;
}
VALUE