From a5b6598192c30187b19b892af3110a46f6a70d76 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Thu, 26 Aug 2021 10:06:32 -0400 Subject: [Feature #18239] Implement VWA for strings This commit adds support for embedded strings with variable capacity and uses Variable Width Allocation to allocate strings. --- string.c | 323 ++++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 249 insertions(+), 74 deletions(-) (limited to 'string.c') diff --git a/string.c b/string.c index 48c92072d8..b815b12c54 100644 --- a/string.c +++ b/string.c @@ -106,14 +106,26 @@ VALUE rb_cSymbol; #define STR_SET_NOEMBED(str) do {\ FL_SET((str), STR_NOEMBED);\ - STR_SET_EMBED_LEN((str), 0);\ + if (USE_RVARGC) {\ + FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\ + }\ + else {\ + STR_SET_EMBED_LEN((str), 0);\ + }\ } while (0) #define STR_SET_EMBED(str) FL_UNSET((str), (STR_NOEMBED|STR_NOFREE)) -#define STR_SET_EMBED_LEN(str, n) do { \ +#if USE_RVARGC +# define STR_SET_EMBED_LEN(str, n) do { \ + assert(str_embed_capa(str) > (n));\ + RSTRING(str)->as.embed.len = (n);\ +} while (0) +#else +# define STR_SET_EMBED_LEN(str, n) do { \ long tmp_n = (n);\ RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\ RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\ } while (0) +#endif #define STR_SET_LEN(str, n) do { \ if (STR_EMBED_P(str)) {\ @@ -150,7 +162,7 @@ VALUE rb_cSymbol; } while (0) #define RESIZE_CAPA_TERM(str,capacity,termlen) do {\ if (STR_EMBED_P(str)) {\ - if (!STR_EMBEDDABLE_P(capacity, termlen)) {\ + if (str_embed_capa(str) < capacity + termlen) {\ char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\ const long tlen = RSTRING_LEN(str);\ memcpy(tmp, RSTRING_PTR(str), tlen);\ @@ -170,6 +182,8 @@ VALUE rb_cSymbol; #define STR_SET_SHARED(str, shared_str) do { \ if (!FL_TEST(str, STR_FAKESTR)) { \ + assert(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \ + assert(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \ RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \ FL_SET((str), STR_SHARED); \ FL_SET((shared_str), STR_SHARED_ROOT); \ @@ -193,8 +207,32 @@ VALUE rb_cSymbol; #define SHARABLE_SUBSTRING_P(beg, len, end) 1 #endif -#define STR_EMBEDDABLE_P(len, termlen) \ - ((len) <= RSTRING_EMBED_LEN_MAX + 1 - (termlen)) + +static inline long +str_embed_capa(VALUE str) +{ +#if USE_RVARGC + return rb_gc_obj_slot_size(str) - offsetof(struct RString, as.embed.ary); +#else + return RSTRING_EMBED_LEN_MAX + 1; +#endif +} + +static inline size_t +str_embed_size(long capa) +{ + return offsetof(struct RString, as.embed.ary) + capa; +} + +static inline bool +STR_EMBEDDABLE_P(long len, long termlen) +{ +#if USE_RVARGC + return rb_gc_size_allocatable_p(str_embed_size(len + termlen)); +#else + return len <= RSTRING_EMBED_LEN_MAX + 1 - termlen; +#endif +} static VALUE str_replace_shared_without_enc(VALUE str2, VALUE str); static VALUE str_new_frozen(VALUE klass, VALUE orig); @@ -768,7 +806,11 @@ static size_t str_capacity(VALUE str, const int termlen) { if (STR_EMBED_P(str)) { +#if USE_RVARGC + return str_embed_capa(str) - termlen; +#else return (RSTRING_EMBED_LEN_MAX + 1 - termlen); +#endif } else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) { return RSTRING(str)->as.heap.len; @@ -793,17 +835,36 @@ must_not_null(const char *ptr) } static inline VALUE -str_alloc(VALUE klass) +str_alloc(VALUE klass, size_t size) { - NEWOBJ_OF(str, struct RString, klass, T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0)); + assert(size > 0); + RVARGC_NEWOBJ_OF(str, struct RString, klass, + T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0), size); return (VALUE)str; } +static inline VALUE +str_alloc_embed(VALUE klass, size_t capa) +{ + size_t size = str_embed_size(capa); + assert(rb_gc_size_allocatable_p(size)); +#if !USE_RVARGC + assert(size <= sizeof(struct RString)); +#endif + return str_alloc(klass, size); +} + +static inline VALUE +str_alloc_heap(VALUE klass) +{ + return str_alloc(klass, sizeof(struct RString)); +} + static inline VALUE empty_str_alloc(VALUE klass) { RUBY_DTRACE_CREATE_HOOK(STRING, 0); - return str_alloc(klass); + return str_alloc_embed(klass, 0); } static VALUE @@ -817,8 +878,14 @@ str_new0(VALUE klass, const char *ptr, long len, int termlen) RUBY_DTRACE_CREATE_HOOK(STRING, len); - str = str_alloc(klass); - if (!STR_EMBEDDABLE_P(len, termlen)) { + if (STR_EMBEDDABLE_P(len, termlen)) { + str = str_alloc_embed(klass, len + termlen); + if (len == 0) { + ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT); + } + } + else { + str = str_alloc_heap(klass); RSTRING(str)->as.heap.aux.capa = len; /* :FIXME: @shyouhei guesses `len + termlen` is guaranteed to never * integer overflow. If we can STATIC_ASSERT that, the following @@ -827,9 +894,6 @@ str_new0(VALUE klass, const char *ptr, long len, int termlen) rb_xmalloc_mul_add_mul(sizeof(char), len, sizeof(char), termlen); STR_SET_NOEMBED(str); } - else if (len == 0) { - ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT); - } if (ptr) { memcpy(RSTRING_PTR(str), ptr, len); } @@ -931,7 +995,7 @@ str_new_static(VALUE klass, const char *ptr, long len, int encindex) } else { RUBY_DTRACE_CREATE_HOOK(STRING, len); - str = str_alloc(klass); + str = str_alloc_heap(klass); RSTRING(str)->as.heap.len = len; RSTRING(str)->as.heap.ptr = (char *)ptr; RSTRING(str)->as.heap.aux.capa = len; @@ -1228,8 +1292,8 @@ str_replace_shared_without_enc(VALUE str2, VALUE str) long len; RSTRING_GETMEM(str, ptr, len); - if (STR_EMBEDDABLE_P(len, termlen)) { - char *ptr2 = RSTRING(str2)->as.embed.ary; + if (str_embed_capa(str2) >= len + termlen) { + char *ptr2 = RSTRING(str2)->as.embed.ary; STR_SET_EMBED(str2); memcpy(ptr2, RSTRING_PTR(str), len); STR_SET_EMBED_LEN(str2, len); @@ -1245,6 +1309,7 @@ str_replace_shared_without_enc(VALUE str2, VALUE str) root = rb_str_new_frozen(str); RSTRING_GETMEM(root, ptr, len); } + assert(OBJ_FROZEN(root)); if (!STR_EMBED_P(str2) && !FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) { if (FL_TEST_RAW(str2, STR_SHARED_ROOT)) { rb_fatal("about to free a possible shared root"); @@ -1273,7 +1338,7 @@ str_replace_shared(VALUE str2, VALUE str) static VALUE str_new_shared(VALUE klass, VALUE str) { - return str_replace_shared(str_alloc(klass), str); + return str_replace_shared(str_alloc_heap(klass), str); } VALUE @@ -1335,26 +1400,54 @@ str_new_frozen(VALUE klass, VALUE orig) return str_new_frozen_buffer(klass, orig, TRUE); } +static VALUE +heap_str_make_shared(VALUE klass, VALUE orig) +{ + assert(!STR_EMBED_P(orig)); + assert(!STR_SHARED_P(orig)); + + VALUE str = str_alloc_heap(klass); + STR_SET_NOEMBED(str); + RSTRING(str)->as.heap.len = RSTRING_LEN(orig); + RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig); + RSTRING(str)->as.heap.aux.capa = RSTRING(orig)->as.heap.aux.capa; + RBASIC(str)->flags |= RBASIC(orig)->flags & STR_NOFREE; + RBASIC(orig)->flags &= ~STR_NOFREE; + STR_SET_SHARED(orig, str); + if (klass == 0) + FL_UNSET_RAW(str, STR_BORROWED); + return str; +} + static VALUE str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding) { VALUE str; - if (STR_EMBED_P(orig)) { - str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig)); + long len = RSTRING_LEN(orig); + + if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(len, 1)) { + str = str_new(klass, RSTRING_PTR(orig), len); + assert(STR_EMBED_P(str)); } else { if (FL_TEST_RAW(orig, STR_SHARED)) { VALUE shared = RSTRING(orig)->as.heap.aux.shared; - long ofs = RSTRING(orig)->as.heap.ptr - RSTRING(shared)->as.heap.ptr; - long rest = RSTRING(shared)->as.heap.len - ofs - RSTRING(orig)->as.heap.len; + long ofs = RSTRING(orig)->as.heap.ptr - RSTRING_PTR(shared); + long rest = RSTRING_LEN(shared) - ofs - RSTRING(orig)->as.heap.len; + assert(ofs >= 0); + assert(rest >= 0); + assert(ofs + rest <= RSTRING_LEN(shared)); +#if !USE_RVARGC assert(!STR_EMBED_P(shared)); +#endif assert(OBJ_FROZEN(shared)); if ((ofs > 0) || (rest > 0) || (klass != RBASIC(shared)->klass) || ENCODING_GET(shared) != ENCODING_GET(orig)) { str = str_new_shared(klass, shared); + assert(!STR_EMBED_P(str)); RSTRING(str)->as.heap.ptr += ofs; RSTRING(str)->as.heap.len -= ofs + rest; } @@ -1364,24 +1457,15 @@ str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding) return shared; } } - else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) { - str = str_alloc(klass); + else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) { + str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig)); STR_SET_EMBED(str); memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig)); STR_SET_EMBED_LEN(str, RSTRING_LEN(orig)); TERM_FILL(RSTRING_END(str), TERM_LEN(orig)); } else { - str = str_alloc(klass); - STR_SET_NOEMBED(str); - RSTRING(str)->as.heap.len = RSTRING_LEN(orig); - RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig); - RSTRING(str)->as.heap.aux.capa = RSTRING(orig)->as.heap.aux.capa; - RBASIC(str)->flags |= RBASIC(orig)->flags & STR_NOFREE; - RBASIC(orig)->flags &= ~STR_NOFREE; - STR_SET_SHARED(orig, str); - if (klass == 0) - FL_UNSET_RAW(str, STR_BORROWED); + str = heap_str_make_shared(klass, orig); } } @@ -1405,17 +1489,24 @@ str_new_empty_String(VALUE str) } #define STR_BUF_MIN_SIZE 63 +#if !USE_RVARGC STATIC_ASSERT(STR_BUF_MIN_SIZE, STR_BUF_MIN_SIZE > RSTRING_EMBED_LEN_MAX); +#endif VALUE rb_str_buf_new(long capa) { - VALUE str = str_alloc(rb_cString); + if (STR_EMBEDDABLE_P(capa, 1)) { + return str_alloc_embed(rb_cString, capa + 1); + } + + VALUE str = str_alloc_heap(rb_cString); - if (capa <= RSTRING_EMBED_LEN_MAX) return str; +#if !USE_RVARGC if (capa < STR_BUF_MIN_SIZE) { capa = STR_BUF_MIN_SIZE; } +#endif FL_SET(str, STR_NOEMBED); RSTRING(str)->as.heap.aux.capa = capa; RSTRING(str)->as.heap.ptr = ALLOC_N(char, (size_t)capa + 1); @@ -1508,7 +1599,7 @@ str_shared_replace(VALUE str, VALUE str2) str_discard(str); termlen = rb_enc_mbminlen(enc); - if (STR_EMBEDDABLE_P(RSTRING_LEN(str2), termlen)) { + if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) { STR_SET_EMBED(str); memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (size_t)RSTRING_LEN(str2) + termlen); STR_SET_EMBED_LEN(str, RSTRING_LEN(str2)); @@ -1516,6 +1607,21 @@ str_shared_replace(VALUE str, VALUE str2) ENC_CODERANGE_SET(str, cr); } else { +#if USE_RVARGC + if (STR_EMBED_P(str2)) { + assert(!FL_TEST(str2, STR_SHARED)); + long len = RSTRING(str2)->as.embed.len; + assert(len + termlen <= str_embed_capa(str2)); + + char *new_ptr = ALLOC_N(char, len + termlen); + memcpy(new_ptr, RSTRING(str2)->as.embed.ary, len + termlen); + RSTRING(str2)->as.heap.ptr = new_ptr; + RSTRING(str2)->as.heap.len = len; + RSTRING(str2)->as.heap.aux.capa = len; + STR_SET_NOEMBED(str2); + } +#endif + STR_SET_NOEMBED(str); FL_UNSET(str, STR_SHARED); RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2); @@ -1581,42 +1687,77 @@ str_replace(VALUE str, VALUE str2) } static inline VALUE -ec_str_alloc(struct rb_execution_context_struct *ec, VALUE klass) +ec_str_alloc(struct rb_execution_context_struct *ec, VALUE klass, size_t size) { - RB_EC_NEWOBJ_OF(ec, str, struct RString, klass, T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0)); + assert(size > 0); + RB_RVARGC_EC_NEWOBJ_OF(ec, str, struct RString, klass, + T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0), size); return (VALUE)str; } +static inline VALUE +ec_str_alloc_embed(struct rb_execution_context_struct *ec, VALUE klass, size_t capa) +{ + size_t size = str_embed_size(capa); + assert(rb_gc_size_allocatable_p(size)); +#if !USE_RVARGC + assert(size <= sizeof(struct RString)); +#endif + return ec_str_alloc(ec, klass, size); +} + +static inline VALUE +ec_str_alloc_heap(struct rb_execution_context_struct *ec, VALUE klass) +{ + return ec_str_alloc(ec, klass, sizeof(struct RString)); +} + static inline VALUE str_duplicate_setup(VALUE klass, VALUE str, VALUE dup) { - enum {embed_size = RSTRING_EMBED_LEN_MAX + 1}; const VALUE flag_mask = +#if !USE_RVARGC RSTRING_NOEMBED | RSTRING_EMBED_LEN_MASK | - ENC_CODERANGE_MASK | ENCODING_MASK | +#endif + ENC_CODERANGE_MASK | ENCODING_MASK | FL_FREEZE ; VALUE flags = FL_TEST_RAW(str, flag_mask); int encidx = 0; - MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary, - char, embed_size); - if (flags & STR_NOEMBED) { + if (STR_EMBED_P(str)) { + assert(str_embed_capa(dup) >= RSTRING_EMBED_LEN(str)); + STR_SET_EMBED_LEN(dup, RSTRING_EMBED_LEN(str)); + MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary, + char, RSTRING_EMBED_LEN(str)); + flags &= ~RSTRING_NOEMBED; + } + else { + VALUE root = str; if (FL_TEST_RAW(str, STR_SHARED)) { - str = RSTRING(str)->as.heap.aux.shared; + root = RSTRING(str)->as.heap.aux.shared; } else if (UNLIKELY(!(flags & FL_FREEZE))) { - str = str_new_frozen(klass, str); + root = str = str_new_frozen(klass, str); flags = FL_TEST_RAW(str, flag_mask); - } - if (flags & STR_NOEMBED) { - RB_OBJ_WRITE(dup, &RSTRING(dup)->as.heap.aux.shared, str); - flags |= STR_SHARED; - } - else { - MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary, - char, embed_size); - } + } + assert(!STR_SHARED_P(root)); + assert(RB_OBJ_FROZEN_RAW(root)); +#if USE_RVARGC + if (1) { +#else + if (STR_EMBED_P(root)) { + MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(root)->as.embed.ary, + char, RSTRING_EMBED_LEN_MAX + 1); + } + else { +#endif + RSTRING(dup)->as.heap.len = RSTRING_LEN(str); + RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str); + RB_OBJ_WRITE(dup, &RSTRING(dup)->as.heap.aux.shared, root); + flags |= RSTRING_NOEMBED | STR_SHARED; + } } + if ((flags & ENCODING_MASK) == (ENCODING_INLINE_MAX<as.embed.len + 1 <= str_embed_capa(str)); + memcpy(new_ptr, RSTRING(str)->as.embed.ary, RSTRING(str)->as.embed.len + 1); +#else memcpy(new_ptr, RSTRING(str)->as.embed.ary, RSTRING_EMBED_LEN_MAX + 1); +#endif RSTRING(str)->as.heap.ptr = new_ptr; } else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) { @@ -2133,7 +2293,7 @@ rb_str_times(VALUE str, VALUE times) return str_duplicate(rb_cString, str); } if (times == INT2FIX(0)) { - str2 = str_alloc(rb_cString); + str2 = str_alloc_embed(rb_cString, 0); rb_enc_copy(str2, str); return str2; } @@ -2142,8 +2302,11 @@ rb_str_times(VALUE str, VALUE times) rb_raise(rb_eArgError, "negative argument"); } if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) { - str2 = str_alloc(rb_cString); - if (!STR_EMBEDDABLE_P(len, 1)) { + if (STR_EMBEDDABLE_P(len, 1)) { + str2 = str_alloc_embed(rb_cString, len + 1); + } + else { + str2 = str_alloc_heap(rb_cString); RSTRING(str2)->as.heap.aux.capa = len; RSTRING(str2)->as.heap.ptr = ZALLOC_N(char, (size_t)len + 1); STR_SET_NOEMBED(str2); @@ -2244,11 +2407,11 @@ str_make_independent_expand(VALUE str, long len, long expand, const int termlen) if (len > capa) len = capa; - if (!STR_EMBED_P(str) && STR_EMBEDDABLE_P(capa, termlen)) { + if (!STR_EMBED_P(str) && str_embed_capa(str) >= capa + termlen) { ptr = RSTRING(str)->as.heap.ptr; STR_SET_EMBED(str); - memcpy(RSTRING(str)->as.embed.ary, ptr, len); - TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen); + memcpy(RSTRING(str)->as.embed.ary, ptr, len); + TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen); STR_SET_EMBED_LEN(str, len); return; } @@ -2646,7 +2809,7 @@ rb_str_subseq(VALUE str, long beg, long len) } else { str2 = rb_str_new(RSTRING_PTR(str)+beg, len); - RB_GC_GUARD(str); + RB_GC_GUARD(str); } rb_enc_cr_str_copy_for_substr(str2, str); @@ -2885,19 +3048,19 @@ rb_str_resize(VALUE str, long len) const int termlen = TERM_LEN(str); if (STR_EMBED_P(str)) { if (len == slen) return str; - if (STR_EMBEDDABLE_P(len, termlen)) { + if (str_embed_capa(str) >= len + termlen) { STR_SET_EMBED_LEN(str, len); - TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen); + TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen); return str; } str_make_independent_expand(str, slen, len - slen, termlen); } - else if (STR_EMBEDDABLE_P(len, termlen)) { + else if (str_embed_capa(str) >= len + termlen) { char *ptr = STR_HEAP_PTR(str); STR_SET_EMBED(str); if (slen > len) slen = len; - if (slen > 0) MEMCPY(RSTRING(str)->as.embed.ary, ptr, char, slen); - TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen); + if (slen > 0) MEMCPY(RSTRING(str)->as.embed.ary, ptr, char, slen); + TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen); STR_SET_EMBED_LEN(str, len); if (independent) ruby_xfree(ptr); return str; @@ -2925,7 +3088,9 @@ str_buf_cat(VALUE str, const char *ptr, long len) long capa, total, olen, off = -1; char *sptr; const int termlen = TERM_LEN(str); +#if !USE_RVARGC assert(termlen < RSTRING_EMBED_LEN_MAX + 1); /* < (LONG_MAX/2) */ +#endif RSTRING_GETMEM(str, sptr, olen); if (ptr >= sptr && ptr <= sptr + olen) { @@ -2934,8 +3099,8 @@ str_buf_cat(VALUE str, const char *ptr, long len) rb_str_modify(str); if (len == 0) return 0; if (STR_EMBED_P(str)) { - capa = RSTRING_EMBED_LEN_MAX + 1 - termlen; - sptr = RSTRING(str)->as.embed.ary; + capa = str_embed_capa(str) - termlen; + sptr = RSTRING(str)->as.embed.ary; olen = RSTRING_EMBED_LEN(str); } else { @@ -4797,17 +4962,21 @@ rb_str_drop_bytes(VALUE str, long len) str_modifiable(str); if (len > olen) len = olen; nlen = olen - len; - if (STR_EMBEDDABLE_P(nlen, TERM_LEN(str))) { + if (str_embed_capa(str) >= nlen + TERM_LEN(str)) { char *oldptr = ptr; int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE)); STR_SET_EMBED(str); STR_SET_EMBED_LEN(str, nlen); - ptr = RSTRING(str)->as.embed.ary; + ptr = RSTRING(str)->as.embed.ary; memmove(ptr, oldptr + len, nlen); if (fl == STR_NOEMBED) xfree(oldptr); } else { - if (!STR_SHARED_P(str)) rb_str_new_frozen(str); + if (!STR_SHARED_P(str)) { + VALUE shared = heap_str_make_shared(rb_obj_class(str), str); + rb_enc_cr_str_exact_copy(shared, str); + OBJ_FREEZE(shared); + } ptr = RSTRING(str)->as.heap.ptr += len; RSTRING(str)->as.heap.len = nlen; } @@ -10465,7 +10634,13 @@ rb_str_force_encoding(VALUE str, VALUE enc) static VALUE rb_str_b(VALUE str) { - VALUE str2 = str_alloc(rb_cString); + VALUE str2; + if (FL_TEST(str, STR_NOEMBED)) { + str2 = str_alloc_heap(rb_cString); + } + else { + str2 = str_alloc_embed(rb_cString, RSTRING_EMBED_LEN(str) + TERM_LEN(str)); + } str_replace_shared_without_enc(str2, str); ENC_CODERANGE_CLEAR(str2); return str2; -- cgit v1.2.3