summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authorPeter Zhu <peter@peterzhu.ca>2021-08-26 10:06:32 -0400
committerPeter Zhu <peter@peterzhu.ca>2021-10-25 13:26:23 -0400
commita5b6598192c30187b19b892af3110a46f6a70d76 (patch)
tree4620f69a10659deb6f278b36c10ec7915194573e /string.c
parent6374be5a8188ff5ed2c70b9f1d76672c87a0eda7 (diff)
[Feature #18239] Implement VWA for strings
This commit adds support for embedded strings with variable capacity and uses Variable Width Allocation to allocate strings.
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/4933
Diffstat (limited to 'string.c')
-rw-r--r--string.c323
1 files changed, 249 insertions, 74 deletions
diff --git a/string.c b/string.c
index 48c92072d8..b815b12c54 100644
--- a/string.c
+++ b/string.c
@@ -106,14 +106,26 @@ VALUE rb_cSymbol;
#define STR_SET_NOEMBED(str) do {\
FL_SET((str), STR_NOEMBED);\
- STR_SET_EMBED_LEN((str), 0);\
+ if (USE_RVARGC) {\
+ FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
+ }\
+ else {\
+ STR_SET_EMBED_LEN((str), 0);\
+ }\
} while (0)
#define STR_SET_EMBED(str) FL_UNSET((str), (STR_NOEMBED|STR_NOFREE))
-#define STR_SET_EMBED_LEN(str, n) do { \
+#if USE_RVARGC
+# define STR_SET_EMBED_LEN(str, n) do { \
+ assert(str_embed_capa(str) > (n));\
+ RSTRING(str)->as.embed.len = (n);\
+} while (0)
+#else
+# define STR_SET_EMBED_LEN(str, n) do { \
long tmp_n = (n);\
RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\
RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\
} while (0)
+#endif
#define STR_SET_LEN(str, n) do { \
if (STR_EMBED_P(str)) {\
@@ -150,7 +162,7 @@ VALUE rb_cSymbol;
} while (0)
#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
if (STR_EMBED_P(str)) {\
- if (!STR_EMBEDDABLE_P(capacity, termlen)) {\
+ if (str_embed_capa(str) < capacity + termlen) {\
char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
const long tlen = RSTRING_LEN(str);\
memcpy(tmp, RSTRING_PTR(str), tlen);\
@@ -170,6 +182,8 @@ VALUE rb_cSymbol;
#define STR_SET_SHARED(str, shared_str) do { \
if (!FL_TEST(str, STR_FAKESTR)) { \
+ assert(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
+ assert(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
FL_SET((str), STR_SHARED); \
FL_SET((shared_str), STR_SHARED_ROOT); \
@@ -193,8 +207,32 @@ VALUE rb_cSymbol;
#define SHARABLE_SUBSTRING_P(beg, len, end) 1
#endif
-#define STR_EMBEDDABLE_P(len, termlen) \
- ((len) <= RSTRING_EMBED_LEN_MAX + 1 - (termlen))
+
+static inline long
+str_embed_capa(VALUE str)
+{
+#if USE_RVARGC
+ return rb_gc_obj_slot_size(str) - offsetof(struct RString, as.embed.ary);
+#else
+ return RSTRING_EMBED_LEN_MAX + 1;
+#endif
+}
+
+static inline size_t
+str_embed_size(long capa)
+{
+ return offsetof(struct RString, as.embed.ary) + capa;
+}
+
+static inline bool
+STR_EMBEDDABLE_P(long len, long termlen)
+{
+#if USE_RVARGC
+ return rb_gc_size_allocatable_p(str_embed_size(len + termlen));
+#else
+ return len <= RSTRING_EMBED_LEN_MAX + 1 - termlen;
+#endif
+}
static VALUE str_replace_shared_without_enc(VALUE str2, VALUE str);
static VALUE str_new_frozen(VALUE klass, VALUE orig);
@@ -768,7 +806,11 @@ static size_t
str_capacity(VALUE str, const int termlen)
{
if (STR_EMBED_P(str)) {
+#if USE_RVARGC
+ return str_embed_capa(str) - termlen;
+#else
return (RSTRING_EMBED_LEN_MAX + 1 - termlen);
+#endif
}
else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) {
return RSTRING(str)->as.heap.len;
@@ -793,17 +835,36 @@ must_not_null(const char *ptr)
}
static inline VALUE
-str_alloc(VALUE klass)
+str_alloc(VALUE klass, size_t size)
{
- NEWOBJ_OF(str, struct RString, klass, T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0));
+ assert(size > 0);
+ RVARGC_NEWOBJ_OF(str, struct RString, klass,
+ T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0), size);
return (VALUE)str;
}
static inline VALUE
+str_alloc_embed(VALUE klass, size_t capa)
+{
+ size_t size = str_embed_size(capa);
+ assert(rb_gc_size_allocatable_p(size));
+#if !USE_RVARGC
+ assert(size <= sizeof(struct RString));
+#endif
+ return str_alloc(klass, size);
+}
+
+static inline VALUE
+str_alloc_heap(VALUE klass)
+{
+ return str_alloc(klass, sizeof(struct RString));
+}
+
+static inline VALUE
empty_str_alloc(VALUE klass)
{
RUBY_DTRACE_CREATE_HOOK(STRING, 0);
- return str_alloc(klass);
+ return str_alloc_embed(klass, 0);
}
static VALUE
@@ -817,8 +878,14 @@ str_new0(VALUE klass, const char *ptr, long len, int termlen)
RUBY_DTRACE_CREATE_HOOK(STRING, len);
- str = str_alloc(klass);
- if (!STR_EMBEDDABLE_P(len, termlen)) {
+ if (STR_EMBEDDABLE_P(len, termlen)) {
+ str = str_alloc_embed(klass, len + termlen);
+ if (len == 0) {
+ ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
+ }
+ }
+ else {
+ str = str_alloc_heap(klass);
RSTRING(str)->as.heap.aux.capa = len;
/* :FIXME: @shyouhei guesses `len + termlen` is guaranteed to never
* integer overflow. If we can STATIC_ASSERT that, the following
@@ -827,9 +894,6 @@ str_new0(VALUE klass, const char *ptr, long len, int termlen)
rb_xmalloc_mul_add_mul(sizeof(char), len, sizeof(char), termlen);
STR_SET_NOEMBED(str);
}
- else if (len == 0) {
- ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
- }
if (ptr) {
memcpy(RSTRING_PTR(str), ptr, len);
}
@@ -931,7 +995,7 @@ str_new_static(VALUE klass, const char *ptr, long len, int encindex)
}
else {
RUBY_DTRACE_CREATE_HOOK(STRING, len);
- str = str_alloc(klass);
+ str = str_alloc_heap(klass);
RSTRING(str)->as.heap.len = len;
RSTRING(str)->as.heap.ptr = (char *)ptr;
RSTRING(str)->as.heap.aux.capa = len;
@@ -1228,8 +1292,8 @@ str_replace_shared_without_enc(VALUE str2, VALUE str)
long len;
RSTRING_GETMEM(str, ptr, len);
- if (STR_EMBEDDABLE_P(len, termlen)) {
- char *ptr2 = RSTRING(str2)->as.embed.ary;
+ if (str_embed_capa(str2) >= len + termlen) {
+ char *ptr2 = RSTRING(str2)->as.embed.ary;
STR_SET_EMBED(str2);
memcpy(ptr2, RSTRING_PTR(str), len);
STR_SET_EMBED_LEN(str2, len);
@@ -1245,6 +1309,7 @@ str_replace_shared_without_enc(VALUE str2, VALUE str)
root = rb_str_new_frozen(str);
RSTRING_GETMEM(root, ptr, len);
}
+ assert(OBJ_FROZEN(root));
if (!STR_EMBED_P(str2) && !FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
if (FL_TEST_RAW(str2, STR_SHARED_ROOT)) {
rb_fatal("about to free a possible shared root");
@@ -1273,7 +1338,7 @@ str_replace_shared(VALUE str2, VALUE str)
static VALUE
str_new_shared(VALUE klass, VALUE str)
{
- return str_replace_shared(str_alloc(klass), str);
+ return str_replace_shared(str_alloc_heap(klass), str);
}
VALUE
@@ -1336,25 +1401,53 @@ str_new_frozen(VALUE klass, VALUE orig)
}
static VALUE
+heap_str_make_shared(VALUE klass, VALUE orig)
+{
+ assert(!STR_EMBED_P(orig));
+ assert(!STR_SHARED_P(orig));
+
+ VALUE str = str_alloc_heap(klass);
+ STR_SET_NOEMBED(str);
+ RSTRING(str)->as.heap.len = RSTRING_LEN(orig);
+ RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
+ RSTRING(str)->as.heap.aux.capa = RSTRING(orig)->as.heap.aux.capa;
+ RBASIC(str)->flags |= RBASIC(orig)->flags & STR_NOFREE;
+ RBASIC(orig)->flags &= ~STR_NOFREE;
+ STR_SET_SHARED(orig, str);
+ if (klass == 0)
+ FL_UNSET_RAW(str, STR_BORROWED);
+ return str;
+}
+
+static VALUE
str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding)
{
VALUE str;
- if (STR_EMBED_P(orig)) {
- str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
+ long len = RSTRING_LEN(orig);
+
+ if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(len, 1)) {
+ str = str_new(klass, RSTRING_PTR(orig), len);
+ assert(STR_EMBED_P(str));
}
else {
if (FL_TEST_RAW(orig, STR_SHARED)) {
VALUE shared = RSTRING(orig)->as.heap.aux.shared;
- long ofs = RSTRING(orig)->as.heap.ptr - RSTRING(shared)->as.heap.ptr;
- long rest = RSTRING(shared)->as.heap.len - ofs - RSTRING(orig)->as.heap.len;
+ long ofs = RSTRING(orig)->as.heap.ptr - RSTRING_PTR(shared);
+ long rest = RSTRING_LEN(shared) - ofs - RSTRING(orig)->as.heap.len;
+ assert(ofs >= 0);
+ assert(rest >= 0);
+ assert(ofs + rest <= RSTRING_LEN(shared));
+#if !USE_RVARGC
assert(!STR_EMBED_P(shared));
+#endif
assert(OBJ_FROZEN(shared));
if ((ofs > 0) || (rest > 0) ||
(klass != RBASIC(shared)->klass) ||
ENCODING_GET(shared) != ENCODING_GET(orig)) {
str = str_new_shared(klass, shared);
+ assert(!STR_EMBED_P(str));
RSTRING(str)->as.heap.ptr += ofs;
RSTRING(str)->as.heap.len -= ofs + rest;
}
@@ -1364,24 +1457,15 @@ str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding)
return shared;
}
}
- else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
- str = str_alloc(klass);
+ else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
+ str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
STR_SET_EMBED(str);
memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
STR_SET_EMBED_LEN(str, RSTRING_LEN(orig));
TERM_FILL(RSTRING_END(str), TERM_LEN(orig));
}
else {
- str = str_alloc(klass);
- STR_SET_NOEMBED(str);
- RSTRING(str)->as.heap.len = RSTRING_LEN(orig);
- RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
- RSTRING(str)->as.heap.aux.capa = RSTRING(orig)->as.heap.aux.capa;
- RBASIC(str)->flags |= RBASIC(orig)->flags & STR_NOFREE;
- RBASIC(orig)->flags &= ~STR_NOFREE;
- STR_SET_SHARED(orig, str);
- if (klass == 0)
- FL_UNSET_RAW(str, STR_BORROWED);
+ str = heap_str_make_shared(klass, orig);
}
}
@@ -1405,17 +1489,24 @@ str_new_empty_String(VALUE str)
}
#define STR_BUF_MIN_SIZE 63
+#if !USE_RVARGC
STATIC_ASSERT(STR_BUF_MIN_SIZE, STR_BUF_MIN_SIZE > RSTRING_EMBED_LEN_MAX);
+#endif
VALUE
rb_str_buf_new(long capa)
{
- VALUE str = str_alloc(rb_cString);
+ if (STR_EMBEDDABLE_P(capa, 1)) {
+ return str_alloc_embed(rb_cString, capa + 1);
+ }
+
+ VALUE str = str_alloc_heap(rb_cString);
- if (capa <= RSTRING_EMBED_LEN_MAX) return str;
+#if !USE_RVARGC
if (capa < STR_BUF_MIN_SIZE) {
capa = STR_BUF_MIN_SIZE;
}
+#endif
FL_SET(str, STR_NOEMBED);
RSTRING(str)->as.heap.aux.capa = capa;
RSTRING(str)->as.heap.ptr = ALLOC_N(char, (size_t)capa + 1);
@@ -1508,7 +1599,7 @@ str_shared_replace(VALUE str, VALUE str2)
str_discard(str);
termlen = rb_enc_mbminlen(enc);
- if (STR_EMBEDDABLE_P(RSTRING_LEN(str2), termlen)) {
+ if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
STR_SET_EMBED(str);
memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (size_t)RSTRING_LEN(str2) + termlen);
STR_SET_EMBED_LEN(str, RSTRING_LEN(str2));
@@ -1516,6 +1607,21 @@ str_shared_replace(VALUE str, VALUE str2)
ENC_CODERANGE_SET(str, cr);
}
else {
+#if USE_RVARGC
+ if (STR_EMBED_P(str2)) {
+ assert(!FL_TEST(str2, STR_SHARED));
+ long len = RSTRING(str2)->as.embed.len;
+ assert(len + termlen <= str_embed_capa(str2));
+
+ char *new_ptr = ALLOC_N(char, len + termlen);
+ memcpy(new_ptr, RSTRING(str2)->as.embed.ary, len + termlen);
+ RSTRING(str2)->as.heap.ptr = new_ptr;
+ RSTRING(str2)->as.heap.len = len;
+ RSTRING(str2)->as.heap.aux.capa = len;
+ STR_SET_NOEMBED(str2);
+ }
+#endif
+
STR_SET_NOEMBED(str);
FL_UNSET(str, STR_SHARED);
RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
@@ -1581,42 +1687,77 @@ str_replace(VALUE str, VALUE str2)
}
static inline VALUE
-ec_str_alloc(struct rb_execution_context_struct *ec, VALUE klass)
+ec_str_alloc(struct rb_execution_context_struct *ec, VALUE klass, size_t size)
{
- RB_EC_NEWOBJ_OF(ec, str, struct RString, klass, T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0));
+ assert(size > 0);
+ RB_RVARGC_EC_NEWOBJ_OF(ec, str, struct RString, klass,
+ T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0), size);
return (VALUE)str;
}
static inline VALUE
+ec_str_alloc_embed(struct rb_execution_context_struct *ec, VALUE klass, size_t capa)
+{
+ size_t size = str_embed_size(capa);
+ assert(rb_gc_size_allocatable_p(size));
+#if !USE_RVARGC
+ assert(size <= sizeof(struct RString));
+#endif
+ return ec_str_alloc(ec, klass, size);
+}
+
+static inline VALUE
+ec_str_alloc_heap(struct rb_execution_context_struct *ec, VALUE klass)
+{
+ return ec_str_alloc(ec, klass, sizeof(struct RString));
+}
+
+static inline VALUE
str_duplicate_setup(VALUE klass, VALUE str, VALUE dup)
{
- enum {embed_size = RSTRING_EMBED_LEN_MAX + 1};
const VALUE flag_mask =
+#if !USE_RVARGC
RSTRING_NOEMBED | RSTRING_EMBED_LEN_MASK |
- ENC_CODERANGE_MASK | ENCODING_MASK |
+#endif
+ ENC_CODERANGE_MASK | ENCODING_MASK |
FL_FREEZE
;
VALUE flags = FL_TEST_RAW(str, flag_mask);
int encidx = 0;
- MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary,
- char, embed_size);
- if (flags & STR_NOEMBED) {
+ if (STR_EMBED_P(str)) {
+ assert(str_embed_capa(dup) >= RSTRING_EMBED_LEN(str));
+ STR_SET_EMBED_LEN(dup, RSTRING_EMBED_LEN(str));
+ MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary,
+ char, RSTRING_EMBED_LEN(str));
+ flags &= ~RSTRING_NOEMBED;
+ }
+ else {
+ VALUE root = str;
if (FL_TEST_RAW(str, STR_SHARED)) {
- str = RSTRING(str)->as.heap.aux.shared;
+ root = RSTRING(str)->as.heap.aux.shared;
}
else if (UNLIKELY(!(flags & FL_FREEZE))) {
- str = str_new_frozen(klass, str);
+ root = str = str_new_frozen(klass, str);
flags = FL_TEST_RAW(str, flag_mask);
- }
- if (flags & STR_NOEMBED) {
- RB_OBJ_WRITE(dup, &RSTRING(dup)->as.heap.aux.shared, str);
- flags |= STR_SHARED;
- }
- else {
- MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary,
- char, embed_size);
- }
+ }
+ assert(!STR_SHARED_P(root));
+ assert(RB_OBJ_FROZEN_RAW(root));
+#if USE_RVARGC
+ if (1) {
+#else
+ if (STR_EMBED_P(root)) {
+ MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(root)->as.embed.ary,
+ char, RSTRING_EMBED_LEN_MAX + 1);
+ }
+ else {
+#endif
+ RSTRING(dup)->as.heap.len = RSTRING_LEN(str);
+ RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
+ RB_OBJ_WRITE(dup, &RSTRING(dup)->as.heap.aux.shared, root);
+ flags |= RSTRING_NOEMBED | STR_SHARED;
+ }
}
+
if ((flags & ENCODING_MASK) == (ENCODING_INLINE_MAX<<ENCODING_SHIFT)) {
encidx = rb_enc_get_index(str);
flags &= ~ENCODING_MASK;
@@ -1629,14 +1770,28 @@ str_duplicate_setup(VALUE klass, VALUE str, VALUE dup)
static inline VALUE
ec_str_duplicate(struct rb_execution_context_struct *ec, VALUE klass, VALUE str)
{
- VALUE dup = ec_str_alloc(ec, klass);
+ VALUE dup;
+ if (FL_TEST(str, STR_NOEMBED)) {
+ dup = ec_str_alloc_heap(ec, klass);
+ }
+ else {
+ dup = ec_str_alloc_embed(ec, klass, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
+ }
+
return str_duplicate_setup(klass, str, dup);
}
static inline VALUE
str_duplicate(VALUE klass, VALUE str)
{
- VALUE dup = str_alloc(klass);
+ VALUE dup;
+ if (FL_TEST(str, STR_NOEMBED)) {
+ dup = str_alloc_heap(klass);
+ }
+ else {
+ dup = str_alloc_embed(klass, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
+ }
+
return str_duplicate_setup(klass, str, dup);
}
@@ -1745,7 +1900,12 @@ rb_str_init(int argc, VALUE *argv, VALUE str)
str_modifiable(str);
if (STR_EMBED_P(str)) { /* make noembed always */
char *new_ptr = ALLOC_N(char, (size_t)capa + termlen);
+#if USE_RVARGC
+ assert(RSTRING(str)->as.embed.len + 1 <= str_embed_capa(str));
+ memcpy(new_ptr, RSTRING(str)->as.embed.ary, RSTRING(str)->as.embed.len + 1);
+#else
memcpy(new_ptr, RSTRING(str)->as.embed.ary, RSTRING_EMBED_LEN_MAX + 1);
+#endif
RSTRING(str)->as.heap.ptr = new_ptr;
}
else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) {
@@ -2133,7 +2293,7 @@ rb_str_times(VALUE str, VALUE times)
return str_duplicate(rb_cString, str);
}
if (times == INT2FIX(0)) {
- str2 = str_alloc(rb_cString);
+ str2 = str_alloc_embed(rb_cString, 0);
rb_enc_copy(str2, str);
return str2;
}
@@ -2142,8 +2302,11 @@ rb_str_times(VALUE str, VALUE times)
rb_raise(rb_eArgError, "negative argument");
}
if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
- str2 = str_alloc(rb_cString);
- if (!STR_EMBEDDABLE_P(len, 1)) {
+ if (STR_EMBEDDABLE_P(len, 1)) {
+ str2 = str_alloc_embed(rb_cString, len + 1);
+ }
+ else {
+ str2 = str_alloc_heap(rb_cString);
RSTRING(str2)->as.heap.aux.capa = len;
RSTRING(str2)->as.heap.ptr = ZALLOC_N(char, (size_t)len + 1);
STR_SET_NOEMBED(str2);
@@ -2244,11 +2407,11 @@ str_make_independent_expand(VALUE str, long len, long expand, const int termlen)
if (len > capa) len = capa;
- if (!STR_EMBED_P(str) && STR_EMBEDDABLE_P(capa, termlen)) {
+ if (!STR_EMBED_P(str) && str_embed_capa(str) >= capa + termlen) {
ptr = RSTRING(str)->as.heap.ptr;
STR_SET_EMBED(str);
- memcpy(RSTRING(str)->as.embed.ary, ptr, len);
- TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
+ memcpy(RSTRING(str)->as.embed.ary, ptr, len);
+ TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
STR_SET_EMBED_LEN(str, len);
return;
}
@@ -2646,7 +2809,7 @@ rb_str_subseq(VALUE str, long beg, long len)
}
else {
str2 = rb_str_new(RSTRING_PTR(str)+beg, len);
- RB_GC_GUARD(str);
+ RB_GC_GUARD(str);
}
rb_enc_cr_str_copy_for_substr(str2, str);
@@ -2885,19 +3048,19 @@ rb_str_resize(VALUE str, long len)
const int termlen = TERM_LEN(str);
if (STR_EMBED_P(str)) {
if (len == slen) return str;
- if (STR_EMBEDDABLE_P(len, termlen)) {
+ if (str_embed_capa(str) >= len + termlen) {
STR_SET_EMBED_LEN(str, len);
- TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
+ TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
return str;
}
str_make_independent_expand(str, slen, len - slen, termlen);
}
- else if (STR_EMBEDDABLE_P(len, termlen)) {
+ else if (str_embed_capa(str) >= len + termlen) {
char *ptr = STR_HEAP_PTR(str);
STR_SET_EMBED(str);
if (slen > len) slen = len;
- if (slen > 0) MEMCPY(RSTRING(str)->as.embed.ary, ptr, char, slen);
- TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
+ if (slen > 0) MEMCPY(RSTRING(str)->as.embed.ary, ptr, char, slen);
+ TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
STR_SET_EMBED_LEN(str, len);
if (independent) ruby_xfree(ptr);
return str;
@@ -2925,7 +3088,9 @@ str_buf_cat(VALUE str, const char *ptr, long len)
long capa, total, olen, off = -1;
char *sptr;
const int termlen = TERM_LEN(str);
+#if !USE_RVARGC
assert(termlen < RSTRING_EMBED_LEN_MAX + 1); /* < (LONG_MAX/2) */
+#endif
RSTRING_GETMEM(str, sptr, olen);
if (ptr >= sptr && ptr <= sptr + olen) {
@@ -2934,8 +3099,8 @@ str_buf_cat(VALUE str, const char *ptr, long len)
rb_str_modify(str);
if (len == 0) return 0;
if (STR_EMBED_P(str)) {
- capa = RSTRING_EMBED_LEN_MAX + 1 - termlen;
- sptr = RSTRING(str)->as.embed.ary;
+ capa = str_embed_capa(str) - termlen;
+ sptr = RSTRING(str)->as.embed.ary;
olen = RSTRING_EMBED_LEN(str);
}
else {
@@ -4797,17 +4962,21 @@ rb_str_drop_bytes(VALUE str, long len)
str_modifiable(str);
if (len > olen) len = olen;
nlen = olen - len;
- if (STR_EMBEDDABLE_P(nlen, TERM_LEN(str))) {
+ if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
char *oldptr = ptr;
int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
STR_SET_EMBED(str);
STR_SET_EMBED_LEN(str, nlen);
- ptr = RSTRING(str)->as.embed.ary;
+ ptr = RSTRING(str)->as.embed.ary;
memmove(ptr, oldptr + len, nlen);
if (fl == STR_NOEMBED) xfree(oldptr);
}
else {
- if (!STR_SHARED_P(str)) rb_str_new_frozen(str);
+ if (!STR_SHARED_P(str)) {
+ VALUE shared = heap_str_make_shared(rb_obj_class(str), str);
+ rb_enc_cr_str_exact_copy(shared, str);
+ OBJ_FREEZE(shared);
+ }
ptr = RSTRING(str)->as.heap.ptr += len;
RSTRING(str)->as.heap.len = nlen;
}
@@ -10465,7 +10634,13 @@ rb_str_force_encoding(VALUE str, VALUE enc)
static VALUE
rb_str_b(VALUE str)
{
- VALUE str2 = str_alloc(rb_cString);
+ VALUE str2;
+ if (FL_TEST(str, STR_NOEMBED)) {
+ str2 = str_alloc_heap(rb_cString);
+ }
+ else {
+ str2 = str_alloc_embed(rb_cString, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
+ }
str_replace_shared_without_enc(str2, str);
ENC_CODERANGE_CLEAR(str2);
return str2;