summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authorPeter Zhu <peter@peterzhu.ca>2023-06-06 10:19:20 -0400
committerGitHub <noreply@github.com>2023-06-06 10:19:20 -0400
commit7577c101ed6452de3e72fadb43db595946acc701 (patch)
treedeed85a09fc431132145897d7e1982fd61c74e0c /string.c
parentfae2f80d06f5058b40e91f62ba27fb01f2463d12 (diff)
Unify length field for embedded and heap strings (#7908)
* Unify length field for embedded and heap strings The length field is of the same type and position in RString for both embedded and heap allocated strings, so we can unify it. * Remove RSTRING_EMBED_LEN
Notes
Notes: Merged-By: maximecb <maximecb@ruby-lang.org>
Diffstat (limited to 'string.c')
-rw-r--r--string.c104
1 files changed, 48 insertions, 56 deletions
diff --git a/string.c b/string.c
index 470bc59b76..a7d05693f1 100644
--- a/string.c
+++ b/string.c
@@ -110,18 +110,9 @@ VALUE rb_cSymbol;
FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
} while (0)
#define STR_SET_EMBED(str) FL_UNSET((str), (STR_NOEMBED|STR_NOFREE))
-# define STR_SET_EMBED_LEN(str, n) do { \
- assert(str_embed_capa(str) > (n));\
- RSTRING(str)->as.embed.len = (n);\
-} while (0)
#define STR_SET_LEN(str, n) do { \
- if (STR_EMBED_P(str)) {\
- STR_SET_EMBED_LEN((str), (n));\
- }\
- else {\
- RSTRING(str)->as.heap.len = (n);\
- }\
+ RSTRING(str)->len = (n); \
} while (0)
static inline bool
@@ -158,7 +149,7 @@ str_enc_fastpath(VALUE str)
const long tlen = RSTRING_LEN(str);\
memcpy(tmp, RSTRING_PTR(str), tlen);\
RSTRING(str)->as.heap.ptr = tmp;\
- RSTRING(str)->as.heap.len = tlen;\
+ RSTRING(str)->len = tlen;\
STR_SET_NOEMBED(str);\
RSTRING(str)->as.heap.aux.capa = (capacity);\
}\
@@ -222,7 +213,7 @@ rb_str_size_as_embedded(VALUE str)
{
size_t real_size;
if (STR_EMBED_P(str)) {
- real_size = rb_str_embed_size(RSTRING(str)->as.embed.len) + TERM_LEN(str);
+ real_size = rb_str_embed_size(RSTRING(str)->len) + TERM_LEN(str);
}
/* if the string is not currently embedded, but it can be embedded, how
* much space would it require */
@@ -275,10 +266,10 @@ rb_str_make_embedded(VALUE str)
RUBY_ASSERT(!STR_EMBED_P(str));
char *buf = RSTRING(str)->as.heap.ptr;
- long len = RSTRING(str)->as.heap.len;
+ long len = RSTRING(str)->len;
STR_SET_EMBED(str);
- STR_SET_EMBED_LEN(str, len);
+ STR_SET_LEN(str, len);
if (len > 0) {
memcpy(RSTRING_PTR(str), buf, len);
@@ -382,13 +373,13 @@ fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t data, int exist
else {
if (FL_TEST_RAW(str, STR_FAKESTR)) {
if (arg->copy) {
- VALUE new_str = str_new(rb_cString, RSTRING(str)->as.heap.ptr, RSTRING(str)->as.heap.len);
+ VALUE new_str = str_new(rb_cString, RSTRING(str)->as.heap.ptr, RSTRING(str)->len);
rb_enc_copy(new_str, str);
str = new_str;
}
else {
str = str_new_static(rb_cString, RSTRING(str)->as.heap.ptr,
- RSTRING(str)->as.heap.len,
+ RSTRING(str)->len,
ENCODING_GET(str));
}
OBJ_FREEZE_RAW(str);
@@ -486,7 +477,7 @@ setup_fake_str(struct RString *fake_str, const char *name, long len, int encidx)
ENCODING_SET_INLINED((VALUE)fake_str, encidx);
RBASIC_SET_CLASS_RAW((VALUE)fake_str, rb_cString);
- fake_str->as.heap.len = len;
+ fake_str->len = len;
fake_str->as.heap.ptr = (char *)name;
fake_str->as.heap.aux.capa = len;
return (VALUE)fake_str;
@@ -832,7 +823,7 @@ str_capacity(VALUE str, const int termlen)
return str_embed_capa(str) - termlen;
}
else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) {
- return RSTRING(str)->as.heap.len;
+ return RSTRING(str)->len;
}
else {
return RSTRING(str)->as.heap.aux.capa;
@@ -1012,7 +1003,7 @@ str_new_static(VALUE klass, const char *ptr, long len, int encindex)
else {
RUBY_DTRACE_CREATE_HOOK(STRING, len);
str = str_alloc_heap(klass);
- RSTRING(str)->as.heap.len = len;
+ RSTRING(str)->len = len;
RSTRING(str)->as.heap.ptr = (char *)ptr;
RSTRING(str)->as.heap.aux.capa = len;
RBASIC(str)->flags |= STR_NOFREE;
@@ -1296,7 +1287,6 @@ str_replace_shared_without_enc(VALUE str2, VALUE str)
char *ptr2 = RSTRING(str2)->as.embed.ary;
STR_SET_EMBED(str2);
memcpy(ptr2, RSTRING_PTR(str), len);
- STR_SET_EMBED_LEN(str2, len);
TERM_FILL(ptr2+len, termlen);
}
else {
@@ -1320,10 +1310,12 @@ str_replace_shared_without_enc(VALUE str2, VALUE str)
}
}
FL_SET(str2, STR_NOEMBED);
- RSTRING(str2)->as.heap.len = len;
RSTRING(str2)->as.heap.ptr = ptr;
STR_SET_SHARED(str2, root);
}
+
+ STR_SET_LEN(str2, len);
+
return str2;
}
@@ -1383,7 +1375,7 @@ rb_str_tmp_frozen_release(VALUE orig, VALUE tmp)
if (shared == tmp && !FL_TEST_RAW(tmp, STR_BORROWED)) {
assert(RSTRING(orig)->as.heap.ptr == RSTRING(tmp)->as.heap.ptr);
- assert(RSTRING(orig)->as.heap.len == RSTRING(tmp)->as.heap.len);
+ assert(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
/* Unshare orig since the root (tmp) only has this one child. */
FL_UNSET_RAW(orig, STR_SHARED);
@@ -1393,7 +1385,7 @@ rb_str_tmp_frozen_release(VALUE orig, VALUE tmp)
/* Make tmp embedded and empty so it is safe for sweeping. */
STR_SET_EMBED(tmp);
- STR_SET_EMBED_LEN(tmp, 0);
+ STR_SET_LEN(tmp, 0);
}
}
}
@@ -1411,7 +1403,7 @@ heap_str_make_shared(VALUE klass, VALUE orig)
assert(!STR_SHARED_P(orig));
VALUE str = str_alloc_heap(klass);
- RSTRING(str)->as.heap.len = RSTRING_LEN(orig);
+ STR_SET_LEN(str, RSTRING_LEN(orig));
RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
RSTRING(str)->as.heap.aux.capa = RSTRING(orig)->as.heap.aux.capa;
RBASIC(str)->flags |= RBASIC(orig)->flags & STR_NOFREE;
@@ -1438,7 +1430,7 @@ str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding)
if (FL_TEST_RAW(orig, STR_SHARED)) {
VALUE shared = RSTRING(orig)->as.heap.aux.shared;
long ofs = RSTRING(orig)->as.heap.ptr - RSTRING_PTR(shared);
- long rest = RSTRING_LEN(shared) - ofs - RSTRING(orig)->as.heap.len;
+ long rest = RSTRING_LEN(shared) - ofs - RSTRING_LEN(orig);
assert(ofs >= 0);
assert(rest >= 0);
assert(ofs + rest <= RSTRING_LEN(shared));
@@ -1450,7 +1442,7 @@ str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding)
str = str_new_shared(klass, shared);
assert(!STR_EMBED_P(str));
RSTRING(str)->as.heap.ptr += ofs;
- RSTRING(str)->as.heap.len -= ofs + rest;
+ STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
}
else {
if (RBASIC_CLASS(shared) == 0)
@@ -1462,7 +1454,7 @@ str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding)
str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
STR_SET_EMBED(str);
memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
- STR_SET_EMBED_LEN(str, RSTRING_LEN(orig));
+ STR_SET_LEN(str, RSTRING_LEN(orig));
TERM_FILL(RSTRING_END(str), TERM_LEN(orig));
}
else {
@@ -1591,23 +1583,24 @@ str_shared_replace(VALUE str, VALUE str2)
str_discard(str);
termlen = rb_enc_mbminlen(enc);
+ STR_SET_LEN(str, RSTRING_LEN(str2));
+
if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
STR_SET_EMBED(str);
memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (size_t)RSTRING_LEN(str2) + termlen);
- STR_SET_EMBED_LEN(str, RSTRING_LEN(str2));
rb_enc_associate(str, enc);
ENC_CODERANGE_SET(str, cr);
}
else {
if (STR_EMBED_P(str2)) {
assert(!FL_TEST(str2, STR_SHARED));
- long len = RSTRING(str2)->as.embed.len;
+ long len = RSTRING_LEN(str2);
assert(len + termlen <= str_embed_capa(str2));
char *new_ptr = ALLOC_N(char, len + termlen);
memcpy(new_ptr, RSTRING(str2)->as.embed.ary, len + termlen);
RSTRING(str2)->as.heap.ptr = new_ptr;
- RSTRING(str2)->as.heap.len = len;
+ STR_SET_LEN(str2, len);
RSTRING(str2)->as.heap.aux.capa = len;
STR_SET_NOEMBED(str2);
}
@@ -1615,7 +1608,6 @@ str_shared_replace(VALUE str, VALUE str2)
STR_SET_NOEMBED(str);
FL_UNSET(str, STR_SHARED);
RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
- RSTRING(str)->as.heap.len = RSTRING_LEN(str2);
if (FL_TEST(str2, STR_SHARED)) {
VALUE shared = RSTRING(str2)->as.heap.aux.shared;
@@ -1628,7 +1620,7 @@ str_shared_replace(VALUE str, VALUE str2)
/* abandon str2 */
STR_SET_EMBED(str2);
RSTRING_PTR(str2)[0] = 0;
- STR_SET_EMBED_LEN(str2, 0);
+ STR_SET_LEN(str2, 0);
rb_enc_associate(str, enc);
ENC_CODERANGE_SET(str, cr);
}
@@ -1664,7 +1656,7 @@ str_replace(VALUE str, VALUE str2)
VALUE shared = RSTRING(str2)->as.heap.aux.shared;
assert(OBJ_FROZEN(shared));
STR_SET_NOEMBED(str);
- RSTRING(str)->as.heap.len = len;
+ STR_SET_LEN(str, len);
RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
STR_SET_SHARED(str, shared);
rb_enc_cr_str_exact_copy(str, str2);
@@ -1708,11 +1700,10 @@ str_duplicate_setup(VALUE klass, VALUE str, VALUE dup)
VALUE flags = FL_TEST_RAW(str, flag_mask);
int encidx = 0;
if (STR_EMBED_P(str)) {
- long len = RSTRING_EMBED_LEN(str);
+ long len = RSTRING_LEN(str);
assert(STR_EMBED_P(dup));
assert(str_embed_capa(dup) >= len + 1);
- STR_SET_EMBED_LEN(dup, len);
MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary, char, len + 1);
}
else {
@@ -1727,13 +1718,14 @@ str_duplicate_setup(VALUE klass, VALUE str, VALUE dup)
assert(!STR_SHARED_P(root));
assert(RB_OBJ_FROZEN_RAW(root));
- RSTRING(dup)->as.heap.len = RSTRING_LEN(str);
RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
FL_SET(root, STR_SHARED_ROOT);
RB_OBJ_WRITE(dup, &RSTRING(dup)->as.heap.aux.shared, root);
flags |= RSTRING_NOEMBED | STR_SHARED;
}
+ STR_SET_LEN(dup, RSTRING_LEN(str));
+
if ((flags & ENCODING_MASK) == (ENCODING_INLINE_MAX<<ENCODING_SHIFT)) {
encidx = rb_enc_get_index(str);
flags &= ~ENCODING_MASK;
@@ -1751,7 +1743,7 @@ ec_str_duplicate(struct rb_execution_context_struct *ec, VALUE klass, VALUE str)
dup = ec_str_alloc_heap(ec, klass);
}
else {
- dup = ec_str_alloc_embed(ec, klass, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
+ dup = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
}
return str_duplicate_setup(klass, str, dup);
@@ -1765,7 +1757,7 @@ str_duplicate(VALUE klass, VALUE str)
dup = str_alloc_heap(klass);
}
else {
- dup = str_alloc_embed(klass, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
+ dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
}
return str_duplicate_setup(klass, str, dup);
@@ -1841,14 +1833,14 @@ rb_str_init(int argc, VALUE *argv, VALUE str)
str_modifiable(str);
if (STR_EMBED_P(str)) { /* make noembed always */
char *new_ptr = ALLOC_N(char, (size_t)capa + termlen);
- assert(RSTRING(str)->as.embed.len + 1 <= str_embed_capa(str));
- memcpy(new_ptr, RSTRING(str)->as.embed.ary, RSTRING(str)->as.embed.len + 1);
+ assert(RSTRING_LEN(str) + 1 <= str_embed_capa(str));
+ memcpy(new_ptr, RSTRING(str)->as.embed.ary, RSTRING_LEN(str) + 1);
RSTRING(str)->as.heap.ptr = new_ptr;
}
else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) {
const size_t size = (size_t)capa + termlen;
const char *const old_ptr = RSTRING_PTR(str);
- const size_t osize = RSTRING(str)->as.heap.len + TERM_LEN(str);
+ const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
char *new_ptr = ALLOC_N(char, (size_t)capa + termlen);
memcpy(new_ptr, old_ptr, osize < size ? osize : size);
FL_UNSET_RAW(str, STR_SHARED|STR_NOFREE);
@@ -1858,7 +1850,7 @@ rb_str_init(int argc, VALUE *argv, VALUE str)
SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char,
(size_t)capa + termlen, STR_HEAP_SIZE(str));
}
- RSTRING(str)->as.heap.len = len;
+ STR_SET_LEN(str, len);
TERM_FILL(&RSTRING(str)->as.heap.ptr[len], termlen);
if (n == 1) {
memcpy(RSTRING(str)->as.heap.ptr, RSTRING_PTR(orig), len);
@@ -2350,7 +2342,7 @@ str_make_independent_expand(VALUE str, long len, long expand, const int termlen)
STR_SET_EMBED(str);
memcpy(RSTRING(str)->as.embed.ary, ptr, len);
TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
- STR_SET_EMBED_LEN(str, len);
+ STR_SET_LEN(str, len);
return;
}
@@ -2366,7 +2358,7 @@ str_make_independent_expand(VALUE str, long len, long expand, const int termlen)
FL_UNSET(str, STR_SHARED|STR_NOFREE);
TERM_FILL(ptr + len, termlen);
RSTRING(str)->as.heap.ptr = ptr;
- RSTRING(str)->as.heap.len = len;
+ STR_SET_LEN(str, len);
RSTRING(str)->as.heap.aux.capa = capa;
}
@@ -2418,7 +2410,7 @@ str_discard(VALUE str)
if (!STR_EMBED_P(str) && !FL_TEST(str, STR_SHARED|STR_NOFREE)) {
ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
RSTRING(str)->as.heap.ptr = 0;
- RSTRING(str)->as.heap.len = 0;
+ STR_SET_LEN(str, 0);
}
}
@@ -2751,8 +2743,8 @@ str_subseq(VALUE str, long beg, long len)
str2 = str_new_shared(rb_cString, str);
ENC_CODERANGE_CLEAR(str2);
RSTRING(str2)->as.heap.ptr += beg;
- if (RSTRING(str2)->as.heap.len > len) {
- RSTRING(str2)->as.heap.len = len;
+ if (RSTRING_LEN(str2) > len) {
+ STR_SET_LEN(str2, len);
}
}
@@ -3001,7 +2993,7 @@ rb_str_resize(VALUE str, long len)
if (STR_EMBED_P(str)) {
if (len == slen) return str;
if (str_embed_capa(str) >= len + termlen) {
- STR_SET_EMBED_LEN(str, len);
+ STR_SET_LEN(str, len);
TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
return str;
}
@@ -3013,7 +3005,7 @@ rb_str_resize(VALUE str, long len)
if (slen > len) slen = len;
if (slen > 0) MEMCPY(RSTRING(str)->as.embed.ary, ptr, char, slen);
TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
- STR_SET_EMBED_LEN(str, len);
+ STR_SET_LEN(str, len);
if (independent) ruby_xfree(ptr);
return str;
}
@@ -3028,7 +3020,7 @@ rb_str_resize(VALUE str, long len)
RSTRING(str)->as.heap.aux.capa = len;
}
else if (len == slen) return str;
- RSTRING(str)->as.heap.len = len;
+ STR_SET_LEN(str, len);
TERM_FILL(RSTRING(str)->as.heap.ptr + len, termlen); /* sentinel */
}
return str;
@@ -5158,7 +5150,6 @@ rb_str_drop_bytes(VALUE str, long len)
char *oldptr = ptr;
int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
STR_SET_EMBED(str);
- STR_SET_EMBED_LEN(str, nlen);
ptr = RSTRING(str)->as.embed.ary;
memmove(ptr, oldptr + len, nlen);
if (fl == STR_NOEMBED) xfree(oldptr);
@@ -5170,8 +5161,9 @@ rb_str_drop_bytes(VALUE str, long len)
OBJ_FREEZE(shared);
}
ptr = RSTRING(str)->as.heap.ptr += len;
- RSTRING(str)->as.heap.len = nlen;
}
+ STR_SET_LEN(str, nlen);
+
ptr[nlen] = 0;
ENC_CODERANGE_CLEAR(str);
return str;
@@ -5946,7 +5938,7 @@ rb_str_clear(VALUE str)
{
str_discard(str);
STR_SET_EMBED(str);
- STR_SET_EMBED_LEN(str, 0);
+ STR_SET_LEN(str, 0);
RSTRING_PTR(str)[0] = 0;
if (rb_enc_asciicompat(STR_ENC_GET(str)))
ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
@@ -7926,7 +7918,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
}
TERM_FILL((char *)t, termlen);
RSTRING(str)->as.heap.ptr = (char *)buf;
- RSTRING(str)->as.heap.len = t - buf;
+ STR_SET_LEN(str, t - buf);
STR_SET_NOEMBED(str);
RSTRING(str)->as.heap.aux.capa = max;
}
@@ -8002,7 +7994,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
}
TERM_FILL((char *)t, termlen);
RSTRING(str)->as.heap.ptr = (char *)buf;
- RSTRING(str)->as.heap.len = t - buf;
+ STR_SET_LEN(str, t - buf);
STR_SET_NOEMBED(str);
RSTRING(str)->as.heap.aux.capa = max;
}
@@ -10722,7 +10714,7 @@ rb_str_b(VALUE str)
str2 = str_alloc_heap(rb_cString);
}
else {
- str2 = str_alloc_embed(rb_cString, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
+ str2 = str_alloc_embed(rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
}
str_replace_shared_without_enc(str2, str);