diff options
Diffstat (limited to 'string.c')
-rw-r--r-- | string.c | 97 |
1 files changed, 69 insertions, 28 deletions
@@ -1127,12 +1127,19 @@ str_replace_shared_without_enc(VALUE str2, VALUE str) TERM_FILL(ptr2+len, termlen); } else { - str = rb_str_new_frozen(str); + VALUE root; + if (STR_SHARED_P(str)) { + root = RSTRING(str)->as.heap.aux.shared; + RSTRING_GETMEM(str, ptr, len); + } + else { + root = rb_str_new_frozen(str); + RSTRING_GETMEM(root, ptr, len); + } FL_SET(str2, STR_NOEMBED); - RSTRING_GETMEM(str, ptr, len); RSTRING(str2)->as.heap.len = len; RSTRING(str2)->as.heap.ptr = ptr; - STR_SET_SHARED(str2, str); + STR_SET_SHARED(str2, root); } return str2; } @@ -1283,6 +1290,7 @@ str_new_empty(VALUE str) } #define STR_BUF_MIN_SIZE 127 +STATIC_ASSERT(STR_BUF_MIN_SIZE, STR_BUF_MIN_SIZE > RSTRING_EMBED_LEN_MAX); VALUE rb_str_buf_new(long capa) @@ -1472,10 +1480,13 @@ str_duplicate(VALUE klass, VALUE str) MEMCPY(RSTRING(dup)->as.ary, RSTRING(str)->as.ary, char, embed_size); if (flags & STR_NOEMBED) { - if (UNLIKELY(!(flags & FL_FREEZE))) { - str = str_new_frozen(klass, str); - FL_SET_RAW(str, flags & FL_TAINT); - flags = FL_TEST_RAW(str, flag_mask); + if (FL_TEST_RAW(str, STR_SHARED)) { + str = RSTRING(str)->as.heap.aux.shared; + } + else if (UNLIKELY(!(flags & FL_FREEZE))) { + str = str_new_frozen(klass, str); + FL_SET_RAW(str, flags & FL_TAINT); + flags = FL_TEST_RAW(str, flag_mask); } if (flags & STR_NOEMBED) { RB_OBJ_WRITE(dup, &RSTRING(dup)->as.heap.aux.shared, str); @@ -1562,7 +1573,18 @@ rb_str_init(int argc, VALUE *argv, VALUE str) } str_modifiable(str); if (STR_EMBED_P(str)) { /* make noembed always */ - RSTRING(str)->as.heap.ptr = ALLOC_N(char, (size_t)capa + termlen); + char *new_ptr = ALLOC_N(char, (size_t)capa + termlen); + memcpy(new_ptr, RSTRING(str)->as.ary, RSTRING_EMBED_LEN_MAX + 1); + RSTRING(str)->as.heap.ptr = new_ptr; + } + else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) { + const size_t size = (size_t)capa + termlen; + const char *const old_ptr = RSTRING_PTR(str); + const size_t osize = RSTRING(str)->as.heap.len + TERM_LEN(str); + char *new_ptr = ALLOC_N(char, (size_t)capa + termlen); + memcpy(new_ptr, old_ptr, osize < size ? osize : size); + FL_UNSET_RAW(str, STR_SHARED); + RSTRING(str)->as.heap.ptr = new_ptr; } else if (STR_HEAP_SIZE(str) != (size_t)capa + termlen) { REALLOC_N(RSTRING(str)->as.heap.ptr, char, (size_t)capa + termlen); @@ -2021,7 +2043,7 @@ static void str_make_independent_expand(VALUE str, long len, long expand, const int termlen) { char *ptr; - const char *oldptr; + char *oldptr; long capa = len + expand; if (len > capa) len = capa; @@ -2040,6 +2062,9 @@ str_make_independent_expand(VALUE str, long len, long expand, const int termlen) if (oldptr) { memcpy(ptr, oldptr, len); } + if (FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) { + xfree(oldptr); + } STR_SET_NOEMBED(str); FL_UNSET(str, STR_SHARED|STR_NOFREE); TERM_FILL(ptr + len, termlen); @@ -4978,7 +5003,7 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str) cr = cr2; } plen = end0 - beg0; - rp = RSTRING_PTR(repl); rlen = RSTRING_LEN(repl); + rlen = RSTRING_LEN(repl); len = RSTRING_LEN(str); if (rlen > plen) { RESIZE_CAPA(str, len + rlen - plen); @@ -4987,7 +5012,8 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str) if (rlen != plen) { memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen); } - memcpy(p + beg0, rp, rlen); + rp = RSTRING_PTR(repl); + memmove(p + beg0, rp, rlen); len += rlen - plen; STR_SET_LEN(str, len); TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str)); @@ -6374,6 +6400,23 @@ typedef struct mapping_buffer { OnigUChar space[1]; } mapping_buffer; +static void +mapping_buffer_free(void *p) +{ + mapping_buffer *previous_buffer; + mapping_buffer *current_buffer = p; + while (current_buffer) { + previous_buffer = current_buffer; + current_buffer = current_buffer->next; + ruby_sized_xfree(previous_buffer, previous_buffer->capa); + } +} + +static const rb_data_type_t mapping_buffer_type = { + "mapping_buffer", + {0, mapping_buffer_free,} +}; + static VALUE rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc) { @@ -6381,8 +6424,9 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc) OnigUChar *source_current, *source_end; int target_length = 0; - mapping_buffer pre_buffer, /* only next pointer used */ - *current_buffer = &pre_buffer; + VALUE buffer_anchor; + mapping_buffer *current_buffer = 0; + mapping_buffer **pre_buffer; size_t buffer_count = 0; int buffer_length_or_invalid; @@ -6391,14 +6435,17 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc) source_current = (OnigUChar*)RSTRING_PTR(source); source_end = (OnigUChar*)RSTRING_END(source); + buffer_anchor = TypedData_Wrap_Struct(0, &mapping_buffer_type, 0); + pre_buffer = (mapping_buffer **)&DATA_PTR(buffer_anchor); while (source_current < source_end) { /* increase multiplier using buffer count to converge quickly */ size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH; if (CASEMAP_DEBUG) { fprintf(stderr, "Buffer allocation, capa is %"PRIuSIZE"\n", capa); /* for tuning */ } - current_buffer->next = xmalloc(offsetof(mapping_buffer, space) + capa); - current_buffer = current_buffer->next; + current_buffer = xmalloc(offsetof(mapping_buffer, space) + capa); + *pre_buffer = current_buffer; + pre_buffer = ¤t_buffer->next; current_buffer->next = NULL; current_buffer->capa = capa; buffer_length_or_invalid = enc->case_map(flags, @@ -6407,14 +6454,9 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc) current_buffer->space+current_buffer->capa, enc); if (buffer_length_or_invalid < 0) { - mapping_buffer *previous_buffer; - - current_buffer = pre_buffer.next; - while (current_buffer) { - previous_buffer = current_buffer; - current_buffer = current_buffer->next; - xfree(previous_buffer); - } + current_buffer = DATA_PTR(buffer_anchor); + DATA_PTR(buffer_anchor) = 0; + mapping_buffer_free(current_buffer); rb_raise(rb_eArgError, "input string invalid"); } target_length += current_buffer->used = buffer_length_or_invalid; @@ -6425,23 +6467,22 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc) if (buffer_count==1) { target = rb_str_new_with_class(source, (const char*)current_buffer->space, target_length); - xfree(current_buffer); } else { char *target_current; - mapping_buffer *previous_buffer; target = rb_str_new_with_class(source, 0, target_length); target_current = RSTRING_PTR(target); - current_buffer=pre_buffer.next; + current_buffer = DATA_PTR(buffer_anchor); while (current_buffer) { memcpy(target_current, current_buffer->space, current_buffer->used); target_current += current_buffer->used; - previous_buffer = current_buffer; current_buffer = current_buffer->next; - xfree(previous_buffer); } } + current_buffer = DATA_PTR(buffer_anchor); + DATA_PTR(buffer_anchor) = 0; + mapping_buffer_free(current_buffer); /* TODO: check about string terminator character */ OBJ_INFECT_RAW(target, source); |