diff options
Diffstat (limited to 'string.c')
| -rw-r--r-- | string.c | 121 |
1 files changed, 72 insertions, 49 deletions
@@ -2486,7 +2486,8 @@ rb_str_plus(VALUE str1, VALUE str2) { VALUE str3; rb_encoding *enc; - char *ptr1, *ptr2, *ptr3; + const char *ptr1, *ptr2; + char *ptr3; long len1, len2; int termlen; @@ -2912,12 +2913,14 @@ str_null_check(VALUE str, int *w) return s; } +static char *str_to_cstr(VALUE str); + const char * rb_str_null_check(VALUE str) { RUBY_ASSERT(RB_TYPE_P(str, T_STRING)); - char *s; + const char *s; long len; RSTRING_GETMEM(str, s, len); @@ -2927,14 +2930,7 @@ rb_str_null_check(VALUE str) } } else { - int w; - const char *s = str_null_check(str, &w); - if (!s) { - if (w) { - rb_raise(rb_eArgError, "string contains null char"); - } - rb_raise(rb_eArgError, "string contains null byte"); - } + str_to_cstr(str); } return s; @@ -2951,6 +2947,12 @@ char * rb_string_value_cstr(volatile VALUE *ptr) { VALUE str = rb_string_value(ptr); + return str_to_cstr(str); +} + +static char * +str_to_cstr(VALUE str) +{ int w; char *s = str_null_check(str, &w); if (!s) { @@ -3127,7 +3129,7 @@ rb_str_sublen(VALUE str, long pos) if (single_byte_optimizable(str) || pos < 0) return pos; else { - char *p = RSTRING_PTR(str); + const char *p = RSTRING_PTR(str); return enc_strlen(p, p + pos, STR_ENC_GET(str), ENC_CODERANGE(str)); } } @@ -3196,7 +3198,7 @@ rb_str_subpos(VALUE str, long beg, long *lenp) long slen = -1L; const long blen = RSTRING_LEN(str); rb_encoding *enc = STR_ENC_GET(str); - char *p, *s = RSTRING_PTR(str), *e = s + blen; + const char *p, *s = RSTRING_PTR(str), *e = s + blen; if (len < 0) return 0; if (beg < 0 && -beg < 0) return 0; @@ -3273,7 +3275,7 @@ rb_str_subpos(VALUE str, long beg, long *lenp) end: *lenp = len; RB_GC_GUARD(str); - return p; + return (char *)p; } static VALUE str_substr(VALUE str, long beg, long len, int empty); @@ -3293,7 +3295,7 @@ rb_str_substr_two_fixnums(VALUE str, VALUE beg, VALUE len, int empty) static VALUE str_substr(VALUE str, long beg, long len, int empty) { - char *p = rb_str_subpos(str, beg, &len); + const char *p = rb_str_subpos(str, beg, &len); if (!p) return Qnil; if (!len && !empty) return Qnil; @@ -4767,10 +4769,9 @@ memrchr(const char *search_str, int chr, long search_len) static long str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc) { - char *hit, *adjusted; + const char *hit, *adjusted, *sbeg, *e, *t; int c; long slen, searchlen; - char *sbeg, *e, *t; sbeg = RSTRING_PTR(str); slen = RSTRING_LEN(sub); @@ -4805,7 +4806,7 @@ static long rb_str_rindex(VALUE str, VALUE sub, long pos) { long len, slen; - char *sbeg, *s; + const char *sbeg, *s; rb_encoding *enc; int singlebyte; @@ -4889,7 +4890,7 @@ static long rb_str_byterindex(VALUE str, VALUE sub, long pos) { long len, slen; - char *sbeg, *s; + const char *sbeg, *s; rb_encoding *enc; enc = rb_enc_check(str, sub); @@ -7251,6 +7252,21 @@ rb_str_escape(VALUE str) return result; } +/* Lookup table for the inspect fast path. 1 marks bytes that need + * no escaping. 0 marks bytes that need escape inspection: 0x00-0x1F + * (control), 0x22 ("), 0x23 (#), 0x5C (\), 0x7F (DEL), 0x80-0xFF + * (non-ASCII). */ +static const bool inspect_no_escape[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1F */ + 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x20-0x2F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x30-0x3F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* 0x50-0x5F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* 0x70-0x7F */ +}; + /* * call-seq: * inspect -> string @@ -7266,10 +7282,11 @@ rb_str_inspect(VALUE str) rb_encoding *enc = rb_enc_from_index(encidx); const char *p, *pend, *prev; char buf[CHAR_ESC_LEN + 1]; - VALUE result = rb_str_buf_new(0); + VALUE result = rb_str_buf_new(RSTRING_LEN(str) + 2); /* string content + surrounding quotes */ rb_encoding *resenc = rb_default_internal_encoding(); int unicode_p = rb_enc_unicode_p(enc); int asciicompat = rb_enc_asciicompat(enc); + int cr = rb_enc_str_coderange(str); if (resenc == NULL) resenc = rb_default_external_encoding(); if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding(); @@ -7282,6 +7299,15 @@ rb_str_inspect(VALUE str) unsigned int c, cc; int n; + /* Fast path: bulk-skip runs of safe ASCII bytes via a lookup table. + * Only well-formed strings (CR=7BIT for any encoding, or UTF-8 VALID) + * are eligible. */ + if (cr == ENC_CODERANGE_7BIT || + (encidx == ENCINDEX_UTF_8 && cr == ENC_CODERANGE_VALID)) { + while (p < pend && inspect_no_escape[(unsigned char)*p]) p++; + if (p >= pend) break; + } + n = rb_enc_precise_mbclen(p, pend, enc); if (!MBCLEN_CHARFOUND_P(n)) { if (p > prev) str_buf_cat(result, prev, p - prev); @@ -8257,7 +8283,7 @@ typedef unsigned char *USTR; struct tr { int gen; unsigned int now, max; - char *p, *pend; + const char *p, *pend; }; static unsigned int @@ -8987,7 +9013,7 @@ rb_str_count(int argc, VALUE *argv, VALUE str) char table[TR_TABLE_SIZE]; rb_encoding *enc = 0; VALUE del = 0, nodel = 0, tstr; - char *s, *send; + const char *s, *send; int i; int ascompat; size_t n = 0; @@ -9218,12 +9244,12 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str) str_mod_check(str, str_start, str_len)) beg = 0; - char *ptr = RSTRING_PTR(str); - char *const str_start = ptr; + const char *ptr = RSTRING_PTR(str); + const char *const str_start = ptr; const long str_len = RSTRING_LEN(str); - char *const eptr = str_start + str_len; + const char *const eptr = str_start + str_len; if (split_type == SPLIT_TYPE_AWK) { - char *bptr = ptr; + const char *bptr = ptr; int skip = 1; unsigned int c; @@ -9282,8 +9308,8 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str) } } else if (split_type == SPLIT_TYPE_STRING) { - char *substr_start = ptr; - char *sptr = RSTRING_PTR(spat); + const char *substr_start = ptr; + const char *sptr = RSTRING_PTR(spat); long slen = RSTRING_LEN(spat); if (result) result = rb_ary_new(); @@ -9292,7 +9318,7 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str) while (ptr < eptr && (end = rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) { /* Check we are at the start of a char */ - char *t = rb_enc_right_char_head(ptr, ptr + end, eptr, enc); + const char *t = rb_enc_right_char_head(ptr, ptr + end, eptr, enc); if (t != ptr + end) { ptr = t; continue; @@ -9431,8 +9457,8 @@ rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, VALUE ary) { rb_encoding *enc; VALUE line, rs, orig = str, opts = Qnil, chomp = Qfalse; - const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted; - long pos, len, rslen; + const char *pend, *subptr, *subend, *rsptr, *hit, *adjusted; + long pos, rslen; int rsnewline = 0; if (rb_scan_args(argc, argv, "01:", &rs, &opts) == 0) @@ -9457,9 +9483,9 @@ rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, VALUE ary) if (!RSTRING_LEN(str)) goto end; str = rb_str_new_frozen(str); - ptr = subptr = RSTRING_PTR(str); + const char *const ptr = subptr = RSTRING_PTR(str); + const long len = RSTRING_LEN(str); pend = RSTRING_END(str); - len = RSTRING_LEN(str); StringValue(rs); rslen = RSTRING_LEN(rs); @@ -10096,9 +10122,9 @@ chompped_length(VALUE str, VALUE rs) { rb_encoding *enc; int newline; - char *pp, *e, *rsptr; + const char *pp, *e, *rsptr; long rslen; - char *const p = RSTRING_PTR(str); + const char *const p = RSTRING_PTR(str); long len = RSTRING_LEN(str); if (len == 0) return 0; @@ -10311,7 +10337,7 @@ static VALUE rb_str_lstrip_bang(int argc, VALUE *argv, VALUE str) { rb_encoding *enc; - char *start, *s; + char *start; long olen, loffset; str_modify_keep_cr(str); @@ -10330,8 +10356,7 @@ rb_str_lstrip_bang(int argc, VALUE *argv, VALUE str) if (loffset > 0) { long len = olen-loffset; - s = start + loffset; - memmove(start, s, len); + memmove(start, start + loffset, len); STR_SET_LEN(str, len); TERM_FILL(start+len, rb_enc_mbminlen(enc)); return str; @@ -10370,7 +10395,7 @@ rb_str_lstrip_bang(int argc, VALUE *argv, VALUE str) static VALUE rb_str_lstrip(int argc, VALUE *argv, VALUE str) { - char *start; + const char *start; long len, loffset; RSTRING_GETMEM(str, start, len); @@ -10406,7 +10431,7 @@ rstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc) while (s < t && ((c = *(t-1)) == '\0' || ascii_isspace(c))) t--; } else { - char *tp; + const char *tp; while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) { unsigned int c = rb_enc_codepoint(tp, e, enc); @@ -10421,8 +10446,7 @@ static long rstrip_offset_table(VALUE str, const char *s, const char *e, rb_encoding *enc, char table[TR_TABLE_SIZE], VALUE del, VALUE nodel) { - const char *t; - char *tp; + const char *t, *tp; rb_str_check_dummy_enc(enc); if (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) { @@ -10514,7 +10538,7 @@ static VALUE rb_str_rstrip(int argc, VALUE *argv, VALUE str) { rb_encoding *enc; - char *start; + const char *start; long olen, roffset; enc = STR_ENC_GET(str); @@ -10614,7 +10638,7 @@ rb_str_strip_bang(int argc, VALUE *argv, VALUE str) static VALUE rb_str_strip(int argc, VALUE *argv, VALUE str) { - char *start; + const char *start; long olen, loffset, roffset; rb_encoding *enc = STR_ENC_GET(str); @@ -10707,7 +10731,8 @@ rb_str_scan(VALUE str, VALUE pat) VALUE result; long start = 0; long last = -1, prev = 0; - char *p = RSTRING_PTR(str); long len = RSTRING_LEN(str); + const char *p = RSTRING_PTR(str); + long len = RSTRING_LEN(str); pat = get_pat_quoted(pat, 1); mustnot_broken(str); @@ -10955,8 +10980,7 @@ rb_str_crypt(VALUE str, VALUE salt) # define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock) #endif VALUE result; - const char *s, *saltp; - char *res; + const char *s, *saltp, *res; #ifdef BROKEN_CRYPT char salt_8bit_clean[3]; #endif @@ -11001,12 +11025,11 @@ rb_str_crypt(VALUE str, VALUE salt) // before allocating a new object (the string to be returned). If we allocate while // holding the lock, we could run GC which fires the VM barrier and causes a deadlock // if other ractors are waiting on this lock. - size_t res_size = strlen(res)+1; + size_t res_size = strlen(res); tmp_buf = ALLOCA_N(char, res_size); // should be small enough to alloca memcpy(tmp_buf, res, res_size); - res = tmp_buf; CRYPT_END(); - result = rb_str_new_cstr(res); + result = rb_str_new(tmp_buf, res_size); #endif return result; } |
