summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
Diffstat (limited to 'string.c')
-rw-r--r--string.c121
1 files changed, 72 insertions, 49 deletions
diff --git a/string.c b/string.c
index 134e125431..51f1a255b9 100644
--- a/string.c
+++ b/string.c
@@ -2486,7 +2486,8 @@ rb_str_plus(VALUE str1, VALUE str2)
{
VALUE str3;
rb_encoding *enc;
- char *ptr1, *ptr2, *ptr3;
+ const char *ptr1, *ptr2;
+ char *ptr3;
long len1, len2;
int termlen;
@@ -2912,12 +2913,14 @@ str_null_check(VALUE str, int *w)
return s;
}
+static char *str_to_cstr(VALUE str);
+
const char *
rb_str_null_check(VALUE str)
{
RUBY_ASSERT(RB_TYPE_P(str, T_STRING));
- char *s;
+ const char *s;
long len;
RSTRING_GETMEM(str, s, len);
@@ -2927,14 +2930,7 @@ rb_str_null_check(VALUE str)
}
}
else {
- int w;
- const char *s = str_null_check(str, &w);
- if (!s) {
- if (w) {
- rb_raise(rb_eArgError, "string contains null char");
- }
- rb_raise(rb_eArgError, "string contains null byte");
- }
+ str_to_cstr(str);
}
return s;
@@ -2951,6 +2947,12 @@ char *
rb_string_value_cstr(volatile VALUE *ptr)
{
VALUE str = rb_string_value(ptr);
+ return str_to_cstr(str);
+}
+
+static char *
+str_to_cstr(VALUE str)
+{
int w;
char *s = str_null_check(str, &w);
if (!s) {
@@ -3127,7 +3129,7 @@ rb_str_sublen(VALUE str, long pos)
if (single_byte_optimizable(str) || pos < 0)
return pos;
else {
- char *p = RSTRING_PTR(str);
+ const char *p = RSTRING_PTR(str);
return enc_strlen(p, p + pos, STR_ENC_GET(str), ENC_CODERANGE(str));
}
}
@@ -3196,7 +3198,7 @@ rb_str_subpos(VALUE str, long beg, long *lenp)
long slen = -1L;
const long blen = RSTRING_LEN(str);
rb_encoding *enc = STR_ENC_GET(str);
- char *p, *s = RSTRING_PTR(str), *e = s + blen;
+ const char *p, *s = RSTRING_PTR(str), *e = s + blen;
if (len < 0) return 0;
if (beg < 0 && -beg < 0) return 0;
@@ -3273,7 +3275,7 @@ rb_str_subpos(VALUE str, long beg, long *lenp)
end:
*lenp = len;
RB_GC_GUARD(str);
- return p;
+ return (char *)p;
}
static VALUE str_substr(VALUE str, long beg, long len, int empty);
@@ -3293,7 +3295,7 @@ rb_str_substr_two_fixnums(VALUE str, VALUE beg, VALUE len, int empty)
static VALUE
str_substr(VALUE str, long beg, long len, int empty)
{
- char *p = rb_str_subpos(str, beg, &len);
+ const char *p = rb_str_subpos(str, beg, &len);
if (!p) return Qnil;
if (!len && !empty) return Qnil;
@@ -4767,10 +4769,9 @@ memrchr(const char *search_str, int chr, long search_len)
static long
str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc)
{
- char *hit, *adjusted;
+ const char *hit, *adjusted, *sbeg, *e, *t;
int c;
long slen, searchlen;
- char *sbeg, *e, *t;
sbeg = RSTRING_PTR(str);
slen = RSTRING_LEN(sub);
@@ -4805,7 +4806,7 @@ static long
rb_str_rindex(VALUE str, VALUE sub, long pos)
{
long len, slen;
- char *sbeg, *s;
+ const char *sbeg, *s;
rb_encoding *enc;
int singlebyte;
@@ -4889,7 +4890,7 @@ static long
rb_str_byterindex(VALUE str, VALUE sub, long pos)
{
long len, slen;
- char *sbeg, *s;
+ const char *sbeg, *s;
rb_encoding *enc;
enc = rb_enc_check(str, sub);
@@ -7251,6 +7252,21 @@ rb_str_escape(VALUE str)
return result;
}
+/* Lookup table for the inspect fast path. 1 marks bytes that need
+ * no escaping. 0 marks bytes that need escape inspection: 0x00-0x1F
+ * (control), 0x22 ("), 0x23 (#), 0x5C (\), 0x7F (DEL), 0x80-0xFF
+ * (non-ASCII). */
+static const bool inspect_no_escape[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1F */
+ 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x20-0x2F */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x30-0x3F */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4F */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* 0x50-0x5F */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6F */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* 0x70-0x7F */
+};
+
/*
* call-seq:
* inspect -> string
@@ -7266,10 +7282,11 @@ rb_str_inspect(VALUE str)
rb_encoding *enc = rb_enc_from_index(encidx);
const char *p, *pend, *prev;
char buf[CHAR_ESC_LEN + 1];
- VALUE result = rb_str_buf_new(0);
+ VALUE result = rb_str_buf_new(RSTRING_LEN(str) + 2); /* string content + surrounding quotes */
rb_encoding *resenc = rb_default_internal_encoding();
int unicode_p = rb_enc_unicode_p(enc);
int asciicompat = rb_enc_asciicompat(enc);
+ int cr = rb_enc_str_coderange(str);
if (resenc == NULL) resenc = rb_default_external_encoding();
if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
@@ -7282,6 +7299,15 @@ rb_str_inspect(VALUE str)
unsigned int c, cc;
int n;
+ /* Fast path: bulk-skip runs of safe ASCII bytes via a lookup table.
+ * Only well-formed strings (CR=7BIT for any encoding, or UTF-8 VALID)
+ * are eligible. */
+ if (cr == ENC_CODERANGE_7BIT ||
+ (encidx == ENCINDEX_UTF_8 && cr == ENC_CODERANGE_VALID)) {
+ while (p < pend && inspect_no_escape[(unsigned char)*p]) p++;
+ if (p >= pend) break;
+ }
+
n = rb_enc_precise_mbclen(p, pend, enc);
if (!MBCLEN_CHARFOUND_P(n)) {
if (p > prev) str_buf_cat(result, prev, p - prev);
@@ -8257,7 +8283,7 @@ typedef unsigned char *USTR;
struct tr {
int gen;
unsigned int now, max;
- char *p, *pend;
+ const char *p, *pend;
};
static unsigned int
@@ -8987,7 +9013,7 @@ rb_str_count(int argc, VALUE *argv, VALUE str)
char table[TR_TABLE_SIZE];
rb_encoding *enc = 0;
VALUE del = 0, nodel = 0, tstr;
- char *s, *send;
+ const char *s, *send;
int i;
int ascompat;
size_t n = 0;
@@ -9218,12 +9244,12 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
str_mod_check(str, str_start, str_len))
beg = 0;
- char *ptr = RSTRING_PTR(str);
- char *const str_start = ptr;
+ const char *ptr = RSTRING_PTR(str);
+ const char *const str_start = ptr;
const long str_len = RSTRING_LEN(str);
- char *const eptr = str_start + str_len;
+ const char *const eptr = str_start + str_len;
if (split_type == SPLIT_TYPE_AWK) {
- char *bptr = ptr;
+ const char *bptr = ptr;
int skip = 1;
unsigned int c;
@@ -9282,8 +9308,8 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
}
}
else if (split_type == SPLIT_TYPE_STRING) {
- char *substr_start = ptr;
- char *sptr = RSTRING_PTR(spat);
+ const char *substr_start = ptr;
+ const char *sptr = RSTRING_PTR(spat);
long slen = RSTRING_LEN(spat);
if (result) result = rb_ary_new();
@@ -9292,7 +9318,7 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
while (ptr < eptr &&
(end = rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
/* Check we are at the start of a char */
- char *t = rb_enc_right_char_head(ptr, ptr + end, eptr, enc);
+ const char *t = rb_enc_right_char_head(ptr, ptr + end, eptr, enc);
if (t != ptr + end) {
ptr = t;
continue;
@@ -9431,8 +9457,8 @@ rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, VALUE ary)
{
rb_encoding *enc;
VALUE line, rs, orig = str, opts = Qnil, chomp = Qfalse;
- const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
- long pos, len, rslen;
+ const char *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
+ long pos, rslen;
int rsnewline = 0;
if (rb_scan_args(argc, argv, "01:", &rs, &opts) == 0)
@@ -9457,9 +9483,9 @@ rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, VALUE ary)
if (!RSTRING_LEN(str)) goto end;
str = rb_str_new_frozen(str);
- ptr = subptr = RSTRING_PTR(str);
+ const char *const ptr = subptr = RSTRING_PTR(str);
+ const long len = RSTRING_LEN(str);
pend = RSTRING_END(str);
- len = RSTRING_LEN(str);
StringValue(rs);
rslen = RSTRING_LEN(rs);
@@ -10096,9 +10122,9 @@ chompped_length(VALUE str, VALUE rs)
{
rb_encoding *enc;
int newline;
- char *pp, *e, *rsptr;
+ const char *pp, *e, *rsptr;
long rslen;
- char *const p = RSTRING_PTR(str);
+ const char *const p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
if (len == 0) return 0;
@@ -10311,7 +10337,7 @@ static VALUE
rb_str_lstrip_bang(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
- char *start, *s;
+ char *start;
long olen, loffset;
str_modify_keep_cr(str);
@@ -10330,8 +10356,7 @@ rb_str_lstrip_bang(int argc, VALUE *argv, VALUE str)
if (loffset > 0) {
long len = olen-loffset;
- s = start + loffset;
- memmove(start, s, len);
+ memmove(start, start + loffset, len);
STR_SET_LEN(str, len);
TERM_FILL(start+len, rb_enc_mbminlen(enc));
return str;
@@ -10370,7 +10395,7 @@ rb_str_lstrip_bang(int argc, VALUE *argv, VALUE str)
static VALUE
rb_str_lstrip(int argc, VALUE *argv, VALUE str)
{
- char *start;
+ const char *start;
long len, loffset;
RSTRING_GETMEM(str, start, len);
@@ -10406,7 +10431,7 @@ rstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc)
while (s < t && ((c = *(t-1)) == '\0' || ascii_isspace(c))) t--;
}
else {
- char *tp;
+ const char *tp;
while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
unsigned int c = rb_enc_codepoint(tp, e, enc);
@@ -10421,8 +10446,7 @@ static long
rstrip_offset_table(VALUE str, const char *s, const char *e, rb_encoding *enc,
char table[TR_TABLE_SIZE], VALUE del, VALUE nodel)
{
- const char *t;
- char *tp;
+ const char *t, *tp;
rb_str_check_dummy_enc(enc);
if (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) {
@@ -10514,7 +10538,7 @@ static VALUE
rb_str_rstrip(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
- char *start;
+ const char *start;
long olen, roffset;
enc = STR_ENC_GET(str);
@@ -10614,7 +10638,7 @@ rb_str_strip_bang(int argc, VALUE *argv, VALUE str)
static VALUE
rb_str_strip(int argc, VALUE *argv, VALUE str)
{
- char *start;
+ const char *start;
long olen, loffset, roffset;
rb_encoding *enc = STR_ENC_GET(str);
@@ -10707,7 +10731,8 @@ rb_str_scan(VALUE str, VALUE pat)
VALUE result;
long start = 0;
long last = -1, prev = 0;
- char *p = RSTRING_PTR(str); long len = RSTRING_LEN(str);
+ const char *p = RSTRING_PTR(str);
+ long len = RSTRING_LEN(str);
pat = get_pat_quoted(pat, 1);
mustnot_broken(str);
@@ -10955,8 +10980,7 @@ rb_str_crypt(VALUE str, VALUE salt)
# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
#endif
VALUE result;
- const char *s, *saltp;
- char *res;
+ const char *s, *saltp, *res;
#ifdef BROKEN_CRYPT
char salt_8bit_clean[3];
#endif
@@ -11001,12 +11025,11 @@ rb_str_crypt(VALUE str, VALUE salt)
// before allocating a new object (the string to be returned). If we allocate while
// holding the lock, we could run GC which fires the VM barrier and causes a deadlock
// if other ractors are waiting on this lock.
- size_t res_size = strlen(res)+1;
+ size_t res_size = strlen(res);
tmp_buf = ALLOCA_N(char, res_size); // should be small enough to alloca
memcpy(tmp_buf, res, res_size);
- res = tmp_buf;
CRYPT_END();
- result = rb_str_new_cstr(res);
+ result = rb_str_new(tmp_buf, res_size);
#endif
return result;
}