diff options
Diffstat (limited to 'internal/string.h')
| -rw-r--r-- | internal/string.h | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/internal/string.h b/internal/string.h index 50561924f2..9212ce8986 100644 --- a/internal/string.h +++ b/internal/string.h @@ -14,6 +14,7 @@ #include "ruby/internal/stdbool.h" /* for bool */ #include "ruby/encoding.h" /* for rb_encoding */ #include "ruby/ruby.h" /* for VALUE */ +#include "encindex.h" #define STR_SHARED FL_USER0 /* = ELTS_SHARED */ #define STR_NOEMBED FL_USER1 @@ -29,7 +30,41 @@ enum ruby_rstring_private_flags { # undef rb_fstring_cstr #endif +static inline bool +rb_str_encindex_fastpath(int encindex) +{ + // The overwhelming majority of strings are in one of these 3 encodings, + // which are all either ASCII or perfect ASCII supersets. + // Hence you can use fast, single byte algorithms on them, such as `memchr` etc, + // without all the overhead of fetching the rb_encoding and using functions such as + // rb_enc_mbminlen etc. + // Many other encodings could qualify, but they are expected to be rare occurences, + // so it's better to keep that list small. + switch (encindex) { + case ENCINDEX_ASCII_8BIT: + case ENCINDEX_UTF_8: + case ENCINDEX_US_ASCII: + return true; + default: + return false; + } +} + +static inline bool +rb_str_enc_fastpath(VALUE str) +{ + return rb_str_encindex_fastpath(ENCODING_GET_INLINED(str)); +} + +static inline rb_encoding * +rb_str_enc_get(VALUE str) +{ + RUBY_ASSERT(RB_TYPE_P(str, T_STRING)); + return rb_enc_from_index(ENCODING_GET(str)); +} + /* string.c */ +VALUE rb_str_dup_m(VALUE str); VALUE rb_fstring(VALUE); VALUE rb_fstring_cstr(const char *str); VALUE rb_fstring_enc_new(const char *ptr, long len, rb_encoding *enc); @@ -62,6 +97,7 @@ bool rb_str_reembeddable_p(VALUE); VALUE rb_str_upto_endless_each(VALUE, int (*each)(VALUE, VALUE), VALUE); VALUE rb_str_with_debug_created_info(VALUE, VALUE, int); VALUE rb_str_frozen_bare_string(VALUE); +const char *rb_str_null_check(VALUE); /* error.c */ void rb_warn_unchilled_literal(VALUE str); |
