diff options
Diffstat (limited to 'internal/string.h')
| -rw-r--r-- | internal/string.h | 84 |
1 files changed, 68 insertions, 16 deletions
diff --git a/internal/string.h b/internal/string.h index 3533766ffb..9212ce8986 100644 --- a/internal/string.h +++ b/internal/string.h @@ -14,10 +14,13 @@ #include "ruby/internal/stdbool.h" /* for bool */ #include "ruby/encoding.h" /* for rb_encoding */ #include "ruby/ruby.h" /* for VALUE */ +#include "encindex.h" -#define STR_NOEMBED FL_USER1 -#define STR_SHARED FL_USER2 /* = ELTS_SHARED */ -#define STR_CHILLED FL_USER3 +#define STR_SHARED FL_USER0 /* = ELTS_SHARED */ +#define STR_NOEMBED FL_USER1 +#define STR_CHILLED (FL_USER2 | FL_USER3) +#define STR_CHILLED_LITERAL FL_USER2 +#define STR_CHILLED_SYMBOL_TO_S FL_USER3 enum ruby_rstring_private_flags { RSTRING_CHILLED = STR_CHILLED, @@ -27,7 +30,41 @@ enum ruby_rstring_private_flags { # undef rb_fstring_cstr #endif +static inline bool +rb_str_encindex_fastpath(int encindex) +{ + // The overwhelming majority of strings are in one of these 3 encodings, + // which are all either ASCII or perfect ASCII supersets. + // Hence you can use fast, single byte algorithms on them, such as `memchr` etc, + // without all the overhead of fetching the rb_encoding and using functions such as + // rb_enc_mbminlen etc. + // Many other encodings could qualify, but they are expected to be rare occurences, + // so it's better to keep that list small. + switch (encindex) { + case ENCINDEX_ASCII_8BIT: + case ENCINDEX_UTF_8: + case ENCINDEX_US_ASCII: + return true; + default: + return false; + } +} + +static inline bool +rb_str_enc_fastpath(VALUE str) +{ + return rb_str_encindex_fastpath(ENCODING_GET_INLINED(str)); +} + +static inline rb_encoding * +rb_str_enc_get(VALUE str) +{ + RUBY_ASSERT(RB_TYPE_P(str, T_STRING)); + return rb_enc_from_index(ENCODING_GET(str)); +} + /* string.c */ +VALUE rb_str_dup_m(VALUE str); VALUE rb_fstring(VALUE); VALUE rb_fstring_cstr(const char *str); VALUE rb_fstring_enc_new(const char *ptr, long len, rb_encoding *enc); @@ -51,6 +88,20 @@ int rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc); int rb_ascii8bit_appendable_encoding_index(rb_encoding *enc, unsigned int code); VALUE rb_str_include(VALUE str, VALUE arg); VALUE rb_str_byte_substr(VALUE str, VALUE beg, VALUE len); +VALUE rb_str_substr_two_fixnums(VALUE str, VALUE beg, VALUE len, int empty); +VALUE rb_str_tmp_frozen_no_embed_acquire(VALUE str); +void rb_str_make_embedded(VALUE); +VALUE rb_str_upto_each(VALUE, VALUE, int, int (*each)(VALUE, VALUE), VALUE); +size_t rb_str_size_as_embedded(VALUE); +bool rb_str_reembeddable_p(VALUE); +VALUE rb_str_upto_endless_each(VALUE, int (*each)(VALUE, VALUE), VALUE); +VALUE rb_str_with_debug_created_info(VALUE, VALUE, int); +VALUE rb_str_frozen_bare_string(VALUE); +const char *rb_str_null_check(VALUE); + +/* error.c */ +void rb_warn_unchilled_literal(VALUE str); +void rb_warn_unchilled_symbol_to_s(VALUE str); static inline bool STR_EMBED_P(VALUE str); static inline bool STR_SHARED_P(VALUE str); @@ -63,17 +114,14 @@ static inline VALUE rb_str_eql_internal(const VALUE str1, const VALUE str2); RUBY_SYMBOL_EXPORT_BEGIN /* string.c (export) */ VALUE rb_str_tmp_frozen_acquire(VALUE str); -VALUE rb_str_tmp_frozen_no_embed_acquire(VALUE str); void rb_str_tmp_frozen_release(VALUE str, VALUE tmp); VALUE rb_setup_fake_str(struct RString *fake_str, const char *name, long len, rb_encoding *enc); -VALUE rb_str_upto_each(VALUE, VALUE, int, int (*each)(VALUE, VALUE), VALUE); -VALUE rb_str_upto_endless_each(VALUE, int (*each)(VALUE, VALUE), VALUE); -void rb_str_make_embedded(VALUE); -size_t rb_str_size_as_embedded(VALUE); -bool rb_str_reembeddable_p(VALUE); RUBY_SYMBOL_EXPORT_END VALUE rb_fstring_new(const char *ptr, long len); +void rb_gc_free_fstring(VALUE obj); +bool rb_obj_is_fstring_table(VALUE obj); +void Init_fstring_table(); VALUE rb_obj_as_string_result(VALUE str, VALUE obj); VALUE rb_str_opt_plus(VALUE x, VALUE y); VALUE rb_str_concat_literals(size_t num, const VALUE *strary); @@ -123,14 +171,18 @@ CHILLED_STRING_P(VALUE obj) static inline void CHILLED_STRING_MUTATED(VALUE str) { + VALUE chilled_reason = RB_FL_TEST_RAW(str, STR_CHILLED); FL_UNSET_RAW(str, STR_CHILLED); - rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "literal string will be frozen in the future"); -} - -static inline void -STR_CHILL_RAW(VALUE str) -{ - FL_SET_RAW(str, STR_CHILLED); + switch (chilled_reason) { + case STR_CHILLED_SYMBOL_TO_S: + rb_warn_unchilled_symbol_to_s(str); + break; + case STR_CHILLED_LITERAL: + rb_warn_unchilled_literal(str); + break; + default: + rb_bug("RString was chilled for multiple reasons"); + } } static inline bool |
