diff options
Diffstat (limited to 'internal/string.h')
| -rw-r--r-- | internal/string.h | 239 |
1 files changed, 193 insertions, 46 deletions
diff --git a/internal/string.h b/internal/string.h index 2135490029..94a46a9657 100644 --- a/internal/string.h +++ b/internal/string.h @@ -1,67 +1,220 @@ -#ifndef INTERNAL_STRING_H /* -*- C -*- */ +#ifndef INTERNAL_STRING_H /*-*-C-*-vi:se ft=c:*/ #define INTERNAL_STRING_H /** - * @file - * @brief Internal header for String. - * @author \@shyouhei + * @author Ruby developers <ruby-core@ruby-lang.org> * @copyright This file is a part of the programming language Ruby. * Permission is hereby granted, to either redistribute and/or * modify this file, provided that the conditions mentioned in the * file COPYING are met. Consult the file for details. + * @brief Internal header for String. */ +#include "ruby/internal/config.h" +#include <stddef.h> /* for size_t */ +#include "internal/compilers.h" /* for __has_builtin */ +#include "ruby/internal/stdbool.h" /* for bool */ +#include "ruby/encoding.h" /* for rb_encoding */ +#include "ruby/ruby.h" /* for VALUE */ +#include "encindex.h" + +#define STR_SHARED FL_USER0 /* = ELTS_SHARED */ +#define STR_NOEMBED FL_USER1 +#define STR_CHILLED (FL_USER2 | FL_USER3) +#define STR_CHILLED_LITERAL FL_USER2 +#define STR_CHILLED_SYMBOL_TO_S FL_USER3 + +enum ruby_rstring_private_flags { + RSTRING_CHILLED = STR_CHILLED, +}; + +#ifdef rb_fstring_cstr +# undef rb_fstring_cstr +#endif + +static inline bool +rb_str_encindex_fastpath(int encindex) +{ + // The overwhelming majority of strings are in one of these 3 encodings, + // which are all either ASCII or perfect ASCII supersets. + // Hence you can use fast, single byte algorithms on them, such as `memchr` etc, + // without all the overhead of fetching the rb_encoding and using functions such as + // rb_enc_mbminlen etc. + // Many other encodings could qualify, but they are expected to be rare occurrences, + // so it's better to keep that list small. + switch (encindex) { + case ENCINDEX_ASCII_8BIT: + case ENCINDEX_UTF_8: + case ENCINDEX_US_ASCII: + return true; + default: + return false; + } +} + +static inline bool +rb_str_enc_fastpath(VALUE str) +{ + return rb_str_encindex_fastpath(ENCODING_GET_INLINED(str)); +} +static inline rb_encoding * +rb_str_enc_get(VALUE str) +{ + RUBY_ASSERT(RB_TYPE_P(str, T_STRING)); + return rb_enc_from_index(ENCODING_GET(str)); +} /* string.c */ +VALUE rb_str_dup_m(VALUE str); VALUE rb_fstring(VALUE); -VALUE rb_fstring_new(const char *ptr, long len); -#define rb_fstring_lit(str) rb_fstring_new((str), rb_strlen_lit(str)) -#define rb_fstring_literal(str) rb_fstring_lit(str) VALUE rb_fstring_cstr(const char *str); -#ifdef HAVE_BUILTIN___BUILTIN_CONSTANT_P -# define rb_fstring_cstr(str) RB_GNUC_EXTENSION_BLOCK( \ - (__builtin_constant_p(str)) ? \ - rb_fstring_new((str), (long)strlen(str)) : \ - rb_fstring_cstr(str) \ -) -#endif -#ifdef RUBY_ENCODING_H VALUE rb_fstring_enc_new(const char *ptr, long len, rb_encoding *enc); -#define rb_fstring_enc_lit(str, enc) rb_fstring_enc_new((str), rb_strlen_lit(str), (enc)) -#define rb_fstring_enc_literal(str, enc) rb_fstring_enc_lit(str, enc) -#endif int rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p); int rb_str_symname_p(VALUE); VALUE rb_str_quote_unprintable(VALUE); -VALUE rb_id_quote_unprintable(ID); -#define QUOTE(str) rb_str_quote_unprintable(str) -#define QUOTE_ID(id) rb_id_quote_unprintable(id) char *rb_str_fill_terminator(VALUE str, const int termlen); void rb_str_change_terminator_length(VALUE str, const int oldtermlen, const int termlen); VALUE rb_str_locktmp_ensure(VALUE str, VALUE (*func)(VALUE), VALUE arg); VALUE rb_str_chomp_string(VALUE str, VALUE chomp); -#ifdef RUBY_ENCODING_H VALUE rb_external_str_with_enc(VALUE str, rb_encoding *eenc); VALUE rb_str_cat_conv_enc_opts(VALUE newstr, long ofs, const char *ptr, long len, rb_encoding *from, int ecflags, VALUE ecopts); VALUE rb_enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl); -VALUE rb_str_initialize(VALUE str, const char *ptr, long len, rb_encoding *enc); -#endif -#define STR_NOEMBED FL_USER1 -#define STR_SHARED FL_USER2 /* = ELTS_SHARED */ -#define STR_EMBED_P(str) (!FL_TEST_RAW((str), STR_NOEMBED)) -#define STR_SHARED_P(s) FL_ALL_RAW((s), STR_NOEMBED|ELTS_SHARED) -#define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) -#define is_broken_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) +VALUE rb_str_escape(VALUE str); size_t rb_str_memsize(VALUE); -VALUE rb_sym_proc_call(ID mid, int argc, const VALUE *argv, int kw_splat, VALUE passed_proc); -VALUE rb_sym_to_proc(VALUE sym); char *rb_str_to_cstr(VALUE str); -VALUE rb_str_eql(VALUE str1, VALUE str2); -VALUE rb_obj_as_string_result(VALUE str, VALUE obj); const char *ruby_escaped_char(int c); -VALUE rb_str_opt_plus(VALUE, VALUE); +void rb_str_make_independent(VALUE str); +int rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc); +int rb_ascii8bit_appendable_encoding_index(rb_encoding *enc, unsigned int code); +VALUE rb_str_include(VALUE str, VALUE arg); +VALUE rb_str_byte_substr(VALUE str, VALUE beg, VALUE len); +VALUE rb_str_substr_two_fixnums(VALUE str, VALUE beg, VALUE len, int empty); +VALUE rb_str_tmp_frozen_no_embed_acquire(VALUE str); +void rb_str_make_embedded(VALUE); +VALUE rb_str_upto_each(VALUE, VALUE, int, int (*each)(VALUE, VALUE), VALUE); +size_t rb_str_size_as_embedded(VALUE); +bool rb_str_reembeddable_p(VALUE); +VALUE rb_str_upto_endless_each(VALUE, int (*each)(VALUE, VALUE), VALUE); +VALUE rb_str_with_debug_created_info(VALUE, VALUE, int); +VALUE rb_str_frozen_bare_string(VALUE); +const char *rb_str_null_check(VALUE); +VALUE rb_str_casecmp(VALUE str1, VALUE str2); + +/* error.c */ +void rb_warn_unchilled_literal(VALUE str); +void rb_warn_unchilled_symbol_to_s(VALUE str); + +static inline bool STR_EMBED_P(VALUE str); +static inline bool STR_SHARED_P(VALUE str); +static inline VALUE QUOTE(VALUE v); +static inline VALUE QUOTE_ID(ID v); +static inline bool is_ascii_string(VALUE str); +static inline bool is_broken_string(VALUE str); +static inline VALUE rb_str_eql_internal(const VALUE str1, const VALUE str2); + +RUBY_SYMBOL_EXPORT_BEGIN +/* string.c (export) */ +VALUE rb_str_tmp_frozen_acquire(VALUE str); +void rb_str_tmp_frozen_release(VALUE str, VALUE tmp); +VALUE rb_setup_fake_str(struct RString *fake_str, const char *name, long len, rb_encoding *enc); +RUBY_SYMBOL_EXPORT_END + +VALUE rb_fstring_new(const char *ptr, long len); +void rb_gc_free_fstring(VALUE obj); +bool rb_obj_is_fstring_table(VALUE obj); +void Init_fstring_table(); +VALUE rb_obj_as_string_result(VALUE str, VALUE obj); +VALUE rb_str_opt_plus(VALUE x, VALUE y); +VALUE rb_str_concat_literals(size_t num, const VALUE *strary); +VALUE rb_str_eql(VALUE str1, VALUE str2); +VALUE rb_id_quote_unprintable(ID); +VALUE rb_sym_proc_call(ID mid, int argc, const VALUE *argv, int kw_splat, VALUE passed_proc); +VALUE rb_enc_literal_str(const char *ptr, long len, rb_encoding *enc); + +struct rb_execution_context_struct; +VALUE rb_ec_str_resurrect(struct rb_execution_context_struct *ec, VALUE str, bool chilled); + +#define rb_fstring_lit(str) rb_fstring_new((str), rb_strlen_lit(str)) +#define rb_fstring_literal(str) rb_fstring_lit(str) +#define rb_fstring_enc_lit(str, enc) rb_fstring_enc_new((str), rb_strlen_lit(str), (enc)) +#define rb_fstring_enc_literal(str, enc) rb_fstring_enc_lit(str, enc) + +static inline VALUE +QUOTE(VALUE v) +{ + return rb_str_quote_unprintable(v); +} + +static inline VALUE +QUOTE_ID(ID i) +{ + return rb_id_quote_unprintable(i); +} + +static inline bool +STR_EMBED_P(VALUE str) +{ + return ! FL_TEST_RAW(str, STR_NOEMBED); +} + +static inline bool +STR_SHARED_P(VALUE str) +{ + return FL_ALL_RAW(str, STR_NOEMBED | STR_SHARED); +} + +static inline bool +CHILLED_STRING_P(VALUE obj) +{ + return RB_TYPE_P(obj, T_STRING) && FL_TEST_RAW(obj, STR_CHILLED); +} + +static inline void +CHILLED_STRING_MUTATED(VALUE str) +{ + VALUE chilled_reason = RB_FL_TEST_RAW(str, STR_CHILLED); + FL_UNSET_RAW(str, STR_CHILLED); + switch (chilled_reason) { + case STR_CHILLED_SYMBOL_TO_S: + rb_warn_unchilled_symbol_to_s(str); + break; + case STR_CHILLED_LITERAL: + rb_warn_unchilled_literal(str); + break; + default: + rb_bug("RString was chilled for multiple reasons"); + } +} + +static inline bool +is_ascii_string(VALUE str) +{ + return rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT; +} + +static inline bool +is_broken_string(VALUE str) +{ + return rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN; +} + +static inline bool +at_char_boundary(const char *s, const char *p, const char *e, rb_encoding *enc) +{ + return rb_enc_left_char_head(s, p, e, enc) == p; +} + +static inline bool +at_char_right_boundary(const char *s, const char *p, const char *e, rb_encoding *enc) +{ + RUBY_ASSERT(s <= p); + RUBY_ASSERT(p <= e); + + return rb_enc_right_char_head(s, p, e, enc) == p; +} /* expect tail call optimization */ +// YJIT needs this function to never allocate and never raise static inline VALUE rb_str_eql_internal(const VALUE str1, const VALUE str2) { @@ -77,16 +230,10 @@ rb_str_eql_internal(const VALUE str1, const VALUE str2) return Qfalse; } -RUBY_SYMBOL_EXPORT_BEGIN -/* string.c (export) */ -VALUE rb_str_tmp_frozen_acquire(VALUE str); -void rb_str_tmp_frozen_release(VALUE str, VALUE tmp); -#ifdef RUBY_ENCODING_H -/* internal use */ -VALUE rb_setup_fake_str(struct RString *fake_str, const char *name, long len, rb_encoding *enc); +#if __has_builtin(__builtin_constant_p) +# define rb_fstring_cstr(str) \ + (__builtin_constant_p(str) ? \ + rb_fstring_new((str), (long)strlen(str)) : \ + (rb_fstring_cstr)(str)) #endif -VALUE rb_str_upto_each(VALUE, VALUE, int, int (*each)(VALUE, VALUE), VALUE); -VALUE rb_str_upto_endless_each(VALUE, int (*each)(VALUE, VALUE), VALUE); -RUBY_SYMBOL_EXPORT_END - #endif /* INTERNAL_STRING_H */ |
