diff options
Diffstat (limited to 'include/ruby/internal/encoding/string.h')
| -rw-r--r-- | include/ruby/internal/encoding/string.h | 37 |
1 files changed, 33 insertions, 4 deletions
diff --git a/include/ruby/internal/encoding/string.h b/include/ruby/internal/encoding/string.h index 6ed7ca1c90..ea78cf23f3 100644 --- a/include/ruby/internal/encoding/string.h +++ b/include/ruby/internal/encoding/string.h @@ -30,7 +30,7 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /** - * Identical to rb_enc_str_new(), except it additionally takes an encoding. + * Identical to rb_str_new(), except it additionally takes an encoding. * * @param[in] ptr A memory region of `len` bytes length. * @param[in] len Length of `ptr`, in bytes, not including the @@ -264,6 +264,14 @@ VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to); VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts); /** + * @private + * + * This is an implementation detail of rb_enc_str_coderange(). Don't use this + * directly. + **/ +int rbimpl_enc_str_coderange_scan(VALUE str); + +/** * Scans the passed string to collect its code range. Because a Ruby's string * is mutable, its contents change from time to time; so does its code range. * A long-lived string tends to fall back to ::RUBY_ENC_CODERANGE_UNKNOWN. @@ -275,6 +283,27 @@ VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ec int rb_enc_str_coderange(VALUE str); /** + * Scans the passed string to collect its code range. Because a Ruby's string + * is mutable, its contents change from time to time; so does its code range. + * A long-lived string tends to fall back to ::RUBY_ENC_CODERANGE_UNKNOWN. + * This API scans it and re-assigns a fine-grained code range constant. + * + * @param[out] str A string. + * @return An enum ::ruby_coderange_type. + */ +static inline int +rb_enc_str_coderange_inline(VALUE str) +{ + int cr = ENC_CODERANGE(str); + if (cr == ENC_CODERANGE_UNKNOWN) { + cr = rbimpl_enc_str_coderange_scan(str); + } + return cr; +} + +#define rb_enc_str_coderange rb_enc_str_coderange_inline + +/** * Scans the passed string until it finds something odd. Returns the number of * bytes scanned. As the name implies this is suitable for repeated call. One * of its application is `IO#readlines`. The method reads from its receiver's @@ -307,13 +336,13 @@ RBIMPL_ATTR_NONNULL(()) /** * Looks for the passed string in the passed buffer. * - * @param[in] x Buffer that potentially includes `y`. + * @param[in] x Query string. * @param[in] m Number of bytes of `x`. - * @param[in] y Query string. + * @param[in] y Buffer that potentially includes `x`. * @param[in] n Number of bytes of `y`. * @param[in] enc Encoding of both `x` and `y`. * @retval -1 Not found. - * @retval otherwise Found index in `x`. + * @retval otherwise Found index in `y`. * @note This API can match at a non-character-boundary. */ long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc); |
