summaryrefslogtreecommitdiff
path: root/include/ruby/internal/encoding/string.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/ruby/internal/encoding/string.h')
-rw-r--r--include/ruby/internal/encoding/string.h39
1 files changed, 34 insertions, 5 deletions
diff --git a/include/ruby/internal/encoding/string.h b/include/ruby/internal/encoding/string.h
index f8ce809199..ea78cf23f3 100644
--- a/include/ruby/internal/encoding/string.h
+++ b/include/ruby/internal/encoding/string.h
@@ -30,7 +30,7 @@
RBIMPL_SYMBOL_EXPORT_BEGIN()
/**
- * Identical to rb_enc_str_new(), except it additionally takes an encoding.
+ * Identical to rb_str_new(), except it additionally takes an encoding.
*
* @param[in] ptr A memory region of `len` bytes length.
* @param[in] len Length of `ptr`, in bytes, not including the
@@ -190,7 +190,7 @@ VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
* In other languages, APIs like this one could be seen as the primitive
* routines where encodings' "encode" feature are implemented. However in case
* of Ruby this is not the primitive one. We directly manipulate encoded
- * strings. Encoding conversion routines transocde an encoded string directly
+ * strings. Encoding conversion routines transcode an encoded string directly
* to another one; not via a code point array.
*/
VALUE rb_enc_uint_chr(unsigned int code, rb_encoding *enc);
@@ -264,6 +264,14 @@ VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to);
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts);
/**
+ * @private
+ *
+ * This is an implementation detail of rb_enc_str_coderange(). Don't use this
+ * directly.
+ **/
+int rbimpl_enc_str_coderange_scan(VALUE str);
+
+/**
* Scans the passed string to collect its code range. Because a Ruby's string
* is mutable, its contents change from time to time; so does its code range.
* A long-lived string tends to fall back to ::RUBY_ENC_CODERANGE_UNKNOWN.
@@ -275,6 +283,27 @@ VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ec
int rb_enc_str_coderange(VALUE str);
/**
+ * Scans the passed string to collect its code range. Because a Ruby's string
+ * is mutable, its contents change from time to time; so does its code range.
+ * A long-lived string tends to fall back to ::RUBY_ENC_CODERANGE_UNKNOWN.
+ * This API scans it and re-assigns a fine-grained code range constant.
+ *
+ * @param[out] str A string.
+ * @return An enum ::ruby_coderange_type.
+ */
+static inline int
+rb_enc_str_coderange_inline(VALUE str)
+{
+ int cr = ENC_CODERANGE(str);
+ if (cr == ENC_CODERANGE_UNKNOWN) {
+ cr = rbimpl_enc_str_coderange_scan(str);
+ }
+ return cr;
+}
+
+#define rb_enc_str_coderange rb_enc_str_coderange_inline
+
+/**
* Scans the passed string until it finds something odd. Returns the number of
* bytes scanned. As the name implies this is suitable for repeated call. One
* of its application is `IO#readlines`. The method reads from its receiver's
@@ -307,13 +336,13 @@ RBIMPL_ATTR_NONNULL(())
/**
* Looks for the passed string in the passed buffer.
*
- * @param[in] x Buffer that potentially includes `y`.
+ * @param[in] x Query string.
* @param[in] m Number of bytes of `x`.
- * @param[in] y Query string.
+ * @param[in] y Buffer that potentially includes `x`.
* @param[in] n Number of bytes of `y`.
* @param[in] enc Encoding of both `x` and `y`.
* @retval -1 Not found.
- * @retval otherwise Found index in `x`.
+ * @retval otherwise Found index in `y`.
* @note This API can match at a non-character-boundary.
*/
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc);