summaryrefslogtreecommitdiff
path: root/include/ruby/internal/encoding/string.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/ruby/internal/encoding/string.h')
-rw-r--r--include/ruby/internal/encoding/string.h72
1 files changed, 55 insertions, 17 deletions
diff --git a/include/ruby/internal/encoding/string.h b/include/ruby/internal/encoding/string.h
index 87226bec10..ea78cf23f3 100644
--- a/include/ruby/internal/encoding/string.h
+++ b/include/ruby/internal/encoding/string.h
@@ -25,11 +25,12 @@
#include "ruby/internal/value.h"
#include "ruby/internal/encoding/encoding.h"
#include "ruby/internal/attr/nonnull.h"
+#include "ruby/internal/intern/string.h" /* rbimpl_strlen */
RBIMPL_SYMBOL_EXPORT_BEGIN()
/**
- * Identical to rb_enc_str_new(), except it additionally takes an encoding.
+ * Identical to rb_str_new(), except it additionally takes an encoding.
*
* @param[in] ptr A memory region of `len` bytes length.
* @param[in] len Length of `ptr`, in bytes, not including the
@@ -189,7 +190,7 @@ VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
* In other languages, APIs like this one could be seen as the primitive
* routines where encodings' "encode" feature are implemented. However in case
* of Ruby this is not the primitive one. We directly manipulate encoded
- * strings. Encoding conversion routines transocde an encoded string directly
+ * strings. Encoding conversion routines transcode an encoded string directly
* to another one; not via a code point array.
*/
VALUE rb_enc_uint_chr(unsigned int code, rb_encoding *enc);
@@ -263,6 +264,14 @@ VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to);
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts);
/**
+ * @private
+ *
+ * This is an implementation detail of rb_enc_str_coderange(). Don't use this
+ * directly.
+ **/
+int rbimpl_enc_str_coderange_scan(VALUE str);
+
+/**
* Scans the passed string to collect its code range. Because a Ruby's string
* is mutable, its contents change from time to time; so does its code range.
* A long-lived string tends to fall back to ::RUBY_ENC_CODERANGE_UNKNOWN.
@@ -274,6 +283,27 @@ VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ec
int rb_enc_str_coderange(VALUE str);
/**
+ * Scans the passed string to collect its code range. Because a Ruby's string
+ * is mutable, its contents change from time to time; so does its code range.
+ * A long-lived string tends to fall back to ::RUBY_ENC_CODERANGE_UNKNOWN.
+ * This API scans it and re-assigns a fine-grained code range constant.
+ *
+ * @param[out] str A string.
+ * @return An enum ::ruby_coderange_type.
+ */
+static inline int
+rb_enc_str_coderange_inline(VALUE str)
+{
+ int cr = ENC_CODERANGE(str);
+ if (cr == ENC_CODERANGE_UNKNOWN) {
+ cr = rbimpl_enc_str_coderange_scan(str);
+ }
+ return cr;
+}
+
+#define rb_enc_str_coderange rb_enc_str_coderange_inline
+
+/**
* Scans the passed string until it finds something odd. Returns the number of
* bytes scanned. As the name implies this is suitable for repeated call. One
* of its application is `IO#readlines`. The method reads from its receiver's
@@ -306,30 +336,38 @@ RBIMPL_ATTR_NONNULL(())
/**
* Looks for the passed string in the passed buffer.
*
- * @param[in] x Buffer that potentially includes `y`.
+ * @param[in] x Query string.
* @param[in] m Number of bytes of `x`.
- * @param[in] y Query string.
+ * @param[in] y Buffer that potentially includes `x`.
* @param[in] n Number of bytes of `y`.
* @param[in] enc Encoding of both `x` and `y`.
* @retval -1 Not found.
- * @retval otherwise Found index in `x`.
+ * @retval otherwise Found index in `y`.
* @note This API can match at a non-character-boundary.
*/
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc);
/** @cond INTERNAL_MACRO */
-#ifdef HAVE_BUILTIN___BUILTIN_CONSTANT_P
-#define rb_enc_str_new(str, len, enc) RB_GNUC_EXTENSION_BLOCK( \
- (__builtin_constant_p(str) && __builtin_constant_p(len)) ? \
- rb_enc_str_new_static((str), (len), (enc)) : \
- rb_enc_str_new((str), (len), (enc)) \
-)
-#define rb_enc_str_new_cstr(str, enc) RB_GNUC_EXTENSION_BLOCK( \
- (__builtin_constant_p(str)) ? \
- rb_enc_str_new_static((str), (long)strlen(str), (enc)) : \
- rb_enc_str_new_cstr((str), (enc)) \
-)
-#endif
+RBIMPL_ATTR_NONNULL(())
+static inline VALUE
+rbimpl_enc_str_new_cstr(const char *str, rb_encoding *enc)
+{
+ long len = rbimpl_strlen(str);
+
+ return rb_enc_str_new_static(str, len, enc);
+}
+
+#define rb_enc_str_new(str, len, enc) \
+ ((RBIMPL_CONSTANT_P(str) && \
+ RBIMPL_CONSTANT_P(len) ? \
+ rb_enc_str_new_static: \
+ rb_enc_str_new) ((str), (len), (enc)))
+
+#define rb_enc_str_new_cstr(str, enc) \
+ ((RBIMPL_CONSTANT_P(str) ? \
+ rbimpl_enc_str_new_cstr : \
+ rb_enc_str_new_cstr) ((str), (enc)))
+
/** @endcond */
RBIMPL_SYMBOL_EXPORT_END()