From 5112a548467e04ebdb386f0cc7bacb29f38d3fd2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8D=9C=E9=83=A8=E6=98=8C=E5=B9=B3?=
 <shyouhei@ruby-lang.org>
Date: Wed, 29 Sep 2021 12:58:26 +0900
Subject: include/ruby/encoding.h: convert macros into inline functions

Less macros == huge win.
---
 encoding.c                                 |  14 --
 include/ruby/internal/encoding/coderange.h |  88 ++++++----
 include/ruby/internal/encoding/ctype.h     |  83 +++++++--
 include/ruby/internal/encoding/encoding.h  | 267 +++++++++++++++++++++++------
 include/ruby/internal/encoding/string.h    |  33 ++--
 string.c                                   |   2 +-
 template/Doxyfile.tmpl                     |   1 +
 7 files changed, 368 insertions(+), 120 deletions(-)

diff --git a/encoding.c b/encoding.c
index a162821e64..2fb1c42047 100644
--- a/encoding.c
+++ b/encoding.c
@@ -1277,13 +1277,6 @@ rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
     return rb_enc_mbc_to_codepoint(p, e, enc);
 }
 
-#undef rb_enc_codepoint
-unsigned int
-rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
-{
-    return rb_enc_codepoint_len(p, e, 0, enc);
-}
-
 int
 rb_enc_codelen(int c, rb_encoding *enc)
 {
@@ -1294,13 +1287,6 @@ rb_enc_codelen(int c, rb_encoding *enc)
     return n;
 }
 
-#undef rb_enc_code_to_mbclen
-int
-rb_enc_code_to_mbclen(int code, rb_encoding *enc)
-{
-    return ONIGENC_CODE_TO_MBCLEN(enc, code);
-}
-
 int
 rb_enc_toupper(int c, rb_encoding *enc)
 {
diff --git a/include/ruby/internal/encoding/coderange.h b/include/ruby/internal/encoding/coderange.h
index 84daddeeb3..7a81208c9e 100644
--- a/include/ruby/internal/encoding/coderange.h
+++ b/include/ruby/internal/encoding/coderange.h
@@ -22,7 +22,9 @@
  */
 
 #include "ruby/internal/attr/const.h"
+#include "ruby/internal/attr/pure.h"
 #include "ruby/internal/dllexport.h"
+#include "ruby/internal/fl_type.h"
 #include "ruby/internal/value.h"
 
 RBIMPL_SYMBOL_EXPORT_BEGIN()
@@ -65,6 +67,7 @@ rb_enc_coderange_clean_p(int cr)
     return (cr ^ (cr >> 1)) & RUBY_ENC_CODERANGE_7BIT;
 }
 
+RBIMPL_ATTR_CONST()
 /**
  * Queries if  a code range  is "clean".  "Clean" in  this context means  it is
  * known and valid.
@@ -73,8 +76,13 @@ rb_enc_coderange_clean_p(int cr)
  * @retval     1   It is.
  * @retval     0   It isn't.
  */
-#define RB_ENC_CODERANGE_CLEAN_P(cr) rb_enc_coderange_clean_p(cr)
+static inline bool
+RB_ENC_CODERANGE_CLEAN_P(enum ruby_coderange_type cr)
+{
+    return rb_enc_coderange_clean_p(cr);
+}
 
+RBIMPL_ATTR_PURE_UNLESS_DEBUG()
 /**
  * Queries the  (inline) code range of  the passed object.  The  object must be
  * capable  of   having  inline   encoding.   Using   this  macro   needs  deep
@@ -83,8 +91,15 @@ rb_enc_coderange_clean_p(int cr)
  * @param[in]  obj  Target object.
  * @return     An enum ::ruby_coderange_type.
  */
-#define RB_ENC_CODERANGE(obj) ((int)RBASIC(obj)->flags & RUBY_ENC_CODERANGE_MASK)
+static inline enum ruby_coderange_type
+RB_ENC_CODERANGE(VALUE obj)
+{
+    VALUE ret = RB_FL_TEST_RAW(obj, RUBY_ENC_CODERANGE_MASK);
+
+    return RBIMPL_CAST((enum ruby_coderange_type)ret);
+}
 
+RBIMPL_ATTR_PURE_UNLESS_DEBUG()
 /**
  * Queries   the    (inline)   code   range    of   the   passed    object   is
  * ::RUBY_ENC_CODERANGE_7BIT.   The object  must  be capable  of having  inline
@@ -95,7 +110,11 @@ rb_enc_coderange_clean_p(int cr)
  * @retval     1    It is ascii only.
  * @retval     0    Otherwise (including cases when the range is not known).
  */
-#define RB_ENC_CODERANGE_ASCIIONLY(obj) (RB_ENC_CODERANGE(obj) == RUBY_ENC_CODERANGE_7BIT)
+static inline bool
+RB_ENC_CODERANGE_ASCIIONLY(VALUE obj)
+{
+    return RB_ENC_CODERANGE(obj) == RUBY_ENC_CODERANGE_7BIT;
+}
 
 /**
  * Destructively modifies the passed object so  that its (inline) code range is
@@ -106,9 +125,12 @@ rb_enc_coderange_clean_p(int cr)
  * @param[out]  cr   An enum ::ruby_coderange_type.
  * @post        `obj`'s code range is `cr`.
  */
-#define RB_ENC_CODERANGE_SET(obj,cr) (\
-        RBASIC(obj)->flags = \
-        (RBASIC(obj)->flags & ~RUBY_ENC_CODERANGE_MASK) | (cr))
+static inline void
+RB_ENC_CODERANGE_SET(VALUE obj, enum ruby_coderange_type cr)
+{
+    RB_FL_UNSET_RAW(obj, RUBY_ENC_CODERANGE_MASK);
+    RB_FL_SET_RAW(obj, cr);
+}
 
 /**
  * Destructively clears  the passed object's  (inline) code range.   The object
@@ -118,8 +140,13 @@ rb_enc_coderange_clean_p(int cr)
  * @param[out]  obj  Target object.
  * @post        `obj`'s code range is ::RUBY_ENC_CODERANGE_UNKNOWN.
  */
-#define RB_ENC_CODERANGE_CLEAR(obj) RB_ENC_CODERANGE_SET((obj),0)
+static inline void
+RB_ENC_CODERANGE_CLEAR(VALUE obj)
+{
+    RB_FL_UNSET_RAW(obj, RUBY_ENC_CODERANGE_MASK);
+}
 
+RBIMPL_ATTR_CONST()
 /* assumed ASCII compatibility */
 /**
  * "Mix"  two code  ranges  into one.   This  is handy  for  instance when  you
@@ -131,28 +158,22 @@ rb_enc_coderange_clean_p(int cr)
  * @param[in]  b  Another enum ::ruby_coderange_type.
  * @return     The `a` "and" `b`.
  */
-#define RB_ENC_CODERANGE_AND(a, b) \
-    ((a) == RUBY_ENC_CODERANGE_7BIT ? (b) : \
-     (a) != RUBY_ENC_CODERANGE_VALID ? RUBY_ENC_CODERANGE_UNKNOWN : \
-     (b) == RUBY_ENC_CODERANGE_7BIT ? RUBY_ENC_CODERANGE_VALID : (b))
-
-/**
- * This is #RB_ENCODING_SET  + RB_ENC_CODERANGE_SET combo.  The  object must be
- * capable  of   having  inline   encoding.   Using   this  macro   needs  deep
- * understanding of bit level object binary layout.
- *
- * @param[out]  obj       Target object.
- * @param[in]   encindex  Encoding in encindex format.
- * @param[in]   cr        An enum ::ruby_coderange_type.
- * @post        `obj`'s encoding is `encindex`.
- * @post        `obj`'s code range is `cr`.
- */
-#define RB_ENCODING_CODERANGE_SET(obj, encindex, cr) \
-    do { \
-        VALUE rb_encoding_coderange_obj = (obj); \
-        RB_ENCODING_SET(rb_encoding_coderange_obj, (encindex)); \
-        RB_ENC_CODERANGE_SET(rb_encoding_coderange_obj, (cr)); \
-    } while (0)
+static inline enum ruby_coderange_type
+RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
+{
+    if (a == RUBY_ENC_CODERANGE_7BIT) {
+        return b;
+    }
+    else if (a != RUBY_ENC_CODERANGE_VALID) {
+        return RUBY_ENC_CODERANGE_UNKNOWN;
+    }
+    else if (b == RUBY_ENC_CODERANGE_7BIT) {
+        return RUBY_ENC_CODERANGE_VALID;
+    }
+    else {
+        return b;
+    }
+}
 
 #define ENC_CODERANGE_MASK                        RUBY_ENC_CODERANGE_MASK                      /**< @old{RUBY_ENC_CODERANGE_MASK} */
 #define ENC_CODERANGE_UNKNOWN                     RUBY_ENC_CODERANGE_UNKNOWN                   /**< @old{RUBY_ENC_CODERANGE_UNKNOWN} */
@@ -167,6 +188,15 @@ rb_enc_coderange_clean_p(int cr)
 #define ENC_CODERANGE_AND(a, b)                   RB_ENC_CODERANGE_AND(a, b)                   /**< @old{RB_ENC_CODERANGE_AND} */
 #define ENCODING_CODERANGE_SET(obj, encindex, cr) RB_ENCODING_CODERANGE_SET(obj, encindex, cr) /**< @old{RB_ENCODING_CODERANGE_SET} */
 
+/** @cond INTERNAL_MACRO */
+#define RB_ENC_CODERANGE           RB_ENC_CODERANGE
+#define RB_ENC_CODERANGE_AND       RB_ENC_CODERANGE_AND
+#define RB_ENC_CODERANGE_ASCIIONLY RB_ENC_CODERANGE_ASCIIONLY
+#define RB_ENC_CODERANGE_CLEAN_P   RB_ENC_CODERANGE_CLEAN_P
+#define RB_ENC_CODERANGE_CLEAR     RB_ENC_CODERANGE_CLEAR
+#define RB_ENC_CODERANGE_SET       RB_ENC_CODERANGE_SET
+/** @endcond */
+
 RBIMPL_SYMBOL_EXPORT_END()
 
 #endif /* RUBY_INTERNAL_ENCODING_CODERANGE_H */
diff --git a/include/ruby/internal/encoding/ctype.h b/include/ruby/internal/encoding/ctype.h
index e0b95f93b2..422e2b9c2d 100644
--- a/include/ruby/internal/encoding/ctype.h
+++ b/include/ruby/internal/encoding/ctype.h
@@ -39,7 +39,14 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
  * @retval     0          It isn't.
  * @retval     otherwise  It is.
  */
-#define rb_enc_is_newline(p,end,enc)  ONIGENC_IS_MBC_NEWLINE((enc),(UChar*)(p),(UChar*)(end))
+static inline bool
+rb_enc_is_newline(const char *p,  const char *e, const rb_encoding *enc)
+{
+    OnigUChar *up = RBIMPL_CAST((OnigUChar *)p);
+    OnigUChar *ue = RBIMPL_CAST((OnigUChar *)e);
+
+    return ONIGENC_IS_MBC_NEWLINE(enc, up, ue);
+}
 
 /**
  * Queries if the passed  code point is of passed character  type in the passed
@@ -52,7 +59,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
  * @retval     1    `c` is of `t` in `enc`.
  * @retval     0    Otherwise.
  */
-#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE((enc),(c),(t))
+static inline bool
+rb_enc_isctype(OnigCodePoint c, OnigCtype t, const rb_encoding *enc)
+{
+    return ONIGENC_IS_CODE_CTYPE(enc, c, t);
+}
 
 /**
  * Identical to rb_isascii(), except it additionally takes an encoding.
@@ -67,7 +78,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
  * `enc` is  ignored.  This  is at least  an intentional  implementation detail
  * (not a bug).  But there could be rooms for future extensions.
  */
-#define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)
+static inline bool
+rb_enc_isascii(OnigCodePoint c, const rb_encoding *enc)
+{
+    return ONIGENC_IS_CODE_ASCII(c);
+}
 
 /**
  * Identical to rb_isalpha(), except it additionally takes an encoding.
@@ -77,7 +92,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
  * @retval     1    `enc` classifies `c` as "ALPHA".
  * @retval     0    Otherwise.
  */
-#define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA((enc),(c))
+static inline bool
+rb_enc_isalpha(OnigCodePoint c, const rb_encoding *enc)
+{
+    return ONIGENC_IS_CODE_ALPHA(enc, c);
+}
 
 /**
  * Identical to rb_islower(), except it additionally takes an encoding.
@@ -87,7 +106,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
  * @retval     1    `enc` classifies `c` as "LOWER".
  * @retval     0    Otherwise.
  */
-#define rb_enc_islower(c,enc) ONIGENC_IS_CODE_LOWER((enc),(c))
+static inline bool
+rb_enc_islower(OnigCodePoint c, const rb_encoding *enc)
+{
+    return ONIGENC_IS_CODE_LOWER(enc, c);
+}
 
 /**
  * Identical to rb_isupper(), except it additionally takes an encoding.
@@ -97,7 +120,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
  * @retval     1    `enc` classifies `c` as "UPPER".
  * @retval     0    Otherwise.
  */
-#define rb_enc_isupper(c,enc) ONIGENC_IS_CODE_UPPER((enc),(c))
+static inline bool
+rb_enc_isupper(OnigCodePoint c, const rb_encoding *enc)
+{
+    return ONIGENC_IS_CODE_UPPER(enc, c);
+}
 
 /**
  * Identical to rb_ispunct(), except it additionally takes an encoding.
@@ -107,7 +134,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
  * @retval     1    `enc` classifies `c` as "PUNCT".
  * @retval     0    Otherwise.
  */
-#define rb_enc_ispunct(c,enc) ONIGENC_IS_CODE_PUNCT((enc),(c))
+static inline bool
+rb_enc_ispunct(OnigCodePoint c, const rb_encoding *enc)
+{
+    return ONIGENC_IS_CODE_PUNCT(enc, c);
+}
 
 /**
  * Identical to rb_isalnum(), except it additionally takes an encoding.
@@ -117,7 +148,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
  * @retval     1    `enc` classifies `c` as "ANUM".
  * @retval     0    Otherwise.
  */
-#define rb_enc_isalnum(c,enc) ONIGENC_IS_CODE_ALNUM((enc),(c))
+static inline bool
+rb_enc_isalnum(OnigCodePoint c, const rb_encoding *enc)
+{
+    return ONIGENC_IS_CODE_ALNUM(enc, c);
+}
 
 /**
  * Identical to rb_isprint(), except it additionally takes an encoding.
@@ -127,7 +162,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
  * @retval     1    `enc` classifies `c` as "PRINT".
  * @retval     0    Otherwise.
  */
-#define rb_enc_isprint(c,enc) ONIGENC_IS_CODE_PRINT((enc),(c))
+static inline bool
+rb_enc_isprint(OnigCodePoint c, const rb_encoding *enc)
+{
+    return ONIGENC_IS_CODE_PRINT(enc, c);
+}
 
 /**
  * Identical to rb_isspace(), except it additionally takes an encoding.
@@ -137,7 +176,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
  * @retval     1    `enc` classifies `c` as "PRINT".
  * @retval     0    Otherwise.
  */
-#define rb_enc_isspace(c,enc) ONIGENC_IS_CODE_SPACE((enc),(c))
+static inline bool
+rb_enc_isspace(OnigCodePoint c, const rb_encoding *enc)
+{
+    return ONIGENC_IS_CODE_SPACE(enc, c);
+}
 
 /**
  * Identical to rb_isdigit(), except it additionally takes an encoding.
@@ -147,7 +190,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
  * @retval     1    `enc` classifies `c` as "DIGIT".
  * @retval     0    Otherwise.
  */
-#define rb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT((enc),(c))
+static inline bool
+rb_enc_isdigit(OnigCodePoint c, const rb_encoding *enc)
+{
+    return ONIGENC_IS_CODE_DIGIT(enc, c);
+}
 
 RBIMPL_ATTR_CONST()
 /**
@@ -179,4 +226,18 @@ int rb_enc_tolower(int c, rb_encoding *enc);
 
 RBIMPL_SYMBOL_EXPORT_END()
 
+/** @cond INTERNAL_MACRO */
+#define rb_enc_is_newline rb_enc_is_newline
+#define rb_enc_isalnum    rb_enc_isalnum
+#define rb_enc_isalpha    rb_enc_isalpha
+#define rb_enc_isascii    rb_enc_isascii
+#define rb_enc_isctype    rb_enc_isctype
+#define rb_enc_isdigit    rb_enc_isdigit
+#define rb_enc_islower    rb_enc_islower
+#define rb_enc_isprint    rb_enc_isprint
+#define rb_enc_ispunct    rb_enc_ispunct
+#define rb_enc_isspace    rb_enc_isspace
+#define rb_enc_isupper    rb_enc_isupper
+/** @endcond */
+
 #endif /* RUBY_INTERNAL_ENCODING_CTYPE_H */
diff --git a/include/ruby/internal/encoding/encoding.h b/include/ruby/internal/encoding/encoding.h
index 0e6463ad78..b4274b7f55 100644
--- a/include/ruby/internal/encoding/encoding.h
+++ b/include/ruby/internal/encoding/encoding.h
@@ -76,13 +76,15 @@ enum ruby_encoding_consts {
  * @param[in]   i    Encoding in encindex format.
  * @post        `obj`'s encoding is `i`.
  */
-#define RB_ENCODING_SET_INLINED(obj,i) do {\
-    RBASIC(obj)->flags &= ~RUBY_ENCODING_MASK;\
-    RBASIC(obj)->flags |= (VALUE)(i) << RUBY_ENCODING_SHIFT;\
-} while (0)
+static inline void
+RB_ENCODING_SET_INLINED(VALUE obj, int encindex)
+{
+    VALUE f = /* upcast */ encindex;
 
-/** @alias{rb_enc_set_index} */
-#define RB_ENCODING_SET(obj,i) rb_enc_set_index((obj), (i))
+    f <<= RUBY_ENCODING_SHIFT;
+    RB_FL_UNSET_RAW(obj, RUBY_ENCODING_MASK);
+    RB_FL_SET_RAW(obj, f);
+}
 
 /**
  * Queries the  encoding of the  passed object.   The encoding must  be smaller
@@ -92,32 +94,13 @@ enum ruby_encoding_consts {
  * @param[in]  obj  Target object.
  * @return     `obj`'s encoding index.
  */
-#define RB_ENCODING_GET_INLINED(obj) \
-    (int)((RBASIC(obj)->flags & RUBY_ENCODING_MASK)>>RUBY_ENCODING_SHIFT)
-
-/**
- * @alias{rb_enc_get_index}
- *
- * @internal
- *
- * Implementation wise this is not a verbatim alias of rb_enc_get_index().  But
- * the API is consistent.  Don't bother.
- */
-#define RB_ENCODING_GET(obj) \
-    (RB_ENCODING_GET_INLINED(obj) != RUBY_ENCODING_INLINE_MAX ? \
-     RB_ENCODING_GET_INLINED(obj) : \
-     rb_enc_get_index(obj))
+static inline int
+RB_ENCODING_GET_INLINED(VALUE obj)
+{
+    VALUE ret = RB_FL_TEST_RAW(obj, RUBY_ENCODING_MASK) >> RUBY_ENCODING_SHIFT;
 
-/**
- * Queries if  the passed  object is  in ascii 8bit  (== binary)  encoding. The
- * object must  be capable of having  inline encoding.  Using this  macro needs
- * deep understanding of bit level object binary layout.
- *
- * @param[in]  obj  An object to check.
- * @retval     1    It is.
- * @retval     0    It isn't.
- */
-#define RB_ENCODING_IS_ASCII8BIT(obj) (RB_ENCODING_GET_INLINED(obj) == 0)
+    return RBIMPL_CAST((int)ret);
+}
 
 #define ENCODING_SET_INLINED(obj,i) RB_ENCODING_SET_INLINED(obj,i) /**< @old{RB_ENCODING_SET_INLINED} */
 #define ENCODING_SET(obj,i) RB_ENCODING_SET(obj,i)                 /**< @old{RB_ENCODING_SET} */
@@ -126,7 +109,6 @@ enum ruby_encoding_consts {
 #define ENCODING_IS_ASCII8BIT(obj) RB_ENCODING_IS_ASCII8BIT(obj)   /**< @old{RB_ENCODING_IS_ASCII8BIT} */
 #define ENCODING_MAXNAMELEN RUBY_ENCODING_MAXNAMELEN               /**< @old{RUBY_ENCODING_MAXNAMELEN} */
 
-
 /**
  * The  type  of encoding.   Our  design  here  is we  take  Oniguruma/Onigmo's
  * multilingualisation schema as our base data structure.
@@ -217,6 +199,27 @@ int rb_enc_to_index(rb_encoding *enc);
  */
 int rb_enc_get_index(VALUE obj);
 
+/**
+ * @alias{rb_enc_get_index}
+ *
+ * @internal
+ *
+ * Implementation wise this is not a verbatim alias of rb_enc_get_index().  But
+ * the API is consistent.  Don't bother.
+ */
+static inline int
+RB_ENCODING_GET(VALUE obj)
+{
+    int encindex = RB_ENCODING_GET_INLINED(obj);
+
+    if (encindex == RUBY_ENCODING_INLINE_MAX) {
+        return rb_enc_get_index(obj);
+    }
+    else {
+        return encindex;
+    }
+}
+
 /**
  * Destructively assigns an encoding (via its index) to an object.
  *
@@ -229,6 +232,31 @@ int rb_enc_get_index(VALUE obj);
  */
 void rb_enc_set_index(VALUE obj, int encindex);
 
+/** @alias{rb_enc_set_index} */
+static inline void
+RB_ENCODING_SET(VALUE obj, int encindex)
+{
+    rb_enc_set_index(obj, encindex);
+}
+
+/**
+ * This is #RB_ENCODING_SET  + RB_ENC_CODERANGE_SET combo.  The  object must be
+ * capable  of   having  inline   encoding.   Using   this  macro   needs  deep
+ * understanding of bit level object binary layout.
+ *
+ * @param[out]  obj       Target object.
+ * @param[in]   encindex  Encoding in encindex format.
+ * @param[in]   cr        An enum ::ruby_coderange_type.
+ * @post        `obj`'s encoding is `encindex`.
+ * @post        `obj`'s code range is `cr`.
+ */
+static inline void
+RB_ENCODING_CODERANGE_SET(VALUE obj, int encindex, enum ruby_coderange_type cr)
+{
+    RB_ENCODING_SET(obj, encindex);
+    RB_ENC_CODERANGE_SET(obj, cr);
+}
+
 RBIMPL_ATTR_PURE()
 /**
  * Queries if the passed object can have its encoding.
@@ -401,7 +429,11 @@ rb_encoding *rb_enc_find(const char *name);
  * @param[in]  enc  An encoding.
  * @return     Its name.
  */
-#define rb_enc_name(enc) (enc)->name
+static inline const char *
+rb_enc_name(const rb_encoding *enc)
+{
+    return enc->name;
+}
 
 /**
  * Queries  the minimum  number  of bytes  that the  passed  encoding needs  to
@@ -412,7 +444,11 @@ rb_encoding *rb_enc_find(const char *name);
  * @param[in]  enc  An encoding.
  * @return     Its least possible number of bytes except 0.
  */
-#define rb_enc_mbminlen(enc) (enc)->min_enc_len
+static inline int
+rb_enc_mbminlen(const rb_encoding *enc)
+{
+    return enc->min_enc_len;
+}
 
 /**
  * Queries  the maximum  number  of bytes  that the  passed  encoding needs  to
@@ -423,7 +459,11 @@ rb_encoding *rb_enc_find(const char *name);
  * @param[in]  enc  An encoding.
  * @return     Its maximum possible number of bytes of a character.
  */
-#define rb_enc_mbmaxlen(enc) (enc)->max_enc_len
+static inline int
+rb_enc_mbmaxlen(const rb_encoding *enc)
+{
+    return enc->max_enc_len;
+}
 
 /**
  * Queries the number of bytes of the character at the passed pointer.
@@ -525,7 +565,6 @@ int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc);
  */
 unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc);
 
-RBIMPL_ATTR_DEPRECATED(("use rb_enc_codepoint_len instead."))
 /**
  * Queries  the  code  point  of  character  pointed  by  the  passed  pointer.
  * Exceptions happen in case of broken input.
@@ -536,12 +575,24 @@ RBIMPL_ATTR_DEPRECATED(("use rb_enc_codepoint_len instead."))
  * @param[in]   enc           Encoding of the string.
  * @exception   rb_eArgError  `p` is broken.
  * @return      Code point of the character pointed by `p`.
+ *
+ * @internal
+ *
+ * @matz says in commit  91e5ba1cb865a2385d3e1cbfacd824496898e098 that the line
+ * below  is a  "prototype for  obsolete function".   However even  today there
+ * still are some use  cases of it throughout our repository.   It seems it has
+ * its own niche.
  */
-unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc);
+static inline unsigned int
+rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
+{
+    return rb_enc_codepoint_len(p, e, 0, enc);
+    /*                               ^^^
+     * This can be `NULL` in C, `nullptr` in C++, and `0` for both.
+     * We choose the most portable one here.
+     */
+}
 
-/** @cond INTERNAL_MACRO */
-#define rb_enc_codepoint(p,e,enc) rb_enc_codepoint_len((p),(e),0,(enc))
-/** @endcond */
 
 /**
  * Identical to rb_enc_codepoint(),  except it assumes the  passed character is
@@ -552,7 +603,14 @@ unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc);
  * @param[in]   enc  Encoding of the string.
  * @return      Code point of the character pointed by `p`.
  */
-#define rb_enc_mbc_to_codepoint(p, e, enc) ONIGENC_MBC_TO_CODE((enc),(UChar*)(p),(UChar*)(e))
+static inline OnigCodePoint
+rb_enc_mbc_to_codepoint(const char *p, const char *e, const rb_encoding *enc)
+{
+    const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
+    const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
+
+    return ONIGENC_MBC_TO_CODE(enc, up, ue);
+}
 
 /**
  * Queries the  number of bytes  requested to  represent the passed  code point
@@ -573,11 +631,13 @@ int rb_enc_codelen(int code, rb_encoding *enc);
  * @retval     0          `code` is invalid.
  * @return     otherwise  Number of bytes used for `enc` to encode `code`.
  */
-int rb_enc_code_to_mbclen(int code, rb_encoding *enc);
+static inline int
+rb_enc_code_to_mbclen(int c, const rb_encoding *enc)
+{
+    OnigCodePoint uc = RBIMPL_CAST((OnigCodePoint)c);
 
-/** @cond INTERNAL_MACRO */
-#define rb_enc_code_to_mbclen(c, enc) ONIGENC_CODE_TO_MBCLEN((enc), (c));
-/** @endcond */
+    return ONIGENC_CODE_TO_MBCLEN(enc, uc);
+}
 
 /**
  * Identical to rb_enc_uint_chr(),  except it writes back to  the passed buffer
@@ -587,8 +647,20 @@ int rb_enc_code_to_mbclen(int code, rb_encoding *enc);
  * @param[out]  buf  Return buffer.
  * @param[in]   enc  Target encoding scheme.
  * @post        `c` is encoded according to `enc`, then written to `buf`.
+ *
+ * @internal
+ *
+ * The second argument  must be typed.  But its current  usages prevent us from
+ * being any stricter than this. :FIXME:
  */
-#define rb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC((enc),(c),(UChar*)(buf))
+static inline int
+rb_enc_mbcput(unsigned int c, void *buf, const rb_encoding *enc)
+{
+    OnigCodePoint uc = RBIMPL_CAST((OnigCodePoint)c);
+    OnigUChar *ubuf = RBIMPL_CAST((OnigUChar *)buf);
+
+    return ONIGENC_CODE_TO_MBC(enc, uc, ubuf);
+}
 
 /**
  * Queries the previous (left) character.
@@ -600,7 +672,16 @@ int rb_enc_code_to_mbclen(int code, rb_encoding *enc);
  * @retval     NULL       No previous character.
  * @retval     otherwise  Pointer to the head of the previous character.
  */
-#define rb_enc_prev_char(s,p,e,enc) ((char *)onigenc_get_prev_char_head((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e)))
+static inline char *
+rb_enc_prev_char(const char *s, const char *p, const char *e, const rb_encoding *enc)
+{
+    const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s);
+    const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
+    const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
+    OnigUChar *ur = onigenc_get_prev_char_head(enc, us, up, ue);
+
+    return RBIMPL_CAST((char *)ur);
+}
 
 /**
  * Queries the  left boundary of  a character.   This function takes  a pointer
@@ -612,7 +693,16 @@ int rb_enc_code_to_mbclen(int code, rb_encoding *enc);
  * @param[in]  enc        Encoding.
  * @return     Pointer to the head of the character that contains `p`.
  */
-#define rb_enc_left_char_head(s,p,e,enc) ((char *)onigenc_get_left_adjust_char_head((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e)))
+static inline char *
+rb_enc_left_char_head(const char *s, const char *p, const char *e, const rb_encoding *enc)
+{
+    const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s);
+    const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
+    const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
+    OnigUChar *ur = onigenc_get_left_adjust_char_head(enc, us, up, ue);
+
+    return RBIMPL_CAST((char *)ur);
+}
 
 /**
  * Queries the  right boundary of a  character.  This function takes  a pointer
@@ -624,7 +714,16 @@ int rb_enc_code_to_mbclen(int code, rb_encoding *enc);
  * @param[in]  enc  Encoding.
  * @return     Pointer to the end of the character that contains `p`.
  */
-#define rb_enc_right_char_head(s,p,e,enc) ((char *)onigenc_get_right_adjust_char_head((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e)))
+static inline char *
+rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
+{
+    const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s);
+    const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
+    const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
+    OnigUChar *ur = onigenc_get_right_adjust_char_head(enc, us, up, ue);
+
+    return RBIMPL_CAST((char *)ur);
+}
 
 /**
  * Scans the string backwards for n characters.
@@ -637,7 +736,16 @@ int rb_enc_code_to_mbclen(int code, rb_encoding *enc);
  * @retval     NULL       There are no `n` characters left.
  * @retval     otherwise  Pointer to `n` character before `p`.
  */
-#define rb_enc_step_back(s,p,e,n,enc) ((char *)onigenc_step_back((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e),(int)(n)))
+static inline char *
+rb_enc_step_back(const char *s, const char *p, const char *e, int n, const rb_encoding *enc)
+{
+    const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s);
+    const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
+    const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
+    const OnigUChar *ur = onigenc_step_back(enc, us, up, ue, n);
+
+    return RBIMPL_CAST((char *)ur);
+}
 
 /**
  * @private
@@ -670,8 +778,19 @@ rb_enc_asciicompat_inline(rb_encoding *enc)
  * @retval     0    It is incompatible.
  * @retval     1    It is compatible.
  */
-#define rb_enc_asciicompat(enc) rb_enc_asciicompat_inline(enc)
-
+static inline bool
+rb_enc_asciicompat(rb_encoding *enc)
+{
+    if (rb_enc_mbminlen(enc) != 1) {
+        return false;
+    }
+    else if (rb_enc_dummy_p(enc)) {
+        return false;
+    }
+    else {
+        return true;
+    }
+}
 
 /**
  * Queries if the passed string is in an ASCII-compatible encoding.
@@ -680,7 +799,13 @@ rb_enc_asciicompat_inline(rb_encoding *enc)
  * @retval     0    `str` is not a String, or an ASCII-incompatible string.
  * @retval     1    Otherwise.
  */
-#define rb_enc_str_asciicompat_p(str) rb_enc_asciicompat(rb_enc_get(str))
+static inline bool
+rb_enc_str_asciicompat_p(VALUE str)
+{
+    rb_encoding *enc = rb_enc_get(str);
+
+    return rb_enc_asciicompat(enc);
+}
 
 /**
  * Queries  the   Ruby-level  counterpart   instance  of   ::rb_cEncoding  that
@@ -803,6 +928,21 @@ RBIMPL_ATTR_CONST()
 int rb_ascii8bit_encindex(void);
 #endif
 
+/**
+ * Queries if  the passed  object is  in ascii 8bit  (== binary)  encoding. The
+ * object must  be capable of having  inline encoding.  Using this  macro needs
+ * deep understanding of bit level object binary layout.
+ *
+ * @param[in]  obj  An object to check.
+ * @retval     1    It is.
+ * @retval     0    It isn't.
+ */
+static inline bool
+RB_ENCODING_IS_ASCII8BIT(VALUE obj)
+{
+    return RB_ENCODING_GET_INLINED(obj) == rb_ascii8bit_encindex();
+}
+
 #ifndef rb_utf8_encindex
 RBIMPL_ATTR_CONST()
 /**
@@ -894,4 +1034,25 @@ VALUE rb_locale_charmap(VALUE klass);
 
 RBIMPL_SYMBOL_EXPORT_END()
 
+/** @cond INTERNAL_MACRO */
+#define RB_ENCODING_GET          RB_ENCODING_GET
+#define RB_ENCODING_GET_INLINED  RB_ENCODING_GET_INLINED
+#define RB_ENCODING_IS_ASCII8BIT RB_ENCODING_IS_ASCII8BIT
+#define RB_ENCODING_SET          RB_ENCODING_SET
+#define RB_ENCODING_SET_INLINED  RB_ENCODING_SET_INLINED
+#define rb_enc_asciicompat       rb_enc_asciicompat
+#define rb_enc_code_to_mbclen    rb_enc_code_to_mbclen
+#define rb_enc_codepoint         rb_enc_codepoint
+#define rb_enc_left_char_head    rb_enc_left_char_head
+#define rb_enc_mbc_to_codepoint  rb_enc_mbc_to_codepoint
+#define rb_enc_mbcput            rb_enc_mbcput
+#define rb_enc_mbmaxlen          rb_enc_mbmaxlen
+#define rb_enc_mbminlen          rb_enc_mbminlen
+#define rb_enc_name              rb_enc_name
+#define rb_enc_prev_char         rb_enc_prev_char
+#define rb_enc_right_char_head   rb_enc_right_char_head
+#define rb_enc_step_back         rb_enc_step_back
+#define rb_enc_str_asciicompat_p rb_enc_str_asciicompat_p
+/** @endcond */
+
 #endif /* RUBY_INTERNAL_ENCODING_ENCODING_H */
diff --git a/include/ruby/internal/encoding/string.h b/include/ruby/internal/encoding/string.h
index 87226bec10..f8ce809199 100644
--- a/include/ruby/internal/encoding/string.h
+++ b/include/ruby/internal/encoding/string.h
@@ -25,6 +25,7 @@
 #include "ruby/internal/value.h"
 #include "ruby/internal/encoding/encoding.h"
 #include "ruby/internal/attr/nonnull.h"
+#include "ruby/internal/intern/string.h" /* rbimpl_strlen */
 
 RBIMPL_SYMBOL_EXPORT_BEGIN()
 
@@ -318,18 +319,26 @@ RBIMPL_ATTR_NONNULL(())
 long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc);
 
 /** @cond INTERNAL_MACRO */
-#ifdef HAVE_BUILTIN___BUILTIN_CONSTANT_P
-#define rb_enc_str_new(str, len, enc) RB_GNUC_EXTENSION_BLOCK( \
-    (__builtin_constant_p(str) && __builtin_constant_p(len)) ? \
-        rb_enc_str_new_static((str), (len), (enc)) : \
-        rb_enc_str_new((str), (len), (enc)) \
-)
-#define rb_enc_str_new_cstr(str, enc) RB_GNUC_EXTENSION_BLOCK(  \
-    (__builtin_constant_p(str)) ?              \
-        rb_enc_str_new_static((str), (long)strlen(str), (enc)) : \
-        rb_enc_str_new_cstr((str), (enc)) \
-)
-#endif
+RBIMPL_ATTR_NONNULL(())
+static inline VALUE
+rbimpl_enc_str_new_cstr(const char *str, rb_encoding *enc)
+{
+    long len = rbimpl_strlen(str);
+
+    return rb_enc_str_new_static(str, len, enc);
+}
+
+#define rb_enc_str_new(str, len, enc)           \
+    ((RBIMPL_CONSTANT_P(str) &&                 \
+      RBIMPL_CONSTANT_P(len) ?                  \
+      rb_enc_str_new_static:                    \
+      rb_enc_str_new) ((str), (len), (enc)))
+
+#define rb_enc_str_new_cstr(str, enc)           \
+    ((RBIMPL_CONSTANT_P(str)  ?                 \
+      rbimpl_enc_str_new_cstr :                 \
+      rb_enc_str_new_cstr) ((str), (enc)))
+
 /** @endcond */
 
 RBIMPL_SYMBOL_EXPORT_END()
diff --git a/string.c b/string.c
index 953716a1ed..80f3358c2c 100644
--- a/string.c
+++ b/string.c
@@ -5738,7 +5738,7 @@ rb_str_setbyte(VALUE str, VALUE index, VALUE value)
 		ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
 	    goto end;
 	  case ENC_CODERANGE_VALID:
-	    left = rb_enc_left_char_head(head, ptr, head+len, enc);
+            left = rb_enc_left_char_head(head, (char *)ptr, head+len, enc);
 	    width = rb_enc_precise_mbclen(left, head+len, enc);
 	    *ptr = byte;
 	    nlen = rb_enc_precise_mbclen(left, head+len, enc);
diff --git a/template/Doxyfile.tmpl b/template/Doxyfile.tmpl
index ed96cb409a..36c0b1c8d6 100644
--- a/template/Doxyfile.tmpl
+++ b/template/Doxyfile.tmpl
@@ -273,6 +273,7 @@ ALIASES               += "alias{1}=Just another name of @ref \1"
 ALIASES               += "old{1}=Old name of @ref \1.^^@deprecated Use @ref \1 instead.^^@ingroup deprecated_macros"
 ALIASES               += "shyouhei=\@shyouhei"
 ALIASES               += "ko1=\@ko1"
+ALIASES               += "matz=\@matz"
 
 # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
 # only. Doxygen will then generate output that is more tailored for C. For
-- 
cgit v1.2.3