summaryrefslogtreecommitdiff
path: root/include/ruby/internal
diff options
context:
space:
mode:
Diffstat (limited to 'include/ruby/internal')
-rw-r--r--include/ruby/internal/abi.h58
-rw-r--r--include/ruby/internal/anyargs.h15
-rw-r--r--include/ruby/internal/arithmetic.h3
-rw-r--r--include/ruby/internal/arithmetic/long.h2
-rw-r--r--include/ruby/internal/assume.h5
-rw-r--r--include/ruby/internal/attr/nodiscard.h2
-rw-r--r--include/ruby/internal/attr/noexcept.h2
-rw-r--r--include/ruby/internal/attr/nonnull.h2
-rw-r--r--include/ruby/internal/attr/packed_struct.h43
-rw-r--r--include/ruby/internal/config.h2
-rw-r--r--include/ruby/internal/core/rarray.h221
-rw-r--r--include/ruby/internal/core/rbasic.h18
-rw-r--r--include/ruby/internal/core/rclass.h49
-rw-r--r--include/ruby/internal/core/rdata.h24
-rw-r--r--include/ruby/internal/core/rfile.h4
-rw-r--r--include/ruby/internal/core/rhash.h13
-rw-r--r--include/ruby/internal/core/rmatch.h14
-rw-r--r--include/ruby/internal/core/robject.h57
-rw-r--r--include/ruby/internal/core/rstring.h114
-rw-r--r--include/ruby/internal/core/rtypeddata.h57
-rw-r--r--include/ruby/internal/dllexport.h34
-rw-r--r--include/ruby/internal/encoding/coderange.h202
-rw-r--r--include/ruby/internal/encoding/ctype.h258
-rw-r--r--include/ruby/internal/encoding/encoding.h1044
-rw-r--r--include/ruby/internal/encoding/pathname.h184
-rw-r--r--include/ruby/internal/encoding/re.h46
-rw-r--r--include/ruby/internal/encoding/sprintf.h78
-rw-r--r--include/ruby/internal/encoding/string.h346
-rw-r--r--include/ruby/internal/encoding/symbol.h100
-rw-r--r--include/ruby/internal/encoding/transcode.h562
-rw-r--r--include/ruby/internal/error.h36
-rw-r--r--include/ruby/internal/eval.h33
-rw-r--r--include/ruby/internal/event.h5
-rw-r--r--include/ruby/internal/fl_type.h126
-rw-r--r--include/ruby/internal/gc.h780
-rw-r--r--include/ruby/internal/globals.h3
-rw-r--r--include/ruby/internal/has/builtin.h8
-rw-r--r--include/ruby/internal/has/c_attribute.h12
-rw-r--r--include/ruby/internal/intern/array.h8
-rw-r--r--include/ruby/internal/intern/bignum.h4
-rw-r--r--include/ruby/internal/intern/class.h56
-rw-r--r--include/ruby/internal/intern/cont.h54
-rw-r--r--include/ruby/internal/intern/error.h17
-rw-r--r--include/ruby/internal/intern/file.h21
-rw-r--r--include/ruby/internal/intern/gc.h390
-rw-r--r--include/ruby/internal/intern/hash.h20
-rw-r--r--include/ruby/internal/intern/load.h37
-rw-r--r--include/ruby/internal/intern/object.h75
-rw-r--r--include/ruby/internal/intern/process.h11
-rw-r--r--include/ruby/internal/intern/re.h5
-rw-r--r--include/ruby/internal/intern/select.h2
-rw-r--r--include/ruby/internal/intern/select/largesize.h3
-rw-r--r--include/ruby/internal/intern/select/posix.h5
-rw-r--r--include/ruby/internal/intern/signal.h8
-rw-r--r--include/ruby/internal/intern/string.h95
-rw-r--r--include/ruby/internal/intern/struct.h38
-rw-r--r--include/ruby/internal/intern/thread.h4
-rw-r--r--include/ruby/internal/intern/vm.h17
-rw-r--r--include/ruby/internal/interpreter.h2
-rw-r--r--include/ruby/internal/memory.h22
-rw-r--r--include/ruby/internal/module.h16
-rw-r--r--include/ruby/internal/newobj.h53
-rw-r--r--include/ruby/internal/rgengc.h443
-rw-r--r--include/ruby/internal/scan_args.h2
-rw-r--r--include/ruby/internal/special_consts.h87
-rw-r--r--include/ruby/internal/static_assert.h2
-rw-r--r--include/ruby/internal/stdbool.h2
-rw-r--r--include/ruby/internal/stdckdint.h60
-rw-r--r--include/ruby/internal/variable.h2
69 files changed, 4368 insertions, 1755 deletions
diff --git a/include/ruby/internal/abi.h b/include/ruby/internal/abi.h
new file mode 100644
index 0000000000..e735a67564
--- /dev/null
+++ b/include/ruby/internal/abi.h
@@ -0,0 +1,58 @@
+#ifndef RUBY_ABI_H
+#define RUBY_ABI_H
+
+#ifdef RUBY_ABI_VERSION /* should match the definition in config.h */
+
+/* This number represents Ruby's ABI version.
+ *
+ * In development Ruby, it should be bumped every time an ABI incompatible
+ * change is introduced. This will force other developers to rebuild extension
+ * gems.
+ *
+ * The following cases are considered as ABI incompatible changes:
+ * - Changing any data structures.
+ * - Changing macros or inline functions causing a change in behavior.
+ * - Deprecating or removing function declarations.
+ *
+ * The following cases are NOT considered as ABI incompatible changes:
+ * - Any changes that does not involve the header files in the `include`
+ * directory.
+ * - Adding macros, inline functions, or function declarations.
+ * - Backwards compatible refactors.
+ * - Editing comments.
+ *
+ * In released versions of Ruby, this number is not defined since teeny
+ * versions of Ruby should guarantee ABI compatibility.
+ */
+#define RUBY_ABI_VERSION 0
+
+/* Windows does not support weak symbols so ruby_abi_version will not exist
+ * in the shared library. */
+#if defined(HAVE_FUNC_WEAK) && !defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__)
+# define RUBY_DLN_CHECK_ABI
+#endif
+#endif /* RUBY_ABI_VERSION */
+
+#if defined(RUBY_DLN_CHECK_ABI) && !defined(RUBY_EXPORT)
+
+# ifdef __cplusplus
+extern "C" {
+# endif
+
+RUBY_FUNC_EXPORTED unsigned long long __attribute__((weak))
+ruby_abi_version(void)
+{
+# ifdef RUBY_ABI_VERSION
+ return RUBY_ABI_VERSION;
+# else
+ return 0;
+# endif
+}
+
+# ifdef __cplusplus
+}
+# endif
+
+#endif
+
+#endif
diff --git a/include/ruby/internal/anyargs.h b/include/ruby/internal/anyargs.h
index 9d8d16fdab..e3e1b6166d 100644
--- a/include/ruby/internal/anyargs.h
+++ b/include/ruby/internal/anyargs.h
@@ -239,15 +239,16 @@
# define RBIMPL_ANYARGS_DISPATCH_rb_define_method_13(n) RBIMPL_ANYARGS_DISPATCH((n) == 13, rb_define_method_13, RBIMPL_ANYARGS_DISPATCH_rb_define_method_12(n))
# define RBIMPL_ANYARGS_DISPATCH_rb_define_method_14(n) RBIMPL_ANYARGS_DISPATCH((n) == 14, rb_define_method_14, RBIMPL_ANYARGS_DISPATCH_rb_define_method_13(n))
# define RBIMPL_ANYARGS_DISPATCH_rb_define_method_15(n) RBIMPL_ANYARGS_DISPATCH((n) == 15, rb_define_method_15, RBIMPL_ANYARGS_DISPATCH_rb_define_method_14(n))
-# define RBIMPL_ANYARGS_DISPATCH_rb_define_singleton_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_singleton_method_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_singleton_method_15(n))
-# define RBIMPL_ANYARGS_DISPATCH_rb_define_protected_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_protected_method_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_protected_method_15(n))
-# define RBIMPL_ANYARGS_DISPATCH_rb_define_private_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_private_method_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_private_method_15(n))
-# define RBIMPL_ANYARGS_DISPATCH_rb_define_module_function(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_module_function_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_module_function_15(n))
-# define RBIMPL_ANYARGS_DISPATCH_rb_define_global_function(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_global_function_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_global_function_15(n))
-# define RBIMPL_ANYARGS_DISPATCH_rb_define_method_id(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_method_id_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_method_id_15(n))
-# define RBIMPL_ANYARGS_DISPATCH_rb_define_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_method_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_method_15(n))
+# define RBIMPL_ANYARGS_DISPATCH_rb_define_singleton_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_singleton_method_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_singleton_method_15(n))
+# define RBIMPL_ANYARGS_DISPATCH_rb_define_protected_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_protected_method_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_protected_method_15(n))
+# define RBIMPL_ANYARGS_DISPATCH_rb_define_private_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_private_method_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_private_method_15(n))
+# define RBIMPL_ANYARGS_DISPATCH_rb_define_module_function(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_module_function_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_module_function_15(n))
+# define RBIMPL_ANYARGS_DISPATCH_rb_define_global_function(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_global_function_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_global_function_15(n))
+# define RBIMPL_ANYARGS_DISPATCH_rb_define_method_id(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_method_id_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_method_id_15(n))
+# define RBIMPL_ANYARGS_DISPATCH_rb_define_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_method_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_method_15(n))
# define RBIMPL_ANYARGS_ATTRSET(sym) RBIMPL_ATTR_MAYBE_UNUSED() RBIMPL_ATTR_NONNULL(()) RBIMPL_ATTR_WEAKREF(sym)
# define RBIMPL_ANYARGS_DECL(sym, ...) \
+RBIMPL_ANYARGS_ATTRSET(sym) static void sym ## _notimpl(__VA_ARGS__, VALUE(*)(int, const VALUE *, VALUE, VALUE), int); \
RBIMPL_ANYARGS_ATTRSET(sym) static void sym ## _m3(__VA_ARGS__, VALUE(*)(ANYARGS), int); \
RBIMPL_ANYARGS_ATTRSET(sym) static void sym ## _m2(__VA_ARGS__, VALUE(*)(VALUE, VALUE), int); \
RBIMPL_ANYARGS_ATTRSET(sym) static void sym ## _m1(__VA_ARGS__, VALUE(*)(int, union { VALUE *x; const VALUE *y; } __attribute__((__transparent_union__)), VALUE), int); \
diff --git a/include/ruby/internal/arithmetic.h b/include/ruby/internal/arithmetic.h
index 3f7840c384..7ebb4a86f1 100644
--- a/include/ruby/internal/arithmetic.h
+++ b/include/ruby/internal/arithmetic.h
@@ -18,7 +18,8 @@
* Do not expect for instance `__VA_ARGS__` is always available.
* We assume C99 for ruby itself but we don't assume languages of
* extension libraries. They could be written in C++98.
- * @brief Conversion between C's arithmtic types and Ruby's numeric types.
+ * @brief Conversion between C's arithmetic types and Ruby's numeric
+ * types.
*/
#include "ruby/internal/arithmetic/char.h"
#include "ruby/internal/arithmetic/double.h"
diff --git a/include/ruby/internal/arithmetic/long.h b/include/ruby/internal/arithmetic/long.h
index 792f7be179..6b8fd8ffc3 100644
--- a/include/ruby/internal/arithmetic/long.h
+++ b/include/ruby/internal/arithmetic/long.h
@@ -115,7 +115,7 @@ RB_INT2FIX(long i)
/* :NOTE: VALUE can be wider than long. As j being unsigned, 2j+1 is fully
* defined. Also it can be compiled into a single LEA instruction. */
const unsigned long j = i;
- const unsigned long k = 2 * j + RUBY_FIXNUM_FLAG;
+ const unsigned long k = (j << 1) + RUBY_FIXNUM_FLAG;
const long l = k;
const SIGNED_VALUE m = l; /* Sign extend */
const VALUE n = m;
diff --git a/include/ruby/internal/assume.h b/include/ruby/internal/assume.h
index 65d34d4ac8..4c183e8af9 100644
--- a/include/ruby/internal/assume.h
+++ b/include/ruby/internal/assume.h
@@ -32,10 +32,7 @@
#include "ruby/internal/warning_push.h"
/** @cond INTERNAL_MACRO */
-#if RBIMPL_COMPILER_SINCE(MSVC, 13, 10, 0)
-# define RBIMPL_HAVE___ASSUME
-
-#elif RBIMPL_COMPILER_SINCE(Intel, 13, 0, 0)
+#if defined(HAVE___ASSUME)
# define RBIMPL_HAVE___ASSUME
#endif
/** @endcond */
diff --git a/include/ruby/internal/attr/nodiscard.h b/include/ruby/internal/attr/nodiscard.h
index 087192a7a8..c3ae118942 100644
--- a/include/ruby/internal/attr/nodiscard.h
+++ b/include/ruby/internal/attr/nodiscard.h
@@ -26,7 +26,7 @@
/**
* Wraps (or simulates) `[[nodiscard]]`. In C++ (at least since C++20) a
- * nodiscard attribute can have a message why the result shall not be ignoed.
+ * nodiscard attribute can have a message why the result shall not be ignored.
* However GCC attribute and SAL annotation cannot take them.
*/
#if RBIMPL_HAS_CPP_ATTRIBUTE(nodiscard)
diff --git a/include/ruby/internal/attr/noexcept.h b/include/ruby/internal/attr/noexcept.h
index ea3001df2a..7c3f92f1e7 100644
--- a/include/ruby/internal/attr/noexcept.h
+++ b/include/ruby/internal/attr/noexcept.h
@@ -54,7 +54,7 @@
* get smarter and smarter. Today they can infer if it actually throws
* or not without any annotations by humans (correct me if I'm wrong).
*
- * - When an inline function attributed `noexcepr` actually _does_ throw an
+ * - When an inline function attributed `noexcept` actually _does_ throw an
* exception: they have to call `std::terminate` then (C++ standard
* mandates so). This means exception handling routines are actually
* enforced, not omitted. This doesn't impact runtime performance (The
diff --git a/include/ruby/internal/attr/nonnull.h b/include/ruby/internal/attr/nonnull.h
index 874f4236c0..778d5be208 100644
--- a/include/ruby/internal/attr/nonnull.h
+++ b/include/ruby/internal/attr/nonnull.h
@@ -25,8 +25,10 @@
/** Wraps (or simulates) `__attribute__((nonnull))` */
#if RBIMPL_HAS_ATTRIBUTE(nonnull)
# define RBIMPL_ATTR_NONNULL(list) __attribute__((__nonnull__ list))
+# define RBIMPL_NONNULL_ARG(arg) RBIMPL_ASSERT_NOTHING
#else
# define RBIMPL_ATTR_NONNULL(list) /* void */
+# define RBIMPL_NONNULL_ARG(arg) RUBY_ASSERT(arg)
#endif
#endif /* RBIMPL_ATTR_NONNULL_H */
diff --git a/include/ruby/internal/attr/packed_struct.h b/include/ruby/internal/attr/packed_struct.h
new file mode 100644
index 0000000000..0678b9acc8
--- /dev/null
+++ b/include/ruby/internal/attr/packed_struct.h
@@ -0,0 +1,43 @@
+#ifndef RBIMPL_ATTR_PACKED_STRUCT_H /*-*-C++-*-vi:se ft=cpp:*/
+#define RBIMPL_ATTR_PACKED_STRUCT_H
+/**
+ * @file
+ * @author Ruby developers <ruby-core@ruby-lang.org>
+ * @copyright This file is a part of the programming language Ruby.
+ * Permission is hereby granted, to either redistribute and/or
+ * modify this file, provided that the conditions mentioned in the
+ * file COPYING are met. Consult the file for details.
+ * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
+ * implementation details. Don't take them as canon. They could
+ * rapidly appear then vanish. The name (path) of this header file
+ * is also an implementation detail. Do not expect it to persist
+ * at the place it is now. Developers are free to move it anywhere
+ * anytime at will.
+ * @note To ruby-core: remember that this header can be possibly
+ * recursively included from extension libraries written in C++.
+ * Do not expect for instance `__VA_ARGS__` is always available.
+ * We assume C99 for ruby itself but we don't assume languages of
+ * extension libraries. They could be written in C++98.
+ * @brief Defines #RBIMPL_ATTR_PACKED_STRUCT_BEGIN,
+ * #RBIMPL_ATTR_PACKED_STRUCT_END,
+ * #RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_BEGIN, and
+ * #RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_END.
+ */
+#include "ruby/internal/config.h"
+
+#ifndef RBIMPL_ATTR_PACKED_STRUCT_BEGIN
+# define RBIMPL_ATTR_PACKED_STRUCT_BEGIN() /* void */
+#endif
+#ifndef RBIMPL_ATTR_PACKED_STRUCT_END
+# define RBIMPL_ATTR_PACKED_STRUCT_END() /* void */
+#endif
+
+#if UNALIGNED_WORD_ACCESS
+# define RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_BEGIN() RBIMPL_ATTR_PACKED_STRUCT_BEGIN()
+# define RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_END() RBIMPL_ATTR_PACKED_STRUCT_END()
+#else
+# define RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_BEGIN() /* void */
+# define RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_END() /* void */
+#endif
+
+#endif
diff --git a/include/ruby/internal/config.h b/include/ruby/internal/config.h
index b6134c6165..da070f0979 100644
--- a/include/ruby/internal/config.h
+++ b/include/ruby/internal/config.h
@@ -113,6 +113,8 @@
# define UNALIGNED_WORD_ACCESS 1
#elif defined(__powerpc64__)
# define UNALIGNED_WORD_ACCESS 1
+#elif defined(__POWERPC__) // __POWERPC__ is defined for ppc and ppc64 on Darwin
+# define UNALIGNED_WORD_ACCESS 1
#elif defined(__aarch64__)
# define UNALIGNED_WORD_ACCESS 1
#elif defined(__mc68020__)
diff --git a/include/ruby/internal/core/rarray.h b/include/ruby/internal/core/rarray.h
index 9f1d0509ea..90690fe794 100644
--- a/include/ruby/internal/core/rarray.h
+++ b/include/ruby/internal/core/rarray.h
@@ -29,25 +29,13 @@
#include "ruby/internal/core/rbasic.h"
#include "ruby/internal/dllexport.h"
#include "ruby/internal/fl_type.h"
-#include "ruby/internal/rgengc.h"
+#include "ruby/internal/gc.h"
#include "ruby/internal/stdbool.h"
#include "ruby/internal/value.h"
#include "ruby/internal/value_type.h"
#include "ruby/assert.h"
/**
- * @private
- * @warning Do not touch this macro.
- * @warning It is an implementation detail.
- * @warning The value of this macro must match for ruby itself and all
- * extension libraries, otherwise serious memory corruption shall
- * occur.
- */
-#ifndef USE_TRANSIENT_HEAP
-# define USE_TRANSIENT_HEAP 1
-#endif
-
-/**
* Convenient casting macro.
*
* @param obj An object, which is in fact an ::RArray.
@@ -59,15 +47,9 @@
#define RARRAY_EMBED_LEN_MASK RARRAY_EMBED_LEN_MASK
#define RARRAY_EMBED_LEN_MAX RARRAY_EMBED_LEN_MAX
#define RARRAY_EMBED_LEN_SHIFT RARRAY_EMBED_LEN_SHIFT
-#if USE_TRANSIENT_HEAP
-# define RARRAY_TRANSIENT_FLAG RARRAY_TRANSIENT_FLAG
-#else
-# define RARRAY_TRANSIENT_FLAG 0
-#endif
/** @endcond */
#define RARRAY_LEN rb_array_len /**< @alias{rb_array_len} */
#define RARRAY_CONST_PTR rb_array_const_ptr /**< @alias{rb_array_const_ptr} */
-#define RARRAY_CONST_PTR_TRANSIENT rb_array_const_ptr_transient /**< @alias{rb_array_const_ptr_transient} */
/** @cond INTERNAL_MACRO */
#if defined(__fcc__) || defined(__fcc_version) || \
@@ -80,7 +62,6 @@
#define RARRAY_EMBED_LEN RARRAY_EMBED_LEN
#define RARRAY_LENINT RARRAY_LENINT
-#define RARRAY_TRANSIENT_P RARRAY_TRANSIENT_P
#define RARRAY_ASET RARRAY_ASET
#define RARRAY_PTR RARRAY_PTR
/** @endcond */
@@ -130,24 +111,8 @@ enum ruby_rarray_flags {
* 3rd parties must not be aware that there even is more than one way to
* store array elements. It was a bad idea to expose this to them.
*/
- RARRAY_EMBED_LEN_MASK = RUBY_FL_USER4 | RUBY_FL_USER3
-#if USE_TRANSIENT_HEAP
- ,
-
- /**
- * This flag has something to do with an array's "transiency". A transient
- * array is an array of young generation (of generational GC), who stores
- * its elements inside of dedicated memory pages called a transient heap.
- * Not every young generation share that storage scheme, but elder
- * generations must no join.
- *
- * @internal
- *
- * 3rd parties must not be aware that there even is more than one way to
- * store array elements. It was a bad idea to expose this to them.
- */
- RARRAY_TRANSIENT_FLAG = RUBY_FL_USER13
-#endif
+ RARRAY_EMBED_LEN_MASK = RUBY_FL_USER9 | RUBY_FL_USER8 | RUBY_FL_USER7 | RUBY_FL_USER6 |
+ RUBY_FL_USER5 | RUBY_FL_USER4 | RUBY_FL_USER3
};
/**
@@ -156,10 +121,7 @@ enum ruby_rarray_flags {
*/
enum ruby_rarray_consts {
/** Where ::RARRAY_EMBED_LEN_MASK resides. */
- RARRAY_EMBED_LEN_SHIFT = RUBY_FL_USHIFT + 3,
-
- /** Max possible number elements that can be embedded. */
- RARRAY_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(VALUE)
+ RARRAY_EMBED_LEN_SHIFT = RUBY_FL_USHIFT + 3
};
/** Ruby's array. */
@@ -218,7 +180,12 @@ struct RArray {
* to store its elements. In this case the length is encoded into the
* flags.
*/
- const VALUE ary[RARRAY_EMBED_LEN_MAX];
+ /* This is a length 1 array because:
+ * 1. GCC has a bug that does not optimize C flexible array members
+ * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452)
+ * 2. Zero length arrays are not supported by all compilers
+ */
+ const VALUE ary[1];
} as;
};
@@ -245,16 +212,6 @@ VALUE *rb_ary_ptr_use_start(VALUE ary);
*/
void rb_ary_ptr_use_end(VALUE a);
-#if USE_TRANSIENT_HEAP
-/**
- * Destructively converts an array of transient backend into ordinal one.
- *
- * @param[out] a An object of ::RArray.
- * @pre `a` must be a transient array.
- * @post `a` gets out of transient heap, destructively.
- */
-void rb_ary_detransient(VALUE a);
-#endif
RBIMPL_SYMBOL_EXPORT_END()
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
@@ -327,33 +284,6 @@ RARRAY_LENINT(VALUE ary)
}
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
-RBIMPL_ATTR_ARTIFICIAL()
-/**
- * Queries if the array is a transient array.
- *
- * @param[in] ary Array in question.
- * @retval true Yes it is.
- * @retval false No it isn't.
- * @pre `ary` must be an instance of ::RArray.
- *
- * @internal
- *
- * @shyouhei doesn't understand the benefit of this function called from
- * extension libraries.
- */
-static inline bool
-RARRAY_TRANSIENT_P(VALUE ary)
-{
- RBIMPL_ASSERT_TYPE(ary, RUBY_T_ARRAY);
-
-#if USE_TRANSIENT_HEAP
- return RB_FL_ANY_RAW(ary, RARRAY_TRANSIENT_FLAG);
-#else
- return false;
-#endif
-}
-
-RBIMPL_ATTR_PURE_UNLESS_DEBUG()
/**
* @private
*
@@ -364,7 +294,7 @@ RBIMPL_ATTR_PURE_UNLESS_DEBUG()
* @return Its backend storage.
*/
static inline const VALUE *
-rb_array_const_ptr_transient(VALUE a)
+rb_array_const_ptr(VALUE a)
{
RBIMPL_ASSERT_TYPE(a, RUBY_T_ARRAY);
@@ -376,110 +306,21 @@ rb_array_const_ptr_transient(VALUE a)
}
}
-#if ! USE_TRANSIENT_HEAP
-RBIMPL_ATTR_PURE_UNLESS_DEBUG()
-#endif
-/**
- * @private
- *
- * This is an implementation detail of RARRAY_PTR(). People do not use it
- * directly.
- *
- * @param[in] a An object of ::RArray.
- * @return Its backend storage.
- * @post `a` is not a transient array.
- */
-static inline const VALUE *
-rb_array_const_ptr(VALUE a)
-{
- RBIMPL_ASSERT_TYPE(a, RUBY_T_ARRAY);
-
-#if USE_TRANSIENT_HEAP
- if (RARRAY_TRANSIENT_P(a)) {
- rb_ary_detransient(a);
- }
-#endif
- return rb_array_const_ptr_transient(a);
-}
-
/**
* @private
*
* This is an implementation detail of #RARRAY_PTR_USE. People do not use it
* directly.
- *
- * @param[in] a An object of ::RArray.
- * @param[in] allow_transient Whether `a` can be transient or not.
- * @return Its backend storage.
- * @post `a` is not a transient array unless `allow_transient`.
- */
-static inline VALUE *
-rb_array_ptr_use_start(VALUE a,
- RBIMPL_ATTR_MAYBE_UNUSED()
- int allow_transient)
-{
- RBIMPL_ASSERT_TYPE(a, RUBY_T_ARRAY);
-
-#if USE_TRANSIENT_HEAP
- if (!allow_transient) {
- if (RARRAY_TRANSIENT_P(a)) {
- rb_ary_detransient(a);
- }
- }
-#endif
-
- return rb_ary_ptr_use_start(a);
-}
-
-/**
- * @private
- *
- * This is an implementation detail of #RARRAY_PTR_USE. People do not use it
- * directly.
- *
- * @param[in] a An object of ::RArray.
- * @param[in] allow_transient Whether `a` can be transient or not.
*/
-static inline void
-rb_array_ptr_use_end(VALUE a,
- RBIMPL_ATTR_MAYBE_UNUSED()
- int allow_transient)
-{
- RBIMPL_ASSERT_TYPE(a, RUBY_T_ARRAY);
- rb_ary_ptr_use_end(a);
-}
-
-/**
- * @private
- *
- * This is an implementation detail of #RARRAY_PTR_USE. People do not use it
- * directly.
- */
-#define RBIMPL_RARRAY_STMT(flag, ary, var, expr) do { \
+#define RBIMPL_RARRAY_STMT(ary, var, expr) do { \
RBIMPL_ASSERT_TYPE((ary), RUBY_T_ARRAY); \
const VALUE rbimpl_ary = (ary); \
- VALUE *var = rb_array_ptr_use_start(rbimpl_ary, (flag)); \
+ VALUE *var = rb_ary_ptr_use_start(rbimpl_ary); \
expr; \
- rb_array_ptr_use_end(rbimpl_ary, (flag)); \
+ rb_ary_ptr_use_end(rbimpl_ary); \
} while (0)
/**
- * @private
- *
- * This is an implementation detail of #RARRAY_PTR_USE. People do not use it
- * directly.
- */
-#define RARRAY_PTR_USE_START(a) rb_array_ptr_use_start(a, 0)
-
-/**
- * @private
- *
- * This is an implementation detail of #RARRAY_PTR_USE. People do not use it
- * directly.
- */
-#define RARRAY_PTR_USE_END(a) rb_array_ptr_use_end(a, 0)
-
-/**
* Declares a section of code where raw pointers are used. In case you need to
* touch the raw C array instead of polite CAPIs, then that operation shall be
* wrapped using this macro.
@@ -505,37 +346,11 @@ rb_array_ptr_use_end(VALUE a,
* them use it... Maybe some transition path can be implemented later.
*/
#define RARRAY_PTR_USE(ary, ptr_name, expr) \
- RBIMPL_RARRAY_STMT(0, ary, ptr_name, expr)
-
-/**
- * @private
- *
- * This is an implementation detail of #RARRAY_PTR_USE_TRANSIENT. People do
- * not use it directly.
- */
-#define RARRAY_PTR_USE_START_TRANSIENT(a) rb_array_ptr_use_start(a, 1)
-
-/**
- * @private
- *
- * This is an implementation detail of #RARRAY_PTR_USE_TRANSIENT. People do
- * not use it directly.
- */
-#define RARRAY_PTR_USE_END_TRANSIENT(a) rb_array_ptr_use_end(a, 1)
-
-/**
- * Identical to #RARRAY_PTR_USE, except the pointer can be a transient one.
- *
- * @param ary An object of ::RArray.
- * @param ptr_name A variable name which points the C array in `expr`.
- * @param expr The expression that touches `ptr_name`.
- */
-#define RARRAY_PTR_USE_TRANSIENT(ary, ptr_name, expr) \
- RBIMPL_RARRAY_STMT(1, ary, ptr_name, expr)
+ RBIMPL_RARRAY_STMT(ary, ptr_name, expr)
/**
* Wild use of a C pointer. This function accesses the backend storage
- * directly. This is slower than #RARRAY_PTR_USE_TRANSIENT. It exercises
+ * directly. This is slower than #RARRAY_PTR_USE. It exercises
* extra manoeuvres to protect our generational GC. Use of this function is
* considered archaic. Use a modern way instead.
*
@@ -570,7 +385,7 @@ RARRAY_PTR(VALUE ary)
static inline void
RARRAY_ASET(VALUE ary, long i, VALUE v)
{
- RARRAY_PTR_USE_TRANSIENT(ary, ptr,
+ RARRAY_PTR_USE(ary, ptr,
RB_OBJ_WRITE(ary, &ptr[i], v));
}
@@ -585,6 +400,6 @@ RARRAY_ASET(VALUE ary, long i, VALUE v)
* remains as it is due to that. If we could warn such usages we can set a
* transition path, but currently no way is found to do so.
*/
-#define RARRAY_AREF(a, i) RARRAY_CONST_PTR_TRANSIENT(a)[i]
+#define RARRAY_AREF(a, i) RARRAY_CONST_PTR(a)[i]
#endif /* RBIMPL_RARRAY_H */
diff --git a/include/ruby/internal/core/rbasic.h b/include/ruby/internal/core/rbasic.h
index 4617f743a7..a1477e2600 100644
--- a/include/ruby/internal/core/rbasic.h
+++ b/include/ruby/internal/core/rbasic.h
@@ -56,22 +56,20 @@ enum ruby_rvalue_flags {
};
/**
- * Ruby's object's, base components. Every single ruby objects have them in
- * common.
+ * Ruby object's base components. All Ruby objects have them in common.
*/
struct
RUBY_ALIGNAS(SIZEOF_VALUE)
RBasic {
/**
- * Per-object flags. Each ruby objects have their own characteristics
- * apart from their classes. For instance whether an object is frozen or
- * not is not controlled by its class. This is where such properties are
- * stored.
+ * Per-object flags. Each Ruby object has its own characteristics apart
+ * from its class. For instance, whether an object is frozen or not is not
+ * controlled by its class. This is where such properties are stored.
*
* @see enum ::ruby_fl_type
*
- * @note This is ::VALUE rather than an enum for alignment purpose. Back
+ * @note This is ::VALUE rather than an enum for alignment purposes. Back
* in the 1990s there were no such thing like `_Alignas` in C.
*/
VALUE flags;
@@ -79,10 +77,10 @@ RBasic {
/**
* Class of an object. Every object has its class. Also, everything is an
* object in Ruby. This means classes are also objects. Classes have
- * their own classes, classes of classes have their classes, too ... and
- * it recursively continues forever.
+ * their own classes, classes of classes have their classes too, and it
+ * recursively continues forever.
*
- * Also note the `const` qualifier. In ruby an object cannot "change" its
+ * Also note the `const` qualifier. In Ruby, an object cannot "change" its
* class.
*/
const VALUE klass;
diff --git a/include/ruby/internal/core/rclass.h b/include/ruby/internal/core/rclass.h
index 13a33a28bd..b0b6bfc80c 100644
--- a/include/ruby/internal/core/rclass.h
+++ b/include/ruby/internal/core/rclass.h
@@ -26,9 +26,7 @@
#include "ruby/internal/cast.h"
/** @cond INTERNAL_MACRO */
-#define RMODULE_IS_OVERLAID RMODULE_IS_OVERLAID
#define RMODULE_IS_REFINEMENT RMODULE_IS_REFINEMENT
-#define RMODULE_INCLUDED_INTO_REFINEMENT RMODULE_INCLUDED_INTO_REFINEMENT
/** @endcond */
/**
@@ -55,57 +53,12 @@
* Why is it here, given RClass itself is not?
*/
enum ruby_rmodule_flags {
-
- /**
- * This flag has something to do with refinements... I guess? It is set on
- * occasions for modules that are refined by refinements, but it seems
- * ... nobody cares about such things? Not sure but this flag could
- * perhaps be a write-only information.
- */
- RMODULE_IS_OVERLAID = RUBY_FL_USER2,
-
/**
* This flag has something to do with refinements. A module created using
* rb_mod_refine() has this flag set. This is the bit which controls
* difference between normal inclusion versus refinements.
*/
- RMODULE_IS_REFINEMENT = RUBY_FL_USER3,
-
- /**
- * This flag has something to do with refinements. This is set when a
- * (non-refinement) module is included into another module, which is a
- * refinement. This amends the way `super` searches for a super method.
- *
- * ```ruby
- * class Foo
- * def foo
- * "Foo"
- * end
- * end
- *
- * module Bar
- * def foo
- * "[#{super}]" # this
- * end
- * end
- *
- * module Baz
- * refine Foo do
- * include Bar
- * def foo
- * "<#{super}>"
- * end
- * end
- * end
- *
- * using Baz
- * Foo.new.foo # => "[<Foo>]"
- * ```
- *
- * The `super` marked with "this" comment shall look for overlaid
- * `Foo#foo`, which is not the ordinal method lookup direction.
- */
- RMODULE_INCLUDED_INTO_REFINEMENT = RUBY_FL_USER4
+ RMODULE_IS_REFINEMENT = RUBY_FL_USER3
};
struct RClass; /* Opaque, declared here for RCLASS() macro. */
diff --git a/include/ruby/internal/core/rdata.h b/include/ruby/internal/core/rdata.h
index f6656b6546..43ab3c01e7 100644
--- a/include/ruby/internal/core/rdata.h
+++ b/include/ruby/internal/core/rdata.h
@@ -369,30 +369,6 @@ rb_data_object_alloc(VALUE klass, void *data, RUBY_DATA_FUNC dmark, RUBY_DATA_FU
return rb_data_object_wrap(klass, data, dmark, dfree);
}
-RBIMPL_ATTR_DEPRECATED(("by: rb_cObject. Will be removed in 3.1."))
-RBIMPL_ATTR_PURE()
-/**
- * @private
- *
- * @deprecated There once was a variable called rb_cData, which no longer
- * exists today. This function is a function because we want
- * warnings for the usages.
- */
-static inline VALUE
-rb_cData(void)
-{
- return rb_cObject;
-}
-
-/**
- * @private
- *
- * @deprecated This macro once was a thing in the old days, but makes no sense
- * any longer today. Exists here for backwards compatibility
- * only. You can safely forget about it.
- */
-#define rb_cData rb_cData()
-
/** @cond INTERNAL_MACRO */
#define rb_data_object_wrap_0 rb_data_object_wrap
#define rb_data_object_wrap_1 rb_data_object_wrap_warning
diff --git a/include/ruby/internal/core/rfile.h b/include/ruby/internal/core/rfile.h
index f8dddde9e5..a0eb8cb833 100644
--- a/include/ruby/internal/core/rfile.h
+++ b/include/ruby/internal/core/rfile.h
@@ -25,7 +25,7 @@
/* rb_io_t is in ruby/io.h. The header file has historically not been included
* into ruby/ruby.h. We follow that tradition. */
-struct rb_io_t;
+struct rb_io;
/**
* Ruby's File and IO. Ruby's IO are not just file descriptors. They have
@@ -38,7 +38,7 @@ struct RFile {
struct RBasic basic;
/** IO's specific fields. */
- struct rb_io_t *fptr;
+ struct rb_io *fptr;
};
/**
diff --git a/include/ruby/internal/core/rhash.h b/include/ruby/internal/core/rhash.h
index 61d2c15d87..897c570794 100644
--- a/include/ruby/internal/core/rhash.h
+++ b/include/ruby/internal/core/rhash.h
@@ -54,19 +54,6 @@
*
* @internal
*
- * Declaration of rb_hash_iter_lev() is at include/ruby/backward.h.
- */
-#define RHASH_ITER_LEV(h) rb_hash_iter_lev(h)
-
-/**
- * @private
- *
- * @deprecated This macro once was a thing in the old days, but makes no sense
- * any longer today. Exists here for backwards compatibility
- * only. You can safely forget about it.
- *
- * @internal
- *
* Declaration of rb_hash_ifnone() is at include/ruby/backward.h.
*/
#define RHASH_IFNONE(h) rb_hash_ifnone(h)
diff --git a/include/ruby/internal/core/rmatch.h b/include/ruby/internal/core/rmatch.h
index 2d2fd897f5..a528c2999e 100644
--- a/include/ruby/internal/core/rmatch.h
+++ b/include/ruby/internal/core/rmatch.h
@@ -68,7 +68,7 @@ struct rmatch_offset {
};
/** Represents a match. */
-struct rmatch {
+struct rb_matchext_struct {
/**
* "Registers" of a match. This is a quasi-opaque struct that holds
* execution result of a match. Roughly resembles `&~`.
@@ -82,6 +82,8 @@ struct rmatch {
int char_offset_num_allocated;
};
+typedef struct rb_matchext_struct rb_matchext_t;
+
/**
* Regular expression execution context. When a regular expression "matches"
* to a string, it generates capture groups etc. This struct holds that info.
@@ -102,16 +104,13 @@ struct RMatch {
VALUE str;
/**
- * The result of this match.
- */
- struct rmatch *rmatch;
-
- /**
* The expression of this match.
*/
VALUE regexp; /* RRegexp */
};
+#define RMATCH_EXT(m) ((rb_matchext_t *)((char *)(m) + sizeof(struct RMatch)))
+
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
RBIMPL_ATTR_ARTIFICIAL()
/**
@@ -139,8 +138,7 @@ static inline struct re_registers *
RMATCH_REGS(VALUE match)
{
RBIMPL_ASSERT_TYPE(match, RUBY_T_MATCH);
- RBIMPL_ASSERT_OR_ASSUME(RMATCH(match)->rmatch != NULL);
- return &RMATCH(match)->rmatch->regs;
+ return &RMATCH_EXT(match)->regs;
}
#endif /* RBIMPL_RMATCH_H */
diff --git a/include/ruby/internal/core/robject.h b/include/ruby/internal/core/robject.h
index f2028063a6..c2bcae6306 100644
--- a/include/ruby/internal/core/robject.h
+++ b/include/ruby/internal/core/robject.h
@@ -37,16 +37,15 @@
/**
* Convenient casting macro.
*
- * @param obj An object, which is in fact an ::RRegexp.
- * @return The passed object casted to ::RRegexp.
+ * @param obj An object, which is in fact an ::RObject.
+ * @return The passed object casted to ::RObject.
*/
#define ROBJECT(obj) RBIMPL_CAST((struct RObject *)(obj))
/** @cond INTERNAL_MACRO */
#define ROBJECT_EMBED_LEN_MAX ROBJECT_EMBED_LEN_MAX
#define ROBJECT_EMBED ROBJECT_EMBED
-#define ROBJECT_NUMIV ROBJECT_NUMIV
+#define ROBJECT_IV_CAPACITY ROBJECT_IV_CAPACITY
#define ROBJECT_IVPTR ROBJECT_IVPTR
-#define ROBJECT_IV_INDEX_TBL ROBJECT_IV_INDEX_TBL
/** @endcond */
/**
@@ -75,15 +74,6 @@ enum ruby_robject_flags {
ROBJECT_EMBED = RUBY_FL_USER1
};
-/**
- * This is an enum because GDB wants it (rather than a macro). People need not
- * bother.
- */
-enum ruby_robject_consts {
- /** Max possible number of instance variables that can be embedded. */
- ROBJECT_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(VALUE)
-};
-
struct st_table;
/**
@@ -103,13 +93,6 @@ struct RObject {
* this pattern.
*/
struct {
-
- /**
- * Number of instance variables. This is per object; objects might
- * differ in this field even if they have the identical classes.
- */
- uint32_t numiv;
-
/** Pointer to a C array that holds instance variables. */
VALUE *ivptr;
@@ -121,42 +104,24 @@ struct RObject {
*
* This is a shortcut for `RCLASS_IV_INDEX_TBL(rb_obj_class(obj))`.
*/
- struct st_table *iv_index_tbl;
+ struct rb_id_table *iv_index_tbl;
} heap;
- /**
- * Embedded instance variables. When an object is small enough, it
+ /* Embedded instance variables. When an object is small enough, it
* uses this area to store the instance variables.
+ *
+ * This is a length 1 array because:
+ * 1. GCC has a bug that does not optimize C flexible array members
+ * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452)
+ * 2. Zero length arrays are not supported by all compilers
*/
- VALUE ary[ROBJECT_EMBED_LEN_MAX];
+ VALUE ary[1];
} as;
};
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
RBIMPL_ATTR_ARTIFICIAL()
/**
- * Queries the number of instance variables.
- *
- * @param[in] obj Object in question.
- * @return Its number of instance variables.
- * @pre `obj` must be an instance of ::RObject.
- */
-static inline uint32_t
-ROBJECT_NUMIV(VALUE obj)
-{
- RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT);
-
- if (RB_FL_ANY_RAW(obj, ROBJECT_EMBED)) {
- return ROBJECT_EMBED_LEN_MAX;
- }
- else {
- return ROBJECT(obj)->as.heap.numiv;
- }
-}
-
-RBIMPL_ATTR_PURE_UNLESS_DEBUG()
-RBIMPL_ATTR_ARTIFICIAL()
-/**
* Queries the instance variables.
*
* @param[in] obj Object in question.
diff --git a/include/ruby/internal/core/rstring.h b/include/ruby/internal/core/rstring.h
index d16a57b1c4..0bca74e688 100644
--- a/include/ruby/internal/core/rstring.h
+++ b/include/ruby/internal/core/rstring.h
@@ -42,11 +42,7 @@
/** @cond INTERNAL_MACRO */
#define RSTRING_NOEMBED RSTRING_NOEMBED
-#define RSTRING_EMBED_LEN_MASK RSTRING_EMBED_LEN_MASK
-#define RSTRING_EMBED_LEN_SHIFT RSTRING_EMBED_LEN_SHIFT
-#define RSTRING_EMBED_LEN_MAX RSTRING_EMBED_LEN_MAX
#define RSTRING_FSTR RSTRING_FSTR
-#define RSTRING_EMBED_LEN RSTRING_EMBED_LEN
#define RSTRING_LEN RSTRING_LEN
#define RSTRING_LENINT RSTRING_LENINT
#define RSTRING_PTR RSTRING_PTR
@@ -160,19 +156,6 @@ enum ruby_rstring_flags {
*/
RSTRING_NOEMBED = RUBY_FL_USER1,
- /**
- * When a string employs embedded strategy (see ::RSTRING_NOEMBED), these
- * bits are used to store the number of bytes actually filled into
- * ::RString::ary.
- *
- * @internal
- *
- * 3rd parties must not be aware that there even is more than one way to
- * store a string. Might better be hidden.
- */
- RSTRING_EMBED_LEN_MASK = RUBY_FL_USER2 | RUBY_FL_USER3 | RUBY_FL_USER4 |
- RUBY_FL_USER5 | RUBY_FL_USER6,
-
/* Actually, string encodings are also encoded into the flags, using
* remaining bits.*/
@@ -199,18 +182,6 @@ enum ruby_rstring_flags {
};
/**
- * This is an enum because GDB wants it (rather than a macro). People need not
- * bother.
- */
-enum ruby_rstring_consts {
- /** Where ::RSTRING_EMBED_LEN_MASK resides. */
- RSTRING_EMBED_LEN_SHIFT = RUBY_FL_USHIFT + 2,
-
- /** Max possible number of characters that can be embedded. */
- RSTRING_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(char) - 1
-};
-
-/**
* Ruby's String. A string in ruby conceptually has these information:
*
* - Encoding of the string.
@@ -227,6 +198,13 @@ struct RString {
/** Basic part, including flags and class. */
struct RBasic basic;
+ /**
+ * Length of the string, not including terminating NUL character.
+ *
+ * @note This is in bytes.
+ */
+ long len;
+
/** String's specific fields. */
union {
@@ -235,14 +213,6 @@ struct RString {
* pattern.
*/
struct {
-
- /**
- * Length of the string, not including terminating NUL character.
- *
- * @note This is in bytes.
- */
- long len;
-
/**
* Pointer to the contents of the string. In the old days each
* string had dedicated memory regions. That is no longer true
@@ -271,14 +241,15 @@ struct RString {
} aux;
} heap;
- /**
- * Embedded contents. When a string is short enough, it uses this area
- * to store the contents themselves. This was impractical in the 20th
- * century, but these days 64 bit machines can typically hold 48 bytes
- * here. Could be sufficiently large. In this case the length is
- * encoded into the flags.
- */
- char ary[RSTRING_EMBED_LEN_MAX + 1];
+ /** Embedded contents. */
+ struct {
+ /* This is a length 1 array because:
+ * 1. GCC has a bug that does not optimize C flexible array members
+ * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452)
+ * 2. Zero length arrays are not supported by all compilers
+ */
+ char ary[1];
+ } embed;
} as;
};
@@ -390,26 +361,12 @@ RBIMPL_ATTR_ARTIFICIAL()
*
* @param[in] str String in question.
* @return Its length, in bytes.
- * @pre `str` must be an instance of ::RString, and must has its
- * ::RSTRING_NOEMBED flag off.
- *
- * @internal
- *
- * This was a macro before. It was inevitable to be public, since macros are
- * global constructs. But should it be forever? Now that it is a function,
- * @shyouhei thinks it could just be eliminated, hidden into implementation
- * details.
+ * @pre `str` must be an instance of ::RString.
*/
static inline long
-RSTRING_EMBED_LEN(VALUE str)
+RSTRING_LEN(VALUE str)
{
- RBIMPL_ASSERT_TYPE(str, RUBY_T_STRING);
- RBIMPL_ASSERT_OR_ASSUME(! RB_FL_ANY_RAW(str, RSTRING_NOEMBED));
-
- VALUE f = RBASIC(str)->flags;
- f &= RSTRING_EMBED_LEN_MASK;
- f >>= RSTRING_EMBED_LEN_SHIFT;
- return RBIMPL_CAST((long)f);
+ return RSTRING(str)->len;
}
RBIMPL_WARNING_PUSH()
@@ -439,29 +396,14 @@ rbimpl_rstring_getmem(VALUE str)
else {
/* Expecting compilers to optimize this on-stack struct away. */
struct RString retval;
- retval.as.heap.len = RSTRING_EMBED_LEN(str);
- retval.as.heap.ptr = RSTRING(str)->as.ary;
+ retval.len = RSTRING_LEN(str);
+ retval.as.heap.ptr = RSTRING(str)->as.embed.ary;
return retval;
}
}
RBIMPL_WARNING_POP()
-RBIMPL_ATTR_PURE_UNLESS_DEBUG()
-RBIMPL_ATTR_ARTIFICIAL()
-/**
- * Queries the length of the string.
- *
- * @param[in] str String in question.
- * @return Its length, in bytes.
- * @pre `str` must be an instance of ::RString.
- */
-static inline long
-RSTRING_LEN(VALUE str)
-{
- return rbimpl_rstring_getmem(str).as.heap.len;
-}
-
RBIMPL_ATTR_ARTIFICIAL()
/**
* Queries the contents pointer of the string.
@@ -475,13 +417,9 @@ RSTRING_PTR(VALUE str)
{
char *ptr = rbimpl_rstring_getmem(str).as.heap.ptr;
- if (RB_UNLIKELY(! ptr)) {
+ if (RUBY_DEBUG && RB_UNLIKELY(! ptr)) {
/* :BEWARE: @shyouhei thinks that currently, there are rooms for this
- * function to return NULL. In the 20th century that was a pointless
- * concern. However struct RString can hold fake strings nowadays. It
- * seems no check against NULL are exercised around handling of them
- * (one of such usages is located in marshal.c, which scares
- * @shyouhei). Better check here for maximum safety.
+ * function to return NULL. Better check here for maximum safety.
*
* Also, this is not rb_warn() because RSTRING_PTR() can be called
* during GC (see what obj_info() does). rb_warn() needs to allocate
@@ -505,12 +443,12 @@ RSTRING_END(VALUE str)
{
struct RString buf = rbimpl_rstring_getmem(str);
- if (RB_UNLIKELY(! buf.as.heap.ptr)) {
+ if (RUBY_DEBUG && RB_UNLIKELY(! buf.as.heap.ptr)) {
/* Ditto. */
rb_debug_rstring_null_ptr("RSTRING_END");
}
- return &buf.as.heap.ptr[buf.as.heap.len];
+ return &buf.as.heap.ptr[buf.len];
}
RBIMPL_ATTR_ARTIFICIAL()
@@ -544,7 +482,7 @@ RSTRING_LENINT(VALUE str)
__extension__ ({ \
struct RString rbimpl_str = rbimpl_rstring_getmem(str); \
(ptrvar) = rbimpl_str.as.heap.ptr; \
- (lenvar) = rbimpl_str.as.heap.len; \
+ (lenvar) = rbimpl_str.len; \
})
#else
# define RSTRING_GETMEM(str, ptrvar, lenvar) \
diff --git a/include/ruby/internal/core/rtypeddata.h b/include/ruby/internal/core/rtypeddata.h
index bbf208867d..6c19576c20 100644
--- a/include/ruby/internal/core/rtypeddata.h
+++ b/include/ruby/internal/core/rtypeddata.h
@@ -114,6 +114,8 @@
#define RUBY_TYPED_PROMOTED1 RUBY_TYPED_PROMOTED1
/** @endcond */
+#define TYPED_DATA_EMBEDDED 2
+
/**
* @private
*
@@ -137,6 +139,8 @@ rbimpl_typeddata_flags {
*/
RUBY_TYPED_FREE_IMMEDIATELY = 1,
+ RUBY_TYPED_EMBEDDABLE = 2,
+
/**
* This flag has something to do with Ractor. Multiple Ractors run without
* protecting each other. Sharing an object among Ractors is basically
@@ -173,10 +177,16 @@ rbimpl_typeddata_flags {
RUBY_TYPED_WB_PROTECTED = RUBY_FL_WB_PROTECTED, /* THIS FLAG DEPENDS ON Ruby version */
/**
- * This flag is mysterious. It seems nobody is currently using it. The
- * intention of this flag is also unclear. We need further investigations.
+ * This flag no longer in use
*/
- RUBY_TYPED_PROMOTED1 = RUBY_FL_PROMOTED1 /* THIS FLAG DEPENDS ON Ruby version */
+ RUBY_TYPED_UNUSED = RUBY_FL_UNUSED6,
+
+ /**
+ * This flag determines whether marking and compaction should be carried out
+ * using the dmark/dcompact callback functions or whether we should mark
+ * declaratively using a list of references defined inside the data struct we're wrapping
+ */
+ RUBY_TYPED_DECL_MARKING = RUBY_FL_USER2
};
/**
@@ -347,16 +357,14 @@ struct RTypedData {
* data. This roughly resembles a Ruby level class (apart from method
* definition etc.)
*/
- const rb_data_type_t *type;
+ const rb_data_type_t *const type;
/**
* This has to be always 1.
*
* @internal
- *
- * Why, then, this is not a const ::VALUE?
*/
- VALUE typed_flag;
+ const VALUE typed_flag;
/** Pointer to the actual C level struct that you want to wrap. */
void *data;
@@ -456,7 +464,7 @@ RBIMPL_SYMBOL_EXPORT_END()
*/
#define TypedData_Make_Struct0(result, klass, type, size, data_type, sval) \
VALUE result = rb_data_typed_object_zalloc(klass, size, data_type); \
- (sval) = RBIMPL_CAST((type *)RTYPEDDATA_DATA(result)); \
+ (sval) = (type *)RTYPEDDATA_GET_DATA(result); \
RBIMPL_CAST(/*suppress unused variable warnings*/(void)(sval))
/**
@@ -507,6 +515,36 @@ RBIMPL_SYMBOL_EXPORT_END()
#define TypedData_Get_Struct(obj,type,data_type,sval) \
((sval) = RBIMPL_CAST((type *)rb_check_typeddata((obj), (data_type))))
+static inline bool
+RTYPEDDATA_EMBEDDED_P(VALUE obj)
+{
+#if RUBY_DEBUG
+ if (RB_UNLIKELY(!RB_TYPE_P(obj, RUBY_T_DATA))) {
+ Check_Type(obj, RUBY_T_DATA);
+ RBIMPL_UNREACHABLE_RETURN(false);
+ }
+#endif
+
+ return RTYPEDDATA(obj)->typed_flag & TYPED_DATA_EMBEDDED;
+}
+
+static inline void *
+RTYPEDDATA_GET_DATA(VALUE obj)
+{
+#if RUBY_DEBUG
+ if (RB_UNLIKELY(!RB_TYPE_P(obj, RUBY_T_DATA))) {
+ Check_Type(obj, RUBY_T_DATA);
+ RBIMPL_UNREACHABLE_RETURN(false);
+ }
+#endif
+
+ /* We reuse the data pointer in embedded TypedData. We can't use offsetof
+ * since RTypedData a non-POD type in C++. */
+ const size_t embedded_typed_data_size = sizeof(struct RTypedData) - sizeof(void *);
+
+ return RTYPEDDATA_EMBEDDED_P(obj) ? (char *)obj + embedded_typed_data_size : RTYPEDDATA(obj)->data;
+}
+
RBIMPL_ATTR_PURE()
RBIMPL_ATTR_ARTIFICIAL()
/**
@@ -523,7 +561,8 @@ RBIMPL_ATTR_ARTIFICIAL()
static inline bool
rbimpl_rtypeddata_p(VALUE obj)
{
- return RTYPEDDATA(obj)->typed_flag == 1;
+ VALUE typed_flag = RTYPEDDATA(obj)->typed_flag;
+ return typed_flag != 0 && typed_flag <= 3;
}
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
diff --git a/include/ruby/internal/dllexport.h b/include/ruby/internal/dllexport.h
index 08a262209d..71026e7100 100644
--- a/include/ruby/internal/dllexport.h
+++ b/include/ruby/internal/dllexport.h
@@ -37,9 +37,7 @@
* ```
*/
#undef RUBY_EXTERN
-#if defined(MJIT_HEADER) && defined(_WIN32)
-# define RUBY_EXTERN extern __declspec(dllimport)
-#elif defined(RUBY_EXPORT)
+#if defined(RUBY_EXPORT)
# define RUBY_EXTERN extern
#elif defined(_WIN32)
# define RUBY_EXTERN extern __declspec(dllimport)
@@ -59,36 +57,6 @@
# define RUBY_FUNC_EXPORTED /* void */
#endif
-/**
- * @cond INTERNAL_MACRO
- *
- * These MJIT related macros are placed here because translate_mjit_header can
- * need them. Extension libraries should not touch.
- */
-
-/* These macros are used for functions which are exported only for MJIT
- and NOT ensured to be exported in future versions. */
-
-#if ! defined(MJIT_HEADER)
-# define MJIT_FUNC_EXPORTED RUBY_FUNC_EXPORTED
-#elif ! RBIMPL_COMPILER_IS(MSVC)
-# define MJIT_FUNC_EXPORTED RUBY_FUNC_EXPORTED
-#else
-# define MJIT_FUNC_EXPORTED static
-#endif
-
-#define MJIT_SYMBOL_EXPORT_BEGIN RUBY_SYMBOL_EXPORT_BEGIN
-#define MJIT_SYMBOL_EXPORT_END RUBY_SYMBOL_EXPORT_END
-
-/* On mswin, MJIT header transformation can't be used since cl.exe can't output
- preprocessed output preserving macros. So this `MJIT_STATIC` is needed
- to force non-static function to static on MJIT header to avoid symbol conflict. */
-#ifdef MJIT_HEADER
-# define MJIT_STATIC static
-#else
-# define MJIT_STATIC
-#endif
-
/** @endcond */
/** Shortcut macro equivalent to `RUBY_SYMBOL_EXPORT_BEGIN extern "C" {`.
diff --git a/include/ruby/internal/encoding/coderange.h b/include/ruby/internal/encoding/coderange.h
new file mode 100644
index 0000000000..7a81208c9e
--- /dev/null
+++ b/include/ruby/internal/encoding/coderange.h
@@ -0,0 +1,202 @@
+#ifndef RUBY_INTERNAL_ENCODING_CODERANGE_H /*-*-C++-*-vi:se ft=cpp:*/
+#define RUBY_INTERNAL_ENCODING_CODERANGE_H
+/**
+ * @file
+ * @author Ruby developers <ruby-core@ruby-lang.org>
+ * @copyright This file is a part of the programming language Ruby.
+ * Permission is hereby granted, to either redistribute and/or
+ * modify this file, provided that the conditions mentioned in the
+ * file COPYING are met. Consult the file for details.
+ * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
+ * implementation details. Don't take them as canon. They could
+ * rapidly appear then vanish. The name (path) of this header file
+ * is also an implementation detail. Do not expect it to persist
+ * at the place it is now. Developers are free to move it anywhere
+ * anytime at will.
+ * @note To ruby-core: remember that this header can be possibly
+ * recursively included from extension libraries written in C++.
+ * Do not expect for instance `__VA_ARGS__` is always available.
+ * We assume C99 for ruby itself but we don't assume languages of
+ * extension libraries. They could be written in C++98.
+ * @brief Routines for code ranges.
+ */
+
+#include "ruby/internal/attr/const.h"
+#include "ruby/internal/attr/pure.h"
+#include "ruby/internal/dllexport.h"
+#include "ruby/internal/fl_type.h"
+#include "ruby/internal/value.h"
+
+RBIMPL_SYMBOL_EXPORT_BEGIN()
+
+/** What rb_enc_str_coderange() returns. */
+enum ruby_coderange_type {
+
+ /** The object's coderange is unclear yet. */
+ RUBY_ENC_CODERANGE_UNKNOWN = 0,
+
+ /** The object holds 0 to 127 inclusive and nothing else. */
+ RUBY_ENC_CODERANGE_7BIT = ((int)RUBY_FL_USER8),
+
+ /** The object's encoding and contents are consistent each other */
+ RUBY_ENC_CODERANGE_VALID = ((int)RUBY_FL_USER9),
+
+ /** The object holds invalid/malformed/broken character(s). */
+ RUBY_ENC_CODERANGE_BROKEN = ((int)(RUBY_FL_USER8|RUBY_FL_USER9)),
+
+ /** Where the coderange resides. */
+ RUBY_ENC_CODERANGE_MASK = (RUBY_ENC_CODERANGE_7BIT|
+ RUBY_ENC_CODERANGE_VALID|
+ RUBY_ENC_CODERANGE_BROKEN)
+};
+
+RBIMPL_ATTR_CONST()
+/**
+ * @private
+ *
+ * This is an implementation detail of #RB_ENC_CODERANGE_CLEAN_P. People don't
+ * use it directly.
+ *
+ * @param[in] cr An enum ::ruby_coderange_type.
+ * @retval 1 It is.
+ * @retval 0 It isn't.
+ */
+static inline int
+rb_enc_coderange_clean_p(int cr)
+{
+ return (cr ^ (cr >> 1)) & RUBY_ENC_CODERANGE_7BIT;
+}
+
+RBIMPL_ATTR_CONST()
+/**
+ * Queries if a code range is "clean". "Clean" in this context means it is
+ * known and valid.
+ *
+ * @param[in] cr An enum ::ruby_coderange_type.
+ * @retval 1 It is.
+ * @retval 0 It isn't.
+ */
+static inline bool
+RB_ENC_CODERANGE_CLEAN_P(enum ruby_coderange_type cr)
+{
+ return rb_enc_coderange_clean_p(cr);
+}
+
+RBIMPL_ATTR_PURE_UNLESS_DEBUG()
+/**
+ * Queries the (inline) code range of the passed object. The object must be
+ * capable of having inline encoding. Using this macro needs deep
+ * understanding of bit level object binary layout.
+ *
+ * @param[in] obj Target object.
+ * @return An enum ::ruby_coderange_type.
+ */
+static inline enum ruby_coderange_type
+RB_ENC_CODERANGE(VALUE obj)
+{
+ VALUE ret = RB_FL_TEST_RAW(obj, RUBY_ENC_CODERANGE_MASK);
+
+ return RBIMPL_CAST((enum ruby_coderange_type)ret);
+}
+
+RBIMPL_ATTR_PURE_UNLESS_DEBUG()
+/**
+ * Queries the (inline) code range of the passed object is
+ * ::RUBY_ENC_CODERANGE_7BIT. The object must be capable of having inline
+ * encoding. Using this macro needs deep understanding of bit level object
+ * binary layout.
+ *
+ * @param[in] obj Target object.
+ * @retval 1 It is ascii only.
+ * @retval 0 Otherwise (including cases when the range is not known).
+ */
+static inline bool
+RB_ENC_CODERANGE_ASCIIONLY(VALUE obj)
+{
+ return RB_ENC_CODERANGE(obj) == RUBY_ENC_CODERANGE_7BIT;
+}
+
+/**
+ * Destructively modifies the passed object so that its (inline) code range is
+ * the passed one. The object must be capable of having inline encoding.
+ * Using this macro needs deep understanding of bit level object binary layout.
+ *
+ * @param[out] obj Target object.
+ * @param[out] cr An enum ::ruby_coderange_type.
+ * @post `obj`'s code range is `cr`.
+ */
+static inline void
+RB_ENC_CODERANGE_SET(VALUE obj, enum ruby_coderange_type cr)
+{
+ RB_FL_UNSET_RAW(obj, RUBY_ENC_CODERANGE_MASK);
+ RB_FL_SET_RAW(obj, cr);
+}
+
+/**
+ * Destructively clears the passed object's (inline) code range. The object
+ * must be capable of having inline encoding. Using this macro needs deep
+ * understanding of bit level object binary layout.
+ *
+ * @param[out] obj Target object.
+ * @post `obj`'s code range is ::RUBY_ENC_CODERANGE_UNKNOWN.
+ */
+static inline void
+RB_ENC_CODERANGE_CLEAR(VALUE obj)
+{
+ RB_FL_UNSET_RAW(obj, RUBY_ENC_CODERANGE_MASK);
+}
+
+RBIMPL_ATTR_CONST()
+/* assumed ASCII compatibility */
+/**
+ * "Mix" two code ranges into one. This is handy for instance when you
+ * concatenate two strings into one. Consider one of then is valid but the
+ * other isn't. The result must be invalid. This macro computes that kind of
+ * mixture.
+ *
+ * @param[in] a An enum ::ruby_coderange_type.
+ * @param[in] b Another enum ::ruby_coderange_type.
+ * @return The `a` "and" `b`.
+ */
+static inline enum ruby_coderange_type
+RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
+{
+ if (a == RUBY_ENC_CODERANGE_7BIT) {
+ return b;
+ }
+ else if (a != RUBY_ENC_CODERANGE_VALID) {
+ return RUBY_ENC_CODERANGE_UNKNOWN;
+ }
+ else if (b == RUBY_ENC_CODERANGE_7BIT) {
+ return RUBY_ENC_CODERANGE_VALID;
+ }
+ else {
+ return b;
+ }
+}
+
+#define ENC_CODERANGE_MASK RUBY_ENC_CODERANGE_MASK /**< @old{RUBY_ENC_CODERANGE_MASK} */
+#define ENC_CODERANGE_UNKNOWN RUBY_ENC_CODERANGE_UNKNOWN /**< @old{RUBY_ENC_CODERANGE_UNKNOWN} */
+#define ENC_CODERANGE_7BIT RUBY_ENC_CODERANGE_7BIT /**< @old{RUBY_ENC_CODERANGE_7BIT} */
+#define ENC_CODERANGE_VALID RUBY_ENC_CODERANGE_VALID /**< @old{RUBY_ENC_CODERANGE_VALID} */
+#define ENC_CODERANGE_BROKEN RUBY_ENC_CODERANGE_BROKEN /**< @old{RUBY_ENC_CODERANGE_BROKEN} */
+#define ENC_CODERANGE_CLEAN_P(cr) RB_ENC_CODERANGE_CLEAN_P(cr) /**< @old{RB_ENC_CODERANGE_CLEAN_P} */
+#define ENC_CODERANGE(obj) RB_ENC_CODERANGE(obj) /**< @old{RB_ENC_CODERANGE} */
+#define ENC_CODERANGE_ASCIIONLY(obj) RB_ENC_CODERANGE_ASCIIONLY(obj) /**< @old{RB_ENC_CODERANGE_ASCIIONLY} */
+#define ENC_CODERANGE_SET(obj,cr) RB_ENC_CODERANGE_SET(obj,cr) /**< @old{RB_ENC_CODERANGE_SET} */
+#define ENC_CODERANGE_CLEAR(obj) RB_ENC_CODERANGE_CLEAR(obj) /**< @old{RB_ENC_CODERANGE_CLEAR} */
+#define ENC_CODERANGE_AND(a, b) RB_ENC_CODERANGE_AND(a, b) /**< @old{RB_ENC_CODERANGE_AND} */
+#define ENCODING_CODERANGE_SET(obj, encindex, cr) RB_ENCODING_CODERANGE_SET(obj, encindex, cr) /**< @old{RB_ENCODING_CODERANGE_SET} */
+
+/** @cond INTERNAL_MACRO */
+#define RB_ENC_CODERANGE RB_ENC_CODERANGE
+#define RB_ENC_CODERANGE_AND RB_ENC_CODERANGE_AND
+#define RB_ENC_CODERANGE_ASCIIONLY RB_ENC_CODERANGE_ASCIIONLY
+#define RB_ENC_CODERANGE_CLEAN_P RB_ENC_CODERANGE_CLEAN_P
+#define RB_ENC_CODERANGE_CLEAR RB_ENC_CODERANGE_CLEAR
+#define RB_ENC_CODERANGE_SET RB_ENC_CODERANGE_SET
+/** @endcond */
+
+RBIMPL_SYMBOL_EXPORT_END()
+
+#endif /* RUBY_INTERNAL_ENCODING_CODERANGE_H */
diff --git a/include/ruby/internal/encoding/ctype.h b/include/ruby/internal/encoding/ctype.h
new file mode 100644
index 0000000000..05c314aeb3
--- /dev/null
+++ b/include/ruby/internal/encoding/ctype.h
@@ -0,0 +1,258 @@
+#ifndef RUBY_INTERNAL_ENCODING_CTYPE_H /*-*-C++-*-vi:se ft=cpp:*/
+#define RUBY_INTERNAL_ENCODING_CTYPE_H
+/**
+ * @file
+ * @author Ruby developers <ruby-core@ruby-lang.org>
+ * @copyright This file is a part of the programming language Ruby.
+ * Permission is hereby granted, to either redistribute and/or
+ * modify this file, provided that the conditions mentioned in the
+ * file COPYING are met. Consult the file for details.
+ * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
+ * implementation details. Don't take them as canon. They could
+ * rapidly appear then vanish. The name (path) of this header file
+ * is also an implementation detail. Do not expect it to persist
+ * at the place it is now. Developers are free to move it anywhere
+ * anytime at will.
+ * @note To ruby-core: remember that this header can be possibly
+ * recursively included from extension libraries written in C++.
+ * Do not expect for instance `__VA_ARGS__` is always available.
+ * We assume C99 for ruby itself but we don't assume languages of
+ * extension libraries. They could be written in C++98.
+ * @brief Routines to query chacater types.
+ */
+
+#include "ruby/onigmo.h"
+#include "ruby/internal/attr/const.h"
+#include "ruby/internal/dllexport.h"
+#include "ruby/internal/encoding/encoding.h"
+#include "ruby/internal/value.h"
+
+RBIMPL_SYMBOL_EXPORT_BEGIN()
+
+/**
+ * Queries if the passed pointer points to a newline character. What is a
+ * newline and what is not depends on the passed encoding.
+ *
+ * @param[in] p Pointer to a possibly-middle of a character.
+ * @param[in] end End of the string.
+ * @param[in] enc Encoding.
+ * @retval false It isn't.
+ * @retval true It is.
+ */
+static inline bool
+rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
+{
+ OnigUChar *up = RBIMPL_CAST((OnigUChar *)p);
+ OnigUChar *ue = RBIMPL_CAST((OnigUChar *)e);
+
+ return ONIGENC_IS_MBC_NEWLINE(enc, up, ue);
+}
+
+/**
+ * Queries if the passed code point is of passed character type in the passed
+ * encoding. The "character type" here is a set of macros defined in onigmo.h,
+ * like `ONIGENC_CTYPE_PUNCT`.
+ *
+ * @param[in] c An `OnigCodePoint` value.
+ * @param[in] t An `OnigCtype` value.
+ * @param[in] enc A `rb_encoding*` value.
+ * @retval true `c` is of `t` in `enc`.
+ * @retval false Otherwise.
+ */
+static inline bool
+rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
+{
+ return ONIGENC_IS_CODE_CTYPE(enc, c, t);
+}
+
+/**
+ * Identical to rb_isascii(), except it additionally takes an encoding.
+ *
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval false `c` is out of range of ASCII character set in `enc`.
+ * @retval true Otherwise.
+ *
+ * @internal
+ *
+ * `enc` is ignored. This is at least an intentional implementation detail
+ * (not a bug). But there could be rooms for future extensions.
+ */
+static inline bool
+rb_enc_isascii(OnigCodePoint c, rb_encoding *enc)
+{
+ return ONIGENC_IS_CODE_ASCII(c);
+}
+
+/**
+ * Identical to rb_isalpha(), except it additionally takes an encoding.
+ *
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval true `enc` classifies `c` as "ALPHA".
+ * @retval false Otherwise.
+ */
+static inline bool
+rb_enc_isalpha(OnigCodePoint c, rb_encoding *enc)
+{
+ return ONIGENC_IS_CODE_ALPHA(enc, c);
+}
+
+/**
+ * Identical to rb_islower(), except it additionally takes an encoding.
+ *
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval true `enc` classifies `c` as "LOWER".
+ * @retval false Otherwise.
+ */
+static inline bool
+rb_enc_islower(OnigCodePoint c, rb_encoding *enc)
+{
+ return ONIGENC_IS_CODE_LOWER(enc, c);
+}
+
+/**
+ * Identical to rb_isupper(), except it additionally takes an encoding.
+ *
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval true `enc` classifies `c` as "UPPER".
+ * @retval false Otherwise.
+ */
+static inline bool
+rb_enc_isupper(OnigCodePoint c, rb_encoding *enc)
+{
+ return ONIGENC_IS_CODE_UPPER(enc, c);
+}
+
+/**
+ * Identical to rb_iscntrl(), except it additionally takes an encoding.
+ *
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval true `enc` classifies `c` as "CNTRL".
+ * @retval false Otherwise.
+ */
+static inline bool
+rb_enc_iscntrl(OnigCodePoint c, rb_encoding *enc)
+{
+ return ONIGENC_IS_CODE_CNTRL(enc, c);
+}
+
+/**
+ * Identical to rb_ispunct(), except it additionally takes an encoding.
+ *
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval true `enc` classifies `c` as "PUNCT".
+ * @retval false Otherwise.
+ */
+static inline bool
+rb_enc_ispunct(OnigCodePoint c, rb_encoding *enc)
+{
+ return ONIGENC_IS_CODE_PUNCT(enc, c);
+}
+
+/**
+ * Identical to rb_isalnum(), except it additionally takes an encoding.
+ *
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval true `enc` classifies `c` as "ANUM".
+ * @retval false Otherwise.
+ */
+static inline bool
+rb_enc_isalnum(OnigCodePoint c, rb_encoding *enc)
+{
+ return ONIGENC_IS_CODE_ALNUM(enc, c);
+}
+
+/**
+ * Identical to rb_isprint(), except it additionally takes an encoding.
+ *
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval true `enc` classifies `c` as "PRINT".
+ * @retval false Otherwise.
+ */
+static inline bool
+rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
+{
+ return ONIGENC_IS_CODE_PRINT(enc, c);
+}
+
+/**
+ * Identical to rb_isspace(), except it additionally takes an encoding.
+ *
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval true `enc` classifies `c` as "PRINT".
+ * @retval false Otherwise.
+ */
+static inline bool
+rb_enc_isspace(OnigCodePoint c, rb_encoding *enc)
+{
+ return ONIGENC_IS_CODE_SPACE(enc, c);
+}
+
+/**
+ * Identical to rb_isdigit(), except it additionally takes an encoding.
+ *
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval true `enc` classifies `c` as "DIGIT".
+ * @retval false Otherwise.
+ */
+static inline bool
+rb_enc_isdigit(OnigCodePoint c, rb_encoding *enc)
+{
+ return ONIGENC_IS_CODE_DIGIT(enc, c);
+}
+
+RBIMPL_ATTR_CONST()
+/**
+ * Identical to rb_toupper(), except it additionally takes an encoding.
+ *
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @return `c`'s (Ruby's definition of) upper case counterpart.
+ *
+ * @internal
+ *
+ * As `RBIMPL_ATTR_CONST` implies this function ignores `enc`.
+ */
+int rb_enc_toupper(int c, rb_encoding *enc);
+
+RBIMPL_ATTR_CONST()
+/**
+ * Identical to rb_tolower(), except it additionally takes an encoding.
+ *
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @return `c`'s (Ruby's definition of) lower case counterpart.
+ *
+ * @internal
+ *
+ * As `RBIMPL_ATTR_CONST` implies this function ignores `enc`.
+ */
+int rb_enc_tolower(int c, rb_encoding *enc);
+
+RBIMPL_SYMBOL_EXPORT_END()
+
+/** @cond INTERNAL_MACRO */
+#define rb_enc_is_newline rb_enc_is_newline
+#define rb_enc_isalnum rb_enc_isalnum
+#define rb_enc_isalpha rb_enc_isalpha
+#define rb_enc_isascii rb_enc_isascii
+#define rb_enc_isctype rb_enc_isctype
+#define rb_enc_isdigit rb_enc_isdigit
+#define rb_enc_islower rb_enc_islower
+#define rb_enc_isprint rb_enc_isprint
+#define rb_enc_iscntrl rb_enc_iscntrl
+#define rb_enc_ispunct rb_enc_ispunct
+#define rb_enc_isspace rb_enc_isspace
+#define rb_enc_isupper rb_enc_isupper
+/** @endcond */
+
+#endif /* RUBY_INTERNAL_ENCODING_CTYPE_H */
diff --git a/include/ruby/internal/encoding/encoding.h b/include/ruby/internal/encoding/encoding.h
new file mode 100644
index 0000000000..a680651a81
--- /dev/null
+++ b/include/ruby/internal/encoding/encoding.h
@@ -0,0 +1,1044 @@
+#ifndef RUBY_INTERNAL_ENCODING_ENCODING_H /*-*-C++-*-vi:se ft=cpp:*/
+#define RUBY_INTERNAL_ENCODING_ENCODING_H
+/**
+ * @file
+ * @author Ruby developers <ruby-core@ruby-lang.org>
+ * @copyright This file is a part of the programming language Ruby.
+ * Permission is hereby granted, to either redistribute and/or
+ * modify this file, provided that the conditions mentioned in the
+ * file COPYING are met. Consult the file for details.
+ * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
+ * implementation details. Don't take them as canon. They could
+ * rapidly appear then vanish. The name (path) of this header file
+ * is also an implementation detail. Do not expect it to persist
+ * at the place it is now. Developers are free to move it anywhere
+ * anytime at will.
+ * @note To ruby-core: remember that this header can be possibly
+ * recursively included from extension libraries written in C++.
+ * Do not expect for instance `__VA_ARGS__` is always available.
+ * We assume C99 for ruby itself but we don't assume languages of
+ * extension libraries. They could be written in C++98.
+ * @brief Defines ::rb_encoding
+ */
+
+#include "ruby/oniguruma.h"
+#include "ruby/internal/attr/const.h"
+#include "ruby/internal/attr/deprecated.h"
+#include "ruby/internal/attr/noalias.h"
+#include "ruby/internal/attr/pure.h"
+#include "ruby/internal/attr/returns_nonnull.h"
+#include "ruby/internal/dllexport.h"
+#include "ruby/internal/encoding/coderange.h"
+#include "ruby/internal/value.h"
+#include "ruby/internal/core/rbasic.h"
+#include "ruby/internal/fl_type.h"
+
+RBIMPL_SYMBOL_EXPORT_BEGIN()
+
+/**
+ * `Encoding` class.
+ *
+ * @ingroup object
+ */
+RUBY_EXTERN VALUE rb_cEncoding;
+
+/**
+ * @private
+ *
+ * Bit constants used when embedding encodings into ::RBasic::flags. Extension
+ * libraries must not bother such things.
+ */
+enum ruby_encoding_consts {
+
+ /** Max possible number of embeddable encodings. */
+ RUBY_ENCODING_INLINE_MAX = 127,
+
+ /** Where inline encodings reside. */
+ RUBY_ENCODING_SHIFT = (RUBY_FL_USHIFT+10),
+
+ /** Bits we use to store inline encodings. */
+ RUBY_ENCODING_MASK = (RUBY_ENCODING_INLINE_MAX<<RUBY_ENCODING_SHIFT
+ /* RUBY_FL_USER10..RUBY_FL_USER16 */),
+
+ /** Max possible length of an encoding name. */
+ RUBY_ENCODING_MAXNAMELEN = 42
+};
+
+#define ENCODING_INLINE_MAX RUBY_ENCODING_INLINE_MAX /**< @old{RUBY_ENCODING_INLINE_MAX} */
+#define ENCODING_SHIFT RUBY_ENCODING_SHIFT /**< @old{RUBY_ENCODING_SHIFT} */
+#define ENCODING_MASK RUBY_ENCODING_MASK /**< @old{RUBY_ENCODING_MASK} */
+
+/**
+ * Destructively assigns the passed encoding to the passed object. The object
+ * must be capable of having inline encoding. Using this macro needs deep
+ * understanding of bit level object binary layout.
+ *
+ * @param[out] obj Target object to modify.
+ * @param[in] ecindex Encoding in encindex format.
+ * @post `obj`'s encoding is `encindex`.
+ */
+static inline void
+RB_ENCODING_SET_INLINED(VALUE obj, int encindex)
+{
+ VALUE f = /* upcast */ encindex;
+
+ f <<= RUBY_ENCODING_SHIFT;
+ RB_FL_UNSET_RAW(obj, RUBY_ENCODING_MASK);
+ RB_FL_SET_RAW(obj, f);
+}
+
+/**
+ * Queries the encoding of the passed object. The encoding must be smaller
+ * than ::RUBY_ENCODING_INLINE_MAX, which means you have some assumption on the
+ * return value. This means the API is for internal use only.
+ *
+ * @param[in] obj Target object.
+ * @return `obj`'s encoding index.
+ */
+static inline int
+RB_ENCODING_GET_INLINED(VALUE obj)
+{
+ VALUE ret = RB_FL_TEST_RAW(obj, RUBY_ENCODING_MASK) >> RUBY_ENCODING_SHIFT;
+
+ return RBIMPL_CAST((int)ret);
+}
+
+#define ENCODING_SET_INLINED(obj,i) RB_ENCODING_SET_INLINED(obj,i) /**< @old{RB_ENCODING_SET_INLINED} */
+#define ENCODING_SET(obj,i) RB_ENCODING_SET(obj,i) /**< @old{RB_ENCODING_SET} */
+#define ENCODING_GET_INLINED(obj) RB_ENCODING_GET_INLINED(obj) /**< @old{RB_ENCODING_GET_INLINED} */
+#define ENCODING_GET(obj) RB_ENCODING_GET(obj) /**< @old{RB_ENCODING_GET} */
+#define ENCODING_IS_ASCII8BIT(obj) RB_ENCODING_IS_ASCII8BIT(obj) /**< @old{RB_ENCODING_IS_ASCII8BIT} */
+#define ENCODING_MAXNAMELEN RUBY_ENCODING_MAXNAMELEN /**< @old{RUBY_ENCODING_MAXNAMELEN} */
+
+/**
+ * The type of encoding. Our design here is we take Oniguruma/Onigmo's
+ * multilingualisation schema as our base data structure.
+ */
+typedef const OnigEncodingType rb_encoding;
+
+RBIMPL_ATTR_NOALIAS()
+/**
+ * Converts a character option to its encoding. It only supports a very
+ * limited set of Japanese encodings due to its Japanese origin. Ruby still
+ * has this in-core for backwards compatibility. But new codes must not bother
+ * such concept like one-character encoding option. Consider deprecated in
+ * practice.
+ *
+ * @param[in] c One of `['n', 'e', 's', 'u', 'i', 'x', 'm']`.
+ * @param[out] option Return buffer.
+ * @param[out] kcode Return buffer.
+ * @retval 1 `c` understood properly.
+ * @retval 0 `c` is not understood.
+ * @post `option` is a ::OnigOptionType.
+ * @post `kcode` is an enum `ruby_preserved_encindex`.
+ *
+ * @internal
+ *
+ * `kcode` is opaque because `ruby_preserved_encindex` is not visible from
+ * extension libraries. But who cares?
+ */
+int rb_char_to_option_kcode(int c, int *option, int *kcode);
+
+/**
+ * Creates a new "dummy" encoding. Roughly speaking, an encoding is dummy when
+ * it is stateful. Notable example of dummy encoding are those defined in
+ * ISO/IEC 2022
+ *
+ * @param[in] name Name of the creating encoding.
+ * @exception rb_eArgError Duplicated or malformed `name`.
+ * @return New dummy encoding's index.
+ * @post Encoding named `name` is created, whose index is the return
+ * value.
+ */
+int rb_define_dummy_encoding(const char *name);
+
+RBIMPL_ATTR_PURE()
+/**
+ * Queries if the passed encoding is dummy.
+ *
+ * @param[in] enc Encoding in question.
+ * @retval 1 It is.
+ * @retval 0 It isn't.
+ */
+int rb_enc_dummy_p(rb_encoding *enc);
+
+RBIMPL_ATTR_PURE()
+/**
+ * Queries the index of the encoding. An encoding's index is a Ruby-local
+ * concept. It is a (sequential) number assigned to each encoding.
+ *
+ * @param[in] enc Encoding in question.
+ * @return Its index.
+ * @note You can pass null pointers to this function. It is equivalent
+ * to rb_usascii_encindex() then.
+ */
+int rb_enc_to_index(rb_encoding *enc);
+
+/**
+ * Queries the index of the encoding of the passed object, if any.
+ *
+ * @param[in] obj Object in question.
+ * @retval -1 `obj` is incapable of having an encoding.
+ * @retval otherwise `obj`'s encoding's index.
+ */
+int rb_enc_get_index(VALUE obj);
+
+/**
+ * @alias{rb_enc_get_index}
+ *
+ * @internal
+ *
+ * Implementation wise this is not a verbatim alias of rb_enc_get_index(). But
+ * the API is consistent. Don't bother.
+ */
+static inline int
+RB_ENCODING_GET(VALUE obj)
+{
+ int encindex = RB_ENCODING_GET_INLINED(obj);
+
+ if (encindex == RUBY_ENCODING_INLINE_MAX) {
+ return rb_enc_get_index(obj);
+ }
+ else {
+ return encindex;
+ }
+}
+
+/**
+ * Destructively assigns an encoding (via its index) to an object.
+ *
+ * @param[out] obj Object in question.
+ * @param[in] encindex An encoding index.
+ * @exception rb_eFrozenError `obj` is frozen.
+ * @exception rb_eArgError `obj` is incapable of having an encoding.
+ * @exception rb_eEncodingError `encindex` is out of bounds.
+ * @exception rb_eLoadError Failed to load the encoding.
+ */
+void rb_enc_set_index(VALUE obj, int encindex);
+
+/** @alias{rb_enc_set_index} */
+static inline void
+RB_ENCODING_SET(VALUE obj, int encindex)
+{
+ rb_enc_set_index(obj, encindex);
+}
+
+/**
+ * This is #RB_ENCODING_SET + RB_ENC_CODERANGE_SET combo. The object must be
+ * capable of having inline encoding. Using this macro needs deep
+ * understanding of bit level object binary layout.
+ *
+ * @param[out] obj Target object.
+ * @param[in] encindex Encoding in encindex format.
+ * @param[in] cr An enum ::ruby_coderange_type.
+ * @post `obj`'s encoding is `encindex`.
+ * @post `obj`'s code range is `cr`.
+ */
+static inline void
+RB_ENCODING_CODERANGE_SET(VALUE obj, int encindex, enum ruby_coderange_type cr)
+{
+ RB_ENCODING_SET(obj, encindex);
+ RB_ENC_CODERANGE_SET(obj, cr);
+}
+
+RBIMPL_ATTR_PURE()
+/**
+ * Queries if the passed object can have its encoding.
+ *
+ * @param[in] obj Object in question.
+ * @retval 1 It can.
+ * @retval 0 It cannot.
+ */
+int rb_enc_capable(VALUE obj);
+
+/**
+ * Queries the index of the encoding.
+ *
+ * @param[in] name Name of the encoding to find.
+ * @exception rb_eArgError No such encoding named `name`.
+ * @retval -1 `name` exists, but unable to load.
+ * @retval otherwise Index of encoding named `name`.
+ */
+int rb_enc_find_index(const char *name);
+
+/**
+ * Registers an "alias" name. In the wild, an encoding can be called using
+ * multiple names. For instance an encoding known as `"CP932"` is also called
+ * `"SJIS"` on occasions. This API registers such relationships.
+ *
+ * @param[in] alias New name.
+ * @param[in] orig Old name.
+ * @exception rb_eArgError `alias` is duplicated or malformed.
+ * @retval -1 Failed to load `orig`.
+ * @retval otherwise The index of `orig` and `alias`.
+ * @post `alias` is a synonym of `orig`. They refer to the identical
+ * encoding.
+ */
+int rb_enc_alias(const char *alias, const char *orig);
+
+/**
+ * Obtains a encoding index from a wider range of objects (than
+ * rb_enc_find_index()).
+ *
+ * @param[in] obj An ::rb_cEncoding, or its name in ::rb_cString.
+ * @retval -1 `obj` is unexpected type/contents.
+ * @retval otherwise Index corresponding to `obj`.
+ */
+int rb_to_encoding_index(VALUE obj);
+
+/**
+ * Identical to rb_find_encoding(), except it raises an exception instead of
+ * returning NULL.
+ *
+ * @param[in] obj An ::rb_cEncoding, or its name in ::rb_cString.
+ * @exception rb_eTypeError `obj` is neither ::rb_cEncoding nor ::rb_cString.
+ * @exception rb_eArgError `obj` is an unknown encoding name.
+ * @return Encoding of `obj`.
+ */
+rb_encoding *rb_to_encoding(VALUE obj);
+
+/**
+ * Identical to rb_to_encoding_index(), except the return type.
+ *
+ * @param[in] obj An ::rb_cEncoding, or its name in ::rb_cString.
+ * @exception rb_eTypeError `obj` is neither ::rb_cEncoding nor ::rb_cString.
+ * @retval NULL No such encoding.
+ * @return otherwise Encoding of `obj`.
+ */
+rb_encoding *rb_find_encoding(VALUE obj);
+
+/**
+ * Identical to rb_enc_get_index(), except the return type.
+ *
+ * @param[in] obj Object in question.
+ * @retval NULL Obj is incapable of having an encoding.
+ * @retval otherwise `obj`'s encoding.
+ */
+rb_encoding *rb_enc_get(VALUE obj);
+
+/**
+ * Look for the "common" encoding between the two. One character can or cannot
+ * be expressed depending on an encoding. This function finds the super-set of
+ * encodings that satisfy contents of both arguments. If that is impossible
+ * returns NULL.
+ *
+ * @param[in] str1 An object.
+ * @param[in] str2 Another object.
+ * @retval NULL No encoding can satisfy both at once.
+ * @retval otherwise Common encoding between the two.
+ * @note Arguments can be non-string, e.g. Regexp.
+ */
+rb_encoding *rb_enc_compatible(VALUE str1, VALUE str2);
+
+/**
+ * Identical to rb_enc_compatible(), except it raises an exception instead of
+ * returning NULL.
+ *
+ * @param[in] str1 An object.
+ * @param[in] str2 Another object.
+ * @exception rb_eEncCompatError No encoding can satisfy both.
+ * @return Common encoding between the two.
+ * @note Arguments can be non-string, e.g. Regexp.
+ */
+rb_encoding *rb_enc_check(VALUE str1,VALUE str2);
+
+/**
+ * Identical to rb_enc_set_index(), except it additionally does contents fix-up
+ * depending on the passed object. It for instance changes the byte length of
+ * terminating `U+0000` according to the passed encoding.
+ *
+ * @param[out] obj Object in question.
+ * @param[in] encindex An encoding index.
+ * @exception rb_eFrozenError `obj` is frozen.
+ * @exception rb_eArgError `obj` is incapable of having an encoding.
+ * @exception rb_eEncodingError `encindex` is out of bounds.
+ * @exception rb_eLoadError Failed to load the encoding.
+ * @return The passed `obj`.
+ * @post `obj`'s contents might be fixed according to `encindex`.
+ */
+VALUE rb_enc_associate_index(VALUE obj, int encindex);
+
+/**
+ * Identical to rb_enc_associate_index(), except it takes an encoding itself
+ * instead of its index.
+ *
+ * @param[out] obj Object in question.
+ * @param[in] enc An encoding.
+ * @exception rb_eFrozenError `obj` is frozen.
+ * @exception rb_eArgError `obj` is incapable of having an encoding.
+ * @return The passed `obj`.
+ * @post `obj`'s contents might be fixed according to `enc`.
+ */
+VALUE rb_enc_associate(VALUE obj, rb_encoding *enc);
+
+/**
+ * Destructively copies the encoding of the latter object to that of former
+ * one. It can also be seen as a routine identical to
+ * rb_enc_associate_index(), except it takes an object's encoding instead of an
+ * encoding's index.
+ *
+ * @param[out] dst Object to modify.
+ * @param[in] src Object to reference.
+ * @exception rb_eFrozenError `dst` is frozen.
+ * @exception rb_eArgError `dst` is incapable of having an encoding.
+ * @exception rb_eEncodingError `src` is incapable of having an encoding.
+ * @post `dst`'s encoding is that of `src`'s.
+ */
+void rb_enc_copy(VALUE dst, VALUE src);
+
+
+/**
+ * Identical to rb_find_encoding(), except it takes an encoding index instead
+ * of a Ruby object.
+ *
+ * @param[in] idx An encoding index.
+ * @retval NULL No such encoding.
+ * @retval otherwise An encoding whose index is `idx`.
+ */
+rb_encoding *rb_enc_from_index(int idx);
+
+/**
+ * Identical to rb_find_encoding(), except it takes a C's string instead of
+ * Ruby's.
+ *
+ * @param[in] name Name of the encoding to query.
+ * @retval NULL No such encoding.
+ * @retval otherwise An encoding whose index is `idx`.
+ */
+rb_encoding *rb_enc_find(const char *name);
+
+/**
+ * Queries the (canonical) name of the passed encoding.
+ *
+ * @param[in] enc An encoding.
+ * @return Its name.
+ */
+static inline const char *
+rb_enc_name(rb_encoding *enc)
+{
+ return enc->name;
+}
+
+/**
+ * Queries the minimum number of bytes that the passed encoding needs to
+ * represent a character. For ASCII and compatible encodings this is typically
+ * 1. There are however encodings whose minimum is not 1; they are
+ * historically called wide characters.
+ *
+ * @param[in] enc An encoding.
+ * @return Its least possible number of bytes except 0.
+ */
+static inline int
+rb_enc_mbminlen(rb_encoding *enc)
+{
+ return enc->min_enc_len;
+}
+
+/**
+ * Queries the maximum number of bytes that the passed encoding needs to
+ * represent a character. Fixed-width encodings have the same value for this
+ * one and #rb_enc_mbminlen. However there are variable-width encodings.
+ * UTF-8, for instance, takes from 1 up to 6 bytes.
+ *
+ * @param[in] enc An encoding.
+ * @return Its maximum possible number of bytes of a character.
+ */
+static inline int
+rb_enc_mbmaxlen(rb_encoding *enc)
+{
+ return enc->max_enc_len;
+}
+
+/**
+ * Queries the number of bytes of the character at the passed pointer.
+ *
+ * @param[in] p Pointer to a character's first byte.
+ * @param[in] e End of the string that has `p`.
+ * @param[in] enc Encoding of the string.
+ * @return If the character at `p` does not end until `e`, number of bytes
+ * between `p` and `e`. Otherwise the number of bytes that the
+ * character at `p` is encoded.
+ *
+ * @internal
+ *
+ * Strictly speaking there are chances when `p` points to a middle byte of a
+ * wide character. This function returns "the number of bytes from `p` to
+ * nearest of either `e` or the next character boundary", if you go strict.
+ */
+int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc);
+
+/**
+ * Identical to rb_enc_mbclen() unless the character at `p` overruns `e`. That
+ * can happen for instance when you read from a socket and its partial read
+ * cuts a wide character in-between. In those situations this function
+ * "estimates" theoretical length of the character in question. Typically it
+ * tends to be possible to know how many bytes a character needs before
+ * actually reaching its end; for instance UTF-8 encodes a character's length
+ * in the first byte of it. This function returns that info.
+ *
+ * @note This implies that the string is not broken.
+ *
+ * @param[in] p Pointer to the character's first byte.
+ * @param[in] e End of the string that has `p`.
+ * @param[in] enc Encoding of the string.
+ * @return Number of bytes of character at `p`, measured or estimated.
+ */
+int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc);
+
+/**
+ * Queries the number of bytes of the character at the passed pointer. This
+ * function returns 3 different types of information:
+ *
+ * ```CXX
+ * auto n = rb_enc_precise_mbclen(p, q, r);
+ *
+ * if (ONIGENC_MBCLEN_CHARFOUND_P(n)) {
+ * // Character found. Normal return.
+ * auto found_length = ONIGENC_MBCLEN_CHARFOUND_LEN(n);
+ * }
+ * else if (ONIGENC_MBCLEN_NEEDMORE_P(n)) {
+ * // Character overruns past `q`; needs more.
+ * auto requested_length = ONIGENC_MBCLEN_NEEDMORE_LEN(n);
+ * }
+ * else {
+ * // `p` is broken.
+ * assert(ONIGENC_MBCLEN_INVALID_P(n));
+ * }
+ * ```
+ *
+ * @param[in] p Pointer to the character's first byte.
+ * @param[in] e End of the string that has `p`.
+ * @param[in] enc Encoding of the string.
+ * @return Encoded read/needed number of bytes (see above).
+ */
+int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc);
+
+#define MBCLEN_CHARFOUND_P(ret) ONIGENC_MBCLEN_CHARFOUND_P(ret) /**< @old{ONIGENC_MBCLEN_CHARFOUND_P} */
+#define MBCLEN_CHARFOUND_LEN(ret) ONIGENC_MBCLEN_CHARFOUND_LEN(ret) /**< @old{ONIGENC_MBCLEN_CHARFOUND_LEN} */
+#define MBCLEN_INVALID_P(ret) ONIGENC_MBCLEN_INVALID_P(ret) /**< @old{ONIGENC_MBCLEN_INVALID_P} */
+#define MBCLEN_NEEDMORE_P(ret) ONIGENC_MBCLEN_NEEDMORE_P(ret) /**< @old{ONIGENC_MBCLEN_NEEDMORE_P} */
+#define MBCLEN_NEEDMORE_LEN(ret) ONIGENC_MBCLEN_NEEDMORE_LEN(ret) /**< @old{ONIGENC_MBCLEN_NEEDMORE_LEN} */
+
+/**
+ * Queries the code point of character pointed by the passed pointer. If that
+ * code point is included in ASCII that code point is returned. Otherwise -1.
+ * This can be different from just looking at the first byte. For instance it
+ * reads 2 bytes in case of UTF-16BE.
+ *
+ * @param[in] p Pointer to the character's first byte.
+ * @param[in] e End of the string that has `p`.
+ * @param[in] len Return buffer.
+ * @param[in] enc Encoding of the string.
+ * @retval -1 The character at `p` is not i ASCII.
+ * @retval otherwise A code point of the character at `p`.
+ * @post `len` (if set) is the number of bytes of `p`.
+ */
+int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc);
+
+/**
+ * Queries the code point of character pointed by the passed pointer.
+ * Exceptions happen in case of broken input.
+ *
+ * @param[in] p Pointer to the character's first byte.
+ * @param[in] e End of the string that has `p`.
+ * @param[in] len Return buffer.
+ * @param[in] enc Encoding of the string.
+ * @exception rb_eArgError `p` is broken.
+ * @return Code point of the character pointed by `p`.
+ * @post `len` (if set) is the number of bytes of `p`.
+ */
+unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc);
+
+/**
+ * Queries the code point of character pointed by the passed pointer.
+ * Exceptions happen in case of broken input.
+ *
+ * @deprecated Use rb_enc_codepoint_len() instead.
+ * @param[in] p Pointer to the character's first byte.
+ * @param[in] e End of the string that has `p`.
+ * @param[in] enc Encoding of the string.
+ * @exception rb_eArgError `p` is broken.
+ * @return Code point of the character pointed by `p`.
+ *
+ * @internal
+ *
+ * @matz says in commit 91e5ba1cb865a2385d3e1cbfacd824496898e098 that the line
+ * below is a "prototype for obsolete function". However even today there
+ * still are some use cases of it throughout our repository. It seems it has
+ * its own niche.
+ */
+static inline unsigned int
+rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
+{
+ return rb_enc_codepoint_len(p, e, 0, enc);
+ /* ^^^
+ * This can be `NULL` in C, `nullptr` in C++, and `0` for both.
+ * We choose the most portable one here.
+ */
+}
+
+
+/**
+ * Identical to rb_enc_codepoint(), except it assumes the passed character is
+ * not broken.
+ *
+ * @param[in] p Pointer to the character's first byte.
+ * @param[in] e End of the string that has `p`.
+ * @param[in] enc Encoding of the string.
+ * @return Code point of the character pointed by `p`.
+ */
+static inline OnigCodePoint
+rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc)
+{
+ const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
+ const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
+
+ return ONIGENC_MBC_TO_CODE(enc, up, ue);
+}
+
+/**
+ * Queries the number of bytes requested to represent the passed code point
+ * using the passed encoding.
+ *
+ * @param[in] code Code point in question.
+ * @param[in] enc Encoding to convert the code into a byte sequence.
+ * @exception rb_eArgError `enc` does not glean `code`.
+ * @return Number of bytes requested to represent `code` using `enc`.
+ */
+int rb_enc_codelen(int code, rb_encoding *enc);
+
+/**
+ * Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
+ *
+ * @param[in] c Code point in question.
+ * @param[in] enc Encoding to convert `c` into a byte sequence.
+ * @retval 0 `c` is invalid.
+ * @return otherwise Number of bytes needed for `enc` to encode `c`.
+ */
+static inline int
+rb_enc_code_to_mbclen(int c, rb_encoding *enc)
+{
+ OnigCodePoint uc = RBIMPL_CAST((OnigCodePoint)c);
+
+ return ONIGENC_CODE_TO_MBCLEN(enc, uc);
+}
+
+/**
+ * Identical to rb_enc_uint_chr(), except it writes back to the passed buffer
+ * instead of allocating one.
+ *
+ * @param[in] c Code point.
+ * @param[out] buf Return buffer.
+ * @param[in] enc Target encoding scheme.
+ * @retval <= 0 `c` is invalid in `enc`.
+ * @return otherwise Number of bytes written to `buf`.
+ * @post `c` is encoded according to `enc`, then written to `buf`.
+ *
+ * @internal
+ *
+ * The second argument must be typed. But its current usages prevent us from
+ * being any stricter than this. :FIXME:
+ */
+static inline int
+rb_enc_mbcput(unsigned int c, void *buf, rb_encoding *enc)
+{
+ OnigCodePoint uc = RBIMPL_CAST((OnigCodePoint)c);
+ OnigUChar *ubuf = RBIMPL_CAST((OnigUChar *)buf);
+
+ return ONIGENC_CODE_TO_MBC(enc, uc, ubuf);
+}
+
+/**
+ * Queries the previous (left) character.
+ *
+ * @param[in] s Start of the string.
+ * @param[in] p Pointer to a character.
+ * @param[in] e End of the string.
+ * @param[in] enc Encoding.
+ * @retval NULL No previous character.
+ * @retval otherwise Pointer to the head of the previous character.
+ */
+static inline char *
+rb_enc_prev_char(const char *s, const char *p, const char *e, rb_encoding *enc)
+{
+ const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s);
+ const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
+ const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
+ OnigUChar *ur = onigenc_get_prev_char_head(enc, us, up, ue);
+
+ return RBIMPL_CAST((char *)ur);
+}
+
+/**
+ * Queries the left boundary of a character. This function takes a pointer
+ * that is not necessarily a head of a character, and searches for its head.
+ *
+ * @param[in] s Start of the string.
+ * @param[in] p Pointer to a possibly-middle of a character.
+ * @param[in] e End of the string.
+ * @param[in] enc Encoding.
+ * @return Pointer to the head of the character that contains `p`.
+ */
+static inline char *
+rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
+{
+ const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s);
+ const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
+ const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
+ OnigUChar *ur = onigenc_get_left_adjust_char_head(enc, us, up, ue);
+
+ return RBIMPL_CAST((char *)ur);
+}
+
+/**
+ * Queries the right boundary of a character. This function takes a pointer
+ * that is not necessarily a head of a character, and searches for its tail.
+ *
+ * @param[in] s Start of the string.
+ * @param[in] p Pointer to a possibly-middle of a character.
+ * @param[in] e End of the string.
+ * @param[in] enc Encoding.
+ * @return Pointer to the end of the character that contains `p`.
+ */
+static inline char *
+rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
+{
+ const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s);
+ const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
+ const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
+ OnigUChar *ur = onigenc_get_right_adjust_char_head(enc, us, up, ue);
+
+ return RBIMPL_CAST((char *)ur);
+}
+
+/**
+ * Scans the string backwards for n characters.
+ *
+ * @param[in] s Start of the string.
+ * @param[in] p Pointer to a character.
+ * @param[in] e End of the string.
+ * @param[in] n Steps.
+ * @param[in] enc Encoding.
+ * @retval NULL There are no `n` characters left.
+ * @retval otherwise Pointer to `n` character before `p`.
+ */
+static inline char *
+rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
+{
+ const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s);
+ const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
+ const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
+ const OnigUChar *ur = onigenc_step_back(enc, us, up, ue, n);
+
+ return RBIMPL_CAST((char *)ur);
+}
+
+/**
+ * @private
+ *
+ * This is an implementation detail of rb_enc_asciicompat(). People don't use
+ * it directly. Just always use rb_enc_asciicompat().
+ *
+ * @param[in] enc Encoding in question.
+ * @retval 1 It is ASCII compatible.
+ * @retval 0 It isn't.
+ */
+static inline int
+rb_enc_asciicompat_inline(rb_encoding *enc)
+{
+ return rb_enc_mbminlen(enc)==1 && !rb_enc_dummy_p(enc);
+}
+
+/**
+ * Queries if the passed encoding is _in some sense_ compatible with ASCII.
+ * The concept of ASCII compatibility is nuanced, and private to our
+ * implementation. For instance SJIS is ASCII compatible to us, despite their
+ * having different characters at code point `0x5C`. This is based on some
+ * practical consideration that Japanese people confuses SJIS to be "upper
+ * compatible" with ASCII (which is in fact a wrong idea, but we just don't go
+ * strict here). An example of ASCII incompatible encoding is UTF-16. UTF-16
+ * shares code points with ASCII, but employs a completely different encoding
+ * scheme.
+ *
+ * @param[in] enc Encoding in question.
+ * @retval 0 It is incompatible.
+ * @retval 1 It is compatible.
+ */
+static inline bool
+rb_enc_asciicompat(rb_encoding *enc)
+{
+ if (rb_enc_mbminlen(enc) != 1) {
+ return false;
+ }
+ else if (rb_enc_dummy_p(enc)) {
+ return false;
+ }
+ else {
+ return true;
+ }
+}
+
+/**
+ * Queries if the passed string is in an ASCII-compatible encoding.
+ *
+ * @param[in] str A Ruby's string to query.
+ * @retval 0 `str` is not a String, or an ASCII-incompatible string.
+ * @retval 1 Otherwise.
+ */
+static inline bool
+rb_enc_str_asciicompat_p(VALUE str)
+{
+ rb_encoding *enc = rb_enc_get(str);
+
+ return rb_enc_asciicompat(enc);
+}
+
+/**
+ * Queries the Ruby-level counterpart instance of ::rb_cEncoding that
+ * corresponds to the passed encoding.
+ *
+ * @param[in] enc An encoding
+ * @retval RUBY_Qnil `enc` is a null pointer.
+ * @retval otherwise An instance of ::rb_cEncoding.
+ */
+VALUE rb_enc_from_encoding(rb_encoding *enc);
+
+RBIMPL_ATTR_PURE()
+/**
+ * Queries if the passed encoding is either one of UTF-8/16/32.
+ *
+ * @note It does not take UTF-7, which we actually support, into account.
+ *
+ * @param[in] enc Encoding in question.
+ * @retval 0 It is not a Unicode variant.
+ * @retval otherwise It is.
+ *
+ * @internal
+ *
+ * In reality it returns 1/0, but the value is abstracted as
+ * `ONIGENC_FLAG_UNICODE`.
+ */
+int rb_enc_unicode_p(rb_encoding *enc);
+
+RBIMPL_ATTR_RETURNS_NONNULL()
+/**
+ * Queries the encoding that represents ASCII-8BIT a.k.a. binary.
+ *
+ * @return The encoding that represents ASCII-8BIT.
+ *
+ * @internal
+ *
+ * This can not return NULL once the process properly boots up.
+ */
+rb_encoding *rb_ascii8bit_encoding(void);
+
+RBIMPL_ATTR_RETURNS_NONNULL()
+/**
+ * Queries the encoding that represents UTF-8.
+ *
+ * @return The encoding that represents UTF-8.
+ *
+ * @internal
+ *
+ * This can not return NULL once the process properly boots up.
+ */
+rb_encoding *rb_utf8_encoding(void);
+
+RBIMPL_ATTR_RETURNS_NONNULL()
+/**
+ * Queries the encoding that represents US-ASCII.
+ *
+ * @return The encoding that represents US-ASCII.
+ *
+ * @internal
+ *
+ * This can not return NULL once the process properly boots up.
+ */
+rb_encoding *rb_usascii_encoding(void);
+
+/**
+ * Queries the encoding that represents the current locale.
+ *
+ * @return The encoding that represents the process' locale.
+ *
+ * @internal
+ *
+ * This is dynamic. If you change the process' locale by e.g. calling
+ * `setlocale(3)`, that should also change the return value of this function.
+ *
+ * There is no official way for Ruby scripts to manipulate locales, though.
+ */
+rb_encoding *rb_locale_encoding(void);
+
+/**
+ * Queries the "filesystem" encoding. This is the encoding that ruby expects
+ * info from the OS' file system are in. This affects for instance return
+ * value of rb_dir_getwd(). Most notably on Windows it can be an alias of OS
+ * codepage. Most notably on Linux users can set this via default external
+ * encoding.
+ *
+ * @return The "filesystem" encoding.
+ */
+rb_encoding *rb_filesystem_encoding(void);
+
+/**
+ * Queries the "default external" encoding. This is used to interact with
+ * outer-process things such as File. Though not recommended, you can set this
+ * using rb_enc_set_default_external().
+ *
+ * @return The "default external" encoding.
+ */
+rb_encoding *rb_default_external_encoding(void);
+
+/**
+ * Queries the "default internal" encoding. This could be a null pointer.
+ * Otherwise, outer-process info are transcoded from default external encoding
+ * to this one during reading from an IO.
+ *
+ * @return The "default internal" encoding (if any).
+ */
+rb_encoding *rb_default_internal_encoding(void);
+
+#ifndef rb_ascii8bit_encindex
+RBIMPL_ATTR_CONST()
+/**
+ * Identical to rb_ascii8bit_encoding(), except it returns the encoding's index
+ * instead of the encoding itself.
+ *
+ * @return The index of encoding of ASCII-8BIT.
+ *
+ * @internal
+ *
+ * This happens to be 0.
+ */
+int rb_ascii8bit_encindex(void);
+#endif
+
+/**
+ * Queries if the passed object is in ascii 8bit (== binary) encoding. The
+ * object must be capable of having inline encoding. Using this macro needs
+ * deep understanding of bit level object binary layout.
+ *
+ * @param[in] obj An object to check.
+ * @retval 1 It is.
+ * @retval 0 It isn't.
+ */
+static inline bool
+RB_ENCODING_IS_ASCII8BIT(VALUE obj)
+{
+ return RB_ENCODING_GET_INLINED(obj) == rb_ascii8bit_encindex();
+}
+
+#ifndef rb_utf8_encindex
+RBIMPL_ATTR_CONST()
+/**
+ * Identical to rb_utf8_encoding(), except it returns the encoding's index
+ * instead of the encoding itself.
+ *
+ * @return The index of encoding of UTF-8.
+ */
+int rb_utf8_encindex(void);
+#endif
+
+#ifndef rb_usascii_encindex
+RBIMPL_ATTR_CONST()
+/**
+ * Identical to rb_usascii_encoding(), except it returns the encoding's index
+ * instead of the encoding itself.
+ *
+ * @return The index of encoding of UTF-8.
+ */
+int rb_usascii_encindex(void);
+#endif
+
+/**
+ * Identical to rb_locale_encoding(), except it returns the encoding's index
+ * instead of the encoding itself.
+ *
+ * @return The index of the locale encoding.
+ */
+int rb_locale_encindex(void);
+
+/**
+ * Identical to rb_filesystem_encoding(), except it returns the encoding's
+ * index instead of the encoding itself.
+ *
+ * @return The index of the filesystem encoding.
+ */
+int rb_filesystem_encindex(void);
+
+/**
+ * Identical to rb_default_external_encoding(), except it returns the
+ * Ruby-level counterpart instance of ::rb_cEncoding that corresponds to the
+ * default external encoding.
+ *
+ * @return An instance of ::rb_cEncoding of default external.
+ */
+VALUE rb_enc_default_external(void);
+
+/**
+ * Identical to rb_default_internal_encoding(), except it returns the
+ * Ruby-level counterpart instance of ::rb_cEncoding that corresponds to the
+ * default internal encoding.
+ *
+ * @return An instance of ::rb_cEncoding of default internal.
+ */
+VALUE rb_enc_default_internal(void);
+
+/**
+ * Destructively assigns the passed encoding as the default external encoding.
+ * You should not use this API. It has process-global side effects. Also it
+ * doesn't change encodings of strings that have already been read.
+ *
+ * @param[in] encoding Ruby level encoding.
+ * @exception rb_eArgError `encoding` is ::RUBY_Qnil.
+ * @post The default external encoding is `encoding`.
+ */
+void rb_enc_set_default_external(VALUE encoding);
+
+/**
+ * Destructively assigns the passed encoding as the default internal encoding.
+ * You should not use this API. It has process-global side effects. Also it
+ * doesn't change encodings of strings that have already been read.
+ *
+ * @param[in] encoding Ruby level encoding.
+ * @post The default internal encoding is `encoding`.
+ * @note Unlike rb_enc_set_default_external() you can pass ::RUBY_Qnil.
+ */
+void rb_enc_set_default_internal(VALUE encoding);
+
+/**
+ * Returns a platform-depended "charmap" of the current locale. This
+ * information is called a "Codeset name" in IEEE 1003.1 section 13
+ * (`<langinfo.h>`). This is a very low-level API. The return value can have
+ * no corresponding encoding when passed to rb_find_encoding().
+ *
+ * @param[in] klass Ignored for no reason (why...)
+ * @return The low-level locale charmap, in Ruby's String.
+ */
+VALUE rb_locale_charmap(VALUE klass);
+
+RBIMPL_SYMBOL_EXPORT_END()
+
+/** @cond INTERNAL_MACRO */
+#define RB_ENCODING_GET RB_ENCODING_GET
+#define RB_ENCODING_GET_INLINED RB_ENCODING_GET_INLINED
+#define RB_ENCODING_IS_ASCII8BIT RB_ENCODING_IS_ASCII8BIT
+#define RB_ENCODING_SET RB_ENCODING_SET
+#define RB_ENCODING_SET_INLINED RB_ENCODING_SET_INLINED
+#define rb_enc_asciicompat rb_enc_asciicompat
+#define rb_enc_code_to_mbclen rb_enc_code_to_mbclen
+#define rb_enc_codepoint rb_enc_codepoint
+#define rb_enc_left_char_head rb_enc_left_char_head
+#define rb_enc_mbc_to_codepoint rb_enc_mbc_to_codepoint
+#define rb_enc_mbcput rb_enc_mbcput
+#define rb_enc_mbmaxlen rb_enc_mbmaxlen
+#define rb_enc_mbminlen rb_enc_mbminlen
+#define rb_enc_name rb_enc_name
+#define rb_enc_prev_char rb_enc_prev_char
+#define rb_enc_right_char_head rb_enc_right_char_head
+#define rb_enc_step_back rb_enc_step_back
+#define rb_enc_str_asciicompat_p rb_enc_str_asciicompat_p
+/** @endcond */
+
+#endif /* RUBY_INTERNAL_ENCODING_ENCODING_H */
diff --git a/include/ruby/internal/encoding/pathname.h b/include/ruby/internal/encoding/pathname.h
new file mode 100644
index 0000000000..0b5e85a524
--- /dev/null
+++ b/include/ruby/internal/encoding/pathname.h
@@ -0,0 +1,184 @@
+#ifndef RUBY_INTERNAL_ENCODING_PATHNAME_H /*-*-C++-*-vi:se ft=cpp:*/
+#define RUBY_INTERNAL_ENCODING_PATHNAME_H
+/**
+ * @file
+ * @author Ruby developers <ruby-core@ruby-lang.org>
+ * @copyright This file is a part of the programming language Ruby.
+ * Permission is hereby granted, to either redistribute and/or
+ * modify this file, provided that the conditions mentioned in the
+ * file COPYING are met. Consult the file for details.
+ * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
+ * implementation details. Don't take them as canon. They could
+ * rapidly appear then vanish. The name (path) of this header file
+ * is also an implementation detail. Do not expect it to persist
+ * at the place it is now. Developers are free to move it anywhere
+ * anytime at will.
+ * @note To ruby-core: remember that this header can be possibly
+ * recursively included from extension libraries written in C++.
+ * Do not expect for instance `__VA_ARGS__` is always available.
+ * We assume C99 for ruby itself but we don't assume languages of
+ * extension libraries. They could be written in C++98.
+ * @brief Routines to manipulate encodings of pathnames.
+ */
+
+#include "ruby/internal/attr/nonnull.h"
+#include "ruby/internal/dllexport.h"
+#include "ruby/internal/encoding/encoding.h"
+#include "ruby/internal/value.h"
+
+RBIMPL_SYMBOL_EXPORT_BEGIN()
+RBIMPL_ATTR_NONNULL(())
+/**
+ * Returns a path component directly adjacent to the passed pointer.
+ *
+ * ```
+ * "/multi/byte/encoded/pathname.txt"
+ * ^ ^ ^
+ * | | +--- end
+ * | +--- @return
+ * +--- path
+ * ```
+ *
+ * @param[in] path Where to start scanning.
+ * @param[in] end End of the path string.
+ * @param[in] enc Encoding of the string.
+ * @return A pointer in the passed string where the next path component
+ * resides, or `end` if there is no next path component.
+ */
+char *rb_enc_path_next(const char *path, const char *end, rb_encoding *enc);
+
+RBIMPL_ATTR_NONNULL(())
+/**
+ * Seeks for non-prefix part of a pathname. This can be a no-op when the OS
+ * has no such concept like a path prefix. But there are OSes where path
+ * prefixes do exist.
+ *
+ * ```
+ * "C:\multi\byte\encoded\pathname.txt"
+ * ^ ^ ^
+ * | | +--- end
+ * | +--- @return
+ * +--- path
+ * ```
+ *
+ * @param[in] path Where to start scanning.
+ * @param[in] end End of the path string.
+ * @param[in] enc Encoding of the string.
+ * @return A pointer in the passed string where non-prefix part starts, or
+ * `path` if the OS does not have path prefix.
+ */
+char *rb_enc_path_skip_prefix(const char *path, const char *end, rb_encoding *enc);
+
+RBIMPL_ATTR_NONNULL(())
+/**
+ * Returns the last path component.
+ *
+ * ```
+ * "/multi/byte/encoded/pathname.txt"
+ * ^ ^ ^
+ * | | +--- end
+ * | +--- @return
+ * +--- path
+ * ```
+ *
+ * @param[in] path Where to start scanning.
+ * @param[in] end End of the path string.
+ * @param[in] enc Encoding of the string.
+ * @return A pointer in the passed string where the last path component
+ * resides, or `end` if there is no more path component.
+ */
+char *rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc);
+
+RBIMPL_ATTR_NONNULL(())
+/**
+ * This just returns the passed end basically. It makes difference in case the
+ * passed string ends with tons of path separators like the following:
+ *
+ * ```
+ * "/path/that/ends/with/lots/of/slashes//////////////"
+ * ^ ^ ^
+ * | | +--- end
+ * | +--- @return
+ * +--- path
+ * ```
+ *
+ * @param[in] path Where to start scanning.
+ * @param[in] end End of the path string.
+ * @param[in] enc Encoding of the string.
+ * @return A pointer in the passed string where the trailing path
+ * separators start, or `end` if there is no trailing path
+ * separators.
+ *
+ * @internal
+ *
+ * It seems this function was introduced to mimic what POSIX says about
+ * `basename(3)`.
+ */
+char *rb_enc_path_end(const char *path, const char *end, rb_encoding *enc);
+
+RBIMPL_ATTR_NONNULL((1, 4))
+/**
+ * Our own encoding-aware version of `basename(3)`. Normally, this function
+ * returns the last path component of the given name. However in case the
+ * passed name ends with a path separator, it returns the name of the
+ * directory, not the last (empty) component. Also if the passed name is a
+ * root directory, it returns that root directory. Note however that Windows
+ * filesystem have drive letters, which this function does not return.
+ *
+ * @param[in] name Target path.
+ * @param[out] baselen Return buffer.
+ * @param[in,out] alllen Number of bytes of `name`.
+ * @param[enc] enc Encoding of `name`.
+ * @return The rightmost component of `name`.
+ * @post `baselen`, if passed, is updated to be the number of bytes
+ * of the returned basename.
+ * @post `alllen`, if passed, is updated to be the number of bytes of
+ * strings not considered as the basename.
+ */
+const char *ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc);
+
+RBIMPL_ATTR_NONNULL((1, 3))
+/**
+ * Our own encoding-aware version of `extname`. This function first applies
+ * rb_enc_path_last_separator() to the passed name and only concerns its return
+ * value (ignores any parent directories). This function returns complicated
+ * results:
+ *
+ * ```CXX
+ * auto path = "...";
+ * auto len = strlen(path);
+ * auto ret = ruby_enc_find_extname(path, &len, rb_ascii8bit_encoding());
+ *
+ * switch(len) {
+ * case 0:
+ * if (ret == 0) {
+ * // `path` is a file without extensions.
+ * }
+ * else {
+ * // `path` is a dotfile.
+ * // `ret` is the file's name.
+ * }
+ * break;
+ *
+ * case 1:
+ * // `path` _ends_ with a dot.
+ * // `ret` is that dot.
+ * break;
+ *
+ * default:
+ * // `path` has an extension.
+ * // `ret` is that extension.
+ * }
+ * ```
+ *
+ * @param[in] name Target path.
+ * @param[in,out] len Number of bytes of `name`.
+ * @param[in] enc Encoding of `name`.
+ * @return See above.
+ * @post `len`, if passed, is updated (see above).
+ */
+const char *ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc);
+
+RBIMPL_SYMBOL_EXPORT_END()
+
+#endif /* RUBY_INTERNAL_ENCODING_PATHNAME_H */
diff --git a/include/ruby/internal/encoding/re.h b/include/ruby/internal/encoding/re.h
new file mode 100644
index 0000000000..d0de23bc83
--- /dev/null
+++ b/include/ruby/internal/encoding/re.h
@@ -0,0 +1,46 @@
+#ifndef RUBY_INTERNAL_ENCODING_RE_H /*-*-C++-*-vi:se ft=cpp:*/
+#define RUBY_INTERNAL_ENCODING_RE_H
+/**
+ * @file
+ * @author Ruby developers <ruby-core@ruby-lang.org>
+ * @copyright This file is a part of the programming language Ruby.
+ * Permission is hereby granted, to either redistribute and/or
+ * modify this file, provided that the conditions mentioned in the
+ * file COPYING are met. Consult the file for details.
+ * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
+ * implementation details. Don't take them as canon. They could
+ * rapidly appear then vanish. The name (path) of this header file
+ * is also an implementation detail. Do not expect it to persist
+ * at the place it is now. Developers are free to move it anywhere
+ * anytime at will.
+ * @note To ruby-core: remember that this header can be possibly
+ * recursively included from extension libraries written in C++.
+ * Do not expect for instance `__VA_ARGS__` is always available.
+ * We assume C99 for ruby itself but we don't assume languages of
+ * extension libraries. They could be written in C++98.
+ * @brief Routines to manipulate encodings of symbols.
+ */
+
+#include "ruby/internal/dllexport.h"
+#include "ruby/internal/encoding/encoding.h"
+#include "ruby/internal/value.h"
+
+RBIMPL_SYMBOL_EXPORT_BEGIN()
+
+/**
+ * Identical to rb_reg_new(), except it additionally takes an encoding.
+ *
+ * @param[in] ptr A memory region of `len` bytes length.
+ * @param[in] len Length of `ptr`, in bytes, not including the
+ * terminating NUL character.
+ * @param[in] enc Encoding of `ptr`.
+ * @param[in] opts Options e.g. ONIG_OPTION_MULTILINE.
+ * @exception rb_eRegexpError Failed to compile `ptr`.
+ * @return An allocated new instance of ::rb_cRegexp, of `enc` encoding,
+ * whose expression is compiled according to `ptr`.
+ */
+VALUE rb_enc_reg_new(const char *ptr, long len, rb_encoding *enc, int opts);
+
+RBIMPL_SYMBOL_EXPORT_END()
+
+#endif /* RUBY_INTERNAL_ENCODING_RE_H */
diff --git a/include/ruby/internal/encoding/sprintf.h b/include/ruby/internal/encoding/sprintf.h
new file mode 100644
index 0000000000..cb8737b414
--- /dev/null
+++ b/include/ruby/internal/encoding/sprintf.h
@@ -0,0 +1,78 @@
+#ifndef RUBY_INTERNAL_ENCODING_SPRINTF_H /*-*-C++-*-vi:se ft=cpp:*/
+#define RUBY_INTERNAL_ENCODING_SPRINTF_H
+/**
+ * @file
+ * @author Ruby developers <ruby-core@ruby-lang.org>
+ * @copyright This file is a part of the programming language Ruby.
+ * Permission is hereby granted, to either redistribute and/or
+ * modify this file, provided that the conditions mentioned in the
+ * file COPYING are met. Consult the file for details.
+ * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
+ * implementation details. Don't take them as canon. They could
+ * rapidly appear then vanish. The name (path) of this header file
+ * is also an implementation detail. Do not expect it to persist
+ * at the place it is now. Developers are free to move it anywhere
+ * anytime at will.
+ * @note To ruby-core: remember that this header can be possibly
+ * recursively included from extension libraries written in C++.
+ * Do not expect for instance `__VA_ARGS__` is always available.
+ * We assume C99 for ruby itself but we don't assume languages of
+ * extension libraries. They could be written in C++98.
+ * @brief Routines to manipulate encodings of symbols.
+ */
+#include "ruby/internal/config.h"
+#include <stdarg.h>
+#include "ruby/internal/attr/format.h"
+#include "ruby/internal/attr/nonnull.h"
+#include "ruby/internal/attr/noreturn.h"
+#include "ruby/internal/dllexport.h"
+#include "ruby/internal/encoding/encoding.h"
+#include "ruby/internal/value.h"
+
+RBIMPL_SYMBOL_EXPORT_BEGIN()
+RBIMPL_ATTR_NONNULL((2))
+RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
+/**
+ * Identical to rb_sprintf(), except it additionally takes an encoding. The
+ * passed encoding rules both the incoming format specifier and the resulting
+ * string.
+ *
+ * @param[in] enc Encoding of `fmt`.
+ * @param[in] fmt A `printf`-like format specifier.
+ * @param[in] ... Variadic number of contents to format.
+ * @return A rendered new instance of ::rb_cString, of `enc` encoding.
+ */
+VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt, ...);
+
+RBIMPL_ATTR_NONNULL((2))
+RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 0)
+/**
+ * Identical to rb_enc_sprintf(), except it takes a `va_list` instead of
+ * variadic arguments. It can also be seen as a routine identical to
+ * rb_vsprintf(), except it additionally takes an encoding.
+ *
+ * @param[in] enc Encoding of `fmt`.
+ * @param[in] fmt A `printf`-like format specifier.
+ * @param[in] ap Contents to format.
+ * @return A rendered new instance of ::rb_cString, of `enc` encoding.
+ */
+VALUE rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap);
+
+RBIMPL_ATTR_NORETURN()
+RBIMPL_ATTR_NONNULL((3))
+RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 3, 4)
+/**
+ * Identical to rb_raise(), except it additionally takes an encoding.
+ *
+ * @param[in] enc Encoding of the generating exception.
+ * @param[in] exc A subclass of ::rb_eException.
+ * @param[in] fmt Format specifier string compatible with rb_sprintf().
+ * @param[in] ... Contents of the message.
+ * @exception exc The specified exception.
+ * @note It never returns.
+ */
+void rb_enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...);
+
+RBIMPL_SYMBOL_EXPORT_END()
+
+#endif /* RUBY_INTERNAL_ENCODING_SPRINTF_H */
diff --git a/include/ruby/internal/encoding/string.h b/include/ruby/internal/encoding/string.h
new file mode 100644
index 0000000000..2b9dfe4f31
--- /dev/null
+++ b/include/ruby/internal/encoding/string.h
@@ -0,0 +1,346 @@
+#ifndef RUBY_INTERNAL_ENCODING_STRING_H /*-*-C++-*-vi:se ft=cpp:*/
+#define RUBY_INTERNAL_ENCODING_STRING_H
+/**
+ * @file
+ * @author Ruby developers <ruby-core@ruby-lang.org>
+ * @copyright This file is a part of the programming language Ruby.
+ * Permission is hereby granted, to either redistribute and/or
+ * modify this file, provided that the conditions mentioned in the
+ * file COPYING are met. Consult the file for details.
+ * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
+ * implementation details. Don't take them as canon. They could
+ * rapidly appear then vanish. The name (path) of this header file
+ * is also an implementation detail. Do not expect it to persist
+ * at the place it is now. Developers are free to move it anywhere
+ * anytime at will.
+ * @note To ruby-core: remember that this header can be possibly
+ * recursively included from extension libraries written in C++.
+ * Do not expect for instance `__VA_ARGS__` is always available.
+ * We assume C99 for ruby itself but we don't assume languages of
+ * extension libraries. They could be written in C++98.
+ * @brief Routines to manipulate encodings of strings.
+ */
+
+#include "ruby/internal/dllexport.h"
+#include "ruby/internal/value.h"
+#include "ruby/internal/encoding/encoding.h"
+#include "ruby/internal/attr/nonnull.h"
+#include "ruby/internal/intern/string.h" /* rbimpl_strlen */
+
+RBIMPL_SYMBOL_EXPORT_BEGIN()
+
+/**
+ * Identical to rb_str_new(), except it additionally takes an encoding.
+ *
+ * @param[in] ptr A memory region of `len` bytes length.
+ * @param[in] len Length of `ptr`, in bytes, not including the
+ * terminating NUL character.
+ * @param[in] enc Encoding of `ptr`.
+ * @exception rb_eNoMemError Failed to allocate `len+1` bytes.
+ * @exception rb_eArgError `len` is negative.
+ * @return An instance of ::rb_cString, of `len` bytes length, of `enc`
+ * encoding, whose contents are verbatim copy of `ptr`.
+ * @pre At least `len` bytes of continuous memory region shall be
+ * accessible via `ptr`.
+ * @note `enc` can be a null pointer. It can also be seen as a routine
+ * identical to rb_usascii_str_new() then.
+ */
+VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc);
+
+RBIMPL_ATTR_NONNULL((1))
+/**
+ * Identical to rb_enc_str_new(), except it assumes the passed pointer is a
+ * pointer to a C string. It can also be seen as a routine identical to
+ * rb_str_new_cstr(), except it additionally takes an encoding.
+ *
+ * @param[in] ptr A C string.
+ * @param[in] enc Encoding of `ptr`.
+ * @exception rb_eNoMemError Failed to allocate memory.
+ * @return An instance of ::rb_cString, of `enc` encoding, whose contents
+ * are verbatim copy of `ptr`.
+ * @pre `ptr` must not be a null pointer.
+ * @pre Because `ptr` is a C string it makes no sense for `enc` to be
+ * something like UTF-32.
+ * @note `enc` can be a null pointer. It can also be seen as a routine
+ * identical to rb_usascii_str_new_cstr() then.
+ */
+VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc);
+
+/**
+ * Identical to rb_enc_str_new(), except it takes a C string literal. It can
+ * also be seen as a routine identical to rb_str_new_static(), except it
+ * additionally takes an encoding.
+ *
+ * @param[in] ptr A C string literal.
+ * @param[in] len `strlen(ptr)`.
+ * @param[in] enc Encoding of `ptr`.
+ * @exception rb_eArgError `len` out of range of `size_t`.
+ * @pre `ptr` must be a C string constant.
+ * @return An instance of ::rb_cString, of `enc` encoding, whose backend
+ * storage is the passed C string literal.
+ * @warning It is a very bad idea to write to a C string literal (often
+ * immediate SEGV shall occur). Consider return values of this
+ * function be read-only.
+ * @note `enc` can be a null pointer. It can also be seen as a routine
+ * identical to rb_usascii_str_new_static() then.
+ */
+VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc);
+
+/**
+ * Identical to rb_enc_str_new(), except it returns a "f"string. It can also
+ * be seen as a routine identical to rb_interned_str(), except it additionally
+ * takes an encoding.
+ *
+ * @param[in] ptr A memory region of `len` bytes length.
+ * @param[in] len Length of `ptr`, in bytes, not including the
+ * terminating NUL character.
+ * @param[in] enc Encoding of `ptr`.
+ * @exception rb_eArgError `len` is negative.
+ * @return A found or created instance of ::rb_cString, of `len` bytes
+ * length, of `enc` encoding, whose contents are identical to that
+ * of `ptr`.
+ * @pre At least `len` bytes of continuous memory region shall be
+ * accessible via `ptr`.
+ * @note `enc` can be a null pointer.
+ */
+VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc);
+
+RBIMPL_ATTR_NONNULL((1))
+/**
+ * Identical to rb_enc_str_new_cstr(), except it returns a "f"string. It can
+ * also be seen as a routine identical to rb_interned_str_cstr(), except it
+ * additionally takes an encoding.
+ *
+ * @param[in] ptr A memory region of `len` bytes length.
+ * @param[in] enc Encoding of `ptr`.
+ * @return A found or created instance of ::rb_cString of `enc` encoding,
+ * whose contents are identical to that of `ptr`.
+ * @pre At least `len` bytes of continuous memory region shall be
+ * accessible via `ptr`.
+ * @note `enc` can be a null pointer.
+ */
+VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc);
+
+/**
+ * Counts the number of characters of the passed string, according to the
+ * passed encoding. This has to be complicated. The passed string could be
+ * invalid and/or broken. This routine would scan from the beginning til the
+ * end, byte by byte, to seek out character boundaries. Could be super slow.
+ *
+ * @param[in] head Leftmost pointer to the string.
+ * @param[in] tail Rightmost pointer to the string.
+ * @param[in] enc Encoding of the string.
+ * @return Number of characters exist in `head` .. `tail`. The definition
+ * of "character" depends on the passed `enc`.
+ */
+long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc);
+
+/**
+ * Queries the n-th character. Like rb_enc_strlen() this function can be fast
+ * or slow depending on the contents. Don't expect characters to be uniformly
+ * distributed across the entire string.
+ *
+ * @param[in] head Leftmost pointer to the string.
+ * @param[in] tail Rightmost pointer to the string.
+ * @param[in] nth Requested index of characters.
+ * @param[in] enc Encoding of the string.
+ * @return Pointer to the first byte of the character that is `nth`
+ * character ahead of `head`, or `tail` if there is no such
+ * character (OOB etc). The definition of "character" depends on
+ * the passed `enc`.
+ */
+char *rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc);
+
+/**
+ * Identical to rb_enc_get_index(), except the return type.
+ *
+ * @param[in] obj Object in question.
+ * @exception rb_eTypeError `obj` is incapable of having an encoding.
+ * @return `obj`'s encoding.
+ */
+VALUE rb_obj_encoding(VALUE obj);
+
+/**
+ * Identical to rb_str_cat(), except it additionally takes an encoding.
+ *
+ * @param[out] str Destination object.
+ * @param[in] ptr Contents to append.
+ * @param[in] len Length of `src`, in bytes.
+ * @param[in] enc Encoding of `ptr`.
+ * @exception rb_eArgError `len` is negative.
+ * @exception rb_eEncCompatError `enc` is not compatible with `str`.
+ * @return The passed `dst`.
+ * @post The contents of `ptr` is copied, transcoded into `dst`'s
+ * encoding, then pasted into `dst`'s end.
+ */
+VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc);
+
+/**
+ * Encodes the passed code point into a series of bytes.
+ *
+ * @param[in] code Code point.
+ * @param[in] enc Target encoding scheme.
+ * @exception rb_eRangeError `enc` does not glean `code`.
+ * @return An instance of ::rb_cString, of `enc` encoding, whose sole
+ * contents is `code` represented in `enc`.
+ * @note No way to encode code points bigger than UINT_MAX.
+ *
+ * @internal
+ *
+ * In other languages, APIs like this one could be seen as the primitive
+ * routines where encodings' "encode" feature are implemented. However in case
+ * of Ruby this is not the primitive one. We directly manipulate encoded
+ * strings. Encoding conversion routines transcode an encoded string directly
+ * to another one; not via a code point array.
+ */
+VALUE rb_enc_uint_chr(unsigned int code, rb_encoding *enc);
+
+/**
+ * Identical to rb_external_str_new(), except it additionally takes an
+ * encoding. However the whole point of rb_external_str_new() is to encode a
+ * string into default external encoding. Being able to specify arbitrary
+ * encoding just ruins the designed purpose the function meseems.
+ *
+ * @param[in] ptr A memory region of `len` bytes length.
+ * @param[in] len Length of `ptr`, in bytes, not including the
+ * terminating NUL character.
+ * @param[in] enc Target encoding scheme.
+ * @exception rb_eArgError `len` is negative.
+ * @return An instance of ::rb_cString. In case encoding conversion from
+ * "default internal" to `enc` is fully defined over the given
+ * contents, then the return value is a string of `enc` encoding,
+ * whose contents are the converted ones. Otherwise the string is
+ * a junk.
+ * @warning It doesn't raise on a conversion failure and silently ends up in
+ * a corrupted output. You can know the failure by querying
+ * `valid_encoding?` of the result object.
+ *
+ * @internal
+ *
+ * @shyouhei has no idea why this one does not follow the naming convention
+ * that others obey. It seems to him that this should have been called
+ * `rb_enc_external_str_new`.
+ */
+VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc);
+
+/**
+ * Identical to rb_str_export(), except it additionally takes an encoding.
+ *
+ * @param[in] obj Target object.
+ * @param[in] enc Target encoding.
+ * @exception rb_eTypeError No implicit conversion to String.
+ * @return Converted ruby string of `enc` encoding.
+ */
+VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc);
+
+/**
+ * Encoding conversion main routine.
+ *
+ * @param[in] str String to convert.
+ * @param[in] from Source encoding.
+ * @param[in] to Destination encoding.
+ * @return A copy of `str`, with conversion from `from` to `to` applied.
+ * @note `from` can be a null pointer. `str`'s encoding is taken then.
+ * @note `to` can be a null pointer. No-op then.
+ */
+VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to);
+
+/**
+ * Identical to rb_str_conv_enc(), except it additionally takes IO encoder
+ * options. The extra arguments can be constructed using io_extract_modeenc()
+ * etc.
+ *
+ * @param[in] str String to convert.
+ * @param[in] from Source encoding.
+ * @param[in] to Destination encoding.
+ * @param[in] ecflags A set of enum ::ruby_econv_flag_type.
+ * @param[in] ecopts Optional hash.
+ * @return A copy of `str`, with conversion from `from` to `to` applied.
+ * @note `from` can be a null pointer. `str`'s encoding is taken then.
+ * @note `to` can be a null pointer. No-op then.
+ * @note `ecopts` can be ::RUBY_Qnil, which is equivalent to passing an
+ * empty hash.
+ */
+VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts);
+
+/**
+ * Scans the passed string to collect its code range. Because a Ruby's string
+ * is mutable, its contents change from time to time; so does its code range.
+ * A long-lived string tends to fall back to ::RUBY_ENC_CODERANGE_UNKNOWN.
+ * This API scans it and re-assigns a fine-grained code range constant.
+ *
+ * @param[out] str A string.
+ * @return An enum ::ruby_coderange_type.
+ */
+int rb_enc_str_coderange(VALUE str);
+
+/**
+ * Scans the passed string until it finds something odd. Returns the number of
+ * bytes scanned. As the name implies this is suitable for repeated call. One
+ * of its application is `IO#readlines`. The method reads from its receiver's
+ * read buffer, maybe more than once, looking for newlines. But "newline" can
+ * be different among encodings. This API is used to detect broken contents to
+ * properly mark them as such.
+ *
+ * @param[in] str String to scan.
+ * @param[in] end End of `str`.
+ * @param[in] enc `str`'s encoding.
+ * @param[out] cr Return buffer.
+ * @return Distance between `str` and first such byte where broken.
+ * @post `cr` has the code range type.
+ */
+long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr);
+
+/**
+ * Queries if the passed string is "ASCII only". An ASCII only string is a
+ * string who doesn't have any non-ASCII characters at all. This doesn't
+ * necessarily mean the string is in ASCII encoding. For instance a String of
+ * CP932 encoding can quite much be ASCII only, depending on its contents.
+ *
+ * @param[in] str String in question.
+ * @retval 1 It doesn't have non-ASCII characters.
+ * @retval 0 It has characters that are out of ASCII.
+ */
+int rb_enc_str_asciionly_p(VALUE str);
+
+RBIMPL_ATTR_NONNULL(())
+/**
+ * Looks for the passed string in the passed buffer.
+ *
+ * @param[in] x Buffer that potentially includes `y`.
+ * @param[in] m Number of bytes of `x`.
+ * @param[in] y Query string.
+ * @param[in] n Number of bytes of `y`.
+ * @param[in] enc Encoding of both `x` and `y`.
+ * @retval -1 Not found.
+ * @retval otherwise Found index in `x`.
+ * @note This API can match at a non-character-boundary.
+ */
+long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc);
+
+/** @cond INTERNAL_MACRO */
+RBIMPL_ATTR_NONNULL(())
+static inline VALUE
+rbimpl_enc_str_new_cstr(const char *str, rb_encoding *enc)
+{
+ long len = rbimpl_strlen(str);
+
+ return rb_enc_str_new_static(str, len, enc);
+}
+
+#define rb_enc_str_new(str, len, enc) \
+ ((RBIMPL_CONSTANT_P(str) && \
+ RBIMPL_CONSTANT_P(len) ? \
+ rb_enc_str_new_static: \
+ rb_enc_str_new) ((str), (len), (enc)))
+
+#define rb_enc_str_new_cstr(str, enc) \
+ ((RBIMPL_CONSTANT_P(str) ? \
+ rbimpl_enc_str_new_cstr : \
+ rb_enc_str_new_cstr) ((str), (enc)))
+
+/** @endcond */
+
+RBIMPL_SYMBOL_EXPORT_END()
+
+#endif /* RUBY_INTERNAL_ENCODING_STRING_H */
diff --git a/include/ruby/internal/encoding/symbol.h b/include/ruby/internal/encoding/symbol.h
new file mode 100644
index 0000000000..9cd1b0dbf4
--- /dev/null
+++ b/include/ruby/internal/encoding/symbol.h
@@ -0,0 +1,100 @@
+#ifndef RUBY_INTERNAL_ENCODING_SYMBOL_H /*-*-C++-*-vi:se ft=cpp:*/
+#define RUBY_INTERNAL_ENCODING_SYMBOL_H
+/**
+ * @file
+ * @author Ruby developers <ruby-core@ruby-lang.org>
+ * @copyright This file is a part of the programming language Ruby.
+ * Permission is hereby granted, to either redistribute and/or
+ * modify this file, provided that the conditions mentioned in the
+ * file COPYING are met. Consult the file for details.
+ * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
+ * implementation details. Don't take them as canon. They could
+ * rapidly appear then vanish. The name (path) of this header file
+ * is also an implementation detail. Do not expect it to persist
+ * at the place it is now. Developers are free to move it anywhere
+ * anytime at will.
+ * @note To ruby-core: remember that this header can be possibly
+ * recursively included from extension libraries written in C++.
+ * Do not expect for instance `__VA_ARGS__` is always available.
+ * We assume C99 for ruby itself but we don't assume languages of
+ * extension libraries. They could be written in C++98.
+ * @brief Routines to manipulate encodings of symbols.
+ */
+
+#include "ruby/internal/attr/nonnull.h"
+#include "ruby/internal/dllexport.h"
+#include "ruby/internal/encoding/encoding.h"
+#include "ruby/internal/value.h"
+
+RBIMPL_SYMBOL_EXPORT_BEGIN()
+
+/**
+ * Identical to rb_intern2(), except it additionally takes an encoding.
+ *
+ * @param[in] name The name of the id.
+ * @param[in] len Length of `name`.
+ * @param[in] enc `name`'s encoding.
+ * @exception rb_eRuntimeError Too many symbols.
+ * @return A (possibly new) id whose value is the given name.
+ * @note These days Ruby internally has two kinds of symbols
+ * (static/dynamic). Symbols created using this function would
+ * become static ones; i.e. would never be garbage collected. It
+ * is up to you to avoid memory leaks. Think twice before using
+ * it.
+ */
+ID rb_intern3(const char *name, long len, rb_encoding *enc);
+
+RBIMPL_ATTR_NONNULL(())
+/**
+ * Identical to rb_symname_p(), except it additionally takes an encoding.
+ *
+ * @param[in] str A C string to check.
+ * @param[in] enc `str`'s encoding.
+ * @retval 1 It is a valid symbol name.
+ * @retval 0 It is invalid as a symbol name.
+ */
+int rb_enc_symname_p(const char *str, rb_encoding *enc);
+
+/**
+ * Identical to rb_enc_symname_p(), except it additionally takes the passed
+ * string's length. This is needed for strings containing NUL bytes, like in
+ * case of UTF-32.
+ *
+ * @param[in] name A C string to check.
+ * @param[in] len Number of bytes of `str`.
+ * @param[in] enc `str`'s encoding.
+ * @retval 1 It is a valid symbol name.
+ * @retval 0 It is invalid as a symbol name.
+ */
+int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc);
+
+/**
+ * Identical to rb_check_id(), except it takes a pointer to a memory region
+ * instead of Ruby's string.
+ *
+ * @param[in] ptr A pointer to a memory region.
+ * @param[in] len Number of bytes of `ptr`.
+ * @param[in] enc Encoding of `ptr`.
+ * @exception rb_eEncodingError `ptr` contains non-ASCII according to `enc`.
+ * @retval 0 No such id ever existed in the history.
+ * @retval otherwise The id that represents the given name.
+ */
+ID rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc);
+
+/**
+ * Identical to rb_check_id_cstr(), except for the return type. It can also be
+ * seen as a routine identical to rb_check_symbol(), except it takes a pointer
+ * to a memory region instead of Ruby's string.
+ *
+ * @param[in] ptr A pointer to a memory region.
+ * @param[in] len Number of bytes of `ptr`.
+ * @param[in] enc Encoding of `ptr`.
+ * @exception rb_eEncodingError `ptr` contains non-ASCII according to `enc`.
+ * @retval RUBY_Qnil No such id ever existed in the history.
+ * @retval otherwise The id that represents the given name.
+ */
+VALUE rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc);
+
+RBIMPL_SYMBOL_EXPORT_END()
+
+#endif /* RUBY_INTERNAL_ENCODING_SYMBOL_H */
diff --git a/include/ruby/internal/encoding/transcode.h b/include/ruby/internal/encoding/transcode.h
new file mode 100644
index 0000000000..7f26d2eae9
--- /dev/null
+++ b/include/ruby/internal/encoding/transcode.h
@@ -0,0 +1,562 @@
+#ifndef RUBY_INTERNAL_ENCODING_TRANSCODE_H /*-*-C++-*-vi:se ft=cpp:*/
+#define RUBY_INTERNAL_ENCODING_TRANSCODE_H
+/**
+ * @file
+ * @author Ruby developers <ruby-core@ruby-lang.org>
+ * @copyright This file is a part of the programming language Ruby.
+ * Permission is hereby granted, to either redistribute and/or
+ * modify this file, provided that the conditions mentioned in the
+ * file COPYING are met. Consult the file for details.
+ * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
+ * implementation details. Don't take them as canon. They could
+ * rapidly appear then vanish. The name (path) of this header file
+ * is also an implementation detail. Do not expect it to persist
+ * at the place it is now. Developers are free to move it anywhere
+ * anytime at will.
+ * @note To ruby-core: remember that this header can be possibly
+ * recursively included from extension libraries written in C++.
+ * Do not expect for instance `__VA_ARGS__` is always available.
+ * We assume C99 for ruby itself but we don't assume languages of
+ * extension libraries. They could be written in C++98.
+ * @brief econv stuff
+ */
+
+#include "ruby/internal/dllexport.h"
+#include "ruby/internal/value.h"
+
+RBIMPL_SYMBOL_EXPORT_BEGIN()
+
+/** return value of rb_econv_convert() */
+typedef enum {
+
+ /**
+ * The conversion stopped when it found an invalid sequence.
+ */
+ econv_invalid_byte_sequence,
+
+ /**
+ * The conversion stopped when it found a character in the input which
+ * cannot be representable in the output.
+ */
+ econv_undefined_conversion,
+
+ /**
+ * The conversion stopped because there is no destination.
+ */
+ econv_destination_buffer_full,
+
+ /**
+ * The conversion stopped because there is no input.
+ */
+ econv_source_buffer_empty,
+
+ /**
+ * The conversion stopped after converting everything. This is arguably
+ * the expected normal end of conversion.
+ */
+ econv_finished,
+
+ /**
+ * The conversion stopped after writing something to somewhere, before
+ * reading everything.
+ */
+ econv_after_output,
+
+ /**
+ * The conversion stopped in middle of reading a character, possibly due to
+ * a partial read of a socket etc.
+ */
+ econv_incomplete_input
+} rb_econv_result_t;
+
+/** An opaque struct that represents a lowest level of encoding conversion. */
+typedef struct rb_econv_t rb_econv_t;
+
+/**
+ * Converts the contents of the passed string from its encoding to the passed
+ * one.
+ *
+ * @param[in] str Target string.
+ * @param[in] to Destination encoding.
+ * @param[in] ecflags A set of enum
+ * ::ruby_econv_flag_type.
+ * @param[in] ecopts A keyword hash, like
+ * ::rb_io_t::rb_io_enc_t::ecopts.
+ * @exception rb_eArgError Not fully converted.
+ * @exception rb_eInvalidByteSequenceError `str` is malformed.
+ * @exception rb_eUndefinedConversionError `str` has a character not
+ * representable using `to`.
+ * @exception rb_eConversionNotFoundError There is no known conversion from
+ * `str`'s encoding to `to`.
+ * @return A string whose encoding is `to`, and whose contents is converted
+ * contents of `str`.
+ * @note Use rb_econv_prepare_options() to generate `ecopts`.
+ */
+VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts);
+
+/**
+ * Queries if there is more than one way to convert between the passed two
+ * encodings. Encoding conversion are has_and_belongs_to_many relationships.
+ * There could be no direct conversion defined for the passed pair. Ruby tries
+ * to find an indirect way to do so then. For instance ISO-8859-1 has no
+ * direct conversion to ISO-2022-JP. But there is ISO-8859-1 to UTF-8
+ * conversion; then there is UTF-8 to EUC-JP conversion; finally there also is
+ * EUC-JP to ISO-2022-JP conversion. So in short ISO-8859-1 can be converted
+ * to ISO-2022-JP using that path. This function returns true. Obviously not
+ * everything that can be represented using UTF-8 can also be represented using
+ * EUC-JP. Conversions in practice can fail depending on the actual input, and
+ * that renders exceptions in case of rb_str_encode().
+ *
+ * @param[in] from_encoding One encoding.
+ * @param[in] to_encoding Another encoding.
+ * @retval 0 No way to convert the two.
+ * @retval 1 At least one way to convert the two.
+ *
+ * @internal
+ *
+ * Practically @shyouhei knows no way for this function to return 0. It seems
+ * everything can eventually be converted to/from UTF-8, which connects
+ * everything.
+ */
+int rb_econv_has_convpath_p(const char* from_encoding, const char* to_encoding);
+
+/**
+ * Identical to rb_econv_prepare_opts(), except it additionally takes the
+ * initial value of flags. The extra bits are bitwise-ORed to the return
+ * value.
+ *
+ * @param[in] opthash Keyword arguments.
+ * @param[out] ecopts Return buffer.
+ * @param[in] ecflags Default set of enum ::ruby_econv_flag_type.
+ * @exception rb_eArgError Unknown/Broken values passed.
+ * @return Calculated set of enum ::ruby_econv_flag_type.
+ * @post `ecopts` holds a hash object suitable for
+ * ::rb_io_t::rb_io_enc_t::ecopts.
+ */
+int rb_econv_prepare_options(VALUE opthash, VALUE *ecopts, int ecflags);
+
+/**
+ * Splits a keyword arguments hash (that for instance `String#encode` took)
+ * into a set of enum ::ruby_econv_flag_type and a hash storing replacement
+ * characters etc.
+ *
+ * @param[in] opthash Keyword arguments.
+ * @param[out] ecopts Return buffer.
+ * @exception rb_eArgError Unknown/Broken values passed.
+ * @return Calculated set of enum ::ruby_econv_flag_type.
+ * @post `ecopts` holds a hash object suitable for
+ * ::rb_io_t::rb_io_enc_t::ecopts.
+ */
+int rb_econv_prepare_opts(VALUE opthash, VALUE *ecopts);
+
+/**
+ * Creates a new instance of struct ::rb_econv_t.
+ *
+ * @param[in] source_encoding Name of an encoding.
+ * @param[in] destination_encoding Name of another encoding.
+ * @param[in] ecflags A set of enum ::ruby_econv_flag_type.
+ * @exception rb_eArgError No such encoding.
+ * @retval NULL Failed to create a struct ::rb_econv_t.
+ * @retval otherwise Allocated struct ::rb_econv_t.
+ * @warning Return value must be passed to rb_econv_close() exactly once.
+ */
+rb_econv_t *rb_econv_open(const char *source_encoding, const char *destination_encoding, int ecflags);
+
+/**
+ * Identical to rb_econv_open(), except it additionally takes a hash of
+ * optional strings.
+ *
+ *
+ * @param[in] source_encoding Name of an encoding.
+ * @param[in] destination_encoding Name of another encoding.
+ * @param[in] ecflags A set of enum ::ruby_econv_flag_type.
+ * @param[in] ecopts Optional set of strings.
+ * @exception rb_eArgError No such encoding.
+ * @retval NULL Failed to create a struct ::rb_econv_t.
+ * @retval otherwise Allocated struct ::rb_econv_t.
+ * @warning Return value must be passed to rb_econv_close() exactly once.
+ */
+rb_econv_t *rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts);
+
+/**
+ * Converts a string from an encoding to another.
+ *
+ * Possible flags are either ::RUBY_ECONV_PARTIAL_INPUT (means the source
+ * buffer is a part of much larger one), ::RUBY_ECONV_AFTER_OUTPUT (instructs
+ * the converter to stop after output before input), or both of them.
+ *
+ * @param[in,out] ec Conversion specification/state etc.
+ * @param[in] source_buffer_ptr Target string.
+ * @param[in] source_buffer_end End of target string.
+ * @param[out] destination_buffer_ptr Return buffer.
+ * @param[out] destination_buffer_end End of return buffer.
+ * @param[in] flags Flags (see above).
+ * @return The status of the conversion.
+ * @post `destination_buffer_ptr` holds conversion results.
+ */
+rb_econv_result_t rb_econv_convert(rb_econv_t *ec,
+ const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end,
+ unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
+ int flags);
+
+/**
+ * Destructs a converter. Note that a converter can have a buffer, and can be
+ * non-empty. Calling this would lose your data then.
+ *
+ * @param[out] ec The converter to destroy.
+ * @post `ec` is no longer a valid pointer.
+ */
+void rb_econv_close(rb_econv_t *ec);
+
+/**
+ * Assigns the replacement string. The string passed here would appear in
+ * converted string when it cannot represent its source counterpart. This can
+ * happen for instance you convert an emoji to ISO-8859-1.
+ *
+ * @param[out] ec Target converter.
+ * @param[in] str Replacement string.
+ * @param[in] len Number of bytes of `str`.
+ * @param[in] encname Name of encoding of `str`.
+ * @retval 0 Success.
+ * @retval -1 Failure (ENOMEM etc.).
+ * @post `ec`'s replacement string is set to `str`.
+ */
+int rb_econv_set_replacement(rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname);
+
+/**
+ * "Decorate"s a converter. There are special kind of converters that
+ * transforms the contents, like replacing CR into CRLF. You can add such
+ * decorators to a converter using this API. By using this function a
+ * decorator is prepended at the beginning of a conversion sequence: in case of
+ * CRLF conversion, newlines are converted before encodings are converted.
+ *
+ * @param[out] ec Target converter to decorate.
+ * @param[in] decorator_name Name of decorator to prepend.
+ * @retval 0 Success.
+ * @retval -1 Failure (no such decorator etc.).
+ * @post Decorator works before encoding conversion happens.
+ *
+ * @internal
+ *
+ * What is the possible value of the `decorator_name` is not public. You have
+ * to read through `transcode.c` carefully.
+ */
+int rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name);
+
+/**
+ * Identical to rb_econv_decorate_at_first(), except it adds to the opposite
+ * direction. For instance CRLF conversion would run _after_ encodings are
+ * converted.
+ *
+ * @param[out] ec Target converter to decorate.
+ * @param[in] decorator_name Name of decorator to prepend.
+ * @retval 0 Success.
+ * @retval -1 Failure (no such decorator etc.).
+ * @post Decorator works after encoding conversion happens.
+ */
+int rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name);
+
+/**
+ * Creates a `rb_eConverterNotFoundError` exception object (but does not
+ * raise).
+ *
+ * @param[in] senc Name of source encoding.
+ * @param[in] denc Name of destination encoding.
+ * @param[in] ecflags A set of enum ::ruby_econv_flag_type.
+ * @return An instance of `rb_eConverterNotFoundError`.
+ */
+VALUE rb_econv_open_exc(const char *senc, const char *denc, int ecflags);
+
+/**
+ * Appends the passed string to the passed converter's output buffer. This can
+ * be handy when an encoding needs bytes out of thin air; for instance
+ * ISO-2022-JP has "shift function" which does not correspond to any
+ * characters.
+ *
+ * @param[out] ec Target converter.
+ * @param[in] str String to insert.
+ * @param[in] len Number of bytes of `str`.
+ * @param[in] str_encoding Encoding of `str`.
+ * @retval 0 Success.
+ * @retval -1 Failure (conversion error etc.).
+ * @note `str_encoding` can be anything, and `str` itself is converted
+ * when necessary.
+ */
+int rb_econv_insert_output(rb_econv_t *ec,
+ const unsigned char *str, size_t len, const char *str_encoding);
+
+/**
+ * Queries an encoding name which best suits for rb_econv_insert_output()'s
+ * last parameter. Strings in this encoding need no conversion when inserted;
+ * can be both time/space efficient.
+ *
+ * @param[in] ec Target converter.
+ * @return Its encoding for insertion.
+ */
+const char *rb_econv_encoding_to_insert_output(rb_econv_t *ec);
+
+/**
+ * This is a rb_econv_make_exception() + rb_exc_raise() combo.
+ *
+ * @param[in] ec (Possibly failed) conversion.
+ * @exception rb_eInvalidByteSequenceError Invalid byte sequence.
+ * @exception rb_eUndefinedConversionError Conversion undefined.
+ * @note This function can return when no error.
+ */
+void rb_econv_check_error(rb_econv_t *ec);
+
+/**
+ * This function makes sense right after rb_econv_convert() returns. As listed
+ * in ::rb_econv_result_t, rb_econv_convert() can bail out for various reasons.
+ * This function checks the passed converter's internal state and convert it to
+ * an appropriate exception object.
+ *
+ * @param[in] ec Target converter.
+ * @retval RUBY_Qnil The converter has no error.
+ * @retval otherwise Conversion error turned into an exception.
+ */
+VALUE rb_econv_make_exception(rb_econv_t *ec);
+
+/**
+ * Queries if rb_econv_putback() makes sense, i.e. there are invalid byte
+ * sequences remain in the buffer.
+ *
+ * @param[in] ec Target converter.
+ * @return Number of bytes that can be pushed back.
+ */
+int rb_econv_putbackable(rb_econv_t *ec);
+
+/**
+ * Puts back the bytes. In case of ::econv_invalid_byte_sequence, some of
+ * those invalid bytes are discarded and the others are buffered to be
+ * converted later. The latter bytes can be put back using this API.
+ *
+ * @param[out] ec Target converter (invalid byte sequence).
+ * @param[out] p Return buffer.
+ * @param[in] n Max number of bytes to put back.
+ * @post At most `n` bytes of what was put back is written to `p`.
+ */
+void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n);
+
+/**
+ * Queries the passed encoding's corresponding ASCII compatible encoding. "The
+ * corresponding ASCII compatible encoding" in this context is an ASCII
+ * compatible encoding which can represent exactly the same character sets as
+ * the given ASCII incompatible encoding. For instance that of UTF-16LE is
+ * UTF-8.
+ *
+ * @param[in] encname Name of an ASCII incompatible encoding.
+ * @retval NULL `encname` is already ASCII compatible.
+ * @retval otherwise The corresponding ASCII compatible encoding.
+ */
+const char *rb_econv_asciicompat_encoding(const char *encname);
+
+/**
+ * Identical to rb_econv_convert(), except it takes Ruby's string instead of
+ * C's pointer.
+ *
+ * @param[in,out] ec Target converter.
+ * @param[in] src Source string.
+ * @param[in] flags Flags (see rb_econv_convert).
+ * @exception rb_eArgError Converted string is too long.
+ * @exception rb_eInvalidByteSequenceError Invalid byte sequence.
+ * @exception rb_eUndefinedConversionError Conversion undefined.
+ * @return The conversion result.
+ */
+VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags);
+
+/**
+ * Identical to rb_econv_str_convert(), except it converts only a part of the
+ * passed string. Can be handy when you for instance want to do line-buffered
+ * conversion.
+ *
+ * @param[in,out] ec Target converter.
+ * @param[in] src Source string.
+ * @param[in] byteoff Number of bytes to seek.
+ * @param[in] bytesize Number of bytes to read.
+ * @param[in] flags Flags (see rb_econv_convert).
+ * @exception rb_eArgError Converted string is too long.
+ * @exception rb_eInvalidByteSequenceError Invalid byte sequence.
+ * @exception rb_eUndefinedConversionError Conversion undefined.
+ * @return The conversion result.
+ */
+VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags);
+
+/**
+ * Identical to rb_econv_str_convert(), except it appends the conversion result
+ * to the additionally passed string instead of creating a new string. It can
+ * also be seen as a routine identical to rb_econv_append(), except it takes a
+ * Ruby's string instead of C's pointer.
+ *
+ * @param[in,out] ec Target converter.
+ * @param[in] src Source string.
+ * @param[in] dst Return buffer.
+ * @param[in] flags Flags (see rb_econv_convert).
+ * @exception rb_eArgError Converted string is too long.
+ * @exception rb_eInvalidByteSequenceError Invalid byte sequence.
+ * @exception rb_eUndefinedConversionError Conversion undefined.
+ * @return The conversion result.
+ */
+VALUE rb_econv_str_append(rb_econv_t *ec, VALUE src, VALUE dst, int flags);
+
+/**
+ * Identical to rb_econv_str_append(), except it appends only a part of the
+ * passed string with conversion. It can also be seen as a routine identical
+ * to rb_econv_substr_convert(), except it appends the conversion result to the
+ * additionally passed string instead of creating a new string.
+ *
+ * @param[in,out] ec Target converter.
+ * @param[in] src Source string.
+ * @param[in] byteoff Number of bytes to seek.
+ * @param[in] bytesize Number of bytes to read.
+ * @param[in] dst Return buffer.
+ * @param[in] flags Flags (see rb_econv_convert).
+ * @exception rb_eArgError Converted string is too long.
+ * @exception rb_eInvalidByteSequenceError Invalid byte sequence.
+ * @exception rb_eUndefinedConversionError Conversion undefined.
+ * @return The conversion result.
+ */
+VALUE rb_econv_substr_append(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, VALUE dst, int flags);
+
+/**
+ * Converts the passed C's pointer according to the passed converter, then
+ * append the conversion result to the passed Ruby's string. This way buffer
+ * overflow is properly avoided to resize the destination properly.
+ *
+ * @param[in,out] ec Target converter.
+ * @param[in] bytesrc Target string.
+ * @param[in] bytesize Number of bytes of `bytesrc`.
+ * @param[in] dst Return buffer.
+ * @param[in] flags Flags (see rb_econv_convert).
+ * @exception rb_eArgError Converted string is too long.
+ * @exception rb_eInvalidByteSequenceError Invalid byte sequence.
+ * @exception rb_eUndefinedConversionError Conversion undefined.
+ * @return The conversion result.
+ */
+VALUE rb_econv_append(rb_econv_t *ec, const char *bytesrc, long bytesize, VALUE dst, int flags);
+
+/**
+ * This badly named function does not set the destination encoding to binary,
+ * but instead just nullifies newline conversion decorators if any. Other
+ * ordinal character conversions still happen after this; something non-binary
+ * would still be generated.
+ *
+ * @param[out] ec Target converter to modify.
+ * @post Any newline conversions, if any, would be killed.
+ */
+void rb_econv_binmode(rb_econv_t *ec);
+
+/**
+ * This enum is kind of omnibus. Gathers various constants.
+ */
+enum ruby_econv_flag_type {
+
+ /**
+ * @name Flags for rb_econv_open()
+ *
+ * @{
+ */
+
+ /** Mask for error handling related bits. */
+ RUBY_ECONV_ERROR_HANDLER_MASK = 0x000000ff,
+
+ /** Special handling of invalid sequences are there. */
+ RUBY_ECONV_INVALID_MASK = 0x0000000f,
+
+ /** Invalid sequences shall be replaced. */
+ RUBY_ECONV_INVALID_REPLACE = 0x00000002,
+
+ /** Special handling of undefined conversion are there. */
+ RUBY_ECONV_UNDEF_MASK = 0x000000f0,
+
+ /** Undefined characters shall be replaced. */
+ RUBY_ECONV_UNDEF_REPLACE = 0x00000020,
+
+ /** Undefined characters shall be escaped. */
+ RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030,
+
+ /** Decorators are there. */
+ RUBY_ECONV_DECORATOR_MASK = 0x0001ff00,
+
+ /** Newline converters are there. */
+ RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00007f00,
+
+ /** (Unclear; seems unused). */
+ RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00,
+
+ /** (Unclear; seems unused). */
+ RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00007000,
+
+ /** Universal newline mode. */
+ RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100,
+
+ /** CR to CRLF conversion shall happen. */
+ RUBY_ECONV_CRLF_NEWLINE_DECORATOR = 0x00001000,
+
+ /** CRLF to CR conversion shall happen. */
+ RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000,
+
+ /** CRLF to LF conversion shall happen. */
+ RUBY_ECONV_LF_NEWLINE_DECORATOR = 0x00004000,
+
+ /** Texts shall be XML-escaped. */
+ RUBY_ECONV_XML_TEXT_DECORATOR = 0x00008000,
+
+ /** Texts shall be AttrValue escaped */
+ RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00010000,
+
+ /** (Unclear; seems unused). */
+ RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000,
+
+ /** Texts shall be AttrValue escaped. */
+ RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR = 0x00100000,
+
+ /** Newline decorator's default. */
+ RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR =
+#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32)
+ RUBY_ECONV_CRLF_NEWLINE_DECORATOR,
+#else
+ 0,
+#endif
+
+#define ECONV_ERROR_HANDLER_MASK RUBY_ECONV_ERROR_HANDLER_MASK /**< @old{RUBY_ECONV_ERROR_HANDLER_MASK} */
+#define ECONV_INVALID_MASK RUBY_ECONV_INVALID_MASK /**< @old{RUBY_ECONV_INVALID_MASK} */
+#define ECONV_INVALID_REPLACE RUBY_ECONV_INVALID_REPLACE /**< @old{RUBY_ECONV_INVALID_REPLACE} */
+#define ECONV_UNDEF_MASK RUBY_ECONV_UNDEF_MASK /**< @old{RUBY_ECONV_UNDEF_MASK} */
+#define ECONV_UNDEF_REPLACE RUBY_ECONV_UNDEF_REPLACE /**< @old{RUBY_ECONV_UNDEF_REPLACE} */
+#define ECONV_UNDEF_HEX_CHARREF RUBY_ECONV_UNDEF_HEX_CHARREF /**< @old{RUBY_ECONV_UNDEF_HEX_CHARREF} */
+#define ECONV_DECORATOR_MASK RUBY_ECONV_DECORATOR_MASK /**< @old{RUBY_ECONV_DECORATOR_MASK} */
+#define ECONV_NEWLINE_DECORATOR_MASK RUBY_ECONV_NEWLINE_DECORATOR_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_MASK} */
+#define ECONV_NEWLINE_DECORATOR_READ_MASK RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK} */
+#define ECONV_NEWLINE_DECORATOR_WRITE_MASK RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK} */
+#define ECONV_UNIVERSAL_NEWLINE_DECORATOR RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR} */
+#define ECONV_CRLF_NEWLINE_DECORATOR RUBY_ECONV_CRLF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CRLF_NEWLINE_DECORATOR} */
+#define ECONV_CR_NEWLINE_DECORATOR RUBY_ECONV_CR_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CR_NEWLINE_DECORATOR} */
+#define ECONV_LF_NEWLINE_DECORATOR RUBY_ECONV_LF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_LF_NEWLINE_DECORATOR} */
+#define ECONV_XML_TEXT_DECORATOR RUBY_ECONV_XML_TEXT_DECORATOR /**< @old{RUBY_ECONV_XML_TEXT_DECORATOR} */
+#define ECONV_XML_ATTR_CONTENT_DECORATOR RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR} */
+#define ECONV_STATEFUL_DECORATOR_MASK RUBY_ECONV_STATEFUL_DECORATOR_MASK /**< @old{RUBY_ECONV_STATEFUL_DECORATOR_MASK} */
+#define ECONV_XML_ATTR_QUOTE_DECORATOR RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR} */
+#define ECONV_DEFAULT_NEWLINE_DECORATOR RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR} */
+ /** @} */
+
+ /**
+ * @name Flags for rb_econv_convert()
+ *
+ * @{
+ */
+
+ /** Indicates the input is a part of much larger one. */
+ RUBY_ECONV_PARTIAL_INPUT = 0x00020000,
+
+ /** Instructs the converter to stop after output. */
+ RUBY_ECONV_AFTER_OUTPUT = 0x00040000,
+#define ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT /**< @old{RUBY_ECONV_PARTIAL_INPUT} */
+#define ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT /**< @old{RUBY_ECONV_AFTER_OUTPUT} */
+
+ RUBY_ECONV_FLAGS_PLACEHOLDER /**< Placeholder (not used) */
+};
+
+RBIMPL_SYMBOL_EXPORT_END()
+
+#endif /* RUBY_INTERNAL_ENCODING_TRANSCODE_H */
diff --git a/include/ruby/internal/error.h b/include/ruby/internal/error.h
index 49e2276cb9..cd37f4461a 100644
--- a/include/ruby/internal/error.h
+++ b/include/ruby/internal/error.h
@@ -50,7 +50,19 @@ typedef enum {
/** Warning is for experimental features. */
RB_WARN_CATEGORY_EXPERIMENTAL,
- RB_WARN_CATEGORY_ALL_BITS = 0x6 /* no RB_WARN_CATEGORY_NONE bit */
+ /** Warning is for performance issues (not enabled by -w). */
+ RB_WARN_CATEGORY_PERFORMANCE,
+
+ RB_WARN_CATEGORY_DEFAULT_BITS = (
+ (1U << RB_WARN_CATEGORY_DEPRECATED) |
+ (1U << RB_WARN_CATEGORY_EXPERIMENTAL) |
+ 0),
+
+ RB_WARN_CATEGORY_ALL_BITS = (
+ (1U << RB_WARN_CATEGORY_DEPRECATED) |
+ (1U << RB_WARN_CATEGORY_EXPERIMENTAL) |
+ (1U << RB_WARN_CATEGORY_PERFORMANCE) |
+ 0)
} rb_warning_category_t;
/** for rb_readwrite_sys_fail first argument */
@@ -469,7 +481,7 @@ VALUE *rb_ruby_debug_ptr(void);
*/
#define ruby_debug (*rb_ruby_debug_ptr())
-/* reports if `-W' specified */
+/* reports if $VERBOSE is true */
RBIMPL_ATTR_NONNULL((1))
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2)
/**
@@ -484,7 +496,8 @@ RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2)
* default, the method just emits its passed contents to ::rb_stderr using
* rb_io_write().
*
- * @note This function is affected by the `-W` flag.
+ * @note This function is affected by the value of $VERBOSE, it does
+ * nothing unless $VERBOSE is true.
* @param[in] fmt Format specifier string compatible with rb_sprintf().
*
* @internal
@@ -509,7 +522,7 @@ RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 3, 4)
* Issues a compile-time warning that happens at `__file__:__line__`. Purpose
* of this function being exposed to CAPI is unclear.
*
- * @note This function is affected by the `-W` flag.
+ * @note This function is affected by the value of $VERBOSE.
* @param[in] file The path corresponding to Ruby level `__FILE__`.
* @param[in] line The number corresponding to Ruby level `__LINE__`.
* @param[in] fmt Format specifier string compatible with rb_sprintf().
@@ -522,19 +535,20 @@ RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2)
* Identical to rb_sys_fail(), except it does not raise an exception to render
* a warning instead.
*
- * @note This function is affected by the `-W` flag.
+ * @note This function is affected by the value of $VERBOSE.
* @param[in] fmt Format specifier string compatible with rb_sprintf().
*/
void rb_sys_warning(const char *fmt, ...);
-/* reports always */
+/* reports if $VERBOSE is not nil (so if it is true or false) */
RBIMPL_ATTR_COLD()
RBIMPL_ATTR_NONNULL((1))
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2)
/**
- * Identical to rb_warning(), except it reports always regardless of runtime
- * `-W` flag.
+ * Identical to rb_warning(), except it reports unless $VERBOSE is nil.
*
+ * @note This function is affected by the value of $VERBOSE, it does
+ * nothing if $VERBOSE is nil.
* @param[in] fmt Format specifier string compatible with rb_sprintf().
*/
void rb_warn(const char *fmt, ...);
@@ -543,8 +557,7 @@ RBIMPL_ATTR_COLD()
RBIMPL_ATTR_NONNULL((2))
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
/**
- * Identical to rb_category_warning(), except it reports always regardless of
- * runtime `-W` flag.
+ * Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
*
* @param[in] cat Category e.g. deprecated.
* @param[in] fmt Format specifier string compatible with rb_sprintf().
@@ -554,8 +567,7 @@ void rb_category_warn(rb_warning_category_t cat, const char *fmt, ...);
RBIMPL_ATTR_NONNULL((1, 3))
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 3, 4)
/**
- * Identical to rb_compile_warning(), except it reports always regardless of
- * runtime `-W` flag.
+ * Identical to rb_compile_warning(), except it reports unless $VERBOSE is nil.
*
* @param[in] file The path corresponding to Ruby level `__FILE__`.
* @param[in] line The number corresponding to Ruby level `__LINE__`.
diff --git a/include/ruby/internal/eval.h b/include/ruby/internal/eval.h
index 34a53849da..5bcbb97746 100644
--- a/include/ruby/internal/eval.h
+++ b/include/ruby/internal/eval.h
@@ -28,10 +28,12 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
RBIMPL_ATTR_NONNULL(())
/**
- * Evaluates the given string in an isolated binding.
+ * Evaluates the given string.
*
- * Here "isolated" means that the binding does not inherit any other
- * bindings. This behaves same as the binding for required libraries.
+ * In case it is called from within a C-backended method, the evaluation is
+ * done under the current binding. However there can be no method. On such
+ * situation this function evaluates in an isolated binding, like `require`
+ * runs in a separate one.
*
* `__FILE__` will be `"(eval)"`, and `__LINE__` starts from 1 in the
* evaluation.
@@ -39,6 +41,31 @@ RBIMPL_ATTR_NONNULL(())
* @param[in] str Ruby code to evaluate.
* @exception rb_eException Raises an exception on error.
* @return The evaluated result.
+ *
+ * @internal
+ *
+ * @shyouhei's old tale about the birth and growth of this function:
+ *
+ * At the beginning, there was no rb_eval_string(). @shyouhei heard that
+ * @shugo, author of Apache httpd's mod_ruby module, requested @matz for this
+ * API. He wanted a way so that mod_ruby can evaluate ruby scripts one by one,
+ * separately, in each different contexts. So this function was made. It was
+ * designed to be a global interpreter entry point like ruby_run_node().
+ *
+ * The way it is implemented however allows extension libraries (not just
+ * programs like Apache httpd) to call this function. Because its name says
+ * nothing about the initial design, people started to think of it as an
+ * orthodox way to call ruby level `eval` method from their extension
+ * libraries. Even our `extension.rdoc` has had a description of this function
+ * basically according to this understanding.
+ *
+ * The old (mod_ruby like) usage still works. But over time, usages of this
+ * function from extension libraries got popular, while mod_ruby faded out; is
+ * no longer maintained now. Devs decided to actively support both. This
+ * function now auto-detects how it is called, and switches how it works
+ * depending on it.
+ *
+ * @see https://bugs.ruby-lang.org/issues/18780
*/
VALUE rb_eval_string(const char *str);
diff --git a/include/ruby/internal/event.h b/include/ruby/internal/event.h
index 04b137a193..1d194ed618 100644
--- a/include/ruby/internal/event.h
+++ b/include/ruby/internal/event.h
@@ -23,6 +23,10 @@
#include "ruby/internal/dllexport.h"
#include "ruby/internal/value.h"
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
+
/* These macros are not enums because they are wider than int.*/
/**
@@ -54,6 +58,7 @@
#define RUBY_EVENT_THREAD_END 0x0800 /**< Encountered an end of a thread. */
#define RUBY_EVENT_FIBER_SWITCH 0x1000 /**< Encountered a `Fiber#yield`. */
#define RUBY_EVENT_SCRIPT_COMPILED 0x2000 /**< Encountered an `eval`. */
+#define RUBY_EVENT_RESCUE 0x4000 /**< Encountered a `rescue` statement. */
#define RUBY_EVENT_TRACEPOINT_ALL 0xffff /**< Bitmask of extended events. */
/** @} */
diff --git a/include/ruby/internal/fl_type.h b/include/ruby/internal/fl_type.h
index 47f054256b..0a05166784 100644
--- a/include/ruby/internal/fl_type.h
+++ b/include/ruby/internal/fl_type.h
@@ -57,8 +57,7 @@
#define FL_SINGLETON RBIMPL_CAST((VALUE)RUBY_FL_SINGLETON) /**< @old{RUBY_FL_SINGLETON} */
#define FL_WB_PROTECTED RBIMPL_CAST((VALUE)RUBY_FL_WB_PROTECTED) /**< @old{RUBY_FL_WB_PROTECTED} */
-#define FL_PROMOTED0 RBIMPL_CAST((VALUE)RUBY_FL_PROMOTED0) /**< @old{RUBY_FL_PROMOTED0} */
-#define FL_PROMOTED1 RBIMPL_CAST((VALUE)RUBY_FL_PROMOTED1) /**< @old{RUBY_FL_PROMOTED1} */
+#define FL_PROMOTED RBIMPL_CAST((VALUE)RUBY_FL_PROMOTED) /**< @old{RUBY_FL_PROMOTED} */
#define FL_FINALIZE RBIMPL_CAST((VALUE)RUBY_FL_FINALIZE) /**< @old{RUBY_FL_FINALIZE} */
#define FL_TAINT RBIMPL_CAST((VALUE)RUBY_FL_TAINT) /**< @old{RUBY_FL_TAINT} */
#define FL_SHAREABLE RBIMPL_CAST((VALUE)RUBY_FL_SHAREABLE) /**< @old{RUBY_FL_SHAREABLE} */
@@ -111,13 +110,6 @@
#define RB_OBJ_FREEZE_RAW RB_OBJ_FREEZE_RAW
#define RB_OBJ_FROZEN RB_OBJ_FROZEN
#define RB_OBJ_FROZEN_RAW RB_OBJ_FROZEN_RAW
-#define RB_OBJ_INFECT RB_OBJ_INFECT
-#define RB_OBJ_INFECT_RAW RB_OBJ_INFECT_RAW
-#define RB_OBJ_TAINT RB_OBJ_TAINT
-#define RB_OBJ_TAINTABLE RB_OBJ_TAINTABLE
-#define RB_OBJ_TAINTED RB_OBJ_TAINTED
-#define RB_OBJ_TAINTED_RAW RB_OBJ_TAINTED_RAW
-#define RB_OBJ_TAINT_RAW RB_OBJ_TAINT_RAW
#define RB_OBJ_UNTRUST RB_OBJ_TAINT
#define RB_OBJ_UNTRUSTED RB_OBJ_TAINTED
/** @endcond */
@@ -183,7 +175,7 @@ RB_GNUC_EXTENSION
* @note About the `FL_USER` terminology: the "user" here does not necessarily
* mean only you. For instance struct ::RString instances use these
* bits to cache their encodings etc. Devs discussed about this topic,
- * reached their concensus that ::RUBY_T_DATA is the only valid data
+ * reached their consensus that ::RUBY_T_DATA is the only valid data
* structure that can use these bits; other data structures including
* ::RUBY_T_OBJECT use these bits for their own purpose. See also
* https://bugs.ruby-lang.org/issues/18059
@@ -207,12 +199,15 @@ ruby_fl_type {
RUBY_FL_WB_PROTECTED = (1<<5),
/**
- * This flag has something to do with our garbage collector. These days
- * ruby objects are "generational". There are those who are young and
- * those who are old. Young objects are prone to die; monitored relatively
- * extensively by the garbage collector. OTOH old objects tend to live
- * longer. They are relatively rarely considered. This flag is set when a
- * object experienced promotion i.e. survived a garbage collection.
+ * Ruby objects are "generational". There are young objects & old objects.
+ * Young objects are prone to die & monitored relatively extensively by the
+ * garbage collector. Old objects tend to live longer & are monitored less
+ * frequently. When an object survives a GC, its age is incremented. When
+ * age is equal to RVALUE_OLD_AGE, the object becomes Old. This flag is set
+ * when an object becomes old, and is used by the write barrier to check if
+ * an old object should be considered for marking more frequently - as old
+ * objects that have references added between major GCs need to be remarked
+ * to prevent the referred object being mistakenly swept.
*
* @internal
*
@@ -220,41 +215,14 @@ ruby_fl_type {
* 3rd parties. It must be an implementation detail that they should never
* know. Might better be hidden.
*/
- RUBY_FL_PROMOTED0 = (1<<5),
+ RUBY_FL_PROMOTED = (1<<5),
/**
- * This flag has something to do with our garbage collector. These days
- * ruby objects are "generational". There are those who are young and
- * those who are old. Young objects are prone to die; monitored relatively
- * extensively by the garbage collector. OTOH old objects tend to live
- * longer. They are relatively rarely considered. This flag is set when a
- * object experienced two promotions i.e. survived garbage collections
- * twice.
+ * This flag is no longer in use
*
* @internal
- *
- * But honestly, @shyouhei doesn't think this flag should be visible from
- * 3rd parties. It must be an implementation detail that they should never
- * know. Might better be hidden.
- */
- RUBY_FL_PROMOTED1 = (1<<6),
-
- /**
- * This flag has something to do with our garbage collector. These days
- * ruby objects are "generational". There are those who are young and
- * those who are old. Young objects are prone to die; monitored relatively
- * extensively by the garbage collector. OTOH old objects tend to live
- * longer. They are relatively rarely considered. This flag is set when a
- * object experienced promotions i.e. survived more than one garbage
- * collections.
- *
- * @internal
- *
- * But honestly, @shyouhei doesn't think this flag should be visible from
- * 3rd parties. It must be an implementation detail that they should never
- * know. Might better be hidden.
*/
- RUBY_FL_PROMOTED = RUBY_FL_PROMOTED0 | RUBY_FL_PROMOTED1,
+ RUBY_FL_UNUSED6 = (1<<6),
/**
* This flag has something to do with finalisers. A ruby object can have
@@ -283,7 +251,7 @@ ruby_fl_type {
# pragma deprecated(RUBY_FL_TAINT)
#endif
- = (1<<8),
+ = 0,
/**
* This flag has something to do with Ractor. Multiple Ractors run without
@@ -310,7 +278,7 @@ ruby_fl_type {
# pragma deprecated(RUBY_FL_UNTRUSTED)
#endif
- = (1<<8),
+ = 0,
/**
* This flag has something to do with object IDs. Unlike in the old days,
@@ -427,7 +395,7 @@ ruby_fl_type {
* 3rd parties. It must be an implementation detail that they should never
* know. Might better be hidden.
*/
- RUBY_FL_SINGLETON = RUBY_FL_USER0,
+ RUBY_FL_SINGLETON = RUBY_FL_USER1,
};
enum {
@@ -451,12 +419,6 @@ enum {
RBIMPL_SYMBOL_EXPORT_BEGIN()
/**
- * @deprecated Does nothing. This method is deprecated and will be removed in
- * Ruby 3.2.
- */
-void rb_obj_infect(VALUE victim, VALUE carrier);
-
-/**
* This is an implementation detail of #RB_OBJ_FREEZE(). People don't use it
* directly.
*
@@ -495,7 +457,7 @@ RB_FL_ABLE(VALUE obj)
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
RBIMPL_ATTR_ARTIFICIAL()
/**
- * This is an implenentation detail of RB_FL_TEST(). 3rd parties need not use
+ * This is an implementation detail of RB_FL_TEST(). 3rd parties need not use
* this. Just always use RB_FL_TEST().
*
* @param[in] obj Object in question.
@@ -543,7 +505,7 @@ RB_FL_TEST(VALUE obj, VALUE flags)
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
RBIMPL_ATTR_ARTIFICIAL()
/**
- * This is an implenentation detail of RB_FL_ANY(). 3rd parties need not use
+ * This is an implementation detail of RB_FL_ANY(). 3rd parties need not use
* this. Just always use RB_FL_ANY().
*
* @param[in] obj Object in question.
@@ -577,7 +539,7 @@ RB_FL_ANY(VALUE obj, VALUE flags)
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
RBIMPL_ATTR_ARTIFICIAL()
/**
- * This is an implenentation detail of RB_FL_ALL(). 3rd parties need not use
+ * This is an implementation detail of RB_FL_ALL(). 3rd parties need not use
* this. Just always use RB_FL_ALL().
*
* @param[in] obj Object in question.
@@ -613,7 +575,7 @@ RBIMPL_ATTR_ARTIFICIAL()
/**
* @private
*
- * This is an implenentation detail of RB_FL_SET(). 3rd parties need not use
+ * This is an implementation detail of RB_FL_SET(). 3rd parties need not use
* this. Just always use RB_FL_SET().
*
* @param[out] obj Object in question.
@@ -633,7 +595,7 @@ rbimpl_fl_set_raw_raw(struct RBasic *obj, VALUE flags)
RBIMPL_ATTR_ARTIFICIAL()
/**
- * This is an implenentation detail of RB_FL_SET(). 3rd parties need not use
+ * This is an implementation detail of RB_FL_SET(). 3rd parties need not use
* this. Just always use RB_FL_SET().
*
* @param[out] obj Object in question.
@@ -673,7 +635,7 @@ RBIMPL_ATTR_ARTIFICIAL()
/**
* @private
*
- * This is an implenentation detail of RB_FL_UNSET(). 3rd parties need not use
+ * This is an implementation detail of RB_FL_UNSET(). 3rd parties need not use
* this. Just always use RB_FL_UNSET().
*
* @param[out] obj Object in question.
@@ -693,7 +655,7 @@ rbimpl_fl_unset_raw_raw(struct RBasic *obj, VALUE flags)
RBIMPL_ATTR_ARTIFICIAL()
/**
- * This is an implenentation detail of RB_FL_UNSET(). 3rd parties need not use
+ * This is an implementation detail of RB_FL_UNSET(). 3rd parties need not use
* this. Just always use RB_FL_UNSET().
*
* @param[out] obj Object in question.
@@ -728,7 +690,7 @@ RBIMPL_ATTR_ARTIFICIAL()
/**
* @private
*
- * This is an implenentation detail of RB_FL_REVERSE(). 3rd parties need not
+ * This is an implementation detail of RB_FL_REVERSE(). 3rd parties need not
* use this. Just always use RB_FL_REVERSE().
*
* @param[out] obj Object in question.
@@ -748,7 +710,7 @@ rbimpl_fl_reverse_raw_raw(struct RBasic *obj, VALUE flags)
RBIMPL_ATTR_ARTIFICIAL()
/**
- * This is an implenentation detail of RB_FL_REVERSE(). 3rd parties need not
+ * This is an implementation detail of RB_FL_REVERSE(). 3rd parties need not
* use this. Just always use RB_FL_REVERSE().
*
* @param[out] obj Object in question.
@@ -793,6 +755,7 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea."))
static inline bool
RB_OBJ_TAINTABLE(VALUE obj)
{
+ (void)obj;
return false;
}
@@ -810,6 +773,7 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea."))
static inline VALUE
RB_OBJ_TAINTED_RAW(VALUE obj)
{
+ (void)obj;
return false;
}
@@ -827,6 +791,7 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea."))
static inline bool
RB_OBJ_TAINTED(VALUE obj)
{
+ (void)obj;
return false;
}
@@ -842,6 +807,7 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea."))
static inline void
RB_OBJ_TAINT_RAW(VALUE obj)
{
+ (void)obj;
return;
}
@@ -857,6 +823,7 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea."))
static inline void
RB_OBJ_TAINT(VALUE obj)
{
+ (void)obj;
return;
}
@@ -873,6 +840,8 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea."))
static inline void
RB_OBJ_INFECT_RAW(VALUE dst, VALUE src)
{
+ (void)dst;
+ (void)src;
return;
}
@@ -889,13 +858,15 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea."))
static inline void
RB_OBJ_INFECT(VALUE dst, VALUE src)
{
+ (void)dst;
+ (void)src;
return;
}
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
RBIMPL_ATTR_ARTIFICIAL()
/**
- * This is an implenentation detail of RB_OBJ_FROZEN(). 3rd parties need not
+ * This is an implementation detail of RB_OBJ_FROZEN(). 3rd parties need not
* use this. Just always use RB_OBJ_FROZEN().
*
* @param[in] obj Object in question.
@@ -934,9 +905,13 @@ RB_OBJ_FROZEN(VALUE obj)
}
}
+RUBY_SYMBOL_EXPORT_BEGIN
+void rb_obj_freeze_inline(VALUE obj);
+RUBY_SYMBOL_EXPORT_END
+
RBIMPL_ATTR_ARTIFICIAL()
/**
- * This is an implenentation detail of RB_OBJ_FREEZE(). 3rd parties need not
+ * This is an implementation detail of RB_OBJ_FREEZE(). 3rd parties need not
* use this. Just always use RB_OBJ_FREEZE().
*
* @param[out] obj Object in question.
@@ -944,24 +919,7 @@ RBIMPL_ATTR_ARTIFICIAL()
static inline void
RB_OBJ_FREEZE_RAW(VALUE obj)
{
- RB_FL_SET_RAW(obj, RUBY_FL_FREEZE);
-}
-
-/**
- * Prevents further modifications to the given object. ::rb_eFrozenError shall
- * be raised if modification is attempted.
- *
- * @param[out] x Object in question.
- */
-static inline void
-rb_obj_freeze_inline(VALUE x)
-{
- if (RB_FL_ABLE(x)) {
- RB_OBJ_FREEZE_RAW(x);
- if (RBASIC_CLASS(x) && !(RBASIC(x)->flags & RUBY_FL_SINGLETON)) {
- rb_freeze_singleton_class(x);
- }
- }
+ rb_obj_freeze_inline(obj);
}
#endif /* RBIMPL_FL_TYPE_H */
diff --git a/include/ruby/internal/gc.h b/include/ruby/internal/gc.h
index 66fc14e511..462f416af2 100644
--- a/include/ruby/internal/gc.h
+++ b/include/ruby/internal/gc.h
@@ -20,16 +20,379 @@
* extension libraries. They could be written in C++98.
* @brief Registering values to the GC.
*/
+#include "ruby/internal/config.h"
+
+#ifdef STDC_HEADERS
+# include <stddef.h> /* size_t */
+#endif
+
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h> /* ssize_t */
+#endif
+
+#include "ruby/assert.h"
+#include "ruby/internal/attr/cold.h"
+#include "ruby/internal/attr/nonnull.h"
+#include "ruby/internal/attr/noreturn.h"
+#include "ruby/internal/attr/artificial.h"
+#include "ruby/internal/attr/maybe_unused.h"
+#include "ruby/internal/attr/pure.h"
#include "ruby/internal/dllexport.h"
+#include "ruby/internal/special_consts.h"
+#include "ruby/internal/stdbool.h"
#include "ruby/internal/value.h"
RBIMPL_SYMBOL_EXPORT_BEGIN()
+#define RUBY_REF_EDGE(s, p) offsetof(s, p)
+#define RUBY_REFS_LIST_PTR(l) (RUBY_DATA_FUNC)(l)
+#define RUBY_REF_END SIZE_MAX
+#define RUBY_REFERENCES(t) static const size_t t[]
+#define RUBY_REFERENCES_START(t) RUBY_REFERENCES(t) = {
+#define RUBY_REFERENCES_END RUBY_REF_END, };
+
+/* gc.c */
+
+RBIMPL_ATTR_COLD()
+RBIMPL_ATTR_NORETURN()
+/**
+ * Triggers out-of-memory error. If possible it raises ::rb_eNoMemError. But
+ * because we are running out of memory that is not always doable. This
+ * function tries hard to show something, but ultimately can die silently.
+ *
+ * @exception rb_eNoMemError Raises it if possible.
+ */
+void rb_memerror(void);
+
+RBIMPL_ATTR_PURE()
+/**
+ * Queries if the GC is busy.
+ *
+ * @retval 0 It isn't.
+ * @retval 1 It is.
+ */
+int rb_during_gc(void);
+
+RBIMPL_ATTR_NONNULL((1))
+/**
+ * Marks objects between the two pointers. This is one of the GC utility
+ * functions that you can call when you design your own
+ * ::rb_data_type_struct::dmark.
+ *
+ * @pre Continuous memory region from `start` to `end` shall be fully
+ * addressable.
+ * @param[out] start Pointer to an array of objects.
+ * @param[out] end Pointer that terminates the array of objects.
+ * @post Objects from `start` (included) to `end` (excluded) are marked.
+ *
+ * @internal
+ *
+ * `end` can be NULL... But that just results in no-op.
+ */
+void rb_gc_mark_locations(const VALUE *start, const VALUE *end);
+
+/**
+ * Identical to rb_mark_hash(), except it marks only values of the table and
+ * leave their associated keys unmarked. This is one of the GC utility
+ * functions that you can call when you design your own
+ * ::rb_data_type_struct::dmark.
+ *
+ * @warning Of course it can break GC. Leave it unused if unsure.
+ * @param[in] tbl A table to mark.
+ * @post Values stored in `tbl` are marked.
+ */
+void rb_mark_tbl(struct st_table *tbl);
+
+/**
+ * Identical to rb_mark_tbl(), except it marks objects using
+ * rb_gc_mark_movable(). This is one of the GC utility functions that you can
+ * call when you design your own ::rb_data_type_struct::dmark.
+ *
+ * @warning Of course it can break GC. Leave it unused if unsure.
+ * @param[in] tbl A table to mark.
+ * @post Values stored in `tbl` are marked.
+ */
+void rb_mark_tbl_no_pin(struct st_table *tbl);
+
+/**
+ * Identical to rb_mark_hash(), except it marks only keys of the table and
+ * leave their associated values unmarked. This is one of the GC utility
+ * functions that you can call when you design your own
+ * ::rb_data_type_struct::dmark.
+ *
+ * @warning Of course it can break GC. Leave it unused if unsure.
+ * @param[in] tbl A table to mark.
+ * @post Keys stored in `tbl` are marked.
+ */
+void rb_mark_set(struct st_table *tbl);
+
/**
- * Inform the garbage collector that `valptr` points to a live Ruby object that
- * should not be moved. Note that extensions should use this API on global
- * constants instead of assuming constants defined in Ruby are always alive.
- * Ruby code can remove global constants.
+ * Marks keys and values associated inside of the given table. This is one of
+ * the GC utility functions that you can call when you design your own
+ * ::rb_data_type_struct::dmark.
+ *
+ * @param[in] tbl A table to mark.
+ * @post Objects stored in `tbl` are marked.
+ */
+void rb_mark_hash(struct st_table *tbl);
+
+/**
+ * Updates references inside of tables. After you marked values using
+ * rb_mark_tbl_no_pin(), the objects inside of the table could of course be
+ * moved. This function is to fixup those references. You can call this from
+ * your ::rb_data_type_struct::dcompact.
+ *
+ * @param[out] ptr A table that potentially includes moved references.
+ * @post Moved references, if any, are corrected.
+ */
+void rb_gc_update_tbl_refs(st_table *ptr);
+
+/**
+ * Identical to rb_gc_mark(), except it allows the passed value be a
+ * non-object. For instance pointers to different type of memory regions are
+ * allowed here. Such values are silently ignored. This is one of the GC
+ * utility functions that you can call when you design your own
+ * ::rb_data_type_struct::dmark.
+ *
+ * @param[out] obj A possible object.
+ * @post `obj` is marked, if possible.
+ */
+void rb_gc_mark_maybe(VALUE obj);
+
+/**
+ * Marks an object. This is one of the GC utility functions that you can call
+ * when you design your own ::rb_data_type_struct::dmark.
+ *
+ * @param[out] obj Arbitrary Ruby object.
+ * @post `obj` is marked.
+ */
+void rb_gc_mark(VALUE obj);
+
+/**
+ * Maybe this is the only function provided for C extensions to control the
+ * pinning of objects, so let us describe it in detail. These days Ruby's GC
+ * is copying. As far as an object's physical address is guaranteed unused, it
+ * can move around the object space. Our GC engine rearranges these objects
+ * after it reclaims unreachable objects from our object space, so that the
+ * space is compact (improves memory locality). This is called the
+ * "compaction" phase, and works well most of the time... as far as there are
+ * no C extensions. C extensions complicate the scenario because Ruby core
+ * cannot detect any use of the physical address of an object inside of C
+ * functions. In order to prevent memory corruptions, objects observable from
+ * C extensions are "pinned"; they stick to where they are born until they die,
+ * just in case any C extensions touch their raw pointers. This variant of
+ * scheme is called "Mostly-Copying" garbage collector. Authors of C
+ * extensions, however, can extremely carefully write them to become
+ * compaction-aware. To do so avoid referring to a Ruby object from inside of
+ * your struct in the first place. But if that is not possible, use this
+ * function from your ::rb_data_type_struct::dmark then. This way objects
+ * marked using it are considered movable. If you chose this way you have to
+ * manually fix up locations of such moved pointers using rb_gc_location().
+ *
+ * @see Bartlett, Joel F., "Compacting Garbage Collection with Ambiguous
+ * Roots", ACM SIGPLAN Lisp Pointers Volume 1 Issue 6 pp. 3-12,
+ * April-May-June, 1988. https://doi.org/10.1145/1317224.1317225
+ *
+ * @param[in] obj Object that is movable.
+ * @post Values stored in `tbl` are marked.
+ */
+void rb_gc_mark_movable(VALUE obj);
+
+/**
+ * Finds a new "location" of an object. An object can be moved on compaction.
+ * This function projects its new abode, or just returns the passed object if
+ * not moved. This is one of the GC utility functions that you can call when
+ * you design your own ::rb_data_type_struct::dcompact.
+ *
+ * @param[in] obj An object, possibly already moved to somewhere else.
+ * @return An object, which holds the current contents of former `obj`.
+ */
+VALUE rb_gc_location(VALUE obj);
+
+/**
+ * Triggers a GC process. This was the only GC entry point that we had at the
+ * beginning. Over time our GC evolved. Now what this function does is just a
+ * very simplified variation of the entire GC algorithms. A series of
+ * procedures kicked by this API is called a "full" GC.
+ *
+ * - It immediately scans the entire object space to sort the dead.
+ * - It immediately reclaims any single dead bodies to reuse later.
+ *
+ * It is worth noting that the procedures above do not include evaluations of
+ * finalisers. They run later.
+ *
+ * @internal
+ *
+ * Finalisers are deferred until we can handle interrupts. See
+ * `rb_postponed_job_flush` in vm_trace.c.
+ *
+ * Of course there are GC that are not "full". For instance this one and the
+ * GC which runs when we are running out of memory are different. See
+ * `gc_profile_record_flag` defined in gc.c for the kinds of GC.
+ *
+ * In spite of the name this is not what everything that a GC can trigger. As
+ * of writing it seems this function does not trigger compaction. But this
+ * might change in future.
+ */
+void rb_gc(void);
+
+/**
+ * Copy&paste an object's finaliser to another. This is one of the GC utility
+ * functions that you can call when you design your own `initialize_copy`,
+ * `initialize_dup`, `initialize_clone`.
+ *
+ * @param[out] dst Destination object.
+ * @param[in] src Source object.
+ * @post `dst` and `src` share the same finaliser.
+ *
+ * @internal
+ *
+ * But isn't it easier for you to call super, and let `Object#initialize_copy`
+ * call this function instead?
+ */
+void rb_gc_copy_finalizer(VALUE dst, VALUE src);
+
+/**
+ * (Re-) enables GC. This makes sense only after you called rb_gc_disable().
+ *
+ * @retval RUBY_Qtrue GC was disabled before.
+ * @retval RUBY_Qfalse GC was enabled before.
+ * @post GC is enabled.
+ *
+ * @internal
+ *
+ * This is one of such exceptional functions that does not raise both Ruby
+ * exceptions and C++ exceptions.
+ */
+VALUE rb_gc_enable(void);
+
+/**
+ * Disables GC. This prevents automatic GC runs when the process is running
+ * out of memory. Such situations shall result in rb_memerror(). However this
+ * does not prevent users from manually invoking rb_gc(). That should work.
+ * People abused this by disabling GC at the beginning of an event loop,
+ * process events without GC overheads, then manually force reclaiming garbage
+ * at the bottom of the loop. However because our GC is now much smarter than
+ * just calling rb_gc(), this technique is proven to be sub-optimal these days.
+ * It is believed that there is currently practically no needs of this
+ * function.
+ *
+ * @retval RUBY_Qtrue GC was disabled before.
+ * @retval RUBY_Qfalse GC was enabled before.
+ * @post GC is disabled.
+ */
+VALUE rb_gc_disable(void);
+
+/**
+ * Identical to rb_gc(), except the return value.
+ *
+ * @return Always returns ::RUBY_Qnil.
+ */
+VALUE rb_gc_start(void);
+
+/**
+ * Assigns a finaliser for an object. Each objects can have objects (typically
+ * blocks) that run immediately after that object dies. They are called
+ * finalisers of an object. This function associates a finaliser object with a
+ * target object.
+ *
+ * @note Note that finalisers run _after_ the object they finalise dies. You
+ * cannot for instance call its methods.
+ * @note If your finaliser references the object it finalises that object
+ * loses any chance to become a garbage; effectively leaks memory until
+ * the end of the process.
+ *
+ * @param[in] obj Target to finalise.
+ * @param[in] block Something `call`able.
+ * @exception rb_eRuntimeError Somehow `obj` cannot have finalisers.
+ * @exception rb_eFrozenError `obj` is frozen.
+ * @exception rb_eArgError `block` doesn't respond to `call`.
+ * @return The passed `block`.
+ * @post `block` runs after `obj` dies.
+ */
+VALUE rb_define_finalizer(VALUE obj, VALUE block);
+
+/**
+ * Modifies the object so that it has no finalisers at all. This function is
+ * mainly provided for symmetry. No practical usages can be thought of.
+ *
+ * @param[out] obj Object to clear its finalisers.
+ * @exception rb_eFrozenError `obj` is frozen.
+ * @return The passed `obj`.
+ * @post `obj` has no finalisers.
+ * @note There is no way to undefine a specific part of many finalisers
+ * that `obj` could have. All you can do is to clear them all.
+ */
+VALUE rb_undefine_finalizer(VALUE obj);
+
+/**
+ * Identical to rb_gc_stat(), with "count" parameter.
+ *
+ * @return Lifetime total number of runs of GC.
+ */
+size_t rb_gc_count(void);
+
+/**
+ * Obtains various GC related profiles. The parameter can be either a Symbol
+ * or a Hash. If a Hash is passed, it is filled with everything currently
+ * available. If a Symbol is passed just that portion is returned.
+ *
+ * Possible variations of keys you can pass here change from version to
+ * version. You can get the list of known keys by passing an empty hash and
+ * let it be filled.
+ *
+ * @param[in,out] key_or_buf A Symbol, or a Hash.
+ * @exception rb_eTypeError Neither Symbol nor Hash.
+ * @exception rb_eFrozenError Frozen hash is passed.
+ * @return In case a Hash is passed it returns 0. Otherwise the
+ * profile value associated with the given key is returned.
+ * @post In case a Hash is passed it is filled with values.
+ */
+size_t rb_gc_stat(VALUE key_or_buf);
+
+/**
+ * Obtains various info regarding the most recent GC run. This includes for
+ * instance the reason of the GC. The parameter can be either a Symbol or a
+ * Hash. If a Hash is passed, it is filled with everything currently
+ * available. If a Symbol is passed just that portion is returned.
+ *
+ * Possible variations of keys you can pass here change from version to
+ * version. You can get the list of known keys by passing an empty hash and
+ * let it be filled.
+ *
+ * @param[in,out] key_or_buf A Symbol, or a Hash.
+ * @exception rb_eTypeError Neither Symbol nor Hash.
+ * @exception rb_eFrozenError Frozen hash is passed.
+ * @return In case a Hash is passed it returns that hash. Otherwise
+ * the profile value associated with the given key is returned.
+ * @post In case a Hash is passed it is filled with values.
+ */
+VALUE rb_gc_latest_gc_info(VALUE key_or_buf);
+
+/**
+ * Informs that there are external memory usages. Our GC runs when we are
+ * running out of memory. The amount of memory, however, can increase/decrease
+ * behind-the-scene. For instance DLLs can allocate memories using `mmap(2)`
+ * etc, which are opaque to us. Registering such external allocations using
+ * this function enables proper detection of how much memories an object used
+ * as a whole. That will trigger GCs more often than it would otherwise. You
+ * can also pass negative numbers here, to indicate that such external
+ * allocations are gone.
+ *
+ * @param[in] diff Amount of memory increased(+)/decreased(-).
+ */
+void rb_gc_adjust_memory_usage(ssize_t diff);
+
+/**
+ * Inform the garbage collector that the global or static variable pointed by
+ * `valptr` stores a live Ruby object that should not be moved. Note that
+ * extensions should use this API on global constants instead of assuming
+ * constants defined in Ruby are always alive. Ruby code can remove global
+ * constants.
+ *
+ * Because this registration itself has a possibility to trigger a GC, this
+ * function must be called before any GC-able objects is assigned to the
+ * address pointed by `valptr`.
*/
void rb_gc_register_address(VALUE *valptr);
@@ -54,4 +417,413 @@ void rb_gc_register_mark_object(VALUE object);
RBIMPL_SYMBOL_EXPORT_END()
+/**
+ * @private
+ *
+ * @deprecated This macro once was a thing in the old days, but makes no sense
+ * any longer today. Exists here for backwards compatibility
+ * only. You can safely forget about it.
+ */
+#undef USE_RGENGC
+#define USE_RGENGC 1
+
+/**
+ * @deprecated This macro seems broken. Setting this to anything other than
+ * zero just doesn't compile. We need to KonMari.
+ */
+#ifndef USE_RGENGC_LOGGING_WB_UNPROTECT
+# define USE_RGENGC_LOGGING_WB_UNPROTECT 0
+#endif
+
+/**
+ * @private
+ *
+ * This is a compile-time flag to enable/disable write barrier for
+ * struct ::RArray. It has to be set at the time ruby itself compiles. Makes
+ * no sense for 3rd parties.
+ */
+#ifndef RGENGC_WB_PROTECTED_ARRAY
+# define RGENGC_WB_PROTECTED_ARRAY 1
+#endif
+
+/**
+ * @private
+ *
+ * This is a compile-time flag to enable/disable write barrier for
+ * struct ::RHash. It has to be set at the time ruby itself compiles. Makes
+ * no sense for 3rd parties.
+ */
+#ifndef RGENGC_WB_PROTECTED_HASH
+# define RGENGC_WB_PROTECTED_HASH 1
+#endif
+
+/**
+ * @private
+ *
+ * This is a compile-time flag to enable/disable write barrier for
+ * struct ::RStruct. It has to be set at the time ruby itself compiles. Makes
+ * no sense for 3rd parties.
+ */
+#ifndef RGENGC_WB_PROTECTED_STRUCT
+# define RGENGC_WB_PROTECTED_STRUCT 1
+#endif
+
+/**
+ * @private
+ *
+ * This is a compile-time flag to enable/disable write barrier for
+ * struct ::RString. It has to be set at the time ruby itself compiles. Makes
+ * no sense for 3rd parties.
+ */
+#ifndef RGENGC_WB_PROTECTED_STRING
+# define RGENGC_WB_PROTECTED_STRING 1
+#endif
+
+/**
+ * @private
+ *
+ * This is a compile-time flag to enable/disable write barrier for
+ * struct ::RObject. It has to be set at the time ruby itself compiles. Makes
+ * no sense for 3rd parties.
+ */
+#ifndef RGENGC_WB_PROTECTED_OBJECT
+# define RGENGC_WB_PROTECTED_OBJECT 1
+#endif
+
+/**
+ * @private
+ *
+ * This is a compile-time flag to enable/disable write barrier for
+ * struct ::RRegexp. It has to be set at the time ruby itself compiles. Makes
+ * no sense for 3rd parties.
+ */
+#ifndef RGENGC_WB_PROTECTED_REGEXP
+# define RGENGC_WB_PROTECTED_REGEXP 1
+#endif
+
+/**
+ * @private
+ *
+ * This is a compile-time flag to enable/disable write barrier for
+ * struct ::RMatch. It has to be set at the time ruby itself compiles. Makes
+ * no sense for 3rd parties.
+ */
+#ifndef RGENGC_WB_PROTECTED_MATCH
+# define RGENGC_WB_PROTECTED_MATCH 1
+#endif
+
+/**
+ * @private
+ *
+ * This is a compile-time flag to enable/disable write barrier for
+ * struct ::RClass. It has to be set at the time ruby itself compiles. Makes
+ * no sense for 3rd parties.
+ */
+#ifndef RGENGC_WB_PROTECTED_CLASS
+# define RGENGC_WB_PROTECTED_CLASS 1
+#endif
+
+/**
+ * @private
+ *
+ * This is a compile-time flag to enable/disable write barrier for
+ * struct ::RFloat. It has to be set at the time ruby itself compiles. Makes
+ * no sense for 3rd parties.
+ */
+#ifndef RGENGC_WB_PROTECTED_FLOAT
+# define RGENGC_WB_PROTECTED_FLOAT 1
+#endif
+
+/**
+ * @private
+ *
+ * This is a compile-time flag to enable/disable write barrier for
+ * struct ::RComplex. It has to be set at the time ruby itself compiles.
+ * Makes no sense for 3rd parties.
+ */
+#ifndef RGENGC_WB_PROTECTED_COMPLEX
+# define RGENGC_WB_PROTECTED_COMPLEX 1
+#endif
+
+/**
+ * @private
+ *
+ * This is a compile-time flag to enable/disable write barrier for
+ * struct ::RRational. It has to be set at the time ruby itself compiles.
+ * Makes no sense for 3rd parties.
+ */
+#ifndef RGENGC_WB_PROTECTED_RATIONAL
+# define RGENGC_WB_PROTECTED_RATIONAL 1
+#endif
+
+/**
+ * @private
+ *
+ * This is a compile-time flag to enable/disable write barrier for
+ * struct ::RBignum. It has to be set at the time ruby itself compiles. Makes
+ * no sense for 3rd parties.
+ */
+#ifndef RGENGC_WB_PROTECTED_BIGNUM
+# define RGENGC_WB_PROTECTED_BIGNUM 1
+#endif
+
+/**
+ * @private
+ *
+ * @deprecated This macro once was a thing in the old days, but makes no sense
+ * any longer today. Exists here for backwards compatibility
+ * only. You can safely forget about it.
+ *
+ * @internal
+ *
+ * @shyouhei doesn't think anybody uses this right now.
+ */
+#ifndef RGENGC_WB_PROTECTED_NODE_CREF
+# define RGENGC_WB_PROTECTED_NODE_CREF 1
+#endif
+
+/**
+ * @defgroup rgengc Write barrier (WB) interfaces:
+ *
+ * @note The following core interfaces can be changed in the future. Please
+ * catch up if you want to insert WB into C-extensions correctly.
+ *
+ * @{
+ */
+
+/**
+ * Declaration of a "back" pointer. This is a write barrier for new reference
+ * from "old" generation to "young" generation. It writes `young` into
+ * `*slot`, which is a pointer inside of `old`.
+ *
+ * @param[in] old An old object.
+ * @param[in] slot A pointer inside of `old`.
+ * @param[out] young A young object.
+ */
+#define RB_OBJ_WRITE(old, slot, young) \
+ RBIMPL_CAST(rb_obj_write((VALUE)(old), (VALUE *)(slot), (VALUE)(young), __FILE__, __LINE__))
+
+/**
+ * Identical to #RB_OBJ_WRITE(), except it doesn't write any values, but only a
+ * WB declaration. `oldv` is replaced value with `b` (not used in current
+ * Ruby).
+ *
+ * @param[in] old An old object.
+ * @param[in] oldv An object previously stored inside of `old`.
+ * @param[out] young A young object.
+ */
+#define RB_OBJ_WRITTEN(old, oldv, young) \
+ RBIMPL_CAST(rb_obj_written((VALUE)(old), (VALUE)(oldv), (VALUE)(young), __FILE__, __LINE__))
+/** @} */
+
+#define OBJ_PROMOTED_RAW RB_OBJ_PROMOTED_RAW /**< @old{RB_OBJ_PROMOTED_RAW} */
+#define OBJ_PROMOTED RB_OBJ_PROMOTED /**< @old{RB_OBJ_PROMOTED} */
+#define OBJ_WB_UNPROTECT RB_OBJ_WB_UNPROTECT /**< @old{RB_OBJ_WB_UNPROTECT} */
+
+/**
+ * Asserts that the passed object is not fenced by write barriers. Objects of
+ * such property do not contribute to generational GCs. They are scanned
+ * always.
+ *
+ * @param[out] x An object that would not be protected by the barrier.
+ */
+#define RB_OBJ_WB_UNPROTECT(x) rb_obj_wb_unprotect(x, __FILE__, __LINE__)
+
+/**
+ * Identical to #RB_OBJ_WB_UNPROTECT(), except it can also assert that the
+ * given object is of given type.
+ *
+ * @param[in] type One of `ARRAY`, `STRING`, etc.
+ * @param[out] obj An object of `type` that would not be protected.
+ *
+ * @internal
+ *
+ * @shyouhei doesn't understand why this has to be visible from extensions.
+ */
+#define RB_OBJ_WB_UNPROTECT_FOR(type, obj) \
+ (RGENGC_WB_PROTECTED_##type ? OBJ_WB_UNPROTECT(obj) : obj)
+
+/**
+ * @private
+ *
+ * This is an implementation detail of rb_obj_wb_unprotect(). People don't use
+ * it directly.
+ */
+#define RGENGC_LOGGING_WB_UNPROTECT rb_gc_unprotect_logging
+
+/** @cond INTERNAL_MACRO */
+#define RB_OBJ_PROMOTED_RAW RB_OBJ_PROMOTED_RAW
+#define RB_OBJ_PROMOTED RB_OBJ_PROMOTED
+/** @endcond */
+
+RBIMPL_SYMBOL_EXPORT_BEGIN()
+/**
+ * This is the implementation of #RB_OBJ_WRITE(). People don't use it
+ * directly.
+ *
+ * @param[in] old An object that points to `young`.
+ * @param[out] young An object that is referenced from `old`.
+ */
+void rb_gc_writebarrier(VALUE old, VALUE young);
+
+/**
+ * This is the implementation of #RB_OBJ_WB_UNPROTECT(). People don't use it
+ * directly.
+ *
+ * @param[out] obj An object that does not participate in WB.
+ */
+void rb_gc_writebarrier_unprotect(VALUE obj);
+
+#if USE_RGENGC_LOGGING_WB_UNPROTECT
+/**
+ * @private
+ *
+ * This is the implementation of #RGENGC_LOGGING_WB_UNPROTECT(). People
+ * don't use it directly.
+ *
+ * @param[in] objptr Don't know why this is a pointer to void but in
+ * reality this is a pointer to an object that is about
+ * to be un-protected.
+ * @param[in] filename Pass C's `__FILE__` here.
+ * @param[in] line Pass C's `__LINE__` here.
+ */
+void rb_gc_unprotect_logging(void *objptr, const char *filename, int line);
+#endif
+
+RBIMPL_SYMBOL_EXPORT_END()
+
+RBIMPL_ATTR_PURE_UNLESS_DEBUG()
+RBIMPL_ATTR_ARTIFICIAL()
+/**
+ * This is the implementation of #RB_OBJ_PROMOTED(). People don't use it
+ * directly.
+ *
+ * @param[in] obj An object to query.
+ * @retval true The object is "promoted".
+ * @retval false The object is young. Have not experienced GC at all.
+ */
+static inline bool
+RB_OBJ_PROMOTED_RAW(VALUE obj)
+{
+ RBIMPL_ASSERT_OR_ASSUME(RB_FL_ABLE(obj));
+ return RB_FL_ANY_RAW(obj, RUBY_FL_PROMOTED);
+}
+
+RBIMPL_ATTR_PURE_UNLESS_DEBUG()
+RBIMPL_ATTR_ARTIFICIAL()
+/**
+ * Tests if the object is "promoted" -- that is, whether the object experienced
+ * one or more GC marks.
+ *
+ * @param[in] obj An object to query.
+ * @retval true The object is "promoted".
+ * @retval false The object is young. Have not experienced GC at all.
+ * @note Hello, is anyone actively calling this function? @shyouhei have
+ * never seen any actual usages outside of the GC implementation
+ * itself.
+ */
+static inline bool
+RB_OBJ_PROMOTED(VALUE obj)
+{
+ if (! RB_FL_ABLE(obj)) {
+ return false;
+ }
+ else {
+ return RB_OBJ_PROMOTED_RAW(obj);
+ }
+}
+
+/**
+ * This is the implementation of #RB_OBJ_WB_UNPROTECT(). People don't use it
+ * directly.
+ *
+ * @param[out] x An object that does not participate in WB.
+ * @param[in] filename C's `__FILE__` of the caller function.
+ * @param[in] line C's `__LINE__` of the caller function.
+ * @return x
+ */
+static inline VALUE
+rb_obj_wb_unprotect(
+ VALUE x,
+ RBIMPL_ATTR_MAYBE_UNUSED()
+ const char *filename,
+ RBIMPL_ATTR_MAYBE_UNUSED()
+ int line)
+{
+#if USE_RGENGC_LOGGING_WB_UNPROTECT
+ RGENGC_LOGGING_WB_UNPROTECT(RBIMPL_CAST((void *)x), filename, line);
+#endif
+ rb_gc_writebarrier_unprotect(x);
+ return x;
+}
+
+/**
+ * @private
+ *
+ * This is the implementation of #RB_OBJ_WRITTEN(). People don't use it
+ * directly.
+ *
+ * @param[in] a An old object.
+ * @param[in] oldv An object previously stored inside of `old`.
+ * @param[out] b A young object.
+ * @param[in] filename C's `__FILE__` of the caller function.
+ * @param[in] line C's `__LINE__` of the caller function.
+ * @return a
+ */
+static inline VALUE
+rb_obj_written(
+ VALUE a,
+ RBIMPL_ATTR_MAYBE_UNUSED()
+ VALUE oldv,
+ VALUE b,
+ RBIMPL_ATTR_MAYBE_UNUSED()
+ const char *filename,
+ RBIMPL_ATTR_MAYBE_UNUSED()
+ int line)
+{
+#if USE_RGENGC_LOGGING_WB_UNPROTECT
+ RGENGC_LOGGING_OBJ_WRITTEN(a, oldv, b, filename, line);
+#endif
+
+ if (!RB_SPECIAL_CONST_P(b)) {
+ rb_gc_writebarrier(a, b);
+ }
+
+ return a;
+}
+
+/**
+ * @private
+ *
+ * This is the implementation of #RB_OBJ_WRITE(). People don't use it
+ * directly.
+ *
+ * @param[in] a An old object.
+ * @param[in] slot A pointer inside of `old`.
+ * @param[out] b A young object.
+ * @param[in] filename C's `__FILE__` of the caller function.
+ * @param[in] line C's `__LINE__` of the caller function.
+ * @return a
+ */
+static inline VALUE
+rb_obj_write(
+ VALUE a, VALUE *slot, VALUE b,
+ RBIMPL_ATTR_MAYBE_UNUSED()
+ const char *filename,
+ RBIMPL_ATTR_MAYBE_UNUSED()
+ int line)
+{
+#ifdef RGENGC_LOGGING_WRITE
+ RGENGC_LOGGING_WRITE(a, slot, b, filename, line);
+#endif
+
+ *slot = b;
+
+ rb_obj_written(a, RUBY_Qundef /* ignore `oldv' now */, b, filename, line);
+ return a;
+}
+
+RBIMPL_ATTR_DEPRECATED(("Will be removed soon"))
+static inline void rb_gc_force_recycle(VALUE obj){}
+
#endif /* RBIMPL_GC_H */
diff --git a/include/ruby/internal/globals.h b/include/ruby/internal/globals.h
index b478e30b04..60d8e5309a 100644
--- a/include/ruby/internal/globals.h
+++ b/include/ruby/internal/globals.h
@@ -82,6 +82,7 @@ RUBY_EXTERN VALUE rb_cInteger; /**< `Module` class. */
RUBY_EXTERN VALUE rb_cMatch; /**< `MatchData` class. */
RUBY_EXTERN VALUE rb_cMethod; /**< `Method` class. */
RUBY_EXTERN VALUE rb_cModule; /**< `Module` class. */
+RUBY_EXTERN VALUE rb_cRefinement; /**< `Refinement` class. */
RUBY_EXTERN VALUE rb_cNameErrorMesg; /**< `NameError::Message` class. */
RUBY_EXTERN VALUE rb_cNilClass; /**< `NilClass` class. */
RUBY_EXTERN VALUE rb_cNumeric; /**< `Numeric` class. */
@@ -93,7 +94,7 @@ RUBY_EXTERN VALUE rb_cRegexp; /**< `Regexp` class. */
RUBY_EXTERN VALUE rb_cStat; /**< `File::Stat` class. */
RUBY_EXTERN VALUE rb_cString; /**< `String` class. */
RUBY_EXTERN VALUE rb_cStruct; /**< `Struct` class. */
-RUBY_EXTERN VALUE rb_cSymbol; /**< `Sumbol` class. */
+RUBY_EXTERN VALUE rb_cSymbol; /**< `Symbol` class. */
RUBY_EXTERN VALUE rb_cThread; /**< `Thread` class. */
RUBY_EXTERN VALUE rb_cTime; /**< `Time` class. */
RUBY_EXTERN VALUE rb_cTrueClass; /**< `TrueClass` class. */
diff --git a/include/ruby/internal/has/builtin.h b/include/ruby/internal/has/builtin.h
index 957aff8375..243ba2a34c 100644
--- a/include/ruby/internal/has/builtin.h
+++ b/include/ruby/internal/has/builtin.h
@@ -53,8 +53,10 @@
# define RBIMPL_HAS_BUILTIN___builtin_assume 0
# /* See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52624 for bswap16. */
# define RBIMPL_HAS_BUILTIN___builtin_bswap16 RBIMPL_COMPILER_SINCE(GCC, 4, 8, 0)
+#ifndef __OpenBSD__
# define RBIMPL_HAS_BUILTIN___builtin_bswap32 RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0)
# define RBIMPL_HAS_BUILTIN___builtin_bswap64 RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0)
+#endif
# define RBIMPL_HAS_BUILTIN___builtin_clz RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0)
# define RBIMPL_HAS_BUILTIN___builtin_clzl RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0)
# define RBIMPL_HAS_BUILTIN___builtin_clzll RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0)
@@ -76,10 +78,6 @@
# define RBIMPL_HAS_BUILTIN___builtin_unreachable RBIMPL_COMPILER_SINCE(GCC, 4, 5, 0)
# /* Note that "0, 0, 0" might be inaccurate. */
-#elif RBIMPL_COMPILER_IS(MSVC)
-# /* MSVC has UNREACHABLE, but that is not __builtin_unreachable. */
-# define RBIMPL_HAS_BUILTIN(_) 0
-
#else
# /* Take config.h definition when available */
# define RBIMPL_HAS_BUILTIN(_) ((RBIMPL_HAS_BUILTIN_ ## _)+0)
@@ -109,7 +107,7 @@
# define RBIMPL_HAS_BUILTIN___builtin_rotateright64 0
# define RBIMPL_HAS_BUILTIN___builtin_popcountll HAVE_BUILTIN___BUILTIN_POPCOUNTLL
# define RBIMPL_HAS_BUILTIN___builtin_sub_overflow HAVE_BUILTIN___BUILTIN_SUB_OVERFLOW
-# if defined(UNREACHABLE)
+# if defined(HAVE___BUILTIN_UNREACHABLE)
# define RBIMPL_HAS_BUILTIN___builtin_unreachable 1
# else
# define RBIMPL_HAS_BUILTIN___builtin_unreachable 0
diff --git a/include/ruby/internal/has/c_attribute.h b/include/ruby/internal/has/c_attribute.h
index c5c48867bf..69b0f402cd 100644
--- a/include/ruby/internal/has/c_attribute.h
+++ b/include/ruby/internal/has/c_attribute.h
@@ -21,11 +21,23 @@
* @brief Defines #RBIMPL_HAS_C_ATTRIBUTE.
*/
+#include "ruby/internal/has/extension.h"
+#include "ruby/internal/has/warning.h"
+
/** Wraps (or simulates) `__has_c_attribute`. */
#if defined(__cplusplus)
# /* Makes no sense. */
# define RBIMPL_HAS_C_ATTRIBUTE(_) 0
+#elif RBIMPL_HAS_EXTENSION(c_attributes)
+# /* Hmm. It seems Clang 17 has this macro defined even when -std=c99 mode,
+# * _and_ fails to compile complaining that attributes are C2X feature. We
+# * need to work around this nonsense. */
+# define RBIMPL_HAS_C_ATTRIBUTE(_) __has_c_attribute(_)
+
+#elif RBIMPL_HAS_WARNING("-Wc2x-extensions")
+# define RBIMPL_HAS_C_ATTRIBUTE(_) 0
+
#elif defined(__has_c_attribute)
# define RBIMPL_HAS_C_ATTRIBUTE(_) __has_c_attribute(_)
diff --git a/include/ruby/internal/intern/array.h b/include/ruby/internal/intern/array.h
index 17964bf810..1909fdf17b 100644
--- a/include/ruby/internal/intern/array.h
+++ b/include/ruby/internal/intern/array.h
@@ -107,14 +107,14 @@ VALUE rb_ary_new_from_args(long n, ...);
VALUE rb_ary_new_from_values(long n, const VALUE *elts);
/**
- * Allocates a "temporary" array. This is a hidden empty array. Handy on
- * occasions.
+ * Allocates a hidden (no class) empty array.
*
* @param[in] capa Designed capacity of the array.
* @return A hidden, empty array.
* @see rb_obj_hide()
*/
-VALUE rb_ary_tmp_new(long capa);
+VALUE rb_ary_hidden_new(long capa);
+#define rb_ary_tmp_new rb_ary_hidden_new
/**
* Destroys the given array for no reason.
@@ -187,7 +187,7 @@ VALUE rb_ary_shared_with_p(VALUE lhs, VALUE rhs);
* : (int i) -> T?
* | (int beg, int len) -> ::Array[T]?
* | (Range[int] r) -> ::Array[T]?
- * | (ArithmeticSequence as) -> ::Array[T]? # This also raises RagneError.
+ * | (ArithmeticSequence as) -> ::Array[T]? # This also raises RangeError.
* end
* ```
*/
diff --git a/include/ruby/internal/intern/bignum.h b/include/ruby/internal/intern/bignum.h
index 43d68018de..c27f77a1fb 100644
--- a/include/ruby/internal/intern/bignum.h
+++ b/include/ruby/internal/intern/bignum.h
@@ -51,7 +51,7 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
VALUE rb_big_new(size_t len, int sign);
/**
- * Queries if the passed bignum instance is a "bigzro". What is a bigzero?
+ * Queries if the passed bignum instance is a "bigzero". What is a bigzero?
* Well, bignums are for very big integers, but can also represent tiny ones
* like -1, 0, 1. Bigzero are instances of bignums whose values are zero.
* Knowing if a bignum is bigzero can be handy on occasions, like for instance
@@ -793,7 +793,7 @@ size_t rb_absint_size(VALUE val, int *nlz_bits_ret);
* @exception rb_eTypeError `val` doesn't respond to `#to_int`.
* @retval (size_t)-1 Overflowed.
* @retval otherwise
- `((val_numbits * CHAR_BIT + word_numbits - 1) / word_numbits)`,
+ * `((val_numbits * CHAR_BIT + word_numbits - 1) / word_numbits)`,
* where val_numbits is the number of bits of `abs(val)`.
* @post If `nlz_bits_ret` is not `NULL` and there is no overflow,
* `(return_value * word_numbits - val_numbits)` is stored in
diff --git a/include/ruby/internal/intern/class.h b/include/ruby/internal/intern/class.h
index af0c0768b8..357af5d176 100644
--- a/include/ruby/internal/intern/class.h
+++ b/include/ruby/internal/intern/class.h
@@ -88,8 +88,8 @@ VALUE rb_define_class_id(ID id, VALUE super);
* @post `outer::id` refers the returned class.
* @note If a class named `id` is already defined and its superclass is
* `super`, the function just returns the defined class.
- * @note The compaction GC does not move classes returned by this
- * function.
+ * @note The GC does not collect nor move classes returned by this
+ * function. They are immortal.
*/
VALUE rb_define_class_id_under(VALUE outer, ID id, VALUE super);
@@ -100,6 +100,14 @@ VALUE rb_define_class_id_under(VALUE outer, ID id, VALUE super);
*/
VALUE rb_module_new(void);
+
+/**
+ * Creates a new, anonymous refinement.
+ *
+ * @return An anonymous refinement.
+ */
+VALUE rb_refinement_new(void);
+
/**
* This is a very badly designed API that creates an anonymous module.
*
@@ -119,8 +127,8 @@ VALUE rb_define_module_id(ID id);
* constant is not a module.
* @return The created module.
* @post `outer::id` refers the returned module.
- * @note The compaction GC does not move classes returned by this
- * function.
+ * @note The GC does not collect nor move classes returned by this
+ * function. They are immortal.
*/
VALUE rb_define_module_id_under(VALUE outer, ID id);
@@ -150,7 +158,7 @@ VALUE rb_mod_included_modules(VALUE mod);
VALUE rb_mod_include_p(VALUE child, VALUE parent);
/**
- * Queries the module's ancestors. This routine gathers classes and modules
+ * Queries the module's ancestors. This routine gathers classes and modules
* that the passed module either inherits, includes, or prepends, then
* recursively applies that routine again and again to the collected entries
* until the list doesn't grow up.
@@ -167,6 +175,44 @@ VALUE rb_mod_include_p(VALUE child, VALUE parent);
VALUE rb_mod_ancestors(VALUE mod);
/**
+ * Queries the class's descendants. This routine gathers classes that are
+ * subclasses of the given class (or subclasses of those subclasses, etc.),
+ * returning an array of classes that have the given class as an ancestor.
+ * The returned array does not include the given class or singleton classes.
+ *
+ * @param[in] klass A class.
+ * @return An array of classes where `klass` is an ancestor.
+ *
+ * @internal
+ */
+VALUE rb_class_descendants(VALUE klass);
+
+/**
+ * Queries the class's direct descendants. This routine gathers classes that are
+ * direct subclasses of the given class,
+ * returning an array of classes that have the given class as a superclass.
+ * The returned array does not include singleton classes.
+ *
+ * @param[in] klass A class.
+ * @return An array of classes where `klass` is the `superclass`.
+ *
+ * @internal
+ */
+VALUE rb_class_subclasses(VALUE klass);
+
+
+/**
+ * Returns the attached object for a singleton class.
+ * If the given class is not a singleton class, raises a TypeError.
+ *
+ * @param[in] klass A class.
+ * @return The object which has the singleton class `klass`.
+ *
+ * @internal
+ */
+VALUE rb_class_attached_object(VALUE klass);
+
+/**
* Generates an array of symbols, which are the list of method names defined in
* the passed class.
*
diff --git a/include/ruby/internal/intern/cont.h b/include/ruby/internal/intern/cont.h
index b0d9137dd9..32647f48aa 100644
--- a/include/ruby/internal/intern/cont.h
+++ b/include/ruby/internal/intern/cont.h
@@ -39,6 +39,28 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
VALUE rb_fiber_new(rb_block_call_func_t func, VALUE callback_obj);
/**
+ * Creates a Fiber instance from a C-backended block with the specified
+ * storage.
+ *
+ * If the given storage is Qundef or Qtrue, this function is equivalent to
+ * rb_fiber_new() which inherits storage from the current fiber.
+ *
+ * Specifying Qtrue is experimental and may be changed in the future.
+ *
+ * If the given storage is Qnil, this function will lazy initialize the
+ * internal storage which starts of empty (without any inheritance).
+ *
+ * Otherwise, the given storage is used as the internal storage.
+ *
+ * @param[in] func A function, to become the fiber's body.
+ * @param[in] callback_obj Passed as-is to `func`.
+ * @param[in] storage The way to set up the storage for the fiber.
+ * @return An allocated new instance of rb_cFiber, which is ready to be
+ * "resume"d.
+ */
+VALUE rb_fiber_new_storage(rb_block_call_func_t func, VALUE callback_obj, VALUE storage);
+
+/**
* Queries the fiber which is calling this function. Any ruby execution
* context has its fiber, either explicitly or implicitly.
*
@@ -139,8 +161,7 @@ VALUE rb_fiber_resume_kw(VALUE fiber, int argc, const VALUE *argv, int kw_splat)
* fiber then suspends its execution until next time it is resumed.
*
* This function can also raise arbitrary exceptions injected from outside of
- * the fiber, using `Fiber#raise` Ruby level API. There is no way to do that
- * from C though.
+ * the fiber using rb_fiber_raise().
*
* ```ruby
* exc = Class.new Exception
@@ -159,12 +180,6 @@ VALUE rb_fiber_resume_kw(VALUE fiber, int argc, const VALUE *argv, int kw_splat)
* @param[in] argv Passed to rb_fiber_resume().
* @exception rb_eException (See above)
* @return (See rb_fiber_resume() for details)
- *
- * @internal
- *
- * "There is no way to do that from C" is a lie. But @shyouhei doesn't think
- * this very intentionally obfuscated way to raise arbitrary exceptions from C
- * is an official C API. Extension libraries must not know this fact.
*/
VALUE rb_fiber_yield(int argc, const VALUE *argv);
@@ -239,7 +254,28 @@ VALUE rb_fiber_transfer(VALUE fiber, int argc, const VALUE *argv);
*/
VALUE rb_fiber_transfer_kw(VALUE fiber, int argc, const VALUE *argv, int kw_splat);
-VALUE rb_fiber_raise(VALUE fiber, int argc, VALUE *argv);
+/**
+ * Identical to rb_fiber_resume() but instead of resuming normal execution of
+ * the passed fiber, it raises the given exception in it. From inside of the
+ * fiber this would be seen as if rb_fiber_yield() raised.
+ *
+ * This function does return in case the passed fiber gracefully handled the
+ * passed exception. But if it does not, the raised exception propagates out
+ * of the passed fiber; this function then does not return.
+ *
+ * Parameters are passed to rb_make_exception() to create an exception object.
+ * See its document for what are allowed here.
+ *
+ * It is a failure to call this function against a fiber which is resuming,
+ * have never run yet, or has already finished running.
+ *
+ * @param[out] fiber Where exception is raised.
+ * @param[in] argc Passed as-is to rb_make_exception().
+ * @param[in] argv Passed as-is to rb_make_exception().
+ * @exception rb_eFiberError `fiber` is terminated etc.
+ * @return (See rb_fiber_resume() for details)
+ */
+VALUE rb_fiber_raise(VALUE fiber, int argc, const VALUE *argv);
RBIMPL_SYMBOL_EXPORT_END()
diff --git a/include/ruby/internal/intern/error.h b/include/ruby/internal/intern/error.h
index 37d3b8592b..11e147a121 100644
--- a/include/ruby/internal/intern/error.h
+++ b/include/ruby/internal/intern/error.h
@@ -38,8 +38,6 @@
#define rb_exc_new3 rb_exc_new_str /**< @old{rb_exc_new_str} */
/** @cond INTERNAL_MACRO */
-#define rb_check_trusted rb_check_trusted
-#define rb_check_trusted_inline rb_check_trusted
#define rb_check_arity rb_check_arity
/** @endcond */
@@ -192,7 +190,6 @@ RBIMPL_ATTR_NONNULL(())
*/
void rb_error_frozen(const char *what);
-RBIMPL_ATTR_NORETURN()
/**
* Identical to rb_error_frozen(), except it takes arbitrary Ruby object
* instead of C's string.
@@ -204,12 +201,6 @@ RBIMPL_ATTR_NORETURN()
void rb_error_frozen_object(VALUE what);
/**
- * @deprecated Does nothing. This method is deprecated and will be removed in
- * Ruby 3.2.
- */
-void rb_error_untrusted(VALUE);
-
-/**
* Queries if the passed object is frozen.
*
* @param[in] obj Target object to test frozen-ness.
@@ -219,12 +210,6 @@ void rb_error_untrusted(VALUE);
void rb_check_frozen(VALUE obj);
/**
- * @deprecated Does nothing. This method is deprecated and will be removed in
- * Ruby 3.2.
- */
-void rb_check_trusted(VALUE);
-
-/**
* Ensures that the passed object can be `initialize_copy` relationship. When
* you implement your own one you would better call this at the right beginning
* of your implementation.
@@ -249,7 +234,7 @@ RBIMPL_ATTR_NORETURN()
* @param[in] max Maximum allowed `argc`.
* @exception rb_eArgError Always.
*/
-MJIT_STATIC void rb_error_arity(int argc, int min, int max);
+void rb_error_arity(int argc, int min, int max);
RBIMPL_SYMBOL_EXPORT_END()
diff --git a/include/ruby/internal/intern/file.h b/include/ruby/internal/intern/file.h
index 8e98ba08f8..79820fdc61 100644
--- a/include/ruby/internal/intern/file.h
+++ b/include/ruby/internal/intern/file.h
@@ -187,6 +187,27 @@ RBIMPL_ATTR_PURE()
*/
int rb_is_absolute_path(const char *path);
+/**
+ * Queries the file size of the given file. Because this function calls
+ * `fstat(2)` internally, it is a failure to pass a closed file to this
+ * function.
+ *
+ * This function flushes the passed file's buffer if any. Can take time.
+ *
+ * @param[in] file A file object.
+ * @exception rb_eFrozenError `file` is frozen.
+ * @exception rb_eIOError `file` is closed.
+ * @exception rb_eSystemCallError Permission denied etc.
+ * @exception rb_eNoMethodError The given non-file object doesn't respond
+ * to `#size`.
+ * @return The size of the passed file.
+ * @note Passing a non-regular file such as a UNIX domain socket to this
+ * function is not a failure. But the return value is
+ * unpredictable. POSIX's `<sys/stat.h>` states that "the use of
+ * this field is unspecified" then.
+ */
+rb_off_t rb_file_size(VALUE file);
+
RBIMPL_SYMBOL_EXPORT_END()
#endif /* RBIMPL_INTERN_FILE_H */
diff --git a/include/ruby/internal/intern/gc.h b/include/ruby/internal/intern/gc.h
deleted file mode 100644
index 1617a7cef6..0000000000
--- a/include/ruby/internal/intern/gc.h
+++ /dev/null
@@ -1,390 +0,0 @@
-#ifndef RBIMPL_INTERN_GC_H /*-*-C++-*-vi:se ft=cpp:*/
-#define RBIMPL_INTERN_GC_H
-/**
- * @file
- * @author Ruby developers <ruby-core@ruby-lang.org>
- * @copyright This file is a part of the programming language Ruby.
- * Permission is hereby granted, to either redistribute and/or
- * modify this file, provided that the conditions mentioned in the
- * file COPYING are met. Consult the file for details.
- * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
- * implementation details. Don't take them as canon. They could
- * rapidly appear then vanish. The name (path) of this header file
- * is also an implementation detail. Do not expect it to persist
- * at the place it is now. Developers are free to move it anywhere
- * anytime at will.
- * @note To ruby-core: remember that this header can be possibly
- * recursively included from extension libraries written in C++.
- * Do not expect for instance `__VA_ARGS__` is always available.
- * We assume C99 for ruby itself but we don't assume languages of
- * extension libraries. They could be written in C++98.
- * @brief Public APIs related to ::rb_mGC.
- */
-#include "ruby/internal/config.h"
-
-#ifdef STDC_HEADERS
-# include <stddef.h> /* size_t */
-#endif
-
-#if HAVE_SYS_TYPES_H
-# include <sys/types.h> /* ssize_t */
-#endif
-
-#include "ruby/internal/attr/cold.h"
-#include "ruby/internal/attr/noreturn.h"
-#include "ruby/internal/attr/nonnull.h"
-#include "ruby/internal/attr/pure.h"
-#include "ruby/internal/dllexport.h"
-#include "ruby/internal/value.h"
-
-RBIMPL_SYMBOL_EXPORT_BEGIN()
-
-/* gc.c */
-
-RBIMPL_ATTR_COLD()
-RBIMPL_ATTR_NORETURN()
-/**
- * Triggers out-of-memory error. If possible it raises ::rb_eNoMemError. But
- * because we are running out of memory that is not always doable. This
- * function tries hard to show something, but ultimately can die silently.
- *
- * @exception rb_eNoMemError Raises it if possible.
- */
-void rb_memerror(void);
-
-RBIMPL_ATTR_PURE()
-/**
- * Queries if the GC is busy.
- *
- * @retval 0 It isn't.
- * @retval 1 It is.
- */
-int rb_during_gc(void);
-
-RBIMPL_ATTR_NONNULL((1))
-/**
- * Marks objects between the two pointers. This is one of the GC utility
- * functions that you can call when you design your own
- * ::rb_data_type_struct::dmark.
- *
- * @pre Continuous memory region from `start` to `end` shall be fully
- * addressable.
- * @param[out] start Pointer to an array of objects.
- * @param[out] end Pointer that terminates the array of objects.
- * @post Objects from `start` to `end`, both inclusive, are marked.
- *
- * @internal
- *
- * `end` can be NULL... But that just results in no-op.
- */
-void rb_gc_mark_locations(const VALUE *start, const VALUE *end);
-
-/**
- * Identical to rb_mark_hash(), except it marks only values of the table and
- * leave their associated keys unmarked. This is one of the GC utility
- * functions that you can call when you design your own
- * ::rb_data_type_struct::dmark.
- *
- * @warning Of course it can break GC. Leave it unused if unsure.
- * @param[in] tbl A table to mark.
- * @post Values stored in `tbl` are marked.
- */
-void rb_mark_tbl(struct st_table *tbl);
-
-/**
- * Identical to rb_mark_tbl(), except it marks objects using
- * rb_gc_mark_movable(). This is one of the GC utility functions that you can
- * call when you design your own ::rb_data_type_struct::dmark.
- *
- * @warning Of course it can break GC. Leave it unused if unsure.
- * @param[in] tbl A table to mark.
- * @post Values stored in `tbl` are marked.
- */
-void rb_mark_tbl_no_pin(struct st_table *tbl);
-
-/**
- * Identical to rb_mark_hash(), except it marks only keys of the table and
- * leave their associated values unmarked. This is one of the GC utility
- * functions that you can call when you design your own
- * ::rb_data_type_struct::dmark.
- *
- * @warning Of course it can break GC. Leave it unused if unsure.
- * @param[in] tbl A table to mark.
- * @post Keys stored in `tbl` are marked.
- */
-void rb_mark_set(struct st_table *tbl);
-
-/**
- * Marks keys and values associated inside of the given table. This is one of
- * the GC utility functions that you can call when you design your own
- * ::rb_data_type_struct::dmark.
- *
- * @param[in] tbl A table to mark.
- * @post Objects stored in `tbl` are marked.
- */
-void rb_mark_hash(struct st_table *tbl);
-
-/**
- * Updates references inside of tables. After you marked values using
- * rb_mark_tbl_no_pin(), the objects inside of the table could of course be
- * moved. This function is to fixup those references. You can call this from
- * your ::rb_data_type_struct::dcompact.
- *
- * @param[out] ptr A table that potentially includes moved references.
- * @post Moved references, if any, are corrected.
- */
-void rb_gc_update_tbl_refs(st_table *ptr);
-
-/**
- * Identical to rb_gc_mark(), except it allows the passed value be a
- * non-object. For instance pointers to different type of memory regions are
- * allowed here. Such values are silently ignored. This is one of the GC
- * utility functions that you can call when you design your own
- * ::rb_data_type_struct::dmark.
- *
- * @param[out] obj A possible object.
- * @post `obj` is marked, if possible.
- */
-void rb_gc_mark_maybe(VALUE obj);
-
-/**
- * Marks an object. This is one of the GC utility functions that you can call
- * when you design your own ::rb_data_type_struct::dmark.
- *
- * @param[out] obj Arbitrary Ruby object.
- * @post `obj` is marked.
- */
-void rb_gc_mark(VALUE obj);
-
-/**
- * Maybe this is the only function provided for C extensions to control the
- * pinning of objects, so let us describe it in detail. These days Ruby's GC
- * is copying. As far as an object's physical address is guaranteed unused, it
- * can move around the object space. Our GC engine rearranges these objects
- * after it reclaims unreachable objects from our object space, so that the
- * space is compact (improves memory locality). This is called the
- * "compaction" phase, and works well most of the time... as far as there are
- * no C extensions. C extensions complicate the scenario because Ruby core
- * cannot detect any use of the physical address of an object inside of C
- * functions. In order to prevent memory corruptions, objects observable from
- * C extensions are "pinned"; they stick to where they are born until they die,
- * just in case any C extensions touch their raw pointers. This variant of
- * scheme is called "Mostly-Copying" garbage collector. Authors of C
- * extensions, however, can extremely carefully write them to become
- * compaction-aware. To do so avoid referring to a Ruby object from inside of
- * your struct in the first place. But if that is not possible, use this
- * function from your ::rb_data_type_struct::dmark then. This way objects
- * marked using it are considered movable. If you chose this way you have to
- * manually fix up locations of such moved pointers using rb_gc_location().
- *
- * @see Bartlett, Joel F., "Compacting Garbage Collection with Ambiguous
- * Roots", ACM SIGPLAN Lisp Pointers Volume 1 Issue 6 pp. 3-12,
- * April-May-June, 1988. https://doi.org/10.1145/1317224.1317225
- *
- * @param[in] obj Object that is movable.
- * @post Values stored in `tbl` are marked.
- */
-void rb_gc_mark_movable(VALUE obj);
-
-/**
- * Finds a new "location" of an object. An object can be moved on compaction.
- * This function projects its new abode, or just returns the passed object if
- * not moved. This is one of the GC utility functions that you can call when
- * you design your own ::rb_data_type_struct::dcompact.
- *
- * @param[in] obj An object, possibly already moved to somewhere else.
- * @return An object, which holds the current contents of former `obj`.
- */
-VALUE rb_gc_location(VALUE obj);
-
-/**
- * Asserts that the passed object is no longer needed. Such objects are
- * reclaimed sooner or later so this function is not mandatory. But sometimes
- * you can know from your application knowledge that an object is surely dead
- * at some point. Calling this as a hint can be a polite way.
- *
- * @param[out] obj Object, dead.
- * @pre `obj` have never been passed to this function before.
- * @post `obj` could be invalidated.
- * @warning It is a failure to pass an object multiple times to this
- * function.
- */
-void rb_gc_force_recycle(VALUE obj);
-
-/**
- * Triggers a GC process. This was the only GC entry point that we had at the
- * beginning. Over time our GC evolved. Now what this function does is just a
- * very simplified variation of the entire GC algorithms. A series of
- * procedures kicked by this API is called a "full" GC.
- *
- * - It immediately scans the entire object space to sort the dead.
- * - It immediately reclaims any single dead bodies to reuse later.
- *
- * It is worth noting that the procedures above do not include evaluations of
- * finalisers. They run later.
- *
- * @internal
- *
- * Finalisers are deferred until we can handle interrupts. See
- * `rb_postponed_job_flush` in vm_trace.c.
- *
- * Of course there are GC that are not "full". For instance this one and the
- * GC which runs when we are running out of memory are different. See
- * `gc_profile_record_flag` defined in gc.c for the kinds of GC.
- *
- * In spite of the name this is not what everything that a GC can trigger. As
- * of writing it seems this function does not trigger compaction. But this
- * might change in future.
- */
-void rb_gc(void);
-
-/**
- * Copy&paste an object's finaliser to another. This is one of the GC utility
- * functions that you can call when you design your own `initialize_copy`,
- * `initialize_dup`, `initialize_clone`.
- *
- * @param[out] dst Destination object.
- * @param[in] src Source object.
- * @post `dst` and `src` share the same finaliser.
- *
- * @internal
- *
- * But isn't it easier for you to call super, and let `Object#intialize_copy`
- * call this function instead?
- */
-void rb_gc_copy_finalizer(VALUE dst, VALUE src);
-
-/**
- * (Re-) enables GC. This makes sense only after you called rb_gc_disable().
- *
- * @retval RUBY_Qtrue GC was disabled before.
- * @retval RUBY_Qfalse GC was enabled before.
- * @post GC is enabled.
- *
- * @internal
- *
- * This is one of such exceptional functions that does not raise both Ruby
- * exceptions and C++ exceptions.
- */
-VALUE rb_gc_enable(void);
-
-/**
- * Disables GC. This prevents automatic GC runs when the process is running
- * out of memory. Such situations shall result in rb_memerror(). However this
- * does not prevent users from manually invoking rb_gc(). That should work.
- * People abused this by disabling GC at the beginning of an event loop,
- * process events without GC overheads, then manually force reclaiming garbage
- * at the bottom of the loop. However because our GC is now much smarter than
- * just calling rb_gc(), this technique is proven to be sub-optimal these days.
- * It is believed that there is currently practically no needs of this
- * function.
- *
- * @retval RUBY_Qtrue GC was disabled before.
- * @retval RUBY_Qfalse GC was enabled before.
- * @post GC is disabled.
- */
-VALUE rb_gc_disable(void);
-
-/**
- * Identical to rb_gc(), except the return value.
- *
- * @return Always returns ::RUBY_Qnil.
- */
-VALUE rb_gc_start(void);
-
-/**
- * Assigns a finaliser for an object. Each objects can have objects (typically
- * blocks) that run immediately after that object dies. They are called
- * finalisers of an object. This function associates a finaliser object with a
- * target object.
- *
- * @note Note that finalisers run _after_ the object they finalise dies. You
- * cannot for instance call its methods.
- * @note If your finaliser references the object it finalises that object
- * loses any chance to become a garbage; effectively leaks memory until
- * the end of the process.
- *
- * @param[in] obj Target to finalise.
- * @param[in] block Something `call`able.
- * @exception rb_eRuntimeError Somehow `obj` cannot have finalisers.
- * @exception rb_eFrozenError `obj` is frozen.
- * @exception rb_eArgError `block` doesn't respond to `call`.
- * @return The passed `block`.
- * @post `block` runs after `obj` dies.
- */
-VALUE rb_define_finalizer(VALUE obj, VALUE block);
-
-/**
- * Modifies the object so that it has no finalisers at all. This function is
- * mainly provided for symmetry. No practical usages can be thought of.
- *
- * @param[out] obj Object to clear its finalisers.
- * @exception rb_eFrozenError `obj` is frozen.
- * @return The passed `obj`.
- * @post `obj` has no finalisers.
- * @note There is no way to undefine a specific part of many finalisers
- * that `obj` could have. All you can do is to clear them all.
- */
-VALUE rb_undefine_finalizer(VALUE obj);
-
-/**
- * Identical to rb_gc_stat(), with "count" parameter.
- *
- * @return Lifetime total number of runs of GC.
- */
-size_t rb_gc_count(void);
-
-/**
- * Obtains various GC related profiles. The parameter can be either a Symbol
- * or a Hash. If a Hash is passed, it is filled with everything currently
- * available. If a Symbol is passed just that portion is returned.
- *
- * Possible variations of keys you can pass here change from version to
- * version. You can get the list of known keys by passing an empty hash and
- * let it be filled.
- *
- * @param[in,out] key_or_buf A Symbol, or a Hash.
- * @exception rb_eTypeError Neither Symbol nor Hash.
- * @exception rb_eFrozenError Frozen hash is passed.
- * @return In case a Hash is passed it returns 0. Otherwise the
- * profile value associated with the given key is returned.
- * @post In case a Hash is passed it is filled with values.
- */
-size_t rb_gc_stat(VALUE key_or_buf);
-
-/**
- * Obtains various info regarding the most recent GC run. This includes for
- * instance the reason of the GC. The parameter can be either a Symbol or a
- * Hash. If a Hash is passed, it is filled with everything currently
- * available. If a Symbol is passed just that portion is returned.
- *
- * Possible variations of keys you can pass here change from version to
- * version. You can get the list of known keys by passing an empty hash and
- * let it be filled.
- *
- * @param[in,out] key_or_buf A Symbol, or a Hash.
- * @exception rb_eTypeError Neither Symbol nor Hash.
- * @exception rb_eFrozenError Frozen hash is passed.
- * @return In case a Hash is passed it returns that hash. Otherwise
- * the profile value associated with the given key is returned.
- * @post In case a Hash is passed it is filled with values.
- */
-VALUE rb_gc_latest_gc_info(VALUE key_or_buf);
-
-/**
- * Informs that there are external memory usages. Our GC runs when we are
- * running out of memory. The amount of memory, however, can increase/decrease
- * behind-the-scene. For instance DLLs can allocate memories using `mmap(2)`
- * etc, which are opaque to us. Registering such external allocations using
- * this function enables proper detection of how much memories an object used
- * as a whole. That will trigger GCs more often than it would otherwise. You
- * can also pass negative numbers here, to indicate that such external
- * allocations are gone.
- *
- * @param[in] diff Amount of memory increased(+)/decreased(-).
- */
-void rb_gc_adjust_memory_usage(ssize_t diff);
-
-RBIMPL_SYMBOL_EXPORT_END()
-
-#endif /* RBIMPL_INTERN_GC_H */
diff --git a/include/ruby/internal/intern/hash.h b/include/ruby/internal/intern/hash.h
index 9d2ce8279a..af8dfd5d8f 100644
--- a/include/ruby/internal/intern/hash.h
+++ b/include/ruby/internal/intern/hash.h
@@ -107,6 +107,17 @@ VALUE rb_hash(VALUE obj);
VALUE rb_hash_new(void);
/**
+ * Identical to rb_hash_new(), except it additionally specifies how many keys
+ * it is expected to contain. This way you can create a hash that is large enough
+ * for your need. For large hashes it means it won't need to be reallocated and
+ * rehashed as much, improving performance.
+ *
+ * @param[in] capa Designed capacity of the hash.
+ * @return An empty Hash, whose capacity is `capa`.
+ */
+VALUE rb_hash_new_capa(long capa);
+
+/**
* Duplicates a hash.
*
* @param[in] hash An instance of ::rb_cHash.
@@ -288,15 +299,6 @@ int rb_path_check(const char *path);
/* hash.c */
/**
- * @deprecated This function once was a thing in the old days, but makes no
- * sense any longer today. Exists here for backwards
- * compatibility only. You can safely forget about it.
- *
- * @return 0 always.
- */
-int rb_env_path_tainted(void);
-
-/**
* Destructively removes every environment variables of the running process.
*
* @return The `ENV` object.
diff --git a/include/ruby/internal/intern/load.h b/include/ruby/internal/intern/load.h
index 288a16c2ec..9ceb98c2e4 100644
--- a/include/ruby/internal/intern/load.h
+++ b/include/ruby/internal/intern/load.h
@@ -177,6 +177,43 @@ VALUE rb_f_require(VALUE self, VALUE feature);
VALUE rb_require_string(VALUE feature);
/**
+ * Resolves and returns a symbol of a function in the native extension
+ * specified by the feature and symbol names. Extensions will use this function
+ * to access the symbols provided by other native extensions.
+ *
+ * @param[in] feature Name of a feature, e.g. `"json"`.
+ * @param[in] symbol Name of a symbol defined by the feature.
+ * @return The resolved symbol of a function, defined and externed by the
+ * specified feature. It may be NULL if the feature is not loaded,
+ * the feature is not extension, or the symbol is not found.
+ */
+void *rb_ext_resolve_symbol(const char *feature, const char *symbol);
+
+/**
+ * This macro is to provide backwards compatibility. It provides a way to
+ * define function prototypes and resolving function symbols in a safe way.
+ *
+ * ```CXX
+ * // prototypes
+ * #ifdef HAVE_RB_EXT_RESOLVE_SYMBOL
+ * VALUE *(*other_extension_func)(VALUE,VALUE);
+ * #else
+ * VALUE other_extension_func(VALUE);
+ * #endif
+ *
+ * // in Init_xxx()
+ * #ifdef HAVE_RB_EXT_RESOLVE_SYMBOL
+ * other_extension_func = \
+ * (VALUE(*)(VALUE,VALUE))rb_ext_resolve_symbol(fname, sym_name);
+ * if (other_extension_func == NULL) {
+ * // raise your own error
+ * }
+ * #endif
+ * ```
+ */
+#define HAVE_RB_EXT_RESOLVE_SYMBOL 1
+
+/**
* @name extension configuration
* @{
*/
diff --git a/include/ruby/internal/intern/object.h b/include/ruby/internal/intern/object.h
index 6bb4ccb2fe..9daad7d046 100644
--- a/include/ruby/internal/intern/object.h
+++ b/include/ruby/internal/intern/object.h
@@ -92,8 +92,8 @@ VALUE rb_class_new_instance_kw(int argc, const VALUE *argv, VALUE klass, int kw_
*
* @param[in] lhs Comparison left hand side.
* @param[in] rhs Comparison right hand side.
- * @retval RUBY_Qtrue They are equal.
- * @retval RUBY_Qfalse Otherwise.
+ * @retval non-zero They are equal.
+ * @retval 0 Otherwise.
* @note This function actually calls `lhs.eql?(rhs)` so you cannot
* implement your class' `#eql?` method using it.
*/
@@ -151,13 +151,12 @@ VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass);
* @return An allocated, not yet initialised instance of `klass`.
* @note It calls the allocator defined by rb_define_alloc_func(). You
* cannot use this function to define an allocator. Use
- * rb_newobj_of(), #TypedData_Make_Struct or others, instead.
+ * TypedData_Make_Struct or others, instead.
* @note Usually prefer rb_class_new_instance() to rb_obj_alloc() and
* rb_obj_call_init().
* @see rb_class_new_instance()
* @see rb_obj_call_init()
* @see rb_define_alloc_func()
- * @see rb_newobj_of()
* @see #TypedData_Make_Struct
*/
VALUE rb_obj_alloc(VALUE klass);
@@ -202,74 +201,6 @@ VALUE rb_obj_dup(VALUE obj);
*/
VALUE rb_obj_init_copy(VALUE src, VALUE dst);
-RBIMPL_ATTR_DEPRECATED_EXT(("taintedness turned out to be a wrong idea."))
-/**
- * @deprecated This function once was a thing in the old days, but makes no
- * sense any longer today. Exists here for backwards
- * compatibility only. You can safely forget about it.
- *
- * @param[in] obj Object in question.
- * @return Verbatim `obj`.
- */
-VALUE rb_obj_taint(VALUE obj);
-
-RBIMPL_ATTR_PURE()
-RBIMPL_ATTR_DEPRECATED_EXT(("taintedness turned out to be a wrong idea."))
-/**
- * @deprecated This function once was a thing in the old days, but makes no
- * sense any longer today. Exists here for backwards
- * compatibility only. You can safely forget about it.
- *
- * @param[in] obj Object in question.
- * @return Always returns ::RUBY_Qfalse.
- */
-VALUE rb_obj_tainted(VALUE obj);
-
-RBIMPL_ATTR_DEPRECATED_EXT(("taintedness turned out to be a wrong idea."))
-/**
- * @deprecated This function once was a thing in the old days, but makes no
- * sense any longer today. Exists here for backwards
- * compatibility only. You can safely forget about it.
- *
- * @param[in] obj Object in question.
- * @return Verbatim `obj`.
- */
-VALUE rb_obj_untaint(VALUE obj);
-
-RBIMPL_ATTR_DEPRECATED_EXT(("trustedness turned out to be a wrong idea."))
-/**
- * @deprecated This function once was a thing in the old days, but makes no
- * sense any longer today. Exists here for backwards
- * compatibility only. You can safely forget about it.
- *
- * @param[in] obj Object in question.
- * @return Verbatim `obj`.
- */
-VALUE rb_obj_untrust(VALUE obj);
-
-RBIMPL_ATTR_PURE()
-RBIMPL_ATTR_DEPRECATED_EXT(("trustedness turned out to be a wrong idea."))
-/**
- * @deprecated This function once was a thing in the old days, but makes no
- * sense any longer today. Exists here for backwards
- * compatibility only. You can safely forget about it.
- *
- * @param[in] obj Object in question.
- * @return Always returns ::RUBY_Qfalse.
- */
-VALUE rb_obj_untrusted(VALUE obj);
-
-RBIMPL_ATTR_DEPRECATED_EXT(("trustedness turned out to be a wrong idea."))
-/**
- * @deprecated This function once was a thing in the old days, but makes no
- * sense any longer today. Exists here for backwards
- * compatibility only. You can safely forget about it.
- *
- * @param[in] obj Object in question.
- * @return Verbatim `obj`.
- */
-VALUE rb_obj_trust(VALUE obj);
-
/**
* Just calls rb_obj_freeze_inline() inside. Does this make any sens to
* extension libraries?
diff --git a/include/ruby/internal/intern/process.h b/include/ruby/internal/intern/process.h
index 7a7b24ed4b..cfa5e13162 100644
--- a/include/ruby/internal/intern/process.h
+++ b/include/ruby/internal/intern/process.h
@@ -31,6 +31,15 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
/* process.c */
/**
+ * Wait for the specified process to terminate, reap it, and return its status.
+ *
+ * @param[in] pid The process ID to wait for.
+ * @param[in] flags The flags to pass to waitpid(2).
+ * @return VALUE An instance of Process::Status.
+ */
+VALUE rb_process_status_wait(rb_pid_t pid, int flags);
+
+/**
* Sets the "last status", or the `$?`.
*
* @param[in] status The termination status, as defined in `waitpid(3posix)`.
@@ -247,7 +256,7 @@ rb_pid_t rb_spawn_err(int argc, const VALUE *argv, char *errbuf, size_t buflen);
*
* @internal
*
- * This function might or might not exist depending on `./confiugre` result.
+ * This function might or might not exist depending on `./configure` result.
* It must be a portability hell. Better not use.
*/
VALUE rb_proc_times(VALUE _);
diff --git a/include/ruby/internal/intern/re.h b/include/ruby/internal/intern/re.h
index 31f5593275..4dd58b469b 100644
--- a/include/ruby/internal/intern/re.h
+++ b/include/ruby/internal/intern/re.h
@@ -87,11 +87,6 @@ void rb_match_busy(VALUE md);
* @retval RUBY_Qfalse There is a `n`-th capture and is empty.
* @retval RUBY_Qtrue There is a `n`-th capture that has something.
*
- * @internal
- *
- * @shyouhei wonders: why there are both rb_reg_match_defined() and
- * rb_match_nth_defined, which are largely the same things, but do not share
- * their implementations at all?
*/
VALUE rb_reg_nth_defined(int n, VALUE md);
diff --git a/include/ruby/internal/intern/select.h b/include/ruby/internal/intern/select.h
index fabc287cd1..6ba84c6e63 100644
--- a/include/ruby/internal/intern/select.h
+++ b/include/ruby/internal/intern/select.h
@@ -76,7 +76,7 @@ struct timeval;
*
* Although any file descriptors are possible here, it makes completely no
* sense to pass a descriptor that is not `O_NONBLOCK`. If you want to know
- * the reason for this limitatuon in detail, you might find this thread super
+ * the reason for this limitation in detail, you might find this thread super
* interesting: https://lkml.org/lkml/2004/10/6/117
*/
int rb_thread_fd_select(int nfds, rb_fdset_t *rfds, rb_fdset_t *wfds, rb_fdset_t *efds, struct timeval *timeout);
diff --git a/include/ruby/internal/intern/select/largesize.h b/include/ruby/internal/intern/select/largesize.h
index d156f62034..d65f088c06 100644
--- a/include/ruby/internal/intern/select/largesize.h
+++ b/include/ruby/internal/intern/select/largesize.h
@@ -35,9 +35,6 @@
* `select(2)` documents how to allocate fd_set dynamically.
* http://www.openbsd.org/cgi-bin/man.cgi?query=select&manpath=OpenBSD+4.4
*
- * - HP-UX documents how to allocate fd_set dynamically.
- * http://docs.hp.com/en/B2355-60105/select.2.html
- *
* - Solaris 8 has `select_large_fdset`
*
* - Mac OS X 10.7 (Lion)
diff --git a/include/ruby/internal/intern/select/posix.h b/include/ruby/internal/intern/select/posix.h
index bfde159890..0a9b0b2e51 100644
--- a/include/ruby/internal/intern/select/posix.h
+++ b/include/ruby/internal/intern/select/posix.h
@@ -95,11 +95,10 @@ RBIMPL_ATTR_NOALIAS()
*
* @param[out] dst Target fdset.
* @param[in] src Source fdset.
- * @param[in] n Unused parameter.
* @post `dst` is a copy of `src`.
*/
static inline void
-rb_fd_dup(rb_fdset_t *dst, const fd_set *src, int n)
+rb_fd_dup(rb_fdset_t *dst, const fd_set *src)
{
*dst = *src;
}
@@ -137,7 +136,7 @@ rb_fd_max(const rb_fdset_t *f)
}
/** @cond INTERNAL_MACRO */
-/* :FIXME: What are these? They don't exist for shibling implementations. */
+/* :FIXME: What are these? They don't exist for sibling implementations. */
#define rb_fd_init_copy(d, s) (*(d) = *(s))
#define rb_fd_term(f) ((void)(f))
/** @endcond */
diff --git a/include/ruby/internal/intern/signal.h b/include/ruby/internal/intern/signal.h
index 84f7558404..4773788651 100644
--- a/include/ruby/internal/intern/signal.h
+++ b/include/ruby/internal/intern/signal.h
@@ -97,7 +97,7 @@ RBIMPL_ATTR_NONNULL(())
* - Case #11: When signo and PID are both negative, the behaviour of this
* function depends on how `killpg(3)` works. On Linux, it seems such
* attempt is strictly prohibited and `Errno::EINVAL` is raised. But on
- * macOS, it seems it tries to to send the signal actually to the process
+ * macOS, it seems it tries to send the signal actually to the process
* group.
*
* @note Above description is in fact different from how `kill(2)` works.
@@ -113,12 +113,6 @@ RBIMPL_ATTR_NONNULL(())
*/
VALUE rb_f_kill(int argc, const VALUE *argv);
-/* This must be private, @shyouhei guesses. */
-#ifdef POSIX_SIGNAL
-#define posix_signal ruby_posix_signal
-void (*posix_signal(int, void (*)(int)))(int);
-#endif
-
RBIMPL_ATTR_PURE()
/**
* Queries the name of the signal. It returns for instance `"KILL"` for
diff --git a/include/ruby/internal/intern/string.h b/include/ruby/internal/intern/string.h
index 0e2e6d6af7..6827563e8d 100644
--- a/include/ruby/internal/intern/string.h
+++ b/include/ruby/internal/intern/string.h
@@ -62,13 +62,13 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
*/
VALUE rb_str_new(const char *ptr, long len);
-RBIMPL_ATTR_NONNULL(())
/**
* Identical to rb_str_new(), except it assumes the passed pointer is a pointer
* to a C string.
*
* @param[in] ptr A C string.
* @exception rb_eNoMemError Failed to allocate memory.
+ * @exception rb_eArgError `ptr` is a null pointer.
* @return An instance of ::rb_cString, of "binary" encoding, whose
* contents are verbatim copy of `ptr`.
* @pre `ptr` must not be a null pointer.
@@ -122,37 +122,6 @@ VALUE rb_str_new_frozen(VALUE str);
*/
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len);
-RBIMPL_ATTR_NONNULL(())
-/**
- * @deprecated This function once was a thing in the old days, but makes no
- * sense any longer today. Exists here for backwards
- * compatibility only. You can safely forget about it.
- *
- * @param[in] ptr A C string.
- * @exception rb_eNoMemError Failed to allocate memory.
- * @return An instance of ::rb_cString, of "binary" encoding, whose
- * contents are verbatim copy of `ptr`.
- * @pre `ptr` must not be a null pointer.
- */
-VALUE rb_tainted_str_new_cstr(const char *ptr);
-
-/**
- * @deprecated This function once was a thing in the old days, but makes no
- * sense any longer today. Exists here for backwards
- * compatibility only. You can safely forget about it.
- *
- * @param[in] ptr A memory region of `len` bytes length.
- * @param[in] len Length of `ptr`, in bytes, not including the
- * terminating NUL character.
- * @exception rb_eNoMemError Failed to allocate `len+1` bytes.
- * @exception rb_eArgError `len` is negative.
- * @return An instance of ::rb_cString, of `len` bytes length, of
- * "binary" encoding, whose contents are verbatim copy of `ptr`.
- * @pre At least `len` bytes of continuous memory region shall be
- * accessible via `ptr`.
- */
-VALUE rb_tainted_str_new(const char *ptr, long len);
-
/**
* Identical to rb_str_new(), except it generates a string of "default
* external" encoding.
@@ -333,7 +302,6 @@ VALUE rb_str_tmp_new(long len);
*/
VALUE rb_usascii_str_new(const char *ptr, long len);
-RBIMPL_ATTR_NONNULL(())
/**
* Identical to rb_str_new_cstr(), except it generates a string of "US ASCII"
* encoding. It can also be seen as a routine Identical to
@@ -342,6 +310,7 @@ RBIMPL_ATTR_NONNULL(())
*
* @param[in] ptr A C string.
* @exception rb_eNoMemError Failed to allocate memory.
+ * @exception rb_eArgError `ptr` is a null pointer.
* @return An instance of ::rb_cString, of "US ASCII" encoding, whose
* contents are verbatim copy of `ptr`.
* @pre `ptr` must not be a null pointer.
@@ -361,7 +330,6 @@ VALUE rb_usascii_str_new_cstr(const char *ptr);
*/
VALUE rb_utf8_str_new(const char *ptr, long len);
-RBIMPL_ATTR_NONNULL(())
/**
* Identical to rb_str_new_cstr(), except it generates a string of "UTF-8"
* encoding. It can also be seen as a routine Identical to
@@ -370,6 +338,7 @@ RBIMPL_ATTR_NONNULL(())
*
* @param[in] ptr A C string.
* @exception rb_eNoMemError Failed to allocate memory.
+ * @exception rb_eArgError `ptr` is a null pointer.
* @return An instance of ::rb_cString, of "UTF-8" encoding, whose contents
* are verbatim copy of `ptr`.
* @pre `ptr` must not be a null pointer.
@@ -443,7 +412,7 @@ VALUE rb_utf8_str_new_static(const char *ptr, long len);
/**
* Identical to rb_interned_str(), except it takes a Ruby's string instead of
- * C's. It can also be seen as a routine identical to to rb_str_new_shared(),
+ * C's. It can also be seen as a routine identical to rb_str_new_shared(),
* except it returns an infamous "f"string.
*
* @param[in] str An object of ::RString.
@@ -485,7 +454,7 @@ VALUE rb_interned_str(const char *ptr, long len);
RBIMPL_ATTR_NONNULL(())
/**
* Identical to rb_interned_str(), except it assumes the passed pointer is a
- * pointer to a C's string. It can also be seen as a routine identical to to
+ * pointer to a C's string. It can also be seen as a routine identical to
* rb_str_to_interned_str(), except it takes a C's string instead of Ruby's.
* Or it can also be seen as a routine identical to rb_str_new_cstr(), except
* it returns an infamous "f"string.
@@ -553,7 +522,6 @@ VALUE rb_str_buf_append(VALUE dst, VALUE src);
/** @alias{rb_str_cat} */
VALUE rb_str_buf_cat(VALUE, const char*, long);
-RBIMPL_ATTR_NONNULL(())
/** @alias{rb_str_cat_cstr} */
VALUE rb_str_buf_cat2(VALUE, const char*);
@@ -634,6 +602,21 @@ VALUE rb_str_dup(VALUE str);
VALUE rb_str_resurrect(VALUE str);
/**
+ * Returns whether a string is chilled or not.
+ *
+ * This function is temporary and users must check for its presence using
+ * #ifdef HAVE_RB_STR_CHILLED_P. If HAVE_RB_STR_CHILLED_P is not defined, then
+ * strings can't be chilled.
+ *
+ * @param[in] str A string.
+ * @retval 1 The string is chilled.
+ * @retval 0 Otherwise.
+ */
+bool rb_str_chilled_p(VALUE str);
+
+#define HAVE_RB_STR_CHILLED_P 1
+
+/**
* Obtains a "temporary lock" of the string. This advisory locking mechanism
* prevents other cooperating threads from tampering the receiver. The same
* thing could be done via freeze mechanism, but this one can also be unlocked
@@ -874,7 +857,6 @@ VALUE rb_str_resize(VALUE str, long len);
*/
VALUE rb_str_cat(VALUE dst, const char *src, long srclen);
-RBIMPL_ATTR_NONNULL(())
/**
* Identical to rb_str_cat(), except it assumes the passed pointer is a pointer
* to a C string.
@@ -882,6 +864,7 @@ RBIMPL_ATTR_NONNULL(())
* @param[out] dst Destination object.
* @param[in] src Contents to append.
* @exception rb_eArgError Result string too big.
+ * @exception rb_eArgError `src` is a null pointer.
* @return The passed `dst`.
* @pre `dst` must not be any arbitrary objects except ::RString.
* @pre `src` must not be a null pointer.
@@ -889,7 +872,6 @@ RBIMPL_ATTR_NONNULL(())
*/
VALUE rb_str_cat_cstr(VALUE dst, const char *src);
-RBIMPL_ATTR_NONNULL(())
/** @alias{rb_str_cat_cstr} */
VALUE rb_str_cat2(VALUE, const char*);
@@ -1153,7 +1135,6 @@ VALUE rb_str_inspect(VALUE str);
*/
VALUE rb_str_dump(VALUE str);
-RBIMPL_ATTR_NONNULL(())
/**
* Divides the given string based on the given delimiter. This is the
* 1-argument 0-block version of `String#split`.
@@ -1161,6 +1142,7 @@ RBIMPL_ATTR_NONNULL(())
* @param[in] str Object in question to split.
* @param[in] delim Delimiter, in C string.
* @exception rb_eTypeError `str` has no implicit conversion to String.
+ * @exception rb_eArgError `delim` is a null pointer.
* @return An array of strings, which are substrings of the passed `str`.
* If `delim` is an empty C string (i.e. `""`), `str` is split into
* each characters. If `delim` is a C string whose sole content is
@@ -1400,22 +1382,6 @@ rbimpl_str_new_cstr(const char *str)
return rb_str_new_static(str, len);
}
-RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea."))
-/**
- * @private
- *
- * This is an implementation detail. Don't bother.
- *
- * @param[in] str A C string literal.
- * @return Corresponding Ruby string.
- */
-static inline VALUE
-rbimpl_tainted_str_new_cstr(const char *str)
-{
- long len = rbimpl_strlen(str);
- return rb_tainted_str_new(str, len);
-}
-
RBIMPL_ATTR_NONNULL(())
/**
* @private
@@ -1602,22 +1568,6 @@ rbimpl_exc_new_cstr(VALUE exc, const char *str)
rb_utf8_str_new) ((str), (len)))
/**
- * @deprecated This macro once was a thing in the old days, but makes no sense
- * any longer today. Exists here for backwards compatibility
- * only. You can safely forget about it.
- *
- * @param[in] str A C string.
- * @exception rb_eNoMemError Failed to allocate memory.
- * @return An instance of ::rb_cString, of "binary" encoding, whose
- * contents are verbatim copy of `str`.
- * @pre `str` must not be a null pointer.
- */
-#define rb_tainted_str_new_cstr(str) \
- ((RBIMPL_CONSTANT_P(str) ? \
- rbimpl_tainted_str_new_cstr : \
- rb_tainted_str_new_cstr) (str))
-
-/**
* Identical to #rb_str_new_cstr, except it generates a string of "US ASCII"
* encoding. It can also be seen as a routine Identical to
* #rb_usascii_str_new, except it assumes the passed pointer is a pointer to a
@@ -1741,7 +1691,6 @@ rbimpl_exc_new_cstr(VALUE exc, const char *str)
#define rb_str_new3 rb_str_new_shared /**< @old{rb_str_new_shared} */
#define rb_str_new4 rb_str_new_frozen /**< @old{rb_str_new_frozen} */
#define rb_str_new5 rb_str_new_with_class /**< @old{rb_str_new_with_class} */
-#define rb_tainted_str_new2 rb_tainted_str_new_cstr /**< @old{rb_tainted_str_new_cstr} */
#define rb_str_buf_new2 rb_str_buf_new_cstr /**< @old{rb_str_buf_new_cstr} */
#define rb_usascii_str_new2 rb_usascii_str_new_cstr /**< @old{rb_usascii_str_new_cstr} */
#define rb_str_buf_cat rb_str_cat /**< @alias{rb_str_cat} */
diff --git a/include/ruby/internal/intern/struct.h b/include/ruby/internal/intern/struct.h
index 312cf444e2..16b3fad4e0 100644
--- a/include/ruby/internal/intern/struct.h
+++ b/include/ruby/internal/intern/struct.h
@@ -46,14 +46,16 @@ VALUE rb_struct_new(VALUE klass, ...);
*
* @param[in] name Name of the class.
* @param[in] ... Arbitrary number of `const char*`, terminated by
- * zero. Each of which are the name of fields.
+ * NULL. Each of which are the name of fields.
* @exception rb_eNameError `name` is not a constant name.
* @exception rb_eTypeError `name` is already taken.
- * @exception rb_eArgError Duplicated field name.
+ * @exception rb_eArgError Duplicated field name.
* @return The defined class.
* @post Global toplevel constant `name` is defined.
* @note `name` is allowed to be a null pointer. This function creates
* an anonymous struct class then.
+ * @note The GC does not collect nor move classes returned by this
+ * function. They are immortal.
*
* @internal
*
@@ -70,14 +72,16 @@ RBIMPL_ATTR_NONNULL((2))
* @param[out] space Namespace that the defining class shall reside.
* @param[in] name Name of the class.
* @param[in] ... Arbitrary number of `const char*`, terminated by
- * zero. Each of which are the name of fields.
+ * NULL. Each of which are the name of fields.
* @exception rb_eNameError `name` is not a constant name.
* @exception rb_eTypeError `name` is already taken.
- * @exception rb_eArgError Duplicated field name.
+ * @exception rb_eArgError Duplicated field name.
* @return The defined class.
* @post `name` is a constant under `space`.
* @note In contrast to rb_struct_define(), it doesn't make any sense to
* pass a null pointer to this function.
+ * @note The GC does not collect nor move classes returned by this
+ * function. They are immortal.
*/
VALUE rb_struct_define_under(VALUE space, const char *name, ...);
@@ -164,10 +168,10 @@ VALUE rb_struct_alloc_noinit(VALUE klass);
* @param[in] super Superclass of the defining class.
* @param[in] func Must be 0 for extension libraries.
* @param[in] ... Arbitrary number of `const char*`, terminated by
- * zero. Each of which are the name of fields.
+ * NULL. Each of which are the name of fields.
* @exception rb_eNameError `name` is not a constant name.
* @exception rb_eTypeError `name` is already taken.
- * @exception rb_eArgError Duplicated field name.
+ * @exception rb_eArgError Duplicated field name.
* @return The defined class.
* @post Global toplevel constant `name` is defined.
* @note `name` is allowed to be a null pointer. This function creates
@@ -187,17 +191,35 @@ RBIMPL_ATTR_NONNULL((2))
* @param[in] super Superclass of the defining class.
* @param[in] alloc Must be 0 for extension libraries.
* @param[in] ... Arbitrary number of `const char*`, terminated by
- * zero. Each of which are the name of fields.
+ * NULL. Each of which are the name of fields.
* @exception rb_eNameError `class_name` is not a constant name.
* @exception rb_eTypeError `class_name` is already taken.
- * @exception rb_eArgError Duplicated field name.
+ * @exception rb_eArgError Duplicated field name.
* @return The defined class.
* @post `class_name` is a constant under `outer`.
* @note In contrast to rb_struct_define_without_accessor(), it doesn't
* make any sense to pass a null name.
+ * @note The GC does not collect nor move classes returned by this
+ * function. They are immortal.
*/
VALUE rb_struct_define_without_accessor_under(VALUE outer, const char *class_name, VALUE super, rb_alloc_func_t alloc, ...);
+/**
+ * Defines an anonymous data class.
+ *
+ * @endinternal
+ *
+ * @param[in] super Superclass of the defining class. Must be a
+ * descendant of ::rb_cData, or 0 as ::rb_cData.
+ * @param[in] ... Arbitrary number of `const char*`, terminated by
+ * NULL. Each of which are the name of fields.
+ * @exception rb_eArgError Duplicated field name.
+ * @return The defined class.
+ * @note The GC does not collect nor move classes returned by this
+ * function. They are immortal.
+ */
+VALUE rb_data_define(VALUE super, ...);
+
RBIMPL_SYMBOL_EXPORT_END()
#endif /* RBIMPL_INTERN_STRUCT_H */
diff --git a/include/ruby/internal/intern/thread.h b/include/ruby/internal/intern/thread.h
index 294e552fe9..716375acd7 100644
--- a/include/ruby/internal/intern/thread.h
+++ b/include/ruby/internal/intern/thread.h
@@ -46,7 +46,7 @@ void rb_thread_schedule(void);
*
* @param[in] fd A file descriptor.
* @exception rb_eIOError Closed stream.
- * @exception rb_eSystemCalleError Situations like EBADF.
+ * @exception rb_eSystemCallError Situations like EBADF.
*/
int rb_thread_wait_fd(int fd);
@@ -56,7 +56,7 @@ int rb_thread_wait_fd(int fd);
*
* @param[in] fd A file descriptor.
* @exception rb_eIOError Closed stream.
- * @exception rb_eSystemCalleError Situations like EBADF.
+ * @exception rb_eSystemCallError Situations like EBADF.
*/
int rb_thread_fd_writable(int fd);
diff --git a/include/ruby/internal/intern/vm.h b/include/ruby/internal/intern/vm.h
index 562d30a6fe..29e0c7f534 100644
--- a/include/ruby/internal/intern/vm.h
+++ b/include/ruby/internal/intern/vm.h
@@ -229,8 +229,7 @@ void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func);
* restrict creation of an instance of a class. For example it rarely makes
* sense for a DB adaptor class to allow programmers creating DB row objects
* without querying the DB itself. You can kill sporadic creation of such
- * objects then, by nullifying the allocator function using this API. Your
- * object shall be allocated using #RB_NEWOBJ_OF() directly.
+ * objects then, by nullifying the allocator function using this API.
*
* @param[out] klass The class to modify.
* @pre `klass` must be an instance of Class.
@@ -247,21 +246,17 @@ void rb_undef_alloc_func(VALUE klass);
*
* @internal
*
- * Who cares? @shyouhei fins no practical usage of the return value. Maybe we
+ * Who cares? @shyouhei finds no practical usage of the return value. Maybe we
* need KonMari.
*/
rb_alloc_func_t rb_get_alloc_func(VALUE klass);
/**
- * Clears the constant cache. Extension libraries should not bother such
- * things. Just forget about this API (or even, the presence of constant
- * cache).
- *
- * @internal
- *
- * Completely no idea why this function is defined in vm_method.c.
+ * Clears the inline constant caches associated with a particular ID. Extension
+ * libraries should not bother with such things. Just forget about this API (or
+ * even, the presence of constant caches).
*/
-void rb_clear_constant_cache(void);
+void rb_clear_constant_cache_for_id(ID id);
/**
* Resembles `alias`.
diff --git a/include/ruby/internal/interpreter.h b/include/ruby/internal/interpreter.h
index 662d39c0ec..a10e7ad2d8 100644
--- a/include/ruby/internal/interpreter.h
+++ b/include/ruby/internal/interpreter.h
@@ -141,7 +141,7 @@ void ruby_show_copyright(void);
*
* @param[in] addr A pointer somewhere on the stack, near its bottom.
*/
-void ruby_init_stack(volatile VALUE *addr);
+void ruby_init_stack(void *addr);
/**
* Initializes the VM and builtin libraries.
diff --git a/include/ruby/internal/memory.h b/include/ruby/internal/memory.h
index aa3464465d..270cc1ac8b 100644
--- a/include/ruby/internal/memory.h
+++ b/include/ruby/internal/memory.h
@@ -38,7 +38,7 @@
# include <alloca.h>
#endif
-#if defined(_MSC_VER) && defined(_WIN64)
+#if defined(_MSC_VER) && defined(_M_AMD64)
# include <intrin.h>
# pragma intrinsic(_umul128)
#endif
@@ -56,13 +56,14 @@
#include "ruby/internal/has/builtin.h"
#include "ruby/internal/stdalign.h"
#include "ruby/internal/stdbool.h"
+#include "ruby/internal/stdckdint.h"
#include "ruby/internal/xmalloc.h"
#include "ruby/backward/2/limits.h"
#include "ruby/backward/2/long_long.h"
#include "ruby/backward/2/assume.h"
#include "ruby/defines.h"
-/** @cond INTENAL_MACRO */
+/** @cond INTERNAL_MACRO */
/* Make alloca work the best possible way. */
#if defined(alloca)
@@ -287,12 +288,12 @@ typedef uint128_t DSIZE_T;
RBIMPL_CAST((type *)alloca(rbimpl_size_mul_or_raise(sizeof(type), (n))))
/**
- * Identical to #RB_ALLOCV_N(), except it implicitly assumes the type of array
- * is ::VALUE.
+ * Identical to #RB_ALLOCV_N(), except that it allocates a number of bytes and
+ * returns a void* .
*
* @param v A variable to hold the just-in-case opaque Ruby object.
* @param n Size of allocation, in bytes.
- * @return An array of `n` bytes of ::VALUE.
+ * @return A void pointer to `n` bytes storage.
* @note `n` may be evaluated twice.
*/
#define RB_ALLOCV(v, n) \
@@ -363,7 +364,7 @@ typedef uint128_t DSIZE_T;
* @return `p1`.
* @post First `n` elements of `p2` are copied into `p1`.
*/
-#define MEMCPY(p1,p2,type,n) memcpy((p1), (p2), rbimpl_size_mul_or_raise(sizeof(type), (n)))
+#define MEMCPY(p1,p2,type,n) ruby_nonempty_memcpy((p1), (p2), rbimpl_size_mul_or_raise(sizeof(type), (n)))
/**
* Handy macro to call memmove.
@@ -567,7 +568,10 @@ rbimpl_size_mul_overflow(size_t x, size_t y)
{
struct rbimpl_size_mul_overflow_tag ret = { false, 0, };
-#if RBIMPL_HAS_BUILTIN(__builtin_mul_overflow)
+#if defined(ckd_mul)
+ ret.left = ckd_mul(&ret.right, x, y);
+
+#elif RBIMPL_HAS_BUILTIN(__builtin_mul_overflow)
ret.left = __builtin_mul_overflow(x, y, &ret.right);
#elif defined(DSIZE_T)
@@ -644,7 +648,6 @@ rb_alloc_tmp_buffer2(volatile VALUE *store, long count, size_t elsize)
return rb_alloc_tmp_buffer_with_count(store, total_size, cnt);
}
-#if ! defined(__MINGW32__) && ! defined(__DOXYGEN__)
RBIMPL_SYMBOL_EXPORT_BEGIN()
RBIMPL_ATTR_NOALIAS()
RBIMPL_ATTR_NONNULL((1))
@@ -663,8 +666,5 @@ ruby_nonempty_memcpy(void *dest, const void *src, size_t n)
}
}
RBIMPL_SYMBOL_EXPORT_END()
-#undef memcpy
-#define memcpy ruby_nonempty_memcpy
-#endif
#endif /* RBIMPL_MEMORY_H */
diff --git a/include/ruby/internal/module.h b/include/ruby/internal/module.h
index d678dd2102..97b0b2b8b0 100644
--- a/include/ruby/internal/module.h
+++ b/include/ruby/internal/module.h
@@ -56,8 +56,8 @@ RBIMPL_ATTR_NONNULL(())
* @post Top-level constant named `name` refers the returned class.
* @note If a class named `name` is already defined and its superclass is
* `super`, the function just returns the defined class.
- * @note The compaction GC does not move classes returned by this
- * function.
+ * @note The GC does not collect nor move classes returned by this
+ * function. They are immortal.
*
* @internal
*
@@ -75,8 +75,8 @@ RBIMPL_ATTR_NONNULL(())
* constant is not a module.
* @return The created module.
* @post Top-level constant named `name` refers the returned module.
- * @note The compaction GC does not move classes returned by this
- * function.
+ * @note The GC does not collect nor move modules returned by this
+ * function. They are immortal.
*
* @internal
*
@@ -103,8 +103,8 @@ RBIMPL_ATTR_NONNULL(())
* @post `outer::name` refers the returned class.
* @note If a class named `name` is already defined and its superclass
* is `super`, the function just returns the defined class.
- * @note The compaction GC does not move classes returned by this
- * function.
+ * @note The GC does not collect nor move classes returned by this
+ * function. They are immortal.
*/
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super);
@@ -118,8 +118,8 @@ RBIMPL_ATTR_NONNULL(())
* the constant is not a class.
* @return The created module.
* @post `outer::name` refers the returned module.
- * @note The compaction GC does not move classes returned by this
- * function.
+ * @note The GC does not collect nor move modules returned by this
+ * function. They are immortal.
*/
VALUE rb_define_module_under(VALUE outer, const char *name);
diff --git a/include/ruby/internal/newobj.h b/include/ruby/internal/newobj.h
index a8a5557a25..6eee2fa5fa 100644
--- a/include/ruby/internal/newobj.h
+++ b/include/ruby/internal/newobj.h
@@ -29,63 +29,14 @@
#include "ruby/internal/value.h"
#include "ruby/assert.h"
-/**
- * Declares, allocates, then assigns a new object to the given variable.
- *
- * @param obj Variable name.
- * @param type Variable type.
- * @exception rb_eNoMemError No space left.
- * @return An allocated object, not initialised.
- * @note Modern programs tend to use #NEWOBJ_OF instead.
- *
- * @internal
- *
- * :FIXME: Should we deprecate it?
- */
-#define RB_NEWOBJ(obj,type) type *(obj) = RBIMPL_CAST((type *)rb_newobj())
-
-/**
- * Identical to #RB_NEWOBJ, except it also accepts the allocating object's
- * class and flags.
- *
- * @param obj Variable name.
- * @param type Variable type.
- * @param klass Object's class.
- * @param flags Object's flags.
- * @exception rb_eNoMemError No space left.
- * @return An allocated object, filled with the arguments.
- */
-#define RB_NEWOBJ_OF(obj,type,klass,flags) type *(obj) = RBIMPL_CAST((type *)rb_newobj_of(klass, flags))
-
-#define NEWOBJ RB_NEWOBJ /**< @old{RB_NEWOBJ} */
-#define NEWOBJ_OF RB_NEWOBJ_OF /**< @old{RB_NEWOBJ_OF} */
#define OBJSETUP rb_obj_setup /**< @old{rb_obj_setup} */
#define CLONESETUP rb_clone_setup /**< @old{rb_clone_setup} */
#define DUPSETUP rb_dup_setup /**< @old{rb_dup_setup} */
RBIMPL_SYMBOL_EXPORT_BEGIN()
/**
- * This is the implementation detail of #RB_NEWOBJ.
- *
- * @exception rb_eNoMemError No space left.
- * @return An allocated object, not initialised.
- */
-VALUE rb_newobj(void);
-
-/**
- * This is the implementation detail of #RB_NEWOBJ_OF.
- *
- * @param klass Object's class.
- * @param flags Object's flags.
- * @exception rb_eNoMemError No space left.
- * @return An allocated object, filled with the arguments.
- */
-VALUE rb_newobj_of(VALUE klass, VALUE flags);
-
-/**
* Fills common fields in the object.
*
- * @note Prefer rb_newobj_of() to this function.
* @param[in,out] obj A Ruby object to be set up.
* @param[in] klass `obj` will belong to this class.
* @param[in] type One of ::ruby_value_type.
@@ -172,6 +123,8 @@ RBIMPL_ATTR_DEPRECATED(("This is no longer how Object#clone works."))
static inline void
rb_clone_setup(VALUE clone, VALUE obj)
{
+ (void)clone;
+ (void)obj;
return;
}
@@ -189,6 +142,8 @@ RBIMPL_ATTR_DEPRECATED(("This is no longer how Object#dup works."))
static inline void
rb_dup_setup(VALUE dup, VALUE obj)
{
+ (void)dup;
+ (void)obj;
return;
}
diff --git a/include/ruby/internal/rgengc.h b/include/ruby/internal/rgengc.h
deleted file mode 100644
index 7ea04442f6..0000000000
--- a/include/ruby/internal/rgengc.h
+++ /dev/null
@@ -1,443 +0,0 @@
-#ifndef RBIMPL_RGENGC_H /*-*-C++-*-vi:se ft=cpp:*/
-#define RBIMPL_RGENGC_H
-/**
- * @file
- * @author Ruby developers <ruby-core@ruby-lang.org>
- * @copyright This file is a part of the programming language Ruby.
- * Permission is hereby granted, to either redistribute and/or
- * modify this file, provided that the conditions mentioned in the
- * file COPYING are met. Consult the file for details.
- * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
- * implementation details. Don't take them as canon. They could
- * rapidly appear then vanish. The name (path) of this header file
- * is also an implementation detail. Do not expect it to persist
- * at the place it is now. Developers are free to move it anywhere
- * anytime at will.
- * @note To ruby-core: remember that this header can be possibly
- * recursively included from extension libraries written in C++.
- * Do not expect for instance `__VA_ARGS__` is always available.
- * We assume C99 for ruby itself but we don't assume languages of
- * extension libraries. They could be written in C++98.
- * @brief RGENGC write-barrier APIs.
- * @see Sasada, K., "Gradual write-barrier insertion into a Ruby
- * interpreter", in proceedings of the 2019 ACM SIGPLAN
- * International Symposium on Memory Management (ISMM 2019), pp
- * 115-121, 2019. https://doi.org/10.1145/3315573.3329986
- */
-#include "ruby/internal/attr/artificial.h"
-#include "ruby/internal/attr/maybe_unused.h"
-#include "ruby/internal/attr/pure.h"
-#include "ruby/internal/dllexport.h"
-#include "ruby/internal/special_consts.h"
-#include "ruby/internal/stdbool.h"
-#include "ruby/internal/value.h"
-#include "ruby/assert.h"
-
-/**
- * @private
- *
- * @deprecated This macro once was a thing in the old days, but makes no sense
- * any longer today. Exists here for backwards compatibility
- * only. You can safely forget about it.
- */
-#undef USE_RGENGC
-#define USE_RGENGC 1
-
-/**
- * @private
- *
- * This is a compile-time flag to enable/disable incremental GC feature. It
- * has to be set at the time ruby itself compiles. Makes no sense for 3rd
- * parties. It is safe for them to set this though; that just doesn't change
- * anything.
- */
-#ifndef USE_RINCGC
-# define USE_RINCGC 1
-#endif
-
-/**
- * @deprecated This macro seems broken. Setting this to anything other than
- * zero just doesn't compile. We need to KonMari.
- */
-#ifndef USE_RGENGC_LOGGING_WB_UNPROTECT
-# define USE_RGENGC_LOGGING_WB_UNPROTECT 0
-#endif
-
-/**
- * @private
- *
- * This is a compile-time flag to enable/disable write barrier for
- * struct ::RArray. It has to be set at the time ruby itself compiles. Makes
- * no sense for 3rd parties.
- */
-#ifndef RGENGC_WB_PROTECTED_ARRAY
-# define RGENGC_WB_PROTECTED_ARRAY 1
-#endif
-
-/**
- * @private
- *
- * This is a compile-time flag to enable/disable write barrier for
- * struct ::RHash. It has to be set at the time ruby itself compiles. Makes
- * no sense for 3rd parties.
- */
-#ifndef RGENGC_WB_PROTECTED_HASH
-# define RGENGC_WB_PROTECTED_HASH 1
-#endif
-
-/**
- * @private
- *
- * This is a compile-time flag to enable/disable write barrier for
- * struct ::RStruct. It has to be set at the time ruby itself compiles. Makes
- * no sense for 3rd parties.
- */
-#ifndef RGENGC_WB_PROTECTED_STRUCT
-# define RGENGC_WB_PROTECTED_STRUCT 1
-#endif
-
-/**
- * @private
- *
- * This is a compile-time flag to enable/disable write barrier for
- * struct ::RString. It has to be set at the time ruby itself compiles. Makes
- * no sense for 3rd parties.
- */
-#ifndef RGENGC_WB_PROTECTED_STRING
-# define RGENGC_WB_PROTECTED_STRING 1
-#endif
-
-/**
- * @private
- *
- * This is a compile-time flag to enable/disable write barrier for
- * struct ::RObject. It has to be set at the time ruby itself compiles. Makes
- * no sense for 3rd parties.
- */
-#ifndef RGENGC_WB_PROTECTED_OBJECT
-# define RGENGC_WB_PROTECTED_OBJECT 1
-#endif
-
-/**
- * @private
- *
- * This is a compile-time flag to enable/disable write barrier for
- * struct ::RRegexp. It has to be set at the time ruby itself compiles. Makes
- * no sense for 3rd parties.
- */
-#ifndef RGENGC_WB_PROTECTED_REGEXP
-# define RGENGC_WB_PROTECTED_REGEXP 1
-#endif
-
-/**
- * @private
- *
- * This is a compile-time flag to enable/disable write barrier for
- * struct ::RClass. It has to be set at the time ruby itself compiles. Makes
- * no sense for 3rd parties.
- */
-#ifndef RGENGC_WB_PROTECTED_CLASS
-# define RGENGC_WB_PROTECTED_CLASS 1
-#endif
-
-/**
- * @private
- *
- * This is a compile-time flag to enable/disable write barrier for
- * struct ::RFloat. It has to be set at the time ruby itself compiles. Makes
- * no sense for 3rd parties.
- */
-#ifndef RGENGC_WB_PROTECTED_FLOAT
-# define RGENGC_WB_PROTECTED_FLOAT 1
-#endif
-
-/**
- * @private
- *
- * This is a compile-time flag to enable/disable write barrier for
- * struct ::RComplex. It has to be set at the time ruby itself compiles.
- * Makes no sense for 3rd parties.
- */
-#ifndef RGENGC_WB_PROTECTED_COMPLEX
-# define RGENGC_WB_PROTECTED_COMPLEX 1
-#endif
-
-/**
- * @private
- *
- * This is a compile-time flag to enable/disable write barrier for
- * struct ::RRational. It has to be set at the time ruby itself compiles.
- * Makes no sense for 3rd parties.
- */
-#ifndef RGENGC_WB_PROTECTED_RATIONAL
-# define RGENGC_WB_PROTECTED_RATIONAL 1
-#endif
-
-/**
- * @private
- *
- * This is a compile-time flag to enable/disable write barrier for
- * struct ::RBignum. It has to be set at the time ruby itself compiles. Makes
- * no sense for 3rd parties.
- */
-#ifndef RGENGC_WB_PROTECTED_BIGNUM
-# define RGENGC_WB_PROTECTED_BIGNUM 1
-#endif
-
-/**
- * @private
- *
- * @deprecated This macro once was a thing in the old days, but makes no sense
- * any longer today. Exists here for backwards compatibility
- * only. You can safely forget about it.
- *
- * @internal
- *
- * @shyouhei doesn't think anybody uses this right now.
- */
-#ifndef RGENGC_WB_PROTECTED_NODE_CREF
-# define RGENGC_WB_PROTECTED_NODE_CREF 1
-#endif
-
-/**
- * @defgroup rgengc Write barrier (WB) interfaces:
- *
- * @note The following core interfaces can be changed in the future. Please
- * catch up if you want to insert WB into C-extensions correctly.
- *
- * @{
- */
-
-/**
- * Declaration of a "back" pointer. This is a write barrier for new reference
- * from "old" generation to "young" generation. It writes `young` into
- * `*slot`, which is a pointer inside of `old`.
- *
- * @param[in] old An old object.
- * @param[in] slot A pointer inside of `old`.
- * @param[out] young A young object.
- */
-#define RB_OBJ_WRITE(old, slot, young) \
- RBIMPL_CAST(rb_obj_write((VALUE)(old), (VALUE *)(slot), (VALUE)(young), __FILE__, __LINE__))
-
-/**
- * Identical to #RB_OBJ_WRITE(), except it doesn't write any values, but only a
- * WB declaration. `oldv` is replaced value with `b` (not used in current
- * Ruby).
- *
- * @param[in] old An old object.
- * @param[in] oldv An object previously stored inside of `old`.
- * @param[out] young A young object.
- */
-#define RB_OBJ_WRITTEN(old, oldv, young) \
- RBIMPL_CAST(rb_obj_written((VALUE)(old), (VALUE)(oldv), (VALUE)(young), __FILE__, __LINE__))
-/** @} */
-
-#define OBJ_PROMOTED_RAW RB_OBJ_PROMOTED_RAW /**< @old{RB_OBJ_PROMOTED_RAW} */
-#define OBJ_PROMOTED RB_OBJ_PROMOTED /**< @old{RB_OBJ_PROMOTED} */
-#define OBJ_WB_UNPROTECT RB_OBJ_WB_UNPROTECT /**< @old{RB_OBJ_WB_UNPROTECT} */
-
-/**
- * Asserts that the passed object is not fenced by write barriers. Objects of
- * such property do not contribute to generational GCs. They are scanned
- * always.
- *
- * @param[out] x An object that would not be protected by the barrier.
- */
-#define RB_OBJ_WB_UNPROTECT(x) rb_obj_wb_unprotect(x, __FILE__, __LINE__)
-
-/**
- * Identical to #RB_OBJ_WB_UNPROTECT(), except it can also assert that the
- * given object is of given type.
- *
- * @param[in] type One of `ARRAY`, `STRING`, etc.
- * @param[out] obj An object of `type` that would not be protected.
- *
- * @internal
- *
- * @shyouhei doesn't understand why this has to be visible from extensions.
- */
-#define RB_OBJ_WB_UNPROTECT_FOR(type, obj) \
- (RGENGC_WB_PROTECTED_##type ? OBJ_WB_UNPROTECT(obj) : obj)
-
-/**
- * @private
- *
- * This is an implementation detail of rb_obj_wb_unprotect(). People don't use
- * it directly.
- */
-#define RGENGC_LOGGING_WB_UNPROTECT rb_gc_unprotect_logging
-
-/** @cond INTERNAL_MACRO */
-#define RB_OBJ_PROMOTED_RAW RB_OBJ_PROMOTED_RAW
-#define RB_OBJ_PROMOTED RB_OBJ_PROMOTED
-/** @endcond */
-
-RBIMPL_SYMBOL_EXPORT_BEGIN()
-/**
- * This is the implementation of #RB_OBJ_WRITE(). People don't use it
- * directly.
- *
- * @param[in] old An object that points to `young`.
- * @param[out] young An object that is referenced from `old`.
- */
-void rb_gc_writebarrier(VALUE old, VALUE young);
-
-/**
- * This is the implementation of #RB_OBJ_WB_UNPROTECT(). People don't use it
- * directly.
- *
- * @param[out] obj An object that does not participate in WB.
- */
-void rb_gc_writebarrier_unprotect(VALUE obj);
-
-#if USE_RGENGC_LOGGING_WB_UNPROTECT
-/**
- * @private
- *
- * This is the implementation of #RGENGC_LOGGING_WB_UNPROTECT(). People
- * don't use it directly.
- *
- * @param[in] objptr Don't know why this is a pointer to void but in
- * reality this is a pointer to an object that is about
- * to be un-protected.
- * @param[in] filename Pass C's `__FILE__` here.
- * @param[in] line Pass C's `__LINE__` here.
- */
-void rb_gc_unprotect_logging(void *objptr, const char *filename, int line);
-#endif
-
-RBIMPL_SYMBOL_EXPORT_END()
-
-RBIMPL_ATTR_PURE_UNLESS_DEBUG()
-RBIMPL_ATTR_ARTIFICIAL()
-/**
- * This is the implementation of #RB_OBJ_PROMOTED(). People don't use it
- * directly.
- *
- * @param[in] obj An object to query.
- * @retval true The object is "promoted".
- * @retval false The object is young. Have not experienced GC at all.
- */
-static inline bool
-RB_OBJ_PROMOTED_RAW(VALUE obj)
-{
- RBIMPL_ASSERT_OR_ASSUME(RB_FL_ABLE(obj));
- return RB_FL_ANY_RAW(obj, RUBY_FL_PROMOTED);
-}
-
-RBIMPL_ATTR_PURE_UNLESS_DEBUG()
-RBIMPL_ATTR_ARTIFICIAL()
-/**
- * Tests if the object is "promoted" -- that is, whether the object experienced
- * one or more GC marks.
- *
- * @param[in] obj An object to query.
- * @retval true The object is "promoted".
- * @retval false The object is young. Have not experienced GC at all.
- * @note Hello, is anyone actively calling this function? @shyouhei have
- * never seen any actual usages outside of the GC implementation
- * itself.
- */
-static inline bool
-RB_OBJ_PROMOTED(VALUE obj)
-{
- if (! RB_FL_ABLE(obj)) {
- return false;
- }
- else {
- return RB_OBJ_PROMOTED_RAW(obj);
- }
-}
-
-/**
- * This is the implementation of #RB_OBJ_WB_UNPROTECT(). People don't use it
- * directly.
- *
- * @param[out] x An object that does not participate in WB.
- * @param[in] filename C's `__FILE__` of the caller function.
- * @param[in] line C's `__LINE__` of the caller function.
- * @return x
- */
-static inline VALUE
-rb_obj_wb_unprotect(
- VALUE x,
- RBIMPL_ATTR_MAYBE_UNUSED()
- const char *filename,
- RBIMPL_ATTR_MAYBE_UNUSED()
- int line)
-{
-#if USE_RGENGC_LOGGING_WB_UNPROTECT
- RGENGC_LOGGING_WB_UNPROTECT(RBIMPL_CAST((void *)x), filename, line);
-#endif
- rb_gc_writebarrier_unprotect(x);
- return x;
-}
-
-/**
- * @private
- *
- * This is the implementation of #RB_OBJ_WRITTEN(). People don't use it
- * directly.
- *
- * @param[in] a An old object.
- * @param[in] oldv An object previously stored inside of `old`.
- * @param[out] b A young object.
- * @param[in] filename C's `__FILE__` of the caller function.
- * @param[in] line C's `__LINE__` of the caller function.
- * @return a
- */
-static inline VALUE
-rb_obj_written(
- VALUE a,
- RBIMPL_ATTR_MAYBE_UNUSED()
- VALUE oldv,
- VALUE b,
- RBIMPL_ATTR_MAYBE_UNUSED()
- const char *filename,
- RBIMPL_ATTR_MAYBE_UNUSED()
- int line)
-{
-#if USE_RGENGC_LOGGING_WB_UNPROTECT
- RGENGC_LOGGING_OBJ_WRITTEN(a, oldv, b, filename, line);
-#endif
-
- if (!RB_SPECIAL_CONST_P(b)) {
- rb_gc_writebarrier(a, b);
- }
-
- return a;
-}
-
-/**
- * @private
- *
- * This is the implementation of #RB_OBJ_WRITE(). People don't use it
- * directly.
- *
- * @param[in] a An old object.
- * @param[in] slot A pointer inside of `old`.
- * @param[out] b A young object.
- * @param[in] filename C's `__FILE__` of the caller function.
- * @param[in] line C's `__LINE__` of the caller function.
- * @return a
- */
-static inline VALUE
-rb_obj_write(
- VALUE a, VALUE *slot, VALUE b,
- RBIMPL_ATTR_MAYBE_UNUSED()
- const char *filename,
- RBIMPL_ATTR_MAYBE_UNUSED()
- int line)
-{
-#ifdef RGENGC_LOGGING_WRITE
- RGENGC_LOGGING_WRITE(a, slot, b, filename, line);
-#endif
-
- *slot = b;
-
- rb_obj_written(a, RUBY_Qundef /* ignore `oldv' now */, b, filename, line);
- return a;
-}
-
-#endif /* RBIMPL_RGENGC_H */
diff --git a/include/ruby/internal/scan_args.h b/include/ruby/internal/scan_args.h
index cf5b18f77d..1ed2bf6368 100644
--- a/include/ruby/internal/scan_args.h
+++ b/include/ruby/internal/scan_args.h
@@ -100,7 +100,7 @@ RBIMPL_ATTR_NONNULL((2, 3))
* param-arg-spec := pre-arg-spec [post-arg-spec] / post-arg-spec /
* pre-opt-post-arg-spec
* pre-arg-spec := num-of-leading-mandatory-args
- [num-of-optional-args]
+ * [num-of-optional-args]
* post-arg-spec := sym-for-variable-length-args
* [num-of-trailing-mandatory-args]
* pre-opt-post-arg-spec := num-of-leading-mandatory-args num-of-optional-args
diff --git a/include/ruby/internal/special_consts.h b/include/ruby/internal/special_consts.h
index 38934e4da3..dc0a6b41d6 100644
--- a/include/ruby/internal/special_consts.h
+++ b/include/ruby/internal/special_consts.h
@@ -76,6 +76,8 @@
#define RB_SPECIAL_CONST_P RB_SPECIAL_CONST_P
#define RB_STATIC_SYM_P RB_STATIC_SYM_P
#define RB_TEST RB_TEST
+#define RB_UNDEF_P RB_UNDEF_P
+#define RB_NIL_OR_UNDEF_P RB_NIL_OR_UNDEF_P
/** @endcond */
/** special constants - i.e. non-zero and non-fixnum constants */
@@ -94,9 +96,9 @@ ruby_special_consts {
RUBY_SYMBOL_FLAG, /**< Flag to denote a static symbol. */
#elif USE_FLONUM
RUBY_Qfalse = 0x00, /* ...0000 0000 */
+ RUBY_Qnil = 0x04, /* ...0000 0100 */
RUBY_Qtrue = 0x14, /* ...0001 0100 */
- RUBY_Qnil = 0x08, /* ...0000 1000 */
- RUBY_Qundef = 0x34, /* ...0011 0100 */
+ RUBY_Qundef = 0x24, /* ...0010 0100 */
RUBY_IMMEDIATE_MASK = 0x07, /* ...0000 0111 */
RUBY_FIXNUM_FLAG = 0x01, /* ...xxxx xxx1 */
RUBY_FLONUM_MASK = 0x03, /* ...0000 0011 */
@@ -104,14 +106,14 @@ ruby_special_consts {
RUBY_SYMBOL_FLAG = 0x0c, /* ...xxxx 1100 */
#else
RUBY_Qfalse = 0x00, /* ...0000 0000 */
- RUBY_Qtrue = 0x02, /* ...0000 0010 */
- RUBY_Qnil = 0x04, /* ...0000 0100 */
- RUBY_Qundef = 0x06, /* ...0000 0110 */
+ RUBY_Qnil = 0x02, /* ...0000 0010 */
+ RUBY_Qtrue = 0x06, /* ...0000 0110 */
+ RUBY_Qundef = 0x0a, /* ...0000 1010 */
RUBY_IMMEDIATE_MASK = 0x03, /* ...0000 0011 */
RUBY_FIXNUM_FLAG = 0x01, /* ...xxxx xxx1 */
RUBY_FLONUM_MASK = 0x00, /* any values ANDed with FLONUM_MASK cannot be FLONUM_FLAG */
RUBY_FLONUM_FLAG = 0x02, /* ...0000 0010 */
- RUBY_SYMBOL_FLAG = 0x0e, /* ...0000 1110 */
+ RUBY_SYMBOL_FLAG = 0x0e, /* ...xxxx 1110 */
#endif
RUBY_SPECIAL_SHIFT = 8 /**< Least significant 8 bits are reserved. */
@@ -136,12 +138,21 @@ static inline bool
RB_TEST(VALUE obj)
{
/*
+ * if USE_FLONUM
* Qfalse: ....0000 0000
- * Qnil: ....0000 1000
- * ~Qnil: ....1111 0111
+ * Qnil: ....0000 0100
+ * ~Qnil: ....1111 1011
* v ....xxxx xxxx
* ----------------------------
- * RTEST(v) ....xxxx 0xxx
+ * RTEST(v) ....xxxx x0xx
+ *
+ * if ! USE_FLONUM
+ * Qfalse: ....0000 0000
+ * Qnil: ....0000 0010
+ * ~Qnil: ....1111 1101
+ * v ....xxxx xxxx
+ * ----------------------------
+ * RTEST(v) ....xxxx xx0x
*
* RTEST(v) can be 0 if and only if (v == Qfalse || v == Qnil).
*/
@@ -168,6 +179,62 @@ RBIMPL_ATTR_CONST()
RBIMPL_ATTR_CONSTEXPR(CXX11)
RBIMPL_ATTR_ARTIFICIAL()
/**
+ * Checks if the given object is undef.
+ *
+ * @param[in] obj An arbitrary ruby object.
+ * @retval true `obj` is ::RUBY_Qundef.
+ * @retval false Anything else.
+ */
+static inline bool
+RB_UNDEF_P(VALUE obj)
+{
+ return obj == RUBY_Qundef;
+}
+
+RBIMPL_ATTR_CONST()
+RBIMPL_ATTR_CONSTEXPR(CXX14)
+RBIMPL_ATTR_ARTIFICIAL()
+/**
+ * Checks if the given object is nil or undef. Can be used to see if
+ * a keyword argument is not given or given `nil`.
+ *
+ * @param[in] obj An arbitrary ruby object.
+ * @retval true `obj` is ::RUBY_Qnil or ::RUBY_Qundef.
+ * @retval false Anything else.
+ */
+static inline bool
+RB_NIL_OR_UNDEF_P(VALUE obj)
+{
+ /*
+ * if USE_FLONUM
+ * Qundef: ....0010 0100
+ * Qnil: ....0000 0100
+ * mask: ....1101 1111
+ * common_bits: ....0000 0100
+ * ---------------------------------
+ * Qnil & mask ....0000 0100
+ * Qundef & mask ....0000 0100
+ *
+ * if ! USE_FLONUM
+ * Qundef: ....0000 1010
+ * Qnil: ....0000 0010
+ * mask: ....1111 0111
+ * common_bits: ....0000 0010
+ * ----------------------------
+ * Qnil & mask ....0000 0010
+ * Qundef & mask ....0000 0010
+ *
+ * NIL_OR_UNDEF_P(v) can be true only when v is Qundef or Qnil.
+ */
+ const VALUE mask = ~(RUBY_Qundef ^ RUBY_Qnil);
+ const VALUE common_bits = RUBY_Qundef & RUBY_Qnil;
+ return (obj & mask) == common_bits;
+}
+
+RBIMPL_ATTR_CONST()
+RBIMPL_ATTR_CONSTEXPR(CXX11)
+RBIMPL_ATTR_ARTIFICIAL()
+/**
* Checks if the given object is a so-called Fixnum.
*
* @param[in] obj An arbitrary ruby object.
@@ -259,7 +326,7 @@ RBIMPL_ATTR_ARTIFICIAL()
static inline bool
RB_SPECIAL_CONST_P(VALUE obj)
{
- return RB_IMMEDIATE_P(obj) || ! RB_TEST(obj);
+ return RB_IMMEDIATE_P(obj) || obj == RUBY_Qfalse;
}
RBIMPL_ATTR_CONST()
diff --git a/include/ruby/internal/static_assert.h b/include/ruby/internal/static_assert.h
index 594c2b2917..b9ff6646e7 100644
--- a/include/ruby/internal/static_assert.h
+++ b/include/ruby/internal/static_assert.h
@@ -71,7 +71,7 @@
#else
# define RBIMPL_STATIC_ASSERT(name, expr) \
- typedef int static_assert_ ## name ## _check[1 - 2 * !(expr)]
+ MAYBE_UNUSED(typedef int static_assert_ ## name ## _check[1 - 2 * !(expr)])
#endif
#endif /* RBIMPL_STATIC_ASSERT_H */
diff --git a/include/ruby/internal/stdbool.h b/include/ruby/internal/stdbool.h
index b15321cb00..1ca61136ba 100644
--- a/include/ruby/internal/stdbool.h
+++ b/include/ruby/internal/stdbool.h
@@ -39,7 +39,7 @@
# /* Take stdbool.h definition. */
# include <stdbool.h>
-#else
+#elif !defined(HAVE__BOOL)
typedef unsigned char _Bool;
# /* See also http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2229.htm */
# define bool _Bool
diff --git a/include/ruby/internal/stdckdint.h b/include/ruby/internal/stdckdint.h
new file mode 100644
index 0000000000..d02530136e
--- /dev/null
+++ b/include/ruby/internal/stdckdint.h
@@ -0,0 +1,60 @@
+#ifndef RBIMPL_STDCKDINT_H /*-*-C++-*-vi:se ft=cpp:*/
+#define RBIMPL_STDCKDINT_H
+/**
+ * @author Ruby developers <ruby-core@ruby-lang.org>
+ * @copyright This file is a part of the programming language Ruby.
+ * Permission is hereby granted, to either redistribute and/or
+ * modify this file, provided that the conditions mentioned in the
+ * file COPYING are met. Consult the file for details.
+ * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
+ * implementation details. Don't take them as canon. They could
+ * rapidly appear then vanish. The name (path) of this header file
+ * is also an implementation detail. Do not expect it to persist
+ * at the place it is now. Developers are free to move it anywhere
+ * anytime at will.
+ * @note To ruby-core: remember that this header can be possibly
+ * recursively included from extension libraries written in C++.
+ * Do not expect for instance `__VA_ARGS__` is always available.
+ * We assume C99 for ruby itself but we don't assume languages of
+ * extension libraries. They could be written in C++98.
+ * @brief C23 shim for <stdckdint.h>
+ */
+#include "ruby/internal/config.h"
+#include "ruby/internal/has/builtin.h"
+#include "ruby/internal/stdbool.h"
+
+#ifdef __has_include
+# if __has_include(<stdckdint.h>)
+# /* Conforming C23 situation; e.g. recent clang */
+# define RBIMPL_HAVE_STDCKDINT_H
+# endif
+#endif
+
+#ifdef HAVE_STDCKDINT_H
+# /* Some OSes (most notably FreeBSD) have this file. */
+# define RBIMPL_HAVE_STDCKDINT_H
+#endif
+
+#ifdef RBIMPL_HAVE_STDCKDINT_H
+# /* Take that. */
+# include <stdckdint.h>
+
+#elif RBIMPL_HAS_BUILTIN(__builtin_add_overflow)
+# define ckd_add(x, y, z) ((bool)__builtin_add_overflow((y), (z), (x)))
+# define ckd_sub(x, y, z) ((bool)__builtin_sub_overflow((y), (z), (x)))
+# define ckd_mul(x, y, z) ((bool)__builtin_mul_overflow((y), (z), (x)))
+# define __STDC_VERSION_STDCKDINT_H__ 202311L
+
+#/* elif defined(__cplusplus) */
+#/* :TODO: if we assume C++11 we can use `<type_traits>` to implement them. */
+
+#else
+# /* intentionally leave them undefined */
+# /* to make `#ifdef ckd_add` etc. work as intended. */
+# undef ckd_add
+# undef ckd_sub
+# undef ckd_mul
+# undef __STDC_VERSION_STDCKDINT_H__
+#endif
+
+#endif /* RBIMPL_STDCKDINT_H */
diff --git a/include/ruby/internal/variable.h b/include/ruby/internal/variable.h
index 1f84b92db0..c017ffe3f7 100644
--- a/include/ruby/internal/variable.h
+++ b/include/ruby/internal/variable.h
@@ -147,7 +147,7 @@ RBIMPL_ATTR_NONNULL(())
* init_Foo(void)
* {
* foo = rb_eval_string("...");
- * rb_define_global_variable("$foo", &foo);
+ * rb_define_variable("$foo", &foo);
* }
* ```
*