diff options
Diffstat (limited to 'include/ruby/internal')
69 files changed, 4368 insertions, 1755 deletions
diff --git a/include/ruby/internal/abi.h b/include/ruby/internal/abi.h new file mode 100644 index 0000000000..e735a67564 --- /dev/null +++ b/include/ruby/internal/abi.h @@ -0,0 +1,58 @@ +#ifndef RUBY_ABI_H +#define RUBY_ABI_H + +#ifdef RUBY_ABI_VERSION /* should match the definition in config.h */ + +/* This number represents Ruby's ABI version. + * + * In development Ruby, it should be bumped every time an ABI incompatible + * change is introduced. This will force other developers to rebuild extension + * gems. + * + * The following cases are considered as ABI incompatible changes: + * - Changing any data structures. + * - Changing macros or inline functions causing a change in behavior. + * - Deprecating or removing function declarations. + * + * The following cases are NOT considered as ABI incompatible changes: + * - Any changes that does not involve the header files in the `include` + * directory. + * - Adding macros, inline functions, or function declarations. + * - Backwards compatible refactors. + * - Editing comments. + * + * In released versions of Ruby, this number is not defined since teeny + * versions of Ruby should guarantee ABI compatibility. + */ +#define RUBY_ABI_VERSION 0 + +/* Windows does not support weak symbols so ruby_abi_version will not exist + * in the shared library. */ +#if defined(HAVE_FUNC_WEAK) && !defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +# define RUBY_DLN_CHECK_ABI +#endif +#endif /* RUBY_ABI_VERSION */ + +#if defined(RUBY_DLN_CHECK_ABI) && !defined(RUBY_EXPORT) + +# ifdef __cplusplus +extern "C" { +# endif + +RUBY_FUNC_EXPORTED unsigned long long __attribute__((weak)) +ruby_abi_version(void) +{ +# ifdef RUBY_ABI_VERSION + return RUBY_ABI_VERSION; +# else + return 0; +# endif +} + +# ifdef __cplusplus +} +# endif + +#endif + +#endif diff --git a/include/ruby/internal/anyargs.h b/include/ruby/internal/anyargs.h index 9d8d16fdab..e3e1b6166d 100644 --- a/include/ruby/internal/anyargs.h +++ b/include/ruby/internal/anyargs.h @@ -239,15 +239,16 @@ # define RBIMPL_ANYARGS_DISPATCH_rb_define_method_13(n) RBIMPL_ANYARGS_DISPATCH((n) == 13, rb_define_method_13, RBIMPL_ANYARGS_DISPATCH_rb_define_method_12(n)) # define RBIMPL_ANYARGS_DISPATCH_rb_define_method_14(n) RBIMPL_ANYARGS_DISPATCH((n) == 14, rb_define_method_14, RBIMPL_ANYARGS_DISPATCH_rb_define_method_13(n)) # define RBIMPL_ANYARGS_DISPATCH_rb_define_method_15(n) RBIMPL_ANYARGS_DISPATCH((n) == 15, rb_define_method_15, RBIMPL_ANYARGS_DISPATCH_rb_define_method_14(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_singleton_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_singleton_method_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_singleton_method_15(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_protected_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_protected_method_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_protected_method_15(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_private_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_private_method_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_private_method_15(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_module_function(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_module_function_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_module_function_15(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_global_function(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_global_function_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_global_function_15(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_method_id(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_method_id_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_method_id_15(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_method_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_method_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_singleton_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_singleton_method_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_singleton_method_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_protected_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_protected_method_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_protected_method_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_private_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_private_method_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_private_method_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_module_function(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_module_function_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_module_function_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_global_function(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_global_function_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_global_function_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_method_id(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_method_id_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_method_id_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_method_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_method_15(n)) # define RBIMPL_ANYARGS_ATTRSET(sym) RBIMPL_ATTR_MAYBE_UNUSED() RBIMPL_ATTR_NONNULL(()) RBIMPL_ATTR_WEAKREF(sym) # define RBIMPL_ANYARGS_DECL(sym, ...) \ +RBIMPL_ANYARGS_ATTRSET(sym) static void sym ## _notimpl(__VA_ARGS__, VALUE(*)(int, const VALUE *, VALUE, VALUE), int); \ RBIMPL_ANYARGS_ATTRSET(sym) static void sym ## _m3(__VA_ARGS__, VALUE(*)(ANYARGS), int); \ RBIMPL_ANYARGS_ATTRSET(sym) static void sym ## _m2(__VA_ARGS__, VALUE(*)(VALUE, VALUE), int); \ RBIMPL_ANYARGS_ATTRSET(sym) static void sym ## _m1(__VA_ARGS__, VALUE(*)(int, union { VALUE *x; const VALUE *y; } __attribute__((__transparent_union__)), VALUE), int); \ diff --git a/include/ruby/internal/arithmetic.h b/include/ruby/internal/arithmetic.h index 3f7840c384..7ebb4a86f1 100644 --- a/include/ruby/internal/arithmetic.h +++ b/include/ruby/internal/arithmetic.h @@ -18,7 +18,8 @@ * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of * extension libraries. They could be written in C++98. - * @brief Conversion between C's arithmtic types and Ruby's numeric types. + * @brief Conversion between C's arithmetic types and Ruby's numeric + * types. */ #include "ruby/internal/arithmetic/char.h" #include "ruby/internal/arithmetic/double.h" diff --git a/include/ruby/internal/arithmetic/long.h b/include/ruby/internal/arithmetic/long.h index 792f7be179..6b8fd8ffc3 100644 --- a/include/ruby/internal/arithmetic/long.h +++ b/include/ruby/internal/arithmetic/long.h @@ -115,7 +115,7 @@ RB_INT2FIX(long i) /* :NOTE: VALUE can be wider than long. As j being unsigned, 2j+1 is fully * defined. Also it can be compiled into a single LEA instruction. */ const unsigned long j = i; - const unsigned long k = 2 * j + RUBY_FIXNUM_FLAG; + const unsigned long k = (j << 1) + RUBY_FIXNUM_FLAG; const long l = k; const SIGNED_VALUE m = l; /* Sign extend */ const VALUE n = m; diff --git a/include/ruby/internal/assume.h b/include/ruby/internal/assume.h index 65d34d4ac8..4c183e8af9 100644 --- a/include/ruby/internal/assume.h +++ b/include/ruby/internal/assume.h @@ -32,10 +32,7 @@ #include "ruby/internal/warning_push.h" /** @cond INTERNAL_MACRO */ -#if RBIMPL_COMPILER_SINCE(MSVC, 13, 10, 0) -# define RBIMPL_HAVE___ASSUME - -#elif RBIMPL_COMPILER_SINCE(Intel, 13, 0, 0) +#if defined(HAVE___ASSUME) # define RBIMPL_HAVE___ASSUME #endif /** @endcond */ diff --git a/include/ruby/internal/attr/nodiscard.h b/include/ruby/internal/attr/nodiscard.h index 087192a7a8..c3ae118942 100644 --- a/include/ruby/internal/attr/nodiscard.h +++ b/include/ruby/internal/attr/nodiscard.h @@ -26,7 +26,7 @@ /** * Wraps (or simulates) `[[nodiscard]]`. In C++ (at least since C++20) a - * nodiscard attribute can have a message why the result shall not be ignoed. + * nodiscard attribute can have a message why the result shall not be ignored. * However GCC attribute and SAL annotation cannot take them. */ #if RBIMPL_HAS_CPP_ATTRIBUTE(nodiscard) diff --git a/include/ruby/internal/attr/noexcept.h b/include/ruby/internal/attr/noexcept.h index ea3001df2a..7c3f92f1e7 100644 --- a/include/ruby/internal/attr/noexcept.h +++ b/include/ruby/internal/attr/noexcept.h @@ -54,7 +54,7 @@ * get smarter and smarter. Today they can infer if it actually throws * or not without any annotations by humans (correct me if I'm wrong). * - * - When an inline function attributed `noexcepr` actually _does_ throw an + * - When an inline function attributed `noexcept` actually _does_ throw an * exception: they have to call `std::terminate` then (C++ standard * mandates so). This means exception handling routines are actually * enforced, not omitted. This doesn't impact runtime performance (The diff --git a/include/ruby/internal/attr/nonnull.h b/include/ruby/internal/attr/nonnull.h index 874f4236c0..778d5be208 100644 --- a/include/ruby/internal/attr/nonnull.h +++ b/include/ruby/internal/attr/nonnull.h @@ -25,8 +25,10 @@ /** Wraps (or simulates) `__attribute__((nonnull))` */ #if RBIMPL_HAS_ATTRIBUTE(nonnull) # define RBIMPL_ATTR_NONNULL(list) __attribute__((__nonnull__ list)) +# define RBIMPL_NONNULL_ARG(arg) RBIMPL_ASSERT_NOTHING #else # define RBIMPL_ATTR_NONNULL(list) /* void */ +# define RBIMPL_NONNULL_ARG(arg) RUBY_ASSERT(arg) #endif #endif /* RBIMPL_ATTR_NONNULL_H */ diff --git a/include/ruby/internal/attr/packed_struct.h b/include/ruby/internal/attr/packed_struct.h new file mode 100644 index 0000000000..0678b9acc8 --- /dev/null +++ b/include/ruby/internal/attr/packed_struct.h @@ -0,0 +1,43 @@ +#ifndef RBIMPL_ATTR_PACKED_STRUCT_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RBIMPL_ATTR_PACKED_STRUCT_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Defines #RBIMPL_ATTR_PACKED_STRUCT_BEGIN, + * #RBIMPL_ATTR_PACKED_STRUCT_END, + * #RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_BEGIN, and + * #RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_END. + */ +#include "ruby/internal/config.h" + +#ifndef RBIMPL_ATTR_PACKED_STRUCT_BEGIN +# define RBIMPL_ATTR_PACKED_STRUCT_BEGIN() /* void */ +#endif +#ifndef RBIMPL_ATTR_PACKED_STRUCT_END +# define RBIMPL_ATTR_PACKED_STRUCT_END() /* void */ +#endif + +#if UNALIGNED_WORD_ACCESS +# define RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_BEGIN() RBIMPL_ATTR_PACKED_STRUCT_BEGIN() +# define RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_END() RBIMPL_ATTR_PACKED_STRUCT_END() +#else +# define RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_BEGIN() /* void */ +# define RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_END() /* void */ +#endif + +#endif diff --git a/include/ruby/internal/config.h b/include/ruby/internal/config.h index b6134c6165..da070f0979 100644 --- a/include/ruby/internal/config.h +++ b/include/ruby/internal/config.h @@ -113,6 +113,8 @@ # define UNALIGNED_WORD_ACCESS 1 #elif defined(__powerpc64__) # define UNALIGNED_WORD_ACCESS 1 +#elif defined(__POWERPC__) // __POWERPC__ is defined for ppc and ppc64 on Darwin +# define UNALIGNED_WORD_ACCESS 1 #elif defined(__aarch64__) # define UNALIGNED_WORD_ACCESS 1 #elif defined(__mc68020__) diff --git a/include/ruby/internal/core/rarray.h b/include/ruby/internal/core/rarray.h index 9f1d0509ea..90690fe794 100644 --- a/include/ruby/internal/core/rarray.h +++ b/include/ruby/internal/core/rarray.h @@ -29,25 +29,13 @@ #include "ruby/internal/core/rbasic.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/fl_type.h" -#include "ruby/internal/rgengc.h" +#include "ruby/internal/gc.h" #include "ruby/internal/stdbool.h" #include "ruby/internal/value.h" #include "ruby/internal/value_type.h" #include "ruby/assert.h" /** - * @private - * @warning Do not touch this macro. - * @warning It is an implementation detail. - * @warning The value of this macro must match for ruby itself and all - * extension libraries, otherwise serious memory corruption shall - * occur. - */ -#ifndef USE_TRANSIENT_HEAP -# define USE_TRANSIENT_HEAP 1 -#endif - -/** * Convenient casting macro. * * @param obj An object, which is in fact an ::RArray. @@ -59,15 +47,9 @@ #define RARRAY_EMBED_LEN_MASK RARRAY_EMBED_LEN_MASK #define RARRAY_EMBED_LEN_MAX RARRAY_EMBED_LEN_MAX #define RARRAY_EMBED_LEN_SHIFT RARRAY_EMBED_LEN_SHIFT -#if USE_TRANSIENT_HEAP -# define RARRAY_TRANSIENT_FLAG RARRAY_TRANSIENT_FLAG -#else -# define RARRAY_TRANSIENT_FLAG 0 -#endif /** @endcond */ #define RARRAY_LEN rb_array_len /**< @alias{rb_array_len} */ #define RARRAY_CONST_PTR rb_array_const_ptr /**< @alias{rb_array_const_ptr} */ -#define RARRAY_CONST_PTR_TRANSIENT rb_array_const_ptr_transient /**< @alias{rb_array_const_ptr_transient} */ /** @cond INTERNAL_MACRO */ #if defined(__fcc__) || defined(__fcc_version) || \ @@ -80,7 +62,6 @@ #define RARRAY_EMBED_LEN RARRAY_EMBED_LEN #define RARRAY_LENINT RARRAY_LENINT -#define RARRAY_TRANSIENT_P RARRAY_TRANSIENT_P #define RARRAY_ASET RARRAY_ASET #define RARRAY_PTR RARRAY_PTR /** @endcond */ @@ -130,24 +111,8 @@ enum ruby_rarray_flags { * 3rd parties must not be aware that there even is more than one way to * store array elements. It was a bad idea to expose this to them. */ - RARRAY_EMBED_LEN_MASK = RUBY_FL_USER4 | RUBY_FL_USER3 -#if USE_TRANSIENT_HEAP - , - - /** - * This flag has something to do with an array's "transiency". A transient - * array is an array of young generation (of generational GC), who stores - * its elements inside of dedicated memory pages called a transient heap. - * Not every young generation share that storage scheme, but elder - * generations must no join. - * - * @internal - * - * 3rd parties must not be aware that there even is more than one way to - * store array elements. It was a bad idea to expose this to them. - */ - RARRAY_TRANSIENT_FLAG = RUBY_FL_USER13 -#endif + RARRAY_EMBED_LEN_MASK = RUBY_FL_USER9 | RUBY_FL_USER8 | RUBY_FL_USER7 | RUBY_FL_USER6 | + RUBY_FL_USER5 | RUBY_FL_USER4 | RUBY_FL_USER3 }; /** @@ -156,10 +121,7 @@ enum ruby_rarray_flags { */ enum ruby_rarray_consts { /** Where ::RARRAY_EMBED_LEN_MASK resides. */ - RARRAY_EMBED_LEN_SHIFT = RUBY_FL_USHIFT + 3, - - /** Max possible number elements that can be embedded. */ - RARRAY_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(VALUE) + RARRAY_EMBED_LEN_SHIFT = RUBY_FL_USHIFT + 3 }; /** Ruby's array. */ @@ -218,7 +180,12 @@ struct RArray { * to store its elements. In this case the length is encoded into the * flags. */ - const VALUE ary[RARRAY_EMBED_LEN_MAX]; + /* This is a length 1 array because: + * 1. GCC has a bug that does not optimize C flexible array members + * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452) + * 2. Zero length arrays are not supported by all compilers + */ + const VALUE ary[1]; } as; }; @@ -245,16 +212,6 @@ VALUE *rb_ary_ptr_use_start(VALUE ary); */ void rb_ary_ptr_use_end(VALUE a); -#if USE_TRANSIENT_HEAP -/** - * Destructively converts an array of transient backend into ordinal one. - * - * @param[out] a An object of ::RArray. - * @pre `a` must be a transient array. - * @post `a` gets out of transient heap, destructively. - */ -void rb_ary_detransient(VALUE a); -#endif RBIMPL_SYMBOL_EXPORT_END() RBIMPL_ATTR_PURE_UNLESS_DEBUG() @@ -327,33 +284,6 @@ RARRAY_LENINT(VALUE ary) } RBIMPL_ATTR_PURE_UNLESS_DEBUG() -RBIMPL_ATTR_ARTIFICIAL() -/** - * Queries if the array is a transient array. - * - * @param[in] ary Array in question. - * @retval true Yes it is. - * @retval false No it isn't. - * @pre `ary` must be an instance of ::RArray. - * - * @internal - * - * @shyouhei doesn't understand the benefit of this function called from - * extension libraries. - */ -static inline bool -RARRAY_TRANSIENT_P(VALUE ary) -{ - RBIMPL_ASSERT_TYPE(ary, RUBY_T_ARRAY); - -#if USE_TRANSIENT_HEAP - return RB_FL_ANY_RAW(ary, RARRAY_TRANSIENT_FLAG); -#else - return false; -#endif -} - -RBIMPL_ATTR_PURE_UNLESS_DEBUG() /** * @private * @@ -364,7 +294,7 @@ RBIMPL_ATTR_PURE_UNLESS_DEBUG() * @return Its backend storage. */ static inline const VALUE * -rb_array_const_ptr_transient(VALUE a) +rb_array_const_ptr(VALUE a) { RBIMPL_ASSERT_TYPE(a, RUBY_T_ARRAY); @@ -376,110 +306,21 @@ rb_array_const_ptr_transient(VALUE a) } } -#if ! USE_TRANSIENT_HEAP -RBIMPL_ATTR_PURE_UNLESS_DEBUG() -#endif -/** - * @private - * - * This is an implementation detail of RARRAY_PTR(). People do not use it - * directly. - * - * @param[in] a An object of ::RArray. - * @return Its backend storage. - * @post `a` is not a transient array. - */ -static inline const VALUE * -rb_array_const_ptr(VALUE a) -{ - RBIMPL_ASSERT_TYPE(a, RUBY_T_ARRAY); - -#if USE_TRANSIENT_HEAP - if (RARRAY_TRANSIENT_P(a)) { - rb_ary_detransient(a); - } -#endif - return rb_array_const_ptr_transient(a); -} - /** * @private * * This is an implementation detail of #RARRAY_PTR_USE. People do not use it * directly. - * - * @param[in] a An object of ::RArray. - * @param[in] allow_transient Whether `a` can be transient or not. - * @return Its backend storage. - * @post `a` is not a transient array unless `allow_transient`. - */ -static inline VALUE * -rb_array_ptr_use_start(VALUE a, - RBIMPL_ATTR_MAYBE_UNUSED() - int allow_transient) -{ - RBIMPL_ASSERT_TYPE(a, RUBY_T_ARRAY); - -#if USE_TRANSIENT_HEAP - if (!allow_transient) { - if (RARRAY_TRANSIENT_P(a)) { - rb_ary_detransient(a); - } - } -#endif - - return rb_ary_ptr_use_start(a); -} - -/** - * @private - * - * This is an implementation detail of #RARRAY_PTR_USE. People do not use it - * directly. - * - * @param[in] a An object of ::RArray. - * @param[in] allow_transient Whether `a` can be transient or not. */ -static inline void -rb_array_ptr_use_end(VALUE a, - RBIMPL_ATTR_MAYBE_UNUSED() - int allow_transient) -{ - RBIMPL_ASSERT_TYPE(a, RUBY_T_ARRAY); - rb_ary_ptr_use_end(a); -} - -/** - * @private - * - * This is an implementation detail of #RARRAY_PTR_USE. People do not use it - * directly. - */ -#define RBIMPL_RARRAY_STMT(flag, ary, var, expr) do { \ +#define RBIMPL_RARRAY_STMT(ary, var, expr) do { \ RBIMPL_ASSERT_TYPE((ary), RUBY_T_ARRAY); \ const VALUE rbimpl_ary = (ary); \ - VALUE *var = rb_array_ptr_use_start(rbimpl_ary, (flag)); \ + VALUE *var = rb_ary_ptr_use_start(rbimpl_ary); \ expr; \ - rb_array_ptr_use_end(rbimpl_ary, (flag)); \ + rb_ary_ptr_use_end(rbimpl_ary); \ } while (0) /** - * @private - * - * This is an implementation detail of #RARRAY_PTR_USE. People do not use it - * directly. - */ -#define RARRAY_PTR_USE_START(a) rb_array_ptr_use_start(a, 0) - -/** - * @private - * - * This is an implementation detail of #RARRAY_PTR_USE. People do not use it - * directly. - */ -#define RARRAY_PTR_USE_END(a) rb_array_ptr_use_end(a, 0) - -/** * Declares a section of code where raw pointers are used. In case you need to * touch the raw C array instead of polite CAPIs, then that operation shall be * wrapped using this macro. @@ -505,37 +346,11 @@ rb_array_ptr_use_end(VALUE a, * them use it... Maybe some transition path can be implemented later. */ #define RARRAY_PTR_USE(ary, ptr_name, expr) \ - RBIMPL_RARRAY_STMT(0, ary, ptr_name, expr) - -/** - * @private - * - * This is an implementation detail of #RARRAY_PTR_USE_TRANSIENT. People do - * not use it directly. - */ -#define RARRAY_PTR_USE_START_TRANSIENT(a) rb_array_ptr_use_start(a, 1) - -/** - * @private - * - * This is an implementation detail of #RARRAY_PTR_USE_TRANSIENT. People do - * not use it directly. - */ -#define RARRAY_PTR_USE_END_TRANSIENT(a) rb_array_ptr_use_end(a, 1) - -/** - * Identical to #RARRAY_PTR_USE, except the pointer can be a transient one. - * - * @param ary An object of ::RArray. - * @param ptr_name A variable name which points the C array in `expr`. - * @param expr The expression that touches `ptr_name`. - */ -#define RARRAY_PTR_USE_TRANSIENT(ary, ptr_name, expr) \ - RBIMPL_RARRAY_STMT(1, ary, ptr_name, expr) + RBIMPL_RARRAY_STMT(ary, ptr_name, expr) /** * Wild use of a C pointer. This function accesses the backend storage - * directly. This is slower than #RARRAY_PTR_USE_TRANSIENT. It exercises + * directly. This is slower than #RARRAY_PTR_USE. It exercises * extra manoeuvres to protect our generational GC. Use of this function is * considered archaic. Use a modern way instead. * @@ -570,7 +385,7 @@ RARRAY_PTR(VALUE ary) static inline void RARRAY_ASET(VALUE ary, long i, VALUE v) { - RARRAY_PTR_USE_TRANSIENT(ary, ptr, + RARRAY_PTR_USE(ary, ptr, RB_OBJ_WRITE(ary, &ptr[i], v)); } @@ -585,6 +400,6 @@ RARRAY_ASET(VALUE ary, long i, VALUE v) * remains as it is due to that. If we could warn such usages we can set a * transition path, but currently no way is found to do so. */ -#define RARRAY_AREF(a, i) RARRAY_CONST_PTR_TRANSIENT(a)[i] +#define RARRAY_AREF(a, i) RARRAY_CONST_PTR(a)[i] #endif /* RBIMPL_RARRAY_H */ diff --git a/include/ruby/internal/core/rbasic.h b/include/ruby/internal/core/rbasic.h index 4617f743a7..a1477e2600 100644 --- a/include/ruby/internal/core/rbasic.h +++ b/include/ruby/internal/core/rbasic.h @@ -56,22 +56,20 @@ enum ruby_rvalue_flags { }; /** - * Ruby's object's, base components. Every single ruby objects have them in - * common. + * Ruby object's base components. All Ruby objects have them in common. */ struct RUBY_ALIGNAS(SIZEOF_VALUE) RBasic { /** - * Per-object flags. Each ruby objects have their own characteristics - * apart from their classes. For instance whether an object is frozen or - * not is not controlled by its class. This is where such properties are - * stored. + * Per-object flags. Each Ruby object has its own characteristics apart + * from its class. For instance, whether an object is frozen or not is not + * controlled by its class. This is where such properties are stored. * * @see enum ::ruby_fl_type * - * @note This is ::VALUE rather than an enum for alignment purpose. Back + * @note This is ::VALUE rather than an enum for alignment purposes. Back * in the 1990s there were no such thing like `_Alignas` in C. */ VALUE flags; @@ -79,10 +77,10 @@ RBasic { /** * Class of an object. Every object has its class. Also, everything is an * object in Ruby. This means classes are also objects. Classes have - * their own classes, classes of classes have their classes, too ... and - * it recursively continues forever. + * their own classes, classes of classes have their classes too, and it + * recursively continues forever. * - * Also note the `const` qualifier. In ruby an object cannot "change" its + * Also note the `const` qualifier. In Ruby, an object cannot "change" its * class. */ const VALUE klass; diff --git a/include/ruby/internal/core/rclass.h b/include/ruby/internal/core/rclass.h index 13a33a28bd..b0b6bfc80c 100644 --- a/include/ruby/internal/core/rclass.h +++ b/include/ruby/internal/core/rclass.h @@ -26,9 +26,7 @@ #include "ruby/internal/cast.h" /** @cond INTERNAL_MACRO */ -#define RMODULE_IS_OVERLAID RMODULE_IS_OVERLAID #define RMODULE_IS_REFINEMENT RMODULE_IS_REFINEMENT -#define RMODULE_INCLUDED_INTO_REFINEMENT RMODULE_INCLUDED_INTO_REFINEMENT /** @endcond */ /** @@ -55,57 +53,12 @@ * Why is it here, given RClass itself is not? */ enum ruby_rmodule_flags { - - /** - * This flag has something to do with refinements... I guess? It is set on - * occasions for modules that are refined by refinements, but it seems - * ... nobody cares about such things? Not sure but this flag could - * perhaps be a write-only information. - */ - RMODULE_IS_OVERLAID = RUBY_FL_USER2, - /** * This flag has something to do with refinements. A module created using * rb_mod_refine() has this flag set. This is the bit which controls * difference between normal inclusion versus refinements. */ - RMODULE_IS_REFINEMENT = RUBY_FL_USER3, - - /** - * This flag has something to do with refinements. This is set when a - * (non-refinement) module is included into another module, which is a - * refinement. This amends the way `super` searches for a super method. - * - * ```ruby - * class Foo - * def foo - * "Foo" - * end - * end - * - * module Bar - * def foo - * "[#{super}]" # this - * end - * end - * - * module Baz - * refine Foo do - * include Bar - * def foo - * "<#{super}>" - * end - * end - * end - * - * using Baz - * Foo.new.foo # => "[<Foo>]" - * ``` - * - * The `super` marked with "this" comment shall look for overlaid - * `Foo#foo`, which is not the ordinal method lookup direction. - */ - RMODULE_INCLUDED_INTO_REFINEMENT = RUBY_FL_USER4 + RMODULE_IS_REFINEMENT = RUBY_FL_USER3 }; struct RClass; /* Opaque, declared here for RCLASS() macro. */ diff --git a/include/ruby/internal/core/rdata.h b/include/ruby/internal/core/rdata.h index f6656b6546..43ab3c01e7 100644 --- a/include/ruby/internal/core/rdata.h +++ b/include/ruby/internal/core/rdata.h @@ -369,30 +369,6 @@ rb_data_object_alloc(VALUE klass, void *data, RUBY_DATA_FUNC dmark, RUBY_DATA_FU return rb_data_object_wrap(klass, data, dmark, dfree); } -RBIMPL_ATTR_DEPRECATED(("by: rb_cObject. Will be removed in 3.1.")) -RBIMPL_ATTR_PURE() -/** - * @private - * - * @deprecated There once was a variable called rb_cData, which no longer - * exists today. This function is a function because we want - * warnings for the usages. - */ -static inline VALUE -rb_cData(void) -{ - return rb_cObject; -} - -/** - * @private - * - * @deprecated This macro once was a thing in the old days, but makes no sense - * any longer today. Exists here for backwards compatibility - * only. You can safely forget about it. - */ -#define rb_cData rb_cData() - /** @cond INTERNAL_MACRO */ #define rb_data_object_wrap_0 rb_data_object_wrap #define rb_data_object_wrap_1 rb_data_object_wrap_warning diff --git a/include/ruby/internal/core/rfile.h b/include/ruby/internal/core/rfile.h index f8dddde9e5..a0eb8cb833 100644 --- a/include/ruby/internal/core/rfile.h +++ b/include/ruby/internal/core/rfile.h @@ -25,7 +25,7 @@ /* rb_io_t is in ruby/io.h. The header file has historically not been included * into ruby/ruby.h. We follow that tradition. */ -struct rb_io_t; +struct rb_io; /** * Ruby's File and IO. Ruby's IO are not just file descriptors. They have @@ -38,7 +38,7 @@ struct RFile { struct RBasic basic; /** IO's specific fields. */ - struct rb_io_t *fptr; + struct rb_io *fptr; }; /** diff --git a/include/ruby/internal/core/rhash.h b/include/ruby/internal/core/rhash.h index 61d2c15d87..897c570794 100644 --- a/include/ruby/internal/core/rhash.h +++ b/include/ruby/internal/core/rhash.h @@ -54,19 +54,6 @@ * * @internal * - * Declaration of rb_hash_iter_lev() is at include/ruby/backward.h. - */ -#define RHASH_ITER_LEV(h) rb_hash_iter_lev(h) - -/** - * @private - * - * @deprecated This macro once was a thing in the old days, but makes no sense - * any longer today. Exists here for backwards compatibility - * only. You can safely forget about it. - * - * @internal - * * Declaration of rb_hash_ifnone() is at include/ruby/backward.h. */ #define RHASH_IFNONE(h) rb_hash_ifnone(h) diff --git a/include/ruby/internal/core/rmatch.h b/include/ruby/internal/core/rmatch.h index 2d2fd897f5..a528c2999e 100644 --- a/include/ruby/internal/core/rmatch.h +++ b/include/ruby/internal/core/rmatch.h @@ -68,7 +68,7 @@ struct rmatch_offset { }; /** Represents a match. */ -struct rmatch { +struct rb_matchext_struct { /** * "Registers" of a match. This is a quasi-opaque struct that holds * execution result of a match. Roughly resembles `&~`. @@ -82,6 +82,8 @@ struct rmatch { int char_offset_num_allocated; }; +typedef struct rb_matchext_struct rb_matchext_t; + /** * Regular expression execution context. When a regular expression "matches" * to a string, it generates capture groups etc. This struct holds that info. @@ -102,16 +104,13 @@ struct RMatch { VALUE str; /** - * The result of this match. - */ - struct rmatch *rmatch; - - /** * The expression of this match. */ VALUE regexp; /* RRegexp */ }; +#define RMATCH_EXT(m) ((rb_matchext_t *)((char *)(m) + sizeof(struct RMatch))) + RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() /** @@ -139,8 +138,7 @@ static inline struct re_registers * RMATCH_REGS(VALUE match) { RBIMPL_ASSERT_TYPE(match, RUBY_T_MATCH); - RBIMPL_ASSERT_OR_ASSUME(RMATCH(match)->rmatch != NULL); - return &RMATCH(match)->rmatch->regs; + return &RMATCH_EXT(match)->regs; } #endif /* RBIMPL_RMATCH_H */ diff --git a/include/ruby/internal/core/robject.h b/include/ruby/internal/core/robject.h index f2028063a6..c2bcae6306 100644 --- a/include/ruby/internal/core/robject.h +++ b/include/ruby/internal/core/robject.h @@ -37,16 +37,15 @@ /** * Convenient casting macro. * - * @param obj An object, which is in fact an ::RRegexp. - * @return The passed object casted to ::RRegexp. + * @param obj An object, which is in fact an ::RObject. + * @return The passed object casted to ::RObject. */ #define ROBJECT(obj) RBIMPL_CAST((struct RObject *)(obj)) /** @cond INTERNAL_MACRO */ #define ROBJECT_EMBED_LEN_MAX ROBJECT_EMBED_LEN_MAX #define ROBJECT_EMBED ROBJECT_EMBED -#define ROBJECT_NUMIV ROBJECT_NUMIV +#define ROBJECT_IV_CAPACITY ROBJECT_IV_CAPACITY #define ROBJECT_IVPTR ROBJECT_IVPTR -#define ROBJECT_IV_INDEX_TBL ROBJECT_IV_INDEX_TBL /** @endcond */ /** @@ -75,15 +74,6 @@ enum ruby_robject_flags { ROBJECT_EMBED = RUBY_FL_USER1 }; -/** - * This is an enum because GDB wants it (rather than a macro). People need not - * bother. - */ -enum ruby_robject_consts { - /** Max possible number of instance variables that can be embedded. */ - ROBJECT_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(VALUE) -}; - struct st_table; /** @@ -103,13 +93,6 @@ struct RObject { * this pattern. */ struct { - - /** - * Number of instance variables. This is per object; objects might - * differ in this field even if they have the identical classes. - */ - uint32_t numiv; - /** Pointer to a C array that holds instance variables. */ VALUE *ivptr; @@ -121,42 +104,24 @@ struct RObject { * * This is a shortcut for `RCLASS_IV_INDEX_TBL(rb_obj_class(obj))`. */ - struct st_table *iv_index_tbl; + struct rb_id_table *iv_index_tbl; } heap; - /** - * Embedded instance variables. When an object is small enough, it + /* Embedded instance variables. When an object is small enough, it * uses this area to store the instance variables. + * + * This is a length 1 array because: + * 1. GCC has a bug that does not optimize C flexible array members + * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452) + * 2. Zero length arrays are not supported by all compilers */ - VALUE ary[ROBJECT_EMBED_LEN_MAX]; + VALUE ary[1]; } as; }; RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() /** - * Queries the number of instance variables. - * - * @param[in] obj Object in question. - * @return Its number of instance variables. - * @pre `obj` must be an instance of ::RObject. - */ -static inline uint32_t -ROBJECT_NUMIV(VALUE obj) -{ - RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT); - - if (RB_FL_ANY_RAW(obj, ROBJECT_EMBED)) { - return ROBJECT_EMBED_LEN_MAX; - } - else { - return ROBJECT(obj)->as.heap.numiv; - } -} - -RBIMPL_ATTR_PURE_UNLESS_DEBUG() -RBIMPL_ATTR_ARTIFICIAL() -/** * Queries the instance variables. * * @param[in] obj Object in question. diff --git a/include/ruby/internal/core/rstring.h b/include/ruby/internal/core/rstring.h index d16a57b1c4..0bca74e688 100644 --- a/include/ruby/internal/core/rstring.h +++ b/include/ruby/internal/core/rstring.h @@ -42,11 +42,7 @@ /** @cond INTERNAL_MACRO */ #define RSTRING_NOEMBED RSTRING_NOEMBED -#define RSTRING_EMBED_LEN_MASK RSTRING_EMBED_LEN_MASK -#define RSTRING_EMBED_LEN_SHIFT RSTRING_EMBED_LEN_SHIFT -#define RSTRING_EMBED_LEN_MAX RSTRING_EMBED_LEN_MAX #define RSTRING_FSTR RSTRING_FSTR -#define RSTRING_EMBED_LEN RSTRING_EMBED_LEN #define RSTRING_LEN RSTRING_LEN #define RSTRING_LENINT RSTRING_LENINT #define RSTRING_PTR RSTRING_PTR @@ -160,19 +156,6 @@ enum ruby_rstring_flags { */ RSTRING_NOEMBED = RUBY_FL_USER1, - /** - * When a string employs embedded strategy (see ::RSTRING_NOEMBED), these - * bits are used to store the number of bytes actually filled into - * ::RString::ary. - * - * @internal - * - * 3rd parties must not be aware that there even is more than one way to - * store a string. Might better be hidden. - */ - RSTRING_EMBED_LEN_MASK = RUBY_FL_USER2 | RUBY_FL_USER3 | RUBY_FL_USER4 | - RUBY_FL_USER5 | RUBY_FL_USER6, - /* Actually, string encodings are also encoded into the flags, using * remaining bits.*/ @@ -199,18 +182,6 @@ enum ruby_rstring_flags { }; /** - * This is an enum because GDB wants it (rather than a macro). People need not - * bother. - */ -enum ruby_rstring_consts { - /** Where ::RSTRING_EMBED_LEN_MASK resides. */ - RSTRING_EMBED_LEN_SHIFT = RUBY_FL_USHIFT + 2, - - /** Max possible number of characters that can be embedded. */ - RSTRING_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(char) - 1 -}; - -/** * Ruby's String. A string in ruby conceptually has these information: * * - Encoding of the string. @@ -227,6 +198,13 @@ struct RString { /** Basic part, including flags and class. */ struct RBasic basic; + /** + * Length of the string, not including terminating NUL character. + * + * @note This is in bytes. + */ + long len; + /** String's specific fields. */ union { @@ -235,14 +213,6 @@ struct RString { * pattern. */ struct { - - /** - * Length of the string, not including terminating NUL character. - * - * @note This is in bytes. - */ - long len; - /** * Pointer to the contents of the string. In the old days each * string had dedicated memory regions. That is no longer true @@ -271,14 +241,15 @@ struct RString { } aux; } heap; - /** - * Embedded contents. When a string is short enough, it uses this area - * to store the contents themselves. This was impractical in the 20th - * century, but these days 64 bit machines can typically hold 48 bytes - * here. Could be sufficiently large. In this case the length is - * encoded into the flags. - */ - char ary[RSTRING_EMBED_LEN_MAX + 1]; + /** Embedded contents. */ + struct { + /* This is a length 1 array because: + * 1. GCC has a bug that does not optimize C flexible array members + * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452) + * 2. Zero length arrays are not supported by all compilers + */ + char ary[1]; + } embed; } as; }; @@ -390,26 +361,12 @@ RBIMPL_ATTR_ARTIFICIAL() * * @param[in] str String in question. * @return Its length, in bytes. - * @pre `str` must be an instance of ::RString, and must has its - * ::RSTRING_NOEMBED flag off. - * - * @internal - * - * This was a macro before. It was inevitable to be public, since macros are - * global constructs. But should it be forever? Now that it is a function, - * @shyouhei thinks it could just be eliminated, hidden into implementation - * details. + * @pre `str` must be an instance of ::RString. */ static inline long -RSTRING_EMBED_LEN(VALUE str) +RSTRING_LEN(VALUE str) { - RBIMPL_ASSERT_TYPE(str, RUBY_T_STRING); - RBIMPL_ASSERT_OR_ASSUME(! RB_FL_ANY_RAW(str, RSTRING_NOEMBED)); - - VALUE f = RBASIC(str)->flags; - f &= RSTRING_EMBED_LEN_MASK; - f >>= RSTRING_EMBED_LEN_SHIFT; - return RBIMPL_CAST((long)f); + return RSTRING(str)->len; } RBIMPL_WARNING_PUSH() @@ -439,29 +396,14 @@ rbimpl_rstring_getmem(VALUE str) else { /* Expecting compilers to optimize this on-stack struct away. */ struct RString retval; - retval.as.heap.len = RSTRING_EMBED_LEN(str); - retval.as.heap.ptr = RSTRING(str)->as.ary; + retval.len = RSTRING_LEN(str); + retval.as.heap.ptr = RSTRING(str)->as.embed.ary; return retval; } } RBIMPL_WARNING_POP() -RBIMPL_ATTR_PURE_UNLESS_DEBUG() -RBIMPL_ATTR_ARTIFICIAL() -/** - * Queries the length of the string. - * - * @param[in] str String in question. - * @return Its length, in bytes. - * @pre `str` must be an instance of ::RString. - */ -static inline long -RSTRING_LEN(VALUE str) -{ - return rbimpl_rstring_getmem(str).as.heap.len; -} - RBIMPL_ATTR_ARTIFICIAL() /** * Queries the contents pointer of the string. @@ -475,13 +417,9 @@ RSTRING_PTR(VALUE str) { char *ptr = rbimpl_rstring_getmem(str).as.heap.ptr; - if (RB_UNLIKELY(! ptr)) { + if (RUBY_DEBUG && RB_UNLIKELY(! ptr)) { /* :BEWARE: @shyouhei thinks that currently, there are rooms for this - * function to return NULL. In the 20th century that was a pointless - * concern. However struct RString can hold fake strings nowadays. It - * seems no check against NULL are exercised around handling of them - * (one of such usages is located in marshal.c, which scares - * @shyouhei). Better check here for maximum safety. + * function to return NULL. Better check here for maximum safety. * * Also, this is not rb_warn() because RSTRING_PTR() can be called * during GC (see what obj_info() does). rb_warn() needs to allocate @@ -505,12 +443,12 @@ RSTRING_END(VALUE str) { struct RString buf = rbimpl_rstring_getmem(str); - if (RB_UNLIKELY(! buf.as.heap.ptr)) { + if (RUBY_DEBUG && RB_UNLIKELY(! buf.as.heap.ptr)) { /* Ditto. */ rb_debug_rstring_null_ptr("RSTRING_END"); } - return &buf.as.heap.ptr[buf.as.heap.len]; + return &buf.as.heap.ptr[buf.len]; } RBIMPL_ATTR_ARTIFICIAL() @@ -544,7 +482,7 @@ RSTRING_LENINT(VALUE str) __extension__ ({ \ struct RString rbimpl_str = rbimpl_rstring_getmem(str); \ (ptrvar) = rbimpl_str.as.heap.ptr; \ - (lenvar) = rbimpl_str.as.heap.len; \ + (lenvar) = rbimpl_str.len; \ }) #else # define RSTRING_GETMEM(str, ptrvar, lenvar) \ diff --git a/include/ruby/internal/core/rtypeddata.h b/include/ruby/internal/core/rtypeddata.h index bbf208867d..6c19576c20 100644 --- a/include/ruby/internal/core/rtypeddata.h +++ b/include/ruby/internal/core/rtypeddata.h @@ -114,6 +114,8 @@ #define RUBY_TYPED_PROMOTED1 RUBY_TYPED_PROMOTED1 /** @endcond */ +#define TYPED_DATA_EMBEDDED 2 + /** * @private * @@ -137,6 +139,8 @@ rbimpl_typeddata_flags { */ RUBY_TYPED_FREE_IMMEDIATELY = 1, + RUBY_TYPED_EMBEDDABLE = 2, + /** * This flag has something to do with Ractor. Multiple Ractors run without * protecting each other. Sharing an object among Ractors is basically @@ -173,10 +177,16 @@ rbimpl_typeddata_flags { RUBY_TYPED_WB_PROTECTED = RUBY_FL_WB_PROTECTED, /* THIS FLAG DEPENDS ON Ruby version */ /** - * This flag is mysterious. It seems nobody is currently using it. The - * intention of this flag is also unclear. We need further investigations. + * This flag no longer in use */ - RUBY_TYPED_PROMOTED1 = RUBY_FL_PROMOTED1 /* THIS FLAG DEPENDS ON Ruby version */ + RUBY_TYPED_UNUSED = RUBY_FL_UNUSED6, + + /** + * This flag determines whether marking and compaction should be carried out + * using the dmark/dcompact callback functions or whether we should mark + * declaratively using a list of references defined inside the data struct we're wrapping + */ + RUBY_TYPED_DECL_MARKING = RUBY_FL_USER2 }; /** @@ -347,16 +357,14 @@ struct RTypedData { * data. This roughly resembles a Ruby level class (apart from method * definition etc.) */ - const rb_data_type_t *type; + const rb_data_type_t *const type; /** * This has to be always 1. * * @internal - * - * Why, then, this is not a const ::VALUE? */ - VALUE typed_flag; + const VALUE typed_flag; /** Pointer to the actual C level struct that you want to wrap. */ void *data; @@ -456,7 +464,7 @@ RBIMPL_SYMBOL_EXPORT_END() */ #define TypedData_Make_Struct0(result, klass, type, size, data_type, sval) \ VALUE result = rb_data_typed_object_zalloc(klass, size, data_type); \ - (sval) = RBIMPL_CAST((type *)RTYPEDDATA_DATA(result)); \ + (sval) = (type *)RTYPEDDATA_GET_DATA(result); \ RBIMPL_CAST(/*suppress unused variable warnings*/(void)(sval)) /** @@ -507,6 +515,36 @@ RBIMPL_SYMBOL_EXPORT_END() #define TypedData_Get_Struct(obj,type,data_type,sval) \ ((sval) = RBIMPL_CAST((type *)rb_check_typeddata((obj), (data_type)))) +static inline bool +RTYPEDDATA_EMBEDDED_P(VALUE obj) +{ +#if RUBY_DEBUG + if (RB_UNLIKELY(!RB_TYPE_P(obj, RUBY_T_DATA))) { + Check_Type(obj, RUBY_T_DATA); + RBIMPL_UNREACHABLE_RETURN(false); + } +#endif + + return RTYPEDDATA(obj)->typed_flag & TYPED_DATA_EMBEDDED; +} + +static inline void * +RTYPEDDATA_GET_DATA(VALUE obj) +{ +#if RUBY_DEBUG + if (RB_UNLIKELY(!RB_TYPE_P(obj, RUBY_T_DATA))) { + Check_Type(obj, RUBY_T_DATA); + RBIMPL_UNREACHABLE_RETURN(false); + } +#endif + + /* We reuse the data pointer in embedded TypedData. We can't use offsetof + * since RTypedData a non-POD type in C++. */ + const size_t embedded_typed_data_size = sizeof(struct RTypedData) - sizeof(void *); + + return RTYPEDDATA_EMBEDDED_P(obj) ? (char *)obj + embedded_typed_data_size : RTYPEDDATA(obj)->data; +} + RBIMPL_ATTR_PURE() RBIMPL_ATTR_ARTIFICIAL() /** @@ -523,7 +561,8 @@ RBIMPL_ATTR_ARTIFICIAL() static inline bool rbimpl_rtypeddata_p(VALUE obj) { - return RTYPEDDATA(obj)->typed_flag == 1; + VALUE typed_flag = RTYPEDDATA(obj)->typed_flag; + return typed_flag != 0 && typed_flag <= 3; } RBIMPL_ATTR_PURE_UNLESS_DEBUG() diff --git a/include/ruby/internal/dllexport.h b/include/ruby/internal/dllexport.h index 08a262209d..71026e7100 100644 --- a/include/ruby/internal/dllexport.h +++ b/include/ruby/internal/dllexport.h @@ -37,9 +37,7 @@ * ``` */ #undef RUBY_EXTERN -#if defined(MJIT_HEADER) && defined(_WIN32) -# define RUBY_EXTERN extern __declspec(dllimport) -#elif defined(RUBY_EXPORT) +#if defined(RUBY_EXPORT) # define RUBY_EXTERN extern #elif defined(_WIN32) # define RUBY_EXTERN extern __declspec(dllimport) @@ -59,36 +57,6 @@ # define RUBY_FUNC_EXPORTED /* void */ #endif -/** - * @cond INTERNAL_MACRO - * - * These MJIT related macros are placed here because translate_mjit_header can - * need them. Extension libraries should not touch. - */ - -/* These macros are used for functions which are exported only for MJIT - and NOT ensured to be exported in future versions. */ - -#if ! defined(MJIT_HEADER) -# define MJIT_FUNC_EXPORTED RUBY_FUNC_EXPORTED -#elif ! RBIMPL_COMPILER_IS(MSVC) -# define MJIT_FUNC_EXPORTED RUBY_FUNC_EXPORTED -#else -# define MJIT_FUNC_EXPORTED static -#endif - -#define MJIT_SYMBOL_EXPORT_BEGIN RUBY_SYMBOL_EXPORT_BEGIN -#define MJIT_SYMBOL_EXPORT_END RUBY_SYMBOL_EXPORT_END - -/* On mswin, MJIT header transformation can't be used since cl.exe can't output - preprocessed output preserving macros. So this `MJIT_STATIC` is needed - to force non-static function to static on MJIT header to avoid symbol conflict. */ -#ifdef MJIT_HEADER -# define MJIT_STATIC static -#else -# define MJIT_STATIC -#endif - /** @endcond */ /** Shortcut macro equivalent to `RUBY_SYMBOL_EXPORT_BEGIN extern "C" {`. diff --git a/include/ruby/internal/encoding/coderange.h b/include/ruby/internal/encoding/coderange.h new file mode 100644 index 0000000000..7a81208c9e --- /dev/null +++ b/include/ruby/internal/encoding/coderange.h @@ -0,0 +1,202 @@ +#ifndef RUBY_INTERNAL_ENCODING_CODERANGE_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_CODERANGE_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines for code ranges. + */ + +#include "ruby/internal/attr/const.h" +#include "ruby/internal/attr/pure.h" +#include "ruby/internal/dllexport.h" +#include "ruby/internal/fl_type.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** What rb_enc_str_coderange() returns. */ +enum ruby_coderange_type { + + /** The object's coderange is unclear yet. */ + RUBY_ENC_CODERANGE_UNKNOWN = 0, + + /** The object holds 0 to 127 inclusive and nothing else. */ + RUBY_ENC_CODERANGE_7BIT = ((int)RUBY_FL_USER8), + + /** The object's encoding and contents are consistent each other */ + RUBY_ENC_CODERANGE_VALID = ((int)RUBY_FL_USER9), + + /** The object holds invalid/malformed/broken character(s). */ + RUBY_ENC_CODERANGE_BROKEN = ((int)(RUBY_FL_USER8|RUBY_FL_USER9)), + + /** Where the coderange resides. */ + RUBY_ENC_CODERANGE_MASK = (RUBY_ENC_CODERANGE_7BIT| + RUBY_ENC_CODERANGE_VALID| + RUBY_ENC_CODERANGE_BROKEN) +}; + +RBIMPL_ATTR_CONST() +/** + * @private + * + * This is an implementation detail of #RB_ENC_CODERANGE_CLEAN_P. People don't + * use it directly. + * + * @param[in] cr An enum ::ruby_coderange_type. + * @retval 1 It is. + * @retval 0 It isn't. + */ +static inline int +rb_enc_coderange_clean_p(int cr) +{ + return (cr ^ (cr >> 1)) & RUBY_ENC_CODERANGE_7BIT; +} + +RBIMPL_ATTR_CONST() +/** + * Queries if a code range is "clean". "Clean" in this context means it is + * known and valid. + * + * @param[in] cr An enum ::ruby_coderange_type. + * @retval 1 It is. + * @retval 0 It isn't. + */ +static inline bool +RB_ENC_CODERANGE_CLEAN_P(enum ruby_coderange_type cr) +{ + return rb_enc_coderange_clean_p(cr); +} + +RBIMPL_ATTR_PURE_UNLESS_DEBUG() +/** + * Queries the (inline) code range of the passed object. The object must be + * capable of having inline encoding. Using this macro needs deep + * understanding of bit level object binary layout. + * + * @param[in] obj Target object. + * @return An enum ::ruby_coderange_type. + */ +static inline enum ruby_coderange_type +RB_ENC_CODERANGE(VALUE obj) +{ + VALUE ret = RB_FL_TEST_RAW(obj, RUBY_ENC_CODERANGE_MASK); + + return RBIMPL_CAST((enum ruby_coderange_type)ret); +} + +RBIMPL_ATTR_PURE_UNLESS_DEBUG() +/** + * Queries the (inline) code range of the passed object is + * ::RUBY_ENC_CODERANGE_7BIT. The object must be capable of having inline + * encoding. Using this macro needs deep understanding of bit level object + * binary layout. + * + * @param[in] obj Target object. + * @retval 1 It is ascii only. + * @retval 0 Otherwise (including cases when the range is not known). + */ +static inline bool +RB_ENC_CODERANGE_ASCIIONLY(VALUE obj) +{ + return RB_ENC_CODERANGE(obj) == RUBY_ENC_CODERANGE_7BIT; +} + +/** + * Destructively modifies the passed object so that its (inline) code range is + * the passed one. The object must be capable of having inline encoding. + * Using this macro needs deep understanding of bit level object binary layout. + * + * @param[out] obj Target object. + * @param[out] cr An enum ::ruby_coderange_type. + * @post `obj`'s code range is `cr`. + */ +static inline void +RB_ENC_CODERANGE_SET(VALUE obj, enum ruby_coderange_type cr) +{ + RB_FL_UNSET_RAW(obj, RUBY_ENC_CODERANGE_MASK); + RB_FL_SET_RAW(obj, cr); +} + +/** + * Destructively clears the passed object's (inline) code range. The object + * must be capable of having inline encoding. Using this macro needs deep + * understanding of bit level object binary layout. + * + * @param[out] obj Target object. + * @post `obj`'s code range is ::RUBY_ENC_CODERANGE_UNKNOWN. + */ +static inline void +RB_ENC_CODERANGE_CLEAR(VALUE obj) +{ + RB_FL_UNSET_RAW(obj, RUBY_ENC_CODERANGE_MASK); +} + +RBIMPL_ATTR_CONST() +/* assumed ASCII compatibility */ +/** + * "Mix" two code ranges into one. This is handy for instance when you + * concatenate two strings into one. Consider one of then is valid but the + * other isn't. The result must be invalid. This macro computes that kind of + * mixture. + * + * @param[in] a An enum ::ruby_coderange_type. + * @param[in] b Another enum ::ruby_coderange_type. + * @return The `a` "and" `b`. + */ +static inline enum ruby_coderange_type +RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b) +{ + if (a == RUBY_ENC_CODERANGE_7BIT) { + return b; + } + else if (a != RUBY_ENC_CODERANGE_VALID) { + return RUBY_ENC_CODERANGE_UNKNOWN; + } + else if (b == RUBY_ENC_CODERANGE_7BIT) { + return RUBY_ENC_CODERANGE_VALID; + } + else { + return b; + } +} + +#define ENC_CODERANGE_MASK RUBY_ENC_CODERANGE_MASK /**< @old{RUBY_ENC_CODERANGE_MASK} */ +#define ENC_CODERANGE_UNKNOWN RUBY_ENC_CODERANGE_UNKNOWN /**< @old{RUBY_ENC_CODERANGE_UNKNOWN} */ +#define ENC_CODERANGE_7BIT RUBY_ENC_CODERANGE_7BIT /**< @old{RUBY_ENC_CODERANGE_7BIT} */ +#define ENC_CODERANGE_VALID RUBY_ENC_CODERANGE_VALID /**< @old{RUBY_ENC_CODERANGE_VALID} */ +#define ENC_CODERANGE_BROKEN RUBY_ENC_CODERANGE_BROKEN /**< @old{RUBY_ENC_CODERANGE_BROKEN} */ +#define ENC_CODERANGE_CLEAN_P(cr) RB_ENC_CODERANGE_CLEAN_P(cr) /**< @old{RB_ENC_CODERANGE_CLEAN_P} */ +#define ENC_CODERANGE(obj) RB_ENC_CODERANGE(obj) /**< @old{RB_ENC_CODERANGE} */ +#define ENC_CODERANGE_ASCIIONLY(obj) RB_ENC_CODERANGE_ASCIIONLY(obj) /**< @old{RB_ENC_CODERANGE_ASCIIONLY} */ +#define ENC_CODERANGE_SET(obj,cr) RB_ENC_CODERANGE_SET(obj,cr) /**< @old{RB_ENC_CODERANGE_SET} */ +#define ENC_CODERANGE_CLEAR(obj) RB_ENC_CODERANGE_CLEAR(obj) /**< @old{RB_ENC_CODERANGE_CLEAR} */ +#define ENC_CODERANGE_AND(a, b) RB_ENC_CODERANGE_AND(a, b) /**< @old{RB_ENC_CODERANGE_AND} */ +#define ENCODING_CODERANGE_SET(obj, encindex, cr) RB_ENCODING_CODERANGE_SET(obj, encindex, cr) /**< @old{RB_ENCODING_CODERANGE_SET} */ + +/** @cond INTERNAL_MACRO */ +#define RB_ENC_CODERANGE RB_ENC_CODERANGE +#define RB_ENC_CODERANGE_AND RB_ENC_CODERANGE_AND +#define RB_ENC_CODERANGE_ASCIIONLY RB_ENC_CODERANGE_ASCIIONLY +#define RB_ENC_CODERANGE_CLEAN_P RB_ENC_CODERANGE_CLEAN_P +#define RB_ENC_CODERANGE_CLEAR RB_ENC_CODERANGE_CLEAR +#define RB_ENC_CODERANGE_SET RB_ENC_CODERANGE_SET +/** @endcond */ + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_CODERANGE_H */ diff --git a/include/ruby/internal/encoding/ctype.h b/include/ruby/internal/encoding/ctype.h new file mode 100644 index 0000000000..05c314aeb3 --- /dev/null +++ b/include/ruby/internal/encoding/ctype.h @@ -0,0 +1,258 @@ +#ifndef RUBY_INTERNAL_ENCODING_CTYPE_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_CTYPE_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines to query chacater types. + */ + +#include "ruby/onigmo.h" +#include "ruby/internal/attr/const.h" +#include "ruby/internal/dllexport.h" +#include "ruby/internal/encoding/encoding.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * Queries if the passed pointer points to a newline character. What is a + * newline and what is not depends on the passed encoding. + * + * @param[in] p Pointer to a possibly-middle of a character. + * @param[in] end End of the string. + * @param[in] enc Encoding. + * @retval false It isn't. + * @retval true It is. + */ +static inline bool +rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc) +{ + OnigUChar *up = RBIMPL_CAST((OnigUChar *)p); + OnigUChar *ue = RBIMPL_CAST((OnigUChar *)e); + + return ONIGENC_IS_MBC_NEWLINE(enc, up, ue); +} + +/** + * Queries if the passed code point is of passed character type in the passed + * encoding. The "character type" here is a set of macros defined in onigmo.h, + * like `ONIGENC_CTYPE_PUNCT`. + * + * @param[in] c An `OnigCodePoint` value. + * @param[in] t An `OnigCtype` value. + * @param[in] enc A `rb_encoding*` value. + * @retval true `c` is of `t` in `enc`. + * @retval false Otherwise. + */ +static inline bool +rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_CTYPE(enc, c, t); +} + +/** + * Identical to rb_isascii(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval false `c` is out of range of ASCII character set in `enc`. + * @retval true Otherwise. + * + * @internal + * + * `enc` is ignored. This is at least an intentional implementation detail + * (not a bug). But there could be rooms for future extensions. + */ +static inline bool +rb_enc_isascii(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_ASCII(c); +} + +/** + * Identical to rb_isalpha(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "ALPHA". + * @retval false Otherwise. + */ +static inline bool +rb_enc_isalpha(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_ALPHA(enc, c); +} + +/** + * Identical to rb_islower(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "LOWER". + * @retval false Otherwise. + */ +static inline bool +rb_enc_islower(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_LOWER(enc, c); +} + +/** + * Identical to rb_isupper(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "UPPER". + * @retval false Otherwise. + */ +static inline bool +rb_enc_isupper(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_UPPER(enc, c); +} + +/** + * Identical to rb_iscntrl(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "CNTRL". + * @retval false Otherwise. + */ +static inline bool +rb_enc_iscntrl(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_CNTRL(enc, c); +} + +/** + * Identical to rb_ispunct(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "PUNCT". + * @retval false Otherwise. + */ +static inline bool +rb_enc_ispunct(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_PUNCT(enc, c); +} + +/** + * Identical to rb_isalnum(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "ANUM". + * @retval false Otherwise. + */ +static inline bool +rb_enc_isalnum(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_ALNUM(enc, c); +} + +/** + * Identical to rb_isprint(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "PRINT". + * @retval false Otherwise. + */ +static inline bool +rb_enc_isprint(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_PRINT(enc, c); +} + +/** + * Identical to rb_isspace(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "PRINT". + * @retval false Otherwise. + */ +static inline bool +rb_enc_isspace(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_SPACE(enc, c); +} + +/** + * Identical to rb_isdigit(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "DIGIT". + * @retval false Otherwise. + */ +static inline bool +rb_enc_isdigit(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_DIGIT(enc, c); +} + +RBIMPL_ATTR_CONST() +/** + * Identical to rb_toupper(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @return `c`'s (Ruby's definition of) upper case counterpart. + * + * @internal + * + * As `RBIMPL_ATTR_CONST` implies this function ignores `enc`. + */ +int rb_enc_toupper(int c, rb_encoding *enc); + +RBIMPL_ATTR_CONST() +/** + * Identical to rb_tolower(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @return `c`'s (Ruby's definition of) lower case counterpart. + * + * @internal + * + * As `RBIMPL_ATTR_CONST` implies this function ignores `enc`. + */ +int rb_enc_tolower(int c, rb_encoding *enc); + +RBIMPL_SYMBOL_EXPORT_END() + +/** @cond INTERNAL_MACRO */ +#define rb_enc_is_newline rb_enc_is_newline +#define rb_enc_isalnum rb_enc_isalnum +#define rb_enc_isalpha rb_enc_isalpha +#define rb_enc_isascii rb_enc_isascii +#define rb_enc_isctype rb_enc_isctype +#define rb_enc_isdigit rb_enc_isdigit +#define rb_enc_islower rb_enc_islower +#define rb_enc_isprint rb_enc_isprint +#define rb_enc_iscntrl rb_enc_iscntrl +#define rb_enc_ispunct rb_enc_ispunct +#define rb_enc_isspace rb_enc_isspace +#define rb_enc_isupper rb_enc_isupper +/** @endcond */ + +#endif /* RUBY_INTERNAL_ENCODING_CTYPE_H */ diff --git a/include/ruby/internal/encoding/encoding.h b/include/ruby/internal/encoding/encoding.h new file mode 100644 index 0000000000..a680651a81 --- /dev/null +++ b/include/ruby/internal/encoding/encoding.h @@ -0,0 +1,1044 @@ +#ifndef RUBY_INTERNAL_ENCODING_ENCODING_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_ENCODING_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Defines ::rb_encoding + */ + +#include "ruby/oniguruma.h" +#include "ruby/internal/attr/const.h" +#include "ruby/internal/attr/deprecated.h" +#include "ruby/internal/attr/noalias.h" +#include "ruby/internal/attr/pure.h" +#include "ruby/internal/attr/returns_nonnull.h" +#include "ruby/internal/dllexport.h" +#include "ruby/internal/encoding/coderange.h" +#include "ruby/internal/value.h" +#include "ruby/internal/core/rbasic.h" +#include "ruby/internal/fl_type.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * `Encoding` class. + * + * @ingroup object + */ +RUBY_EXTERN VALUE rb_cEncoding; + +/** + * @private + * + * Bit constants used when embedding encodings into ::RBasic::flags. Extension + * libraries must not bother such things. + */ +enum ruby_encoding_consts { + + /** Max possible number of embeddable encodings. */ + RUBY_ENCODING_INLINE_MAX = 127, + + /** Where inline encodings reside. */ + RUBY_ENCODING_SHIFT = (RUBY_FL_USHIFT+10), + + /** Bits we use to store inline encodings. */ + RUBY_ENCODING_MASK = (RUBY_ENCODING_INLINE_MAX<<RUBY_ENCODING_SHIFT + /* RUBY_FL_USER10..RUBY_FL_USER16 */), + + /** Max possible length of an encoding name. */ + RUBY_ENCODING_MAXNAMELEN = 42 +}; + +#define ENCODING_INLINE_MAX RUBY_ENCODING_INLINE_MAX /**< @old{RUBY_ENCODING_INLINE_MAX} */ +#define ENCODING_SHIFT RUBY_ENCODING_SHIFT /**< @old{RUBY_ENCODING_SHIFT} */ +#define ENCODING_MASK RUBY_ENCODING_MASK /**< @old{RUBY_ENCODING_MASK} */ + +/** + * Destructively assigns the passed encoding to the passed object. The object + * must be capable of having inline encoding. Using this macro needs deep + * understanding of bit level object binary layout. + * + * @param[out] obj Target object to modify. + * @param[in] ecindex Encoding in encindex format. + * @post `obj`'s encoding is `encindex`. + */ +static inline void +RB_ENCODING_SET_INLINED(VALUE obj, int encindex) +{ + VALUE f = /* upcast */ encindex; + + f <<= RUBY_ENCODING_SHIFT; + RB_FL_UNSET_RAW(obj, RUBY_ENCODING_MASK); + RB_FL_SET_RAW(obj, f); +} + +/** + * Queries the encoding of the passed object. The encoding must be smaller + * than ::RUBY_ENCODING_INLINE_MAX, which means you have some assumption on the + * return value. This means the API is for internal use only. + * + * @param[in] obj Target object. + * @return `obj`'s encoding index. + */ +static inline int +RB_ENCODING_GET_INLINED(VALUE obj) +{ + VALUE ret = RB_FL_TEST_RAW(obj, RUBY_ENCODING_MASK) >> RUBY_ENCODING_SHIFT; + + return RBIMPL_CAST((int)ret); +} + +#define ENCODING_SET_INLINED(obj,i) RB_ENCODING_SET_INLINED(obj,i) /**< @old{RB_ENCODING_SET_INLINED} */ +#define ENCODING_SET(obj,i) RB_ENCODING_SET(obj,i) /**< @old{RB_ENCODING_SET} */ +#define ENCODING_GET_INLINED(obj) RB_ENCODING_GET_INLINED(obj) /**< @old{RB_ENCODING_GET_INLINED} */ +#define ENCODING_GET(obj) RB_ENCODING_GET(obj) /**< @old{RB_ENCODING_GET} */ +#define ENCODING_IS_ASCII8BIT(obj) RB_ENCODING_IS_ASCII8BIT(obj) /**< @old{RB_ENCODING_IS_ASCII8BIT} */ +#define ENCODING_MAXNAMELEN RUBY_ENCODING_MAXNAMELEN /**< @old{RUBY_ENCODING_MAXNAMELEN} */ + +/** + * The type of encoding. Our design here is we take Oniguruma/Onigmo's + * multilingualisation schema as our base data structure. + */ +typedef const OnigEncodingType rb_encoding; + +RBIMPL_ATTR_NOALIAS() +/** + * Converts a character option to its encoding. It only supports a very + * limited set of Japanese encodings due to its Japanese origin. Ruby still + * has this in-core for backwards compatibility. But new codes must not bother + * such concept like one-character encoding option. Consider deprecated in + * practice. + * + * @param[in] c One of `['n', 'e', 's', 'u', 'i', 'x', 'm']`. + * @param[out] option Return buffer. + * @param[out] kcode Return buffer. + * @retval 1 `c` understood properly. + * @retval 0 `c` is not understood. + * @post `option` is a ::OnigOptionType. + * @post `kcode` is an enum `ruby_preserved_encindex`. + * + * @internal + * + * `kcode` is opaque because `ruby_preserved_encindex` is not visible from + * extension libraries. But who cares? + */ +int rb_char_to_option_kcode(int c, int *option, int *kcode); + +/** + * Creates a new "dummy" encoding. Roughly speaking, an encoding is dummy when + * it is stateful. Notable example of dummy encoding are those defined in + * ISO/IEC 2022 + * + * @param[in] name Name of the creating encoding. + * @exception rb_eArgError Duplicated or malformed `name`. + * @return New dummy encoding's index. + * @post Encoding named `name` is created, whose index is the return + * value. + */ +int rb_define_dummy_encoding(const char *name); + +RBIMPL_ATTR_PURE() +/** + * Queries if the passed encoding is dummy. + * + * @param[in] enc Encoding in question. + * @retval 1 It is. + * @retval 0 It isn't. + */ +int rb_enc_dummy_p(rb_encoding *enc); + +RBIMPL_ATTR_PURE() +/** + * Queries the index of the encoding. An encoding's index is a Ruby-local + * concept. It is a (sequential) number assigned to each encoding. + * + * @param[in] enc Encoding in question. + * @return Its index. + * @note You can pass null pointers to this function. It is equivalent + * to rb_usascii_encindex() then. + */ +int rb_enc_to_index(rb_encoding *enc); + +/** + * Queries the index of the encoding of the passed object, if any. + * + * @param[in] obj Object in question. + * @retval -1 `obj` is incapable of having an encoding. + * @retval otherwise `obj`'s encoding's index. + */ +int rb_enc_get_index(VALUE obj); + +/** + * @alias{rb_enc_get_index} + * + * @internal + * + * Implementation wise this is not a verbatim alias of rb_enc_get_index(). But + * the API is consistent. Don't bother. + */ +static inline int +RB_ENCODING_GET(VALUE obj) +{ + int encindex = RB_ENCODING_GET_INLINED(obj); + + if (encindex == RUBY_ENCODING_INLINE_MAX) { + return rb_enc_get_index(obj); + } + else { + return encindex; + } +} + +/** + * Destructively assigns an encoding (via its index) to an object. + * + * @param[out] obj Object in question. + * @param[in] encindex An encoding index. + * @exception rb_eFrozenError `obj` is frozen. + * @exception rb_eArgError `obj` is incapable of having an encoding. + * @exception rb_eEncodingError `encindex` is out of bounds. + * @exception rb_eLoadError Failed to load the encoding. + */ +void rb_enc_set_index(VALUE obj, int encindex); + +/** @alias{rb_enc_set_index} */ +static inline void +RB_ENCODING_SET(VALUE obj, int encindex) +{ + rb_enc_set_index(obj, encindex); +} + +/** + * This is #RB_ENCODING_SET + RB_ENC_CODERANGE_SET combo. The object must be + * capable of having inline encoding. Using this macro needs deep + * understanding of bit level object binary layout. + * + * @param[out] obj Target object. + * @param[in] encindex Encoding in encindex format. + * @param[in] cr An enum ::ruby_coderange_type. + * @post `obj`'s encoding is `encindex`. + * @post `obj`'s code range is `cr`. + */ +static inline void +RB_ENCODING_CODERANGE_SET(VALUE obj, int encindex, enum ruby_coderange_type cr) +{ + RB_ENCODING_SET(obj, encindex); + RB_ENC_CODERANGE_SET(obj, cr); +} + +RBIMPL_ATTR_PURE() +/** + * Queries if the passed object can have its encoding. + * + * @param[in] obj Object in question. + * @retval 1 It can. + * @retval 0 It cannot. + */ +int rb_enc_capable(VALUE obj); + +/** + * Queries the index of the encoding. + * + * @param[in] name Name of the encoding to find. + * @exception rb_eArgError No such encoding named `name`. + * @retval -1 `name` exists, but unable to load. + * @retval otherwise Index of encoding named `name`. + */ +int rb_enc_find_index(const char *name); + +/** + * Registers an "alias" name. In the wild, an encoding can be called using + * multiple names. For instance an encoding known as `"CP932"` is also called + * `"SJIS"` on occasions. This API registers such relationships. + * + * @param[in] alias New name. + * @param[in] orig Old name. + * @exception rb_eArgError `alias` is duplicated or malformed. + * @retval -1 Failed to load `orig`. + * @retval otherwise The index of `orig` and `alias`. + * @post `alias` is a synonym of `orig`. They refer to the identical + * encoding. + */ +int rb_enc_alias(const char *alias, const char *orig); + +/** + * Obtains a encoding index from a wider range of objects (than + * rb_enc_find_index()). + * + * @param[in] obj An ::rb_cEncoding, or its name in ::rb_cString. + * @retval -1 `obj` is unexpected type/contents. + * @retval otherwise Index corresponding to `obj`. + */ +int rb_to_encoding_index(VALUE obj); + +/** + * Identical to rb_find_encoding(), except it raises an exception instead of + * returning NULL. + * + * @param[in] obj An ::rb_cEncoding, or its name in ::rb_cString. + * @exception rb_eTypeError `obj` is neither ::rb_cEncoding nor ::rb_cString. + * @exception rb_eArgError `obj` is an unknown encoding name. + * @return Encoding of `obj`. + */ +rb_encoding *rb_to_encoding(VALUE obj); + +/** + * Identical to rb_to_encoding_index(), except the return type. + * + * @param[in] obj An ::rb_cEncoding, or its name in ::rb_cString. + * @exception rb_eTypeError `obj` is neither ::rb_cEncoding nor ::rb_cString. + * @retval NULL No such encoding. + * @return otherwise Encoding of `obj`. + */ +rb_encoding *rb_find_encoding(VALUE obj); + +/** + * Identical to rb_enc_get_index(), except the return type. + * + * @param[in] obj Object in question. + * @retval NULL Obj is incapable of having an encoding. + * @retval otherwise `obj`'s encoding. + */ +rb_encoding *rb_enc_get(VALUE obj); + +/** + * Look for the "common" encoding between the two. One character can or cannot + * be expressed depending on an encoding. This function finds the super-set of + * encodings that satisfy contents of both arguments. If that is impossible + * returns NULL. + * + * @param[in] str1 An object. + * @param[in] str2 Another object. + * @retval NULL No encoding can satisfy both at once. + * @retval otherwise Common encoding between the two. + * @note Arguments can be non-string, e.g. Regexp. + */ +rb_encoding *rb_enc_compatible(VALUE str1, VALUE str2); + +/** + * Identical to rb_enc_compatible(), except it raises an exception instead of + * returning NULL. + * + * @param[in] str1 An object. + * @param[in] str2 Another object. + * @exception rb_eEncCompatError No encoding can satisfy both. + * @return Common encoding between the two. + * @note Arguments can be non-string, e.g. Regexp. + */ +rb_encoding *rb_enc_check(VALUE str1,VALUE str2); + +/** + * Identical to rb_enc_set_index(), except it additionally does contents fix-up + * depending on the passed object. It for instance changes the byte length of + * terminating `U+0000` according to the passed encoding. + * + * @param[out] obj Object in question. + * @param[in] encindex An encoding index. + * @exception rb_eFrozenError `obj` is frozen. + * @exception rb_eArgError `obj` is incapable of having an encoding. + * @exception rb_eEncodingError `encindex` is out of bounds. + * @exception rb_eLoadError Failed to load the encoding. + * @return The passed `obj`. + * @post `obj`'s contents might be fixed according to `encindex`. + */ +VALUE rb_enc_associate_index(VALUE obj, int encindex); + +/** + * Identical to rb_enc_associate_index(), except it takes an encoding itself + * instead of its index. + * + * @param[out] obj Object in question. + * @param[in] enc An encoding. + * @exception rb_eFrozenError `obj` is frozen. + * @exception rb_eArgError `obj` is incapable of having an encoding. + * @return The passed `obj`. + * @post `obj`'s contents might be fixed according to `enc`. + */ +VALUE rb_enc_associate(VALUE obj, rb_encoding *enc); + +/** + * Destructively copies the encoding of the latter object to that of former + * one. It can also be seen as a routine identical to + * rb_enc_associate_index(), except it takes an object's encoding instead of an + * encoding's index. + * + * @param[out] dst Object to modify. + * @param[in] src Object to reference. + * @exception rb_eFrozenError `dst` is frozen. + * @exception rb_eArgError `dst` is incapable of having an encoding. + * @exception rb_eEncodingError `src` is incapable of having an encoding. + * @post `dst`'s encoding is that of `src`'s. + */ +void rb_enc_copy(VALUE dst, VALUE src); + + +/** + * Identical to rb_find_encoding(), except it takes an encoding index instead + * of a Ruby object. + * + * @param[in] idx An encoding index. + * @retval NULL No such encoding. + * @retval otherwise An encoding whose index is `idx`. + */ +rb_encoding *rb_enc_from_index(int idx); + +/** + * Identical to rb_find_encoding(), except it takes a C's string instead of + * Ruby's. + * + * @param[in] name Name of the encoding to query. + * @retval NULL No such encoding. + * @retval otherwise An encoding whose index is `idx`. + */ +rb_encoding *rb_enc_find(const char *name); + +/** + * Queries the (canonical) name of the passed encoding. + * + * @param[in] enc An encoding. + * @return Its name. + */ +static inline const char * +rb_enc_name(rb_encoding *enc) +{ + return enc->name; +} + +/** + * Queries the minimum number of bytes that the passed encoding needs to + * represent a character. For ASCII and compatible encodings this is typically + * 1. There are however encodings whose minimum is not 1; they are + * historically called wide characters. + * + * @param[in] enc An encoding. + * @return Its least possible number of bytes except 0. + */ +static inline int +rb_enc_mbminlen(rb_encoding *enc) +{ + return enc->min_enc_len; +} + +/** + * Queries the maximum number of bytes that the passed encoding needs to + * represent a character. Fixed-width encodings have the same value for this + * one and #rb_enc_mbminlen. However there are variable-width encodings. + * UTF-8, for instance, takes from 1 up to 6 bytes. + * + * @param[in] enc An encoding. + * @return Its maximum possible number of bytes of a character. + */ +static inline int +rb_enc_mbmaxlen(rb_encoding *enc) +{ + return enc->max_enc_len; +} + +/** + * Queries the number of bytes of the character at the passed pointer. + * + * @param[in] p Pointer to a character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] enc Encoding of the string. + * @return If the character at `p` does not end until `e`, number of bytes + * between `p` and `e`. Otherwise the number of bytes that the + * character at `p` is encoded. + * + * @internal + * + * Strictly speaking there are chances when `p` points to a middle byte of a + * wide character. This function returns "the number of bytes from `p` to + * nearest of either `e` or the next character boundary", if you go strict. + */ +int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc); + +/** + * Identical to rb_enc_mbclen() unless the character at `p` overruns `e`. That + * can happen for instance when you read from a socket and its partial read + * cuts a wide character in-between. In those situations this function + * "estimates" theoretical length of the character in question. Typically it + * tends to be possible to know how many bytes a character needs before + * actually reaching its end; for instance UTF-8 encodes a character's length + * in the first byte of it. This function returns that info. + * + * @note This implies that the string is not broken. + * + * @param[in] p Pointer to the character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] enc Encoding of the string. + * @return Number of bytes of character at `p`, measured or estimated. + */ +int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc); + +/** + * Queries the number of bytes of the character at the passed pointer. This + * function returns 3 different types of information: + * + * ```CXX + * auto n = rb_enc_precise_mbclen(p, q, r); + * + * if (ONIGENC_MBCLEN_CHARFOUND_P(n)) { + * // Character found. Normal return. + * auto found_length = ONIGENC_MBCLEN_CHARFOUND_LEN(n); + * } + * else if (ONIGENC_MBCLEN_NEEDMORE_P(n)) { + * // Character overruns past `q`; needs more. + * auto requested_length = ONIGENC_MBCLEN_NEEDMORE_LEN(n); + * } + * else { + * // `p` is broken. + * assert(ONIGENC_MBCLEN_INVALID_P(n)); + * } + * ``` + * + * @param[in] p Pointer to the character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] enc Encoding of the string. + * @return Encoded read/needed number of bytes (see above). + */ +int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc); + +#define MBCLEN_CHARFOUND_P(ret) ONIGENC_MBCLEN_CHARFOUND_P(ret) /**< @old{ONIGENC_MBCLEN_CHARFOUND_P} */ +#define MBCLEN_CHARFOUND_LEN(ret) ONIGENC_MBCLEN_CHARFOUND_LEN(ret) /**< @old{ONIGENC_MBCLEN_CHARFOUND_LEN} */ +#define MBCLEN_INVALID_P(ret) ONIGENC_MBCLEN_INVALID_P(ret) /**< @old{ONIGENC_MBCLEN_INVALID_P} */ +#define MBCLEN_NEEDMORE_P(ret) ONIGENC_MBCLEN_NEEDMORE_P(ret) /**< @old{ONIGENC_MBCLEN_NEEDMORE_P} */ +#define MBCLEN_NEEDMORE_LEN(ret) ONIGENC_MBCLEN_NEEDMORE_LEN(ret) /**< @old{ONIGENC_MBCLEN_NEEDMORE_LEN} */ + +/** + * Queries the code point of character pointed by the passed pointer. If that + * code point is included in ASCII that code point is returned. Otherwise -1. + * This can be different from just looking at the first byte. For instance it + * reads 2 bytes in case of UTF-16BE. + * + * @param[in] p Pointer to the character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] len Return buffer. + * @param[in] enc Encoding of the string. + * @retval -1 The character at `p` is not i ASCII. + * @retval otherwise A code point of the character at `p`. + * @post `len` (if set) is the number of bytes of `p`. + */ +int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc); + +/** + * Queries the code point of character pointed by the passed pointer. + * Exceptions happen in case of broken input. + * + * @param[in] p Pointer to the character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] len Return buffer. + * @param[in] enc Encoding of the string. + * @exception rb_eArgError `p` is broken. + * @return Code point of the character pointed by `p`. + * @post `len` (if set) is the number of bytes of `p`. + */ +unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc); + +/** + * Queries the code point of character pointed by the passed pointer. + * Exceptions happen in case of broken input. + * + * @deprecated Use rb_enc_codepoint_len() instead. + * @param[in] p Pointer to the character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] enc Encoding of the string. + * @exception rb_eArgError `p` is broken. + * @return Code point of the character pointed by `p`. + * + * @internal + * + * @matz says in commit 91e5ba1cb865a2385d3e1cbfacd824496898e098 that the line + * below is a "prototype for obsolete function". However even today there + * still are some use cases of it throughout our repository. It seems it has + * its own niche. + */ +static inline unsigned int +rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc) +{ + return rb_enc_codepoint_len(p, e, 0, enc); + /* ^^^ + * This can be `NULL` in C, `nullptr` in C++, and `0` for both. + * We choose the most portable one here. + */ +} + + +/** + * Identical to rb_enc_codepoint(), except it assumes the passed character is + * not broken. + * + * @param[in] p Pointer to the character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] enc Encoding of the string. + * @return Code point of the character pointed by `p`. + */ +static inline OnigCodePoint +rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc) +{ + const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p); + const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e); + + return ONIGENC_MBC_TO_CODE(enc, up, ue); +} + +/** + * Queries the number of bytes requested to represent the passed code point + * using the passed encoding. + * + * @param[in] code Code point in question. + * @param[in] enc Encoding to convert the code into a byte sequence. + * @exception rb_eArgError `enc` does not glean `code`. + * @return Number of bytes requested to represent `code` using `enc`. + */ +int rb_enc_codelen(int code, rb_encoding *enc); + +/** + * Identical to rb_enc_codelen(), except it returns 0 for invalid code points. + * + * @param[in] c Code point in question. + * @param[in] enc Encoding to convert `c` into a byte sequence. + * @retval 0 `c` is invalid. + * @return otherwise Number of bytes needed for `enc` to encode `c`. + */ +static inline int +rb_enc_code_to_mbclen(int c, rb_encoding *enc) +{ + OnigCodePoint uc = RBIMPL_CAST((OnigCodePoint)c); + + return ONIGENC_CODE_TO_MBCLEN(enc, uc); +} + +/** + * Identical to rb_enc_uint_chr(), except it writes back to the passed buffer + * instead of allocating one. + * + * @param[in] c Code point. + * @param[out] buf Return buffer. + * @param[in] enc Target encoding scheme. + * @retval <= 0 `c` is invalid in `enc`. + * @return otherwise Number of bytes written to `buf`. + * @post `c` is encoded according to `enc`, then written to `buf`. + * + * @internal + * + * The second argument must be typed. But its current usages prevent us from + * being any stricter than this. :FIXME: + */ +static inline int +rb_enc_mbcput(unsigned int c, void *buf, rb_encoding *enc) +{ + OnigCodePoint uc = RBIMPL_CAST((OnigCodePoint)c); + OnigUChar *ubuf = RBIMPL_CAST((OnigUChar *)buf); + + return ONIGENC_CODE_TO_MBC(enc, uc, ubuf); +} + +/** + * Queries the previous (left) character. + * + * @param[in] s Start of the string. + * @param[in] p Pointer to a character. + * @param[in] e End of the string. + * @param[in] enc Encoding. + * @retval NULL No previous character. + * @retval otherwise Pointer to the head of the previous character. + */ +static inline char * +rb_enc_prev_char(const char *s, const char *p, const char *e, rb_encoding *enc) +{ + const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s); + const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p); + const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e); + OnigUChar *ur = onigenc_get_prev_char_head(enc, us, up, ue); + + return RBIMPL_CAST((char *)ur); +} + +/** + * Queries the left boundary of a character. This function takes a pointer + * that is not necessarily a head of a character, and searches for its head. + * + * @param[in] s Start of the string. + * @param[in] p Pointer to a possibly-middle of a character. + * @param[in] e End of the string. + * @param[in] enc Encoding. + * @return Pointer to the head of the character that contains `p`. + */ +static inline char * +rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc) +{ + const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s); + const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p); + const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e); + OnigUChar *ur = onigenc_get_left_adjust_char_head(enc, us, up, ue); + + return RBIMPL_CAST((char *)ur); +} + +/** + * Queries the right boundary of a character. This function takes a pointer + * that is not necessarily a head of a character, and searches for its tail. + * + * @param[in] s Start of the string. + * @param[in] p Pointer to a possibly-middle of a character. + * @param[in] e End of the string. + * @param[in] enc Encoding. + * @return Pointer to the end of the character that contains `p`. + */ +static inline char * +rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc) +{ + const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s); + const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p); + const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e); + OnigUChar *ur = onigenc_get_right_adjust_char_head(enc, us, up, ue); + + return RBIMPL_CAST((char *)ur); +} + +/** + * Scans the string backwards for n characters. + * + * @param[in] s Start of the string. + * @param[in] p Pointer to a character. + * @param[in] e End of the string. + * @param[in] n Steps. + * @param[in] enc Encoding. + * @retval NULL There are no `n` characters left. + * @retval otherwise Pointer to `n` character before `p`. + */ +static inline char * +rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc) +{ + const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s); + const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p); + const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e); + const OnigUChar *ur = onigenc_step_back(enc, us, up, ue, n); + + return RBIMPL_CAST((char *)ur); +} + +/** + * @private + * + * This is an implementation detail of rb_enc_asciicompat(). People don't use + * it directly. Just always use rb_enc_asciicompat(). + * + * @param[in] enc Encoding in question. + * @retval 1 It is ASCII compatible. + * @retval 0 It isn't. + */ +static inline int +rb_enc_asciicompat_inline(rb_encoding *enc) +{ + return rb_enc_mbminlen(enc)==1 && !rb_enc_dummy_p(enc); +} + +/** + * Queries if the passed encoding is _in some sense_ compatible with ASCII. + * The concept of ASCII compatibility is nuanced, and private to our + * implementation. For instance SJIS is ASCII compatible to us, despite their + * having different characters at code point `0x5C`. This is based on some + * practical consideration that Japanese people confuses SJIS to be "upper + * compatible" with ASCII (which is in fact a wrong idea, but we just don't go + * strict here). An example of ASCII incompatible encoding is UTF-16. UTF-16 + * shares code points with ASCII, but employs a completely different encoding + * scheme. + * + * @param[in] enc Encoding in question. + * @retval 0 It is incompatible. + * @retval 1 It is compatible. + */ +static inline bool +rb_enc_asciicompat(rb_encoding *enc) +{ + if (rb_enc_mbminlen(enc) != 1) { + return false; + } + else if (rb_enc_dummy_p(enc)) { + return false; + } + else { + return true; + } +} + +/** + * Queries if the passed string is in an ASCII-compatible encoding. + * + * @param[in] str A Ruby's string to query. + * @retval 0 `str` is not a String, or an ASCII-incompatible string. + * @retval 1 Otherwise. + */ +static inline bool +rb_enc_str_asciicompat_p(VALUE str) +{ + rb_encoding *enc = rb_enc_get(str); + + return rb_enc_asciicompat(enc); +} + +/** + * Queries the Ruby-level counterpart instance of ::rb_cEncoding that + * corresponds to the passed encoding. + * + * @param[in] enc An encoding + * @retval RUBY_Qnil `enc` is a null pointer. + * @retval otherwise An instance of ::rb_cEncoding. + */ +VALUE rb_enc_from_encoding(rb_encoding *enc); + +RBIMPL_ATTR_PURE() +/** + * Queries if the passed encoding is either one of UTF-8/16/32. + * + * @note It does not take UTF-7, which we actually support, into account. + * + * @param[in] enc Encoding in question. + * @retval 0 It is not a Unicode variant. + * @retval otherwise It is. + * + * @internal + * + * In reality it returns 1/0, but the value is abstracted as + * `ONIGENC_FLAG_UNICODE`. + */ +int rb_enc_unicode_p(rb_encoding *enc); + +RBIMPL_ATTR_RETURNS_NONNULL() +/** + * Queries the encoding that represents ASCII-8BIT a.k.a. binary. + * + * @return The encoding that represents ASCII-8BIT. + * + * @internal + * + * This can not return NULL once the process properly boots up. + */ +rb_encoding *rb_ascii8bit_encoding(void); + +RBIMPL_ATTR_RETURNS_NONNULL() +/** + * Queries the encoding that represents UTF-8. + * + * @return The encoding that represents UTF-8. + * + * @internal + * + * This can not return NULL once the process properly boots up. + */ +rb_encoding *rb_utf8_encoding(void); + +RBIMPL_ATTR_RETURNS_NONNULL() +/** + * Queries the encoding that represents US-ASCII. + * + * @return The encoding that represents US-ASCII. + * + * @internal + * + * This can not return NULL once the process properly boots up. + */ +rb_encoding *rb_usascii_encoding(void); + +/** + * Queries the encoding that represents the current locale. + * + * @return The encoding that represents the process' locale. + * + * @internal + * + * This is dynamic. If you change the process' locale by e.g. calling + * `setlocale(3)`, that should also change the return value of this function. + * + * There is no official way for Ruby scripts to manipulate locales, though. + */ +rb_encoding *rb_locale_encoding(void); + +/** + * Queries the "filesystem" encoding. This is the encoding that ruby expects + * info from the OS' file system are in. This affects for instance return + * value of rb_dir_getwd(). Most notably on Windows it can be an alias of OS + * codepage. Most notably on Linux users can set this via default external + * encoding. + * + * @return The "filesystem" encoding. + */ +rb_encoding *rb_filesystem_encoding(void); + +/** + * Queries the "default external" encoding. This is used to interact with + * outer-process things such as File. Though not recommended, you can set this + * using rb_enc_set_default_external(). + * + * @return The "default external" encoding. + */ +rb_encoding *rb_default_external_encoding(void); + +/** + * Queries the "default internal" encoding. This could be a null pointer. + * Otherwise, outer-process info are transcoded from default external encoding + * to this one during reading from an IO. + * + * @return The "default internal" encoding (if any). + */ +rb_encoding *rb_default_internal_encoding(void); + +#ifndef rb_ascii8bit_encindex +RBIMPL_ATTR_CONST() +/** + * Identical to rb_ascii8bit_encoding(), except it returns the encoding's index + * instead of the encoding itself. + * + * @return The index of encoding of ASCII-8BIT. + * + * @internal + * + * This happens to be 0. + */ +int rb_ascii8bit_encindex(void); +#endif + +/** + * Queries if the passed object is in ascii 8bit (== binary) encoding. The + * object must be capable of having inline encoding. Using this macro needs + * deep understanding of bit level object binary layout. + * + * @param[in] obj An object to check. + * @retval 1 It is. + * @retval 0 It isn't. + */ +static inline bool +RB_ENCODING_IS_ASCII8BIT(VALUE obj) +{ + return RB_ENCODING_GET_INLINED(obj) == rb_ascii8bit_encindex(); +} + +#ifndef rb_utf8_encindex +RBIMPL_ATTR_CONST() +/** + * Identical to rb_utf8_encoding(), except it returns the encoding's index + * instead of the encoding itself. + * + * @return The index of encoding of UTF-8. + */ +int rb_utf8_encindex(void); +#endif + +#ifndef rb_usascii_encindex +RBIMPL_ATTR_CONST() +/** + * Identical to rb_usascii_encoding(), except it returns the encoding's index + * instead of the encoding itself. + * + * @return The index of encoding of UTF-8. + */ +int rb_usascii_encindex(void); +#endif + +/** + * Identical to rb_locale_encoding(), except it returns the encoding's index + * instead of the encoding itself. + * + * @return The index of the locale encoding. + */ +int rb_locale_encindex(void); + +/** + * Identical to rb_filesystem_encoding(), except it returns the encoding's + * index instead of the encoding itself. + * + * @return The index of the filesystem encoding. + */ +int rb_filesystem_encindex(void); + +/** + * Identical to rb_default_external_encoding(), except it returns the + * Ruby-level counterpart instance of ::rb_cEncoding that corresponds to the + * default external encoding. + * + * @return An instance of ::rb_cEncoding of default external. + */ +VALUE rb_enc_default_external(void); + +/** + * Identical to rb_default_internal_encoding(), except it returns the + * Ruby-level counterpart instance of ::rb_cEncoding that corresponds to the + * default internal encoding. + * + * @return An instance of ::rb_cEncoding of default internal. + */ +VALUE rb_enc_default_internal(void); + +/** + * Destructively assigns the passed encoding as the default external encoding. + * You should not use this API. It has process-global side effects. Also it + * doesn't change encodings of strings that have already been read. + * + * @param[in] encoding Ruby level encoding. + * @exception rb_eArgError `encoding` is ::RUBY_Qnil. + * @post The default external encoding is `encoding`. + */ +void rb_enc_set_default_external(VALUE encoding); + +/** + * Destructively assigns the passed encoding as the default internal encoding. + * You should not use this API. It has process-global side effects. Also it + * doesn't change encodings of strings that have already been read. + * + * @param[in] encoding Ruby level encoding. + * @post The default internal encoding is `encoding`. + * @note Unlike rb_enc_set_default_external() you can pass ::RUBY_Qnil. + */ +void rb_enc_set_default_internal(VALUE encoding); + +/** + * Returns a platform-depended "charmap" of the current locale. This + * information is called a "Codeset name" in IEEE 1003.1 section 13 + * (`<langinfo.h>`). This is a very low-level API. The return value can have + * no corresponding encoding when passed to rb_find_encoding(). + * + * @param[in] klass Ignored for no reason (why...) + * @return The low-level locale charmap, in Ruby's String. + */ +VALUE rb_locale_charmap(VALUE klass); + +RBIMPL_SYMBOL_EXPORT_END() + +/** @cond INTERNAL_MACRO */ +#define RB_ENCODING_GET RB_ENCODING_GET +#define RB_ENCODING_GET_INLINED RB_ENCODING_GET_INLINED +#define RB_ENCODING_IS_ASCII8BIT RB_ENCODING_IS_ASCII8BIT +#define RB_ENCODING_SET RB_ENCODING_SET +#define RB_ENCODING_SET_INLINED RB_ENCODING_SET_INLINED +#define rb_enc_asciicompat rb_enc_asciicompat +#define rb_enc_code_to_mbclen rb_enc_code_to_mbclen +#define rb_enc_codepoint rb_enc_codepoint +#define rb_enc_left_char_head rb_enc_left_char_head +#define rb_enc_mbc_to_codepoint rb_enc_mbc_to_codepoint +#define rb_enc_mbcput rb_enc_mbcput +#define rb_enc_mbmaxlen rb_enc_mbmaxlen +#define rb_enc_mbminlen rb_enc_mbminlen +#define rb_enc_name rb_enc_name +#define rb_enc_prev_char rb_enc_prev_char +#define rb_enc_right_char_head rb_enc_right_char_head +#define rb_enc_step_back rb_enc_step_back +#define rb_enc_str_asciicompat_p rb_enc_str_asciicompat_p +/** @endcond */ + +#endif /* RUBY_INTERNAL_ENCODING_ENCODING_H */ diff --git a/include/ruby/internal/encoding/pathname.h b/include/ruby/internal/encoding/pathname.h new file mode 100644 index 0000000000..0b5e85a524 --- /dev/null +++ b/include/ruby/internal/encoding/pathname.h @@ -0,0 +1,184 @@ +#ifndef RUBY_INTERNAL_ENCODING_PATHNAME_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_PATHNAME_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines to manipulate encodings of pathnames. + */ + +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/dllexport.h" +#include "ruby/internal/encoding/encoding.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() +RBIMPL_ATTR_NONNULL(()) +/** + * Returns a path component directly adjacent to the passed pointer. + * + * ``` + * "/multi/byte/encoded/pathname.txt" + * ^ ^ ^ + * | | +--- end + * | +--- @return + * +--- path + * ``` + * + * @param[in] path Where to start scanning. + * @param[in] end End of the path string. + * @param[in] enc Encoding of the string. + * @return A pointer in the passed string where the next path component + * resides, or `end` if there is no next path component. + */ +char *rb_enc_path_next(const char *path, const char *end, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL(()) +/** + * Seeks for non-prefix part of a pathname. This can be a no-op when the OS + * has no such concept like a path prefix. But there are OSes where path + * prefixes do exist. + * + * ``` + * "C:\multi\byte\encoded\pathname.txt" + * ^ ^ ^ + * | | +--- end + * | +--- @return + * +--- path + * ``` + * + * @param[in] path Where to start scanning. + * @param[in] end End of the path string. + * @param[in] enc Encoding of the string. + * @return A pointer in the passed string where non-prefix part starts, or + * `path` if the OS does not have path prefix. + */ +char *rb_enc_path_skip_prefix(const char *path, const char *end, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL(()) +/** + * Returns the last path component. + * + * ``` + * "/multi/byte/encoded/pathname.txt" + * ^ ^ ^ + * | | +--- end + * | +--- @return + * +--- path + * ``` + * + * @param[in] path Where to start scanning. + * @param[in] end End of the path string. + * @param[in] enc Encoding of the string. + * @return A pointer in the passed string where the last path component + * resides, or `end` if there is no more path component. + */ +char *rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL(()) +/** + * This just returns the passed end basically. It makes difference in case the + * passed string ends with tons of path separators like the following: + * + * ``` + * "/path/that/ends/with/lots/of/slashes//////////////" + * ^ ^ ^ + * | | +--- end + * | +--- @return + * +--- path + * ``` + * + * @param[in] path Where to start scanning. + * @param[in] end End of the path string. + * @param[in] enc Encoding of the string. + * @return A pointer in the passed string where the trailing path + * separators start, or `end` if there is no trailing path + * separators. + * + * @internal + * + * It seems this function was introduced to mimic what POSIX says about + * `basename(3)`. + */ +char *rb_enc_path_end(const char *path, const char *end, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL((1, 4)) +/** + * Our own encoding-aware version of `basename(3)`. Normally, this function + * returns the last path component of the given name. However in case the + * passed name ends with a path separator, it returns the name of the + * directory, not the last (empty) component. Also if the passed name is a + * root directory, it returns that root directory. Note however that Windows + * filesystem have drive letters, which this function does not return. + * + * @param[in] name Target path. + * @param[out] baselen Return buffer. + * @param[in,out] alllen Number of bytes of `name`. + * @param[enc] enc Encoding of `name`. + * @return The rightmost component of `name`. + * @post `baselen`, if passed, is updated to be the number of bytes + * of the returned basename. + * @post `alllen`, if passed, is updated to be the number of bytes of + * strings not considered as the basename. + */ +const char *ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL((1, 3)) +/** + * Our own encoding-aware version of `extname`. This function first applies + * rb_enc_path_last_separator() to the passed name and only concerns its return + * value (ignores any parent directories). This function returns complicated + * results: + * + * ```CXX + * auto path = "..."; + * auto len = strlen(path); + * auto ret = ruby_enc_find_extname(path, &len, rb_ascii8bit_encoding()); + * + * switch(len) { + * case 0: + * if (ret == 0) { + * // `path` is a file without extensions. + * } + * else { + * // `path` is a dotfile. + * // `ret` is the file's name. + * } + * break; + * + * case 1: + * // `path` _ends_ with a dot. + * // `ret` is that dot. + * break; + * + * default: + * // `path` has an extension. + * // `ret` is that extension. + * } + * ``` + * + * @param[in] name Target path. + * @param[in,out] len Number of bytes of `name`. + * @param[in] enc Encoding of `name`. + * @return See above. + * @post `len`, if passed, is updated (see above). + */ +const char *ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc); + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_PATHNAME_H */ diff --git a/include/ruby/internal/encoding/re.h b/include/ruby/internal/encoding/re.h new file mode 100644 index 0000000000..d0de23bc83 --- /dev/null +++ b/include/ruby/internal/encoding/re.h @@ -0,0 +1,46 @@ +#ifndef RUBY_INTERNAL_ENCODING_RE_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_RE_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines to manipulate encodings of symbols. + */ + +#include "ruby/internal/dllexport.h" +#include "ruby/internal/encoding/encoding.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * Identical to rb_reg_new(), except it additionally takes an encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @param[in] enc Encoding of `ptr`. + * @param[in] opts Options e.g. ONIG_OPTION_MULTILINE. + * @exception rb_eRegexpError Failed to compile `ptr`. + * @return An allocated new instance of ::rb_cRegexp, of `enc` encoding, + * whose expression is compiled according to `ptr`. + */ +VALUE rb_enc_reg_new(const char *ptr, long len, rb_encoding *enc, int opts); + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_RE_H */ diff --git a/include/ruby/internal/encoding/sprintf.h b/include/ruby/internal/encoding/sprintf.h new file mode 100644 index 0000000000..cb8737b414 --- /dev/null +++ b/include/ruby/internal/encoding/sprintf.h @@ -0,0 +1,78 @@ +#ifndef RUBY_INTERNAL_ENCODING_SPRINTF_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_SPRINTF_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines to manipulate encodings of symbols. + */ +#include "ruby/internal/config.h" +#include <stdarg.h> +#include "ruby/internal/attr/format.h" +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/attr/noreturn.h" +#include "ruby/internal/dllexport.h" +#include "ruby/internal/encoding/encoding.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() +RBIMPL_ATTR_NONNULL((2)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) +/** + * Identical to rb_sprintf(), except it additionally takes an encoding. The + * passed encoding rules both the incoming format specifier and the resulting + * string. + * + * @param[in] enc Encoding of `fmt`. + * @param[in] fmt A `printf`-like format specifier. + * @param[in] ... Variadic number of contents to format. + * @return A rendered new instance of ::rb_cString, of `enc` encoding. + */ +VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt, ...); + +RBIMPL_ATTR_NONNULL((2)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 0) +/** + * Identical to rb_enc_sprintf(), except it takes a `va_list` instead of + * variadic arguments. It can also be seen as a routine identical to + * rb_vsprintf(), except it additionally takes an encoding. + * + * @param[in] enc Encoding of `fmt`. + * @param[in] fmt A `printf`-like format specifier. + * @param[in] ap Contents to format. + * @return A rendered new instance of ::rb_cString, of `enc` encoding. + */ +VALUE rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL((3)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 3, 4) +/** + * Identical to rb_raise(), except it additionally takes an encoding. + * + * @param[in] enc Encoding of the generating exception. + * @param[in] exc A subclass of ::rb_eException. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + * @param[in] ... Contents of the message. + * @exception exc The specified exception. + * @note It never returns. + */ +void rb_enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...); + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_SPRINTF_H */ diff --git a/include/ruby/internal/encoding/string.h b/include/ruby/internal/encoding/string.h new file mode 100644 index 0000000000..2b9dfe4f31 --- /dev/null +++ b/include/ruby/internal/encoding/string.h @@ -0,0 +1,346 @@ +#ifndef RUBY_INTERNAL_ENCODING_STRING_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_STRING_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines to manipulate encodings of strings. + */ + +#include "ruby/internal/dllexport.h" +#include "ruby/internal/value.h" +#include "ruby/internal/encoding/encoding.h" +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/intern/string.h" /* rbimpl_strlen */ + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * Identical to rb_str_new(), except it additionally takes an encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString, of `len` bytes length, of `enc` + * encoding, whose contents are verbatim copy of `ptr`. + * @pre At least `len` bytes of continuous memory region shall be + * accessible via `ptr`. + * @note `enc` can be a null pointer. It can also be seen as a routine + * identical to rb_usascii_str_new() then. + */ +VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL((1)) +/** + * Identical to rb_enc_str_new(), except it assumes the passed pointer is a + * pointer to a C string. It can also be seen as a routine identical to + * rb_str_new_cstr(), except it additionally takes an encoding. + * + * @param[in] ptr A C string. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString, of `enc` encoding, whose contents + * are verbatim copy of `ptr`. + * @pre `ptr` must not be a null pointer. + * @pre Because `ptr` is a C string it makes no sense for `enc` to be + * something like UTF-32. + * @note `enc` can be a null pointer. It can also be seen as a routine + * identical to rb_usascii_str_new_cstr() then. + */ +VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc); + +/** + * Identical to rb_enc_str_new(), except it takes a C string literal. It can + * also be seen as a routine identical to rb_str_new_static(), except it + * additionally takes an encoding. + * + * @param[in] ptr A C string literal. + * @param[in] len `strlen(ptr)`. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eArgError `len` out of range of `size_t`. + * @pre `ptr` must be a C string constant. + * @return An instance of ::rb_cString, of `enc` encoding, whose backend + * storage is the passed C string literal. + * @warning It is a very bad idea to write to a C string literal (often + * immediate SEGV shall occur). Consider return values of this + * function be read-only. + * @note `enc` can be a null pointer. It can also be seen as a routine + * identical to rb_usascii_str_new_static() then. + */ +VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc); + +/** + * Identical to rb_enc_str_new(), except it returns a "f"string. It can also + * be seen as a routine identical to rb_interned_str(), except it additionally + * takes an encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eArgError `len` is negative. + * @return A found or created instance of ::rb_cString, of `len` bytes + * length, of `enc` encoding, whose contents are identical to that + * of `ptr`. + * @pre At least `len` bytes of continuous memory region shall be + * accessible via `ptr`. + * @note `enc` can be a null pointer. + */ +VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL((1)) +/** + * Identical to rb_enc_str_new_cstr(), except it returns a "f"string. It can + * also be seen as a routine identical to rb_interned_str_cstr(), except it + * additionally takes an encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] enc Encoding of `ptr`. + * @return A found or created instance of ::rb_cString of `enc` encoding, + * whose contents are identical to that of `ptr`. + * @pre At least `len` bytes of continuous memory region shall be + * accessible via `ptr`. + * @note `enc` can be a null pointer. + */ +VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc); + +/** + * Counts the number of characters of the passed string, according to the + * passed encoding. This has to be complicated. The passed string could be + * invalid and/or broken. This routine would scan from the beginning til the + * end, byte by byte, to seek out character boundaries. Could be super slow. + * + * @param[in] head Leftmost pointer to the string. + * @param[in] tail Rightmost pointer to the string. + * @param[in] enc Encoding of the string. + * @return Number of characters exist in `head` .. `tail`. The definition + * of "character" depends on the passed `enc`. + */ +long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc); + +/** + * Queries the n-th character. Like rb_enc_strlen() this function can be fast + * or slow depending on the contents. Don't expect characters to be uniformly + * distributed across the entire string. + * + * @param[in] head Leftmost pointer to the string. + * @param[in] tail Rightmost pointer to the string. + * @param[in] nth Requested index of characters. + * @param[in] enc Encoding of the string. + * @return Pointer to the first byte of the character that is `nth` + * character ahead of `head`, or `tail` if there is no such + * character (OOB etc). The definition of "character" depends on + * the passed `enc`. + */ +char *rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc); + +/** + * Identical to rb_enc_get_index(), except the return type. + * + * @param[in] obj Object in question. + * @exception rb_eTypeError `obj` is incapable of having an encoding. + * @return `obj`'s encoding. + */ +VALUE rb_obj_encoding(VALUE obj); + +/** + * Identical to rb_str_cat(), except it additionally takes an encoding. + * + * @param[out] str Destination object. + * @param[in] ptr Contents to append. + * @param[in] len Length of `src`, in bytes. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eArgError `len` is negative. + * @exception rb_eEncCompatError `enc` is not compatible with `str`. + * @return The passed `dst`. + * @post The contents of `ptr` is copied, transcoded into `dst`'s + * encoding, then pasted into `dst`'s end. + */ +VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc); + +/** + * Encodes the passed code point into a series of bytes. + * + * @param[in] code Code point. + * @param[in] enc Target encoding scheme. + * @exception rb_eRangeError `enc` does not glean `code`. + * @return An instance of ::rb_cString, of `enc` encoding, whose sole + * contents is `code` represented in `enc`. + * @note No way to encode code points bigger than UINT_MAX. + * + * @internal + * + * In other languages, APIs like this one could be seen as the primitive + * routines where encodings' "encode" feature are implemented. However in case + * of Ruby this is not the primitive one. We directly manipulate encoded + * strings. Encoding conversion routines transcode an encoded string directly + * to another one; not via a code point array. + */ +VALUE rb_enc_uint_chr(unsigned int code, rb_encoding *enc); + +/** + * Identical to rb_external_str_new(), except it additionally takes an + * encoding. However the whole point of rb_external_str_new() is to encode a + * string into default external encoding. Being able to specify arbitrary + * encoding just ruins the designed purpose the function meseems. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @param[in] enc Target encoding scheme. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString. In case encoding conversion from + * "default internal" to `enc` is fully defined over the given + * contents, then the return value is a string of `enc` encoding, + * whose contents are the converted ones. Otherwise the string is + * a junk. + * @warning It doesn't raise on a conversion failure and silently ends up in + * a corrupted output. You can know the failure by querying + * `valid_encoding?` of the result object. + * + * @internal + * + * @shyouhei has no idea why this one does not follow the naming convention + * that others obey. It seems to him that this should have been called + * `rb_enc_external_str_new`. + */ +VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc); + +/** + * Identical to rb_str_export(), except it additionally takes an encoding. + * + * @param[in] obj Target object. + * @param[in] enc Target encoding. + * @exception rb_eTypeError No implicit conversion to String. + * @return Converted ruby string of `enc` encoding. + */ +VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc); + +/** + * Encoding conversion main routine. + * + * @param[in] str String to convert. + * @param[in] from Source encoding. + * @param[in] to Destination encoding. + * @return A copy of `str`, with conversion from `from` to `to` applied. + * @note `from` can be a null pointer. `str`'s encoding is taken then. + * @note `to` can be a null pointer. No-op then. + */ +VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to); + +/** + * Identical to rb_str_conv_enc(), except it additionally takes IO encoder + * options. The extra arguments can be constructed using io_extract_modeenc() + * etc. + * + * @param[in] str String to convert. + * @param[in] from Source encoding. + * @param[in] to Destination encoding. + * @param[in] ecflags A set of enum ::ruby_econv_flag_type. + * @param[in] ecopts Optional hash. + * @return A copy of `str`, with conversion from `from` to `to` applied. + * @note `from` can be a null pointer. `str`'s encoding is taken then. + * @note `to` can be a null pointer. No-op then. + * @note `ecopts` can be ::RUBY_Qnil, which is equivalent to passing an + * empty hash. + */ +VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts); + +/** + * Scans the passed string to collect its code range. Because a Ruby's string + * is mutable, its contents change from time to time; so does its code range. + * A long-lived string tends to fall back to ::RUBY_ENC_CODERANGE_UNKNOWN. + * This API scans it and re-assigns a fine-grained code range constant. + * + * @param[out] str A string. + * @return An enum ::ruby_coderange_type. + */ +int rb_enc_str_coderange(VALUE str); + +/** + * Scans the passed string until it finds something odd. Returns the number of + * bytes scanned. As the name implies this is suitable for repeated call. One + * of its application is `IO#readlines`. The method reads from its receiver's + * read buffer, maybe more than once, looking for newlines. But "newline" can + * be different among encodings. This API is used to detect broken contents to + * properly mark them as such. + * + * @param[in] str String to scan. + * @param[in] end End of `str`. + * @param[in] enc `str`'s encoding. + * @param[out] cr Return buffer. + * @return Distance between `str` and first such byte where broken. + * @post `cr` has the code range type. + */ +long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr); + +/** + * Queries if the passed string is "ASCII only". An ASCII only string is a + * string who doesn't have any non-ASCII characters at all. This doesn't + * necessarily mean the string is in ASCII encoding. For instance a String of + * CP932 encoding can quite much be ASCII only, depending on its contents. + * + * @param[in] str String in question. + * @retval 1 It doesn't have non-ASCII characters. + * @retval 0 It has characters that are out of ASCII. + */ +int rb_enc_str_asciionly_p(VALUE str); + +RBIMPL_ATTR_NONNULL(()) +/** + * Looks for the passed string in the passed buffer. + * + * @param[in] x Buffer that potentially includes `y`. + * @param[in] m Number of bytes of `x`. + * @param[in] y Query string. + * @param[in] n Number of bytes of `y`. + * @param[in] enc Encoding of both `x` and `y`. + * @retval -1 Not found. + * @retval otherwise Found index in `x`. + * @note This API can match at a non-character-boundary. + */ +long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc); + +/** @cond INTERNAL_MACRO */ +RBIMPL_ATTR_NONNULL(()) +static inline VALUE +rbimpl_enc_str_new_cstr(const char *str, rb_encoding *enc) +{ + long len = rbimpl_strlen(str); + + return rb_enc_str_new_static(str, len, enc); +} + +#define rb_enc_str_new(str, len, enc) \ + ((RBIMPL_CONSTANT_P(str) && \ + RBIMPL_CONSTANT_P(len) ? \ + rb_enc_str_new_static: \ + rb_enc_str_new) ((str), (len), (enc))) + +#define rb_enc_str_new_cstr(str, enc) \ + ((RBIMPL_CONSTANT_P(str) ? \ + rbimpl_enc_str_new_cstr : \ + rb_enc_str_new_cstr) ((str), (enc))) + +/** @endcond */ + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_STRING_H */ diff --git a/include/ruby/internal/encoding/symbol.h b/include/ruby/internal/encoding/symbol.h new file mode 100644 index 0000000000..9cd1b0dbf4 --- /dev/null +++ b/include/ruby/internal/encoding/symbol.h @@ -0,0 +1,100 @@ +#ifndef RUBY_INTERNAL_ENCODING_SYMBOL_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_SYMBOL_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines to manipulate encodings of symbols. + */ + +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/dllexport.h" +#include "ruby/internal/encoding/encoding.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * Identical to rb_intern2(), except it additionally takes an encoding. + * + * @param[in] name The name of the id. + * @param[in] len Length of `name`. + * @param[in] enc `name`'s encoding. + * @exception rb_eRuntimeError Too many symbols. + * @return A (possibly new) id whose value is the given name. + * @note These days Ruby internally has two kinds of symbols + * (static/dynamic). Symbols created using this function would + * become static ones; i.e. would never be garbage collected. It + * is up to you to avoid memory leaks. Think twice before using + * it. + */ +ID rb_intern3(const char *name, long len, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_symname_p(), except it additionally takes an encoding. + * + * @param[in] str A C string to check. + * @param[in] enc `str`'s encoding. + * @retval 1 It is a valid symbol name. + * @retval 0 It is invalid as a symbol name. + */ +int rb_enc_symname_p(const char *str, rb_encoding *enc); + +/** + * Identical to rb_enc_symname_p(), except it additionally takes the passed + * string's length. This is needed for strings containing NUL bytes, like in + * case of UTF-32. + * + * @param[in] name A C string to check. + * @param[in] len Number of bytes of `str`. + * @param[in] enc `str`'s encoding. + * @retval 1 It is a valid symbol name. + * @retval 0 It is invalid as a symbol name. + */ +int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc); + +/** + * Identical to rb_check_id(), except it takes a pointer to a memory region + * instead of Ruby's string. + * + * @param[in] ptr A pointer to a memory region. + * @param[in] len Number of bytes of `ptr`. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eEncodingError `ptr` contains non-ASCII according to `enc`. + * @retval 0 No such id ever existed in the history. + * @retval otherwise The id that represents the given name. + */ +ID rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc); + +/** + * Identical to rb_check_id_cstr(), except for the return type. It can also be + * seen as a routine identical to rb_check_symbol(), except it takes a pointer + * to a memory region instead of Ruby's string. + * + * @param[in] ptr A pointer to a memory region. + * @param[in] len Number of bytes of `ptr`. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eEncodingError `ptr` contains non-ASCII according to `enc`. + * @retval RUBY_Qnil No such id ever existed in the history. + * @retval otherwise The id that represents the given name. + */ +VALUE rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc); + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_SYMBOL_H */ diff --git a/include/ruby/internal/encoding/transcode.h b/include/ruby/internal/encoding/transcode.h new file mode 100644 index 0000000000..7f26d2eae9 --- /dev/null +++ b/include/ruby/internal/encoding/transcode.h @@ -0,0 +1,562 @@ +#ifndef RUBY_INTERNAL_ENCODING_TRANSCODE_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_TRANSCODE_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief econv stuff + */ + +#include "ruby/internal/dllexport.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** return value of rb_econv_convert() */ +typedef enum { + + /** + * The conversion stopped when it found an invalid sequence. + */ + econv_invalid_byte_sequence, + + /** + * The conversion stopped when it found a character in the input which + * cannot be representable in the output. + */ + econv_undefined_conversion, + + /** + * The conversion stopped because there is no destination. + */ + econv_destination_buffer_full, + + /** + * The conversion stopped because there is no input. + */ + econv_source_buffer_empty, + + /** + * The conversion stopped after converting everything. This is arguably + * the expected normal end of conversion. + */ + econv_finished, + + /** + * The conversion stopped after writing something to somewhere, before + * reading everything. + */ + econv_after_output, + + /** + * The conversion stopped in middle of reading a character, possibly due to + * a partial read of a socket etc. + */ + econv_incomplete_input +} rb_econv_result_t; + +/** An opaque struct that represents a lowest level of encoding conversion. */ +typedef struct rb_econv_t rb_econv_t; + +/** + * Converts the contents of the passed string from its encoding to the passed + * one. + * + * @param[in] str Target string. + * @param[in] to Destination encoding. + * @param[in] ecflags A set of enum + * ::ruby_econv_flag_type. + * @param[in] ecopts A keyword hash, like + * ::rb_io_t::rb_io_enc_t::ecopts. + * @exception rb_eArgError Not fully converted. + * @exception rb_eInvalidByteSequenceError `str` is malformed. + * @exception rb_eUndefinedConversionError `str` has a character not + * representable using `to`. + * @exception rb_eConversionNotFoundError There is no known conversion from + * `str`'s encoding to `to`. + * @return A string whose encoding is `to`, and whose contents is converted + * contents of `str`. + * @note Use rb_econv_prepare_options() to generate `ecopts`. + */ +VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts); + +/** + * Queries if there is more than one way to convert between the passed two + * encodings. Encoding conversion are has_and_belongs_to_many relationships. + * There could be no direct conversion defined for the passed pair. Ruby tries + * to find an indirect way to do so then. For instance ISO-8859-1 has no + * direct conversion to ISO-2022-JP. But there is ISO-8859-1 to UTF-8 + * conversion; then there is UTF-8 to EUC-JP conversion; finally there also is + * EUC-JP to ISO-2022-JP conversion. So in short ISO-8859-1 can be converted + * to ISO-2022-JP using that path. This function returns true. Obviously not + * everything that can be represented using UTF-8 can also be represented using + * EUC-JP. Conversions in practice can fail depending on the actual input, and + * that renders exceptions in case of rb_str_encode(). + * + * @param[in] from_encoding One encoding. + * @param[in] to_encoding Another encoding. + * @retval 0 No way to convert the two. + * @retval 1 At least one way to convert the two. + * + * @internal + * + * Practically @shyouhei knows no way for this function to return 0. It seems + * everything can eventually be converted to/from UTF-8, which connects + * everything. + */ +int rb_econv_has_convpath_p(const char* from_encoding, const char* to_encoding); + +/** + * Identical to rb_econv_prepare_opts(), except it additionally takes the + * initial value of flags. The extra bits are bitwise-ORed to the return + * value. + * + * @param[in] opthash Keyword arguments. + * @param[out] ecopts Return buffer. + * @param[in] ecflags Default set of enum ::ruby_econv_flag_type. + * @exception rb_eArgError Unknown/Broken values passed. + * @return Calculated set of enum ::ruby_econv_flag_type. + * @post `ecopts` holds a hash object suitable for + * ::rb_io_t::rb_io_enc_t::ecopts. + */ +int rb_econv_prepare_options(VALUE opthash, VALUE *ecopts, int ecflags); + +/** + * Splits a keyword arguments hash (that for instance `String#encode` took) + * into a set of enum ::ruby_econv_flag_type and a hash storing replacement + * characters etc. + * + * @param[in] opthash Keyword arguments. + * @param[out] ecopts Return buffer. + * @exception rb_eArgError Unknown/Broken values passed. + * @return Calculated set of enum ::ruby_econv_flag_type. + * @post `ecopts` holds a hash object suitable for + * ::rb_io_t::rb_io_enc_t::ecopts. + */ +int rb_econv_prepare_opts(VALUE opthash, VALUE *ecopts); + +/** + * Creates a new instance of struct ::rb_econv_t. + * + * @param[in] source_encoding Name of an encoding. + * @param[in] destination_encoding Name of another encoding. + * @param[in] ecflags A set of enum ::ruby_econv_flag_type. + * @exception rb_eArgError No such encoding. + * @retval NULL Failed to create a struct ::rb_econv_t. + * @retval otherwise Allocated struct ::rb_econv_t. + * @warning Return value must be passed to rb_econv_close() exactly once. + */ +rb_econv_t *rb_econv_open(const char *source_encoding, const char *destination_encoding, int ecflags); + +/** + * Identical to rb_econv_open(), except it additionally takes a hash of + * optional strings. + * + * + * @param[in] source_encoding Name of an encoding. + * @param[in] destination_encoding Name of another encoding. + * @param[in] ecflags A set of enum ::ruby_econv_flag_type. + * @param[in] ecopts Optional set of strings. + * @exception rb_eArgError No such encoding. + * @retval NULL Failed to create a struct ::rb_econv_t. + * @retval otherwise Allocated struct ::rb_econv_t. + * @warning Return value must be passed to rb_econv_close() exactly once. + */ +rb_econv_t *rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts); + +/** + * Converts a string from an encoding to another. + * + * Possible flags are either ::RUBY_ECONV_PARTIAL_INPUT (means the source + * buffer is a part of much larger one), ::RUBY_ECONV_AFTER_OUTPUT (instructs + * the converter to stop after output before input), or both of them. + * + * @param[in,out] ec Conversion specification/state etc. + * @param[in] source_buffer_ptr Target string. + * @param[in] source_buffer_end End of target string. + * @param[out] destination_buffer_ptr Return buffer. + * @param[out] destination_buffer_end End of return buffer. + * @param[in] flags Flags (see above). + * @return The status of the conversion. + * @post `destination_buffer_ptr` holds conversion results. + */ +rb_econv_result_t rb_econv_convert(rb_econv_t *ec, + const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, + unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, + int flags); + +/** + * Destructs a converter. Note that a converter can have a buffer, and can be + * non-empty. Calling this would lose your data then. + * + * @param[out] ec The converter to destroy. + * @post `ec` is no longer a valid pointer. + */ +void rb_econv_close(rb_econv_t *ec); + +/** + * Assigns the replacement string. The string passed here would appear in + * converted string when it cannot represent its source counterpart. This can + * happen for instance you convert an emoji to ISO-8859-1. + * + * @param[out] ec Target converter. + * @param[in] str Replacement string. + * @param[in] len Number of bytes of `str`. + * @param[in] encname Name of encoding of `str`. + * @retval 0 Success. + * @retval -1 Failure (ENOMEM etc.). + * @post `ec`'s replacement string is set to `str`. + */ +int rb_econv_set_replacement(rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname); + +/** + * "Decorate"s a converter. There are special kind of converters that + * transforms the contents, like replacing CR into CRLF. You can add such + * decorators to a converter using this API. By using this function a + * decorator is prepended at the beginning of a conversion sequence: in case of + * CRLF conversion, newlines are converted before encodings are converted. + * + * @param[out] ec Target converter to decorate. + * @param[in] decorator_name Name of decorator to prepend. + * @retval 0 Success. + * @retval -1 Failure (no such decorator etc.). + * @post Decorator works before encoding conversion happens. + * + * @internal + * + * What is the possible value of the `decorator_name` is not public. You have + * to read through `transcode.c` carefully. + */ +int rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name); + +/** + * Identical to rb_econv_decorate_at_first(), except it adds to the opposite + * direction. For instance CRLF conversion would run _after_ encodings are + * converted. + * + * @param[out] ec Target converter to decorate. + * @param[in] decorator_name Name of decorator to prepend. + * @retval 0 Success. + * @retval -1 Failure (no such decorator etc.). + * @post Decorator works after encoding conversion happens. + */ +int rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name); + +/** + * Creates a `rb_eConverterNotFoundError` exception object (but does not + * raise). + * + * @param[in] senc Name of source encoding. + * @param[in] denc Name of destination encoding. + * @param[in] ecflags A set of enum ::ruby_econv_flag_type. + * @return An instance of `rb_eConverterNotFoundError`. + */ +VALUE rb_econv_open_exc(const char *senc, const char *denc, int ecflags); + +/** + * Appends the passed string to the passed converter's output buffer. This can + * be handy when an encoding needs bytes out of thin air; for instance + * ISO-2022-JP has "shift function" which does not correspond to any + * characters. + * + * @param[out] ec Target converter. + * @param[in] str String to insert. + * @param[in] len Number of bytes of `str`. + * @param[in] str_encoding Encoding of `str`. + * @retval 0 Success. + * @retval -1 Failure (conversion error etc.). + * @note `str_encoding` can be anything, and `str` itself is converted + * when necessary. + */ +int rb_econv_insert_output(rb_econv_t *ec, + const unsigned char *str, size_t len, const char *str_encoding); + +/** + * Queries an encoding name which best suits for rb_econv_insert_output()'s + * last parameter. Strings in this encoding need no conversion when inserted; + * can be both time/space efficient. + * + * @param[in] ec Target converter. + * @return Its encoding for insertion. + */ +const char *rb_econv_encoding_to_insert_output(rb_econv_t *ec); + +/** + * This is a rb_econv_make_exception() + rb_exc_raise() combo. + * + * @param[in] ec (Possibly failed) conversion. + * @exception rb_eInvalidByteSequenceError Invalid byte sequence. + * @exception rb_eUndefinedConversionError Conversion undefined. + * @note This function can return when no error. + */ +void rb_econv_check_error(rb_econv_t *ec); + +/** + * This function makes sense right after rb_econv_convert() returns. As listed + * in ::rb_econv_result_t, rb_econv_convert() can bail out for various reasons. + * This function checks the passed converter's internal state and convert it to + * an appropriate exception object. + * + * @param[in] ec Target converter. + * @retval RUBY_Qnil The converter has no error. + * @retval otherwise Conversion error turned into an exception. + */ +VALUE rb_econv_make_exception(rb_econv_t *ec); + +/** + * Queries if rb_econv_putback() makes sense, i.e. there are invalid byte + * sequences remain in the buffer. + * + * @param[in] ec Target converter. + * @return Number of bytes that can be pushed back. + */ +int rb_econv_putbackable(rb_econv_t *ec); + +/** + * Puts back the bytes. In case of ::econv_invalid_byte_sequence, some of + * those invalid bytes are discarded and the others are buffered to be + * converted later. The latter bytes can be put back using this API. + * + * @param[out] ec Target converter (invalid byte sequence). + * @param[out] p Return buffer. + * @param[in] n Max number of bytes to put back. + * @post At most `n` bytes of what was put back is written to `p`. + */ +void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n); + +/** + * Queries the passed encoding's corresponding ASCII compatible encoding. "The + * corresponding ASCII compatible encoding" in this context is an ASCII + * compatible encoding which can represent exactly the same character sets as + * the given ASCII incompatible encoding. For instance that of UTF-16LE is + * UTF-8. + * + * @param[in] encname Name of an ASCII incompatible encoding. + * @retval NULL `encname` is already ASCII compatible. + * @retval otherwise The corresponding ASCII compatible encoding. + */ +const char *rb_econv_asciicompat_encoding(const char *encname); + +/** + * Identical to rb_econv_convert(), except it takes Ruby's string instead of + * C's pointer. + * + * @param[in,out] ec Target converter. + * @param[in] src Source string. + * @param[in] flags Flags (see rb_econv_convert). + * @exception rb_eArgError Converted string is too long. + * @exception rb_eInvalidByteSequenceError Invalid byte sequence. + * @exception rb_eUndefinedConversionError Conversion undefined. + * @return The conversion result. + */ +VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags); + +/** + * Identical to rb_econv_str_convert(), except it converts only a part of the + * passed string. Can be handy when you for instance want to do line-buffered + * conversion. + * + * @param[in,out] ec Target converter. + * @param[in] src Source string. + * @param[in] byteoff Number of bytes to seek. + * @param[in] bytesize Number of bytes to read. + * @param[in] flags Flags (see rb_econv_convert). + * @exception rb_eArgError Converted string is too long. + * @exception rb_eInvalidByteSequenceError Invalid byte sequence. + * @exception rb_eUndefinedConversionError Conversion undefined. + * @return The conversion result. + */ +VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags); + +/** + * Identical to rb_econv_str_convert(), except it appends the conversion result + * to the additionally passed string instead of creating a new string. It can + * also be seen as a routine identical to rb_econv_append(), except it takes a + * Ruby's string instead of C's pointer. + * + * @param[in,out] ec Target converter. + * @param[in] src Source string. + * @param[in] dst Return buffer. + * @param[in] flags Flags (see rb_econv_convert). + * @exception rb_eArgError Converted string is too long. + * @exception rb_eInvalidByteSequenceError Invalid byte sequence. + * @exception rb_eUndefinedConversionError Conversion undefined. + * @return The conversion result. + */ +VALUE rb_econv_str_append(rb_econv_t *ec, VALUE src, VALUE dst, int flags); + +/** + * Identical to rb_econv_str_append(), except it appends only a part of the + * passed string with conversion. It can also be seen as a routine identical + * to rb_econv_substr_convert(), except it appends the conversion result to the + * additionally passed string instead of creating a new string. + * + * @param[in,out] ec Target converter. + * @param[in] src Source string. + * @param[in] byteoff Number of bytes to seek. + * @param[in] bytesize Number of bytes to read. + * @param[in] dst Return buffer. + * @param[in] flags Flags (see rb_econv_convert). + * @exception rb_eArgError Converted string is too long. + * @exception rb_eInvalidByteSequenceError Invalid byte sequence. + * @exception rb_eUndefinedConversionError Conversion undefined. + * @return The conversion result. + */ +VALUE rb_econv_substr_append(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, VALUE dst, int flags); + +/** + * Converts the passed C's pointer according to the passed converter, then + * append the conversion result to the passed Ruby's string. This way buffer + * overflow is properly avoided to resize the destination properly. + * + * @param[in,out] ec Target converter. + * @param[in] bytesrc Target string. + * @param[in] bytesize Number of bytes of `bytesrc`. + * @param[in] dst Return buffer. + * @param[in] flags Flags (see rb_econv_convert). + * @exception rb_eArgError Converted string is too long. + * @exception rb_eInvalidByteSequenceError Invalid byte sequence. + * @exception rb_eUndefinedConversionError Conversion undefined. + * @return The conversion result. + */ +VALUE rb_econv_append(rb_econv_t *ec, const char *bytesrc, long bytesize, VALUE dst, int flags); + +/** + * This badly named function does not set the destination encoding to binary, + * but instead just nullifies newline conversion decorators if any. Other + * ordinal character conversions still happen after this; something non-binary + * would still be generated. + * + * @param[out] ec Target converter to modify. + * @post Any newline conversions, if any, would be killed. + */ +void rb_econv_binmode(rb_econv_t *ec); + +/** + * This enum is kind of omnibus. Gathers various constants. + */ +enum ruby_econv_flag_type { + + /** + * @name Flags for rb_econv_open() + * + * @{ + */ + + /** Mask for error handling related bits. */ + RUBY_ECONV_ERROR_HANDLER_MASK = 0x000000ff, + + /** Special handling of invalid sequences are there. */ + RUBY_ECONV_INVALID_MASK = 0x0000000f, + + /** Invalid sequences shall be replaced. */ + RUBY_ECONV_INVALID_REPLACE = 0x00000002, + + /** Special handling of undefined conversion are there. */ + RUBY_ECONV_UNDEF_MASK = 0x000000f0, + + /** Undefined characters shall be replaced. */ + RUBY_ECONV_UNDEF_REPLACE = 0x00000020, + + /** Undefined characters shall be escaped. */ + RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030, + + /** Decorators are there. */ + RUBY_ECONV_DECORATOR_MASK = 0x0001ff00, + + /** Newline converters are there. */ + RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00007f00, + + /** (Unclear; seems unused). */ + RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00, + + /** (Unclear; seems unused). */ + RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00007000, + + /** Universal newline mode. */ + RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100, + + /** CR to CRLF conversion shall happen. */ + RUBY_ECONV_CRLF_NEWLINE_DECORATOR = 0x00001000, + + /** CRLF to CR conversion shall happen. */ + RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000, + + /** CRLF to LF conversion shall happen. */ + RUBY_ECONV_LF_NEWLINE_DECORATOR = 0x00004000, + + /** Texts shall be XML-escaped. */ + RUBY_ECONV_XML_TEXT_DECORATOR = 0x00008000, + + /** Texts shall be AttrValue escaped */ + RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00010000, + + /** (Unclear; seems unused). */ + RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000, + + /** Texts shall be AttrValue escaped. */ + RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR = 0x00100000, + + /** Newline decorator's default. */ + RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR = +#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) + RUBY_ECONV_CRLF_NEWLINE_DECORATOR, +#else + 0, +#endif + +#define ECONV_ERROR_HANDLER_MASK RUBY_ECONV_ERROR_HANDLER_MASK /**< @old{RUBY_ECONV_ERROR_HANDLER_MASK} */ +#define ECONV_INVALID_MASK RUBY_ECONV_INVALID_MASK /**< @old{RUBY_ECONV_INVALID_MASK} */ +#define ECONV_INVALID_REPLACE RUBY_ECONV_INVALID_REPLACE /**< @old{RUBY_ECONV_INVALID_REPLACE} */ +#define ECONV_UNDEF_MASK RUBY_ECONV_UNDEF_MASK /**< @old{RUBY_ECONV_UNDEF_MASK} */ +#define ECONV_UNDEF_REPLACE RUBY_ECONV_UNDEF_REPLACE /**< @old{RUBY_ECONV_UNDEF_REPLACE} */ +#define ECONV_UNDEF_HEX_CHARREF RUBY_ECONV_UNDEF_HEX_CHARREF /**< @old{RUBY_ECONV_UNDEF_HEX_CHARREF} */ +#define ECONV_DECORATOR_MASK RUBY_ECONV_DECORATOR_MASK /**< @old{RUBY_ECONV_DECORATOR_MASK} */ +#define ECONV_NEWLINE_DECORATOR_MASK RUBY_ECONV_NEWLINE_DECORATOR_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_MASK} */ +#define ECONV_NEWLINE_DECORATOR_READ_MASK RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK} */ +#define ECONV_NEWLINE_DECORATOR_WRITE_MASK RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK} */ +#define ECONV_UNIVERSAL_NEWLINE_DECORATOR RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR} */ +#define ECONV_CRLF_NEWLINE_DECORATOR RUBY_ECONV_CRLF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CRLF_NEWLINE_DECORATOR} */ +#define ECONV_CR_NEWLINE_DECORATOR RUBY_ECONV_CR_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CR_NEWLINE_DECORATOR} */ +#define ECONV_LF_NEWLINE_DECORATOR RUBY_ECONV_LF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_LF_NEWLINE_DECORATOR} */ +#define ECONV_XML_TEXT_DECORATOR RUBY_ECONV_XML_TEXT_DECORATOR /**< @old{RUBY_ECONV_XML_TEXT_DECORATOR} */ +#define ECONV_XML_ATTR_CONTENT_DECORATOR RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR} */ +#define ECONV_STATEFUL_DECORATOR_MASK RUBY_ECONV_STATEFUL_DECORATOR_MASK /**< @old{RUBY_ECONV_STATEFUL_DECORATOR_MASK} */ +#define ECONV_XML_ATTR_QUOTE_DECORATOR RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR} */ +#define ECONV_DEFAULT_NEWLINE_DECORATOR RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR} */ + /** @} */ + + /** + * @name Flags for rb_econv_convert() + * + * @{ + */ + + /** Indicates the input is a part of much larger one. */ + RUBY_ECONV_PARTIAL_INPUT = 0x00020000, + + /** Instructs the converter to stop after output. */ + RUBY_ECONV_AFTER_OUTPUT = 0x00040000, +#define ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT /**< @old{RUBY_ECONV_PARTIAL_INPUT} */ +#define ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT /**< @old{RUBY_ECONV_AFTER_OUTPUT} */ + + RUBY_ECONV_FLAGS_PLACEHOLDER /**< Placeholder (not used) */ +}; + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_TRANSCODE_H */ diff --git a/include/ruby/internal/error.h b/include/ruby/internal/error.h index 49e2276cb9..cd37f4461a 100644 --- a/include/ruby/internal/error.h +++ b/include/ruby/internal/error.h @@ -50,7 +50,19 @@ typedef enum { /** Warning is for experimental features. */ RB_WARN_CATEGORY_EXPERIMENTAL, - RB_WARN_CATEGORY_ALL_BITS = 0x6 /* no RB_WARN_CATEGORY_NONE bit */ + /** Warning is for performance issues (not enabled by -w). */ + RB_WARN_CATEGORY_PERFORMANCE, + + RB_WARN_CATEGORY_DEFAULT_BITS = ( + (1U << RB_WARN_CATEGORY_DEPRECATED) | + (1U << RB_WARN_CATEGORY_EXPERIMENTAL) | + 0), + + RB_WARN_CATEGORY_ALL_BITS = ( + (1U << RB_WARN_CATEGORY_DEPRECATED) | + (1U << RB_WARN_CATEGORY_EXPERIMENTAL) | + (1U << RB_WARN_CATEGORY_PERFORMANCE) | + 0) } rb_warning_category_t; /** for rb_readwrite_sys_fail first argument */ @@ -469,7 +481,7 @@ VALUE *rb_ruby_debug_ptr(void); */ #define ruby_debug (*rb_ruby_debug_ptr()) -/* reports if `-W' specified */ +/* reports if $VERBOSE is true */ RBIMPL_ATTR_NONNULL((1)) RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2) /** @@ -484,7 +496,8 @@ RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2) * default, the method just emits its passed contents to ::rb_stderr using * rb_io_write(). * - * @note This function is affected by the `-W` flag. + * @note This function is affected by the value of $VERBOSE, it does + * nothing unless $VERBOSE is true. * @param[in] fmt Format specifier string compatible with rb_sprintf(). * * @internal @@ -509,7 +522,7 @@ RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 3, 4) * Issues a compile-time warning that happens at `__file__:__line__`. Purpose * of this function being exposed to CAPI is unclear. * - * @note This function is affected by the `-W` flag. + * @note This function is affected by the value of $VERBOSE. * @param[in] file The path corresponding to Ruby level `__FILE__`. * @param[in] line The number corresponding to Ruby level `__LINE__`. * @param[in] fmt Format specifier string compatible with rb_sprintf(). @@ -522,19 +535,20 @@ RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2) * Identical to rb_sys_fail(), except it does not raise an exception to render * a warning instead. * - * @note This function is affected by the `-W` flag. + * @note This function is affected by the value of $VERBOSE. * @param[in] fmt Format specifier string compatible with rb_sprintf(). */ void rb_sys_warning(const char *fmt, ...); -/* reports always */ +/* reports if $VERBOSE is not nil (so if it is true or false) */ RBIMPL_ATTR_COLD() RBIMPL_ATTR_NONNULL((1)) RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2) /** - * Identical to rb_warning(), except it reports always regardless of runtime - * `-W` flag. + * Identical to rb_warning(), except it reports unless $VERBOSE is nil. * + * @note This function is affected by the value of $VERBOSE, it does + * nothing if $VERBOSE is nil. * @param[in] fmt Format specifier string compatible with rb_sprintf(). */ void rb_warn(const char *fmt, ...); @@ -543,8 +557,7 @@ RBIMPL_ATTR_COLD() RBIMPL_ATTR_NONNULL((2)) RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) /** - * Identical to rb_category_warning(), except it reports always regardless of - * runtime `-W` flag. + * Identical to rb_category_warning(), except it reports unless $VERBOSE is nil. * * @param[in] cat Category e.g. deprecated. * @param[in] fmt Format specifier string compatible with rb_sprintf(). @@ -554,8 +567,7 @@ void rb_category_warn(rb_warning_category_t cat, const char *fmt, ...); RBIMPL_ATTR_NONNULL((1, 3)) RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 3, 4) /** - * Identical to rb_compile_warning(), except it reports always regardless of - * runtime `-W` flag. + * Identical to rb_compile_warning(), except it reports unless $VERBOSE is nil. * * @param[in] file The path corresponding to Ruby level `__FILE__`. * @param[in] line The number corresponding to Ruby level `__LINE__`. diff --git a/include/ruby/internal/eval.h b/include/ruby/internal/eval.h index 34a53849da..5bcbb97746 100644 --- a/include/ruby/internal/eval.h +++ b/include/ruby/internal/eval.h @@ -28,10 +28,12 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() RBIMPL_ATTR_NONNULL(()) /** - * Evaluates the given string in an isolated binding. + * Evaluates the given string. * - * Here "isolated" means that the binding does not inherit any other - * bindings. This behaves same as the binding for required libraries. + * In case it is called from within a C-backended method, the evaluation is + * done under the current binding. However there can be no method. On such + * situation this function evaluates in an isolated binding, like `require` + * runs in a separate one. * * `__FILE__` will be `"(eval)"`, and `__LINE__` starts from 1 in the * evaluation. @@ -39,6 +41,31 @@ RBIMPL_ATTR_NONNULL(()) * @param[in] str Ruby code to evaluate. * @exception rb_eException Raises an exception on error. * @return The evaluated result. + * + * @internal + * + * @shyouhei's old tale about the birth and growth of this function: + * + * At the beginning, there was no rb_eval_string(). @shyouhei heard that + * @shugo, author of Apache httpd's mod_ruby module, requested @matz for this + * API. He wanted a way so that mod_ruby can evaluate ruby scripts one by one, + * separately, in each different contexts. So this function was made. It was + * designed to be a global interpreter entry point like ruby_run_node(). + * + * The way it is implemented however allows extension libraries (not just + * programs like Apache httpd) to call this function. Because its name says + * nothing about the initial design, people started to think of it as an + * orthodox way to call ruby level `eval` method from their extension + * libraries. Even our `extension.rdoc` has had a description of this function + * basically according to this understanding. + * + * The old (mod_ruby like) usage still works. But over time, usages of this + * function from extension libraries got popular, while mod_ruby faded out; is + * no longer maintained now. Devs decided to actively support both. This + * function now auto-detects how it is called, and switches how it works + * depending on it. + * + * @see https://bugs.ruby-lang.org/issues/18780 */ VALUE rb_eval_string(const char *str); diff --git a/include/ruby/internal/event.h b/include/ruby/internal/event.h index 04b137a193..1d194ed618 100644 --- a/include/ruby/internal/event.h +++ b/include/ruby/internal/event.h @@ -23,6 +23,10 @@ #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" +#ifdef HAVE_STDINT_H +#include <stdint.h> +#endif + /* These macros are not enums because they are wider than int.*/ /** @@ -54,6 +58,7 @@ #define RUBY_EVENT_THREAD_END 0x0800 /**< Encountered an end of a thread. */ #define RUBY_EVENT_FIBER_SWITCH 0x1000 /**< Encountered a `Fiber#yield`. */ #define RUBY_EVENT_SCRIPT_COMPILED 0x2000 /**< Encountered an `eval`. */ +#define RUBY_EVENT_RESCUE 0x4000 /**< Encountered a `rescue` statement. */ #define RUBY_EVENT_TRACEPOINT_ALL 0xffff /**< Bitmask of extended events. */ /** @} */ diff --git a/include/ruby/internal/fl_type.h b/include/ruby/internal/fl_type.h index 47f054256b..0a05166784 100644 --- a/include/ruby/internal/fl_type.h +++ b/include/ruby/internal/fl_type.h @@ -57,8 +57,7 @@ #define FL_SINGLETON RBIMPL_CAST((VALUE)RUBY_FL_SINGLETON) /**< @old{RUBY_FL_SINGLETON} */ #define FL_WB_PROTECTED RBIMPL_CAST((VALUE)RUBY_FL_WB_PROTECTED) /**< @old{RUBY_FL_WB_PROTECTED} */ -#define FL_PROMOTED0 RBIMPL_CAST((VALUE)RUBY_FL_PROMOTED0) /**< @old{RUBY_FL_PROMOTED0} */ -#define FL_PROMOTED1 RBIMPL_CAST((VALUE)RUBY_FL_PROMOTED1) /**< @old{RUBY_FL_PROMOTED1} */ +#define FL_PROMOTED RBIMPL_CAST((VALUE)RUBY_FL_PROMOTED) /**< @old{RUBY_FL_PROMOTED} */ #define FL_FINALIZE RBIMPL_CAST((VALUE)RUBY_FL_FINALIZE) /**< @old{RUBY_FL_FINALIZE} */ #define FL_TAINT RBIMPL_CAST((VALUE)RUBY_FL_TAINT) /**< @old{RUBY_FL_TAINT} */ #define FL_SHAREABLE RBIMPL_CAST((VALUE)RUBY_FL_SHAREABLE) /**< @old{RUBY_FL_SHAREABLE} */ @@ -111,13 +110,6 @@ #define RB_OBJ_FREEZE_RAW RB_OBJ_FREEZE_RAW #define RB_OBJ_FROZEN RB_OBJ_FROZEN #define RB_OBJ_FROZEN_RAW RB_OBJ_FROZEN_RAW -#define RB_OBJ_INFECT RB_OBJ_INFECT -#define RB_OBJ_INFECT_RAW RB_OBJ_INFECT_RAW -#define RB_OBJ_TAINT RB_OBJ_TAINT -#define RB_OBJ_TAINTABLE RB_OBJ_TAINTABLE -#define RB_OBJ_TAINTED RB_OBJ_TAINTED -#define RB_OBJ_TAINTED_RAW RB_OBJ_TAINTED_RAW -#define RB_OBJ_TAINT_RAW RB_OBJ_TAINT_RAW #define RB_OBJ_UNTRUST RB_OBJ_TAINT #define RB_OBJ_UNTRUSTED RB_OBJ_TAINTED /** @endcond */ @@ -183,7 +175,7 @@ RB_GNUC_EXTENSION * @note About the `FL_USER` terminology: the "user" here does not necessarily * mean only you. For instance struct ::RString instances use these * bits to cache their encodings etc. Devs discussed about this topic, - * reached their concensus that ::RUBY_T_DATA is the only valid data + * reached their consensus that ::RUBY_T_DATA is the only valid data * structure that can use these bits; other data structures including * ::RUBY_T_OBJECT use these bits for their own purpose. See also * https://bugs.ruby-lang.org/issues/18059 @@ -207,12 +199,15 @@ ruby_fl_type { RUBY_FL_WB_PROTECTED = (1<<5), /** - * This flag has something to do with our garbage collector. These days - * ruby objects are "generational". There are those who are young and - * those who are old. Young objects are prone to die; monitored relatively - * extensively by the garbage collector. OTOH old objects tend to live - * longer. They are relatively rarely considered. This flag is set when a - * object experienced promotion i.e. survived a garbage collection. + * Ruby objects are "generational". There are young objects & old objects. + * Young objects are prone to die & monitored relatively extensively by the + * garbage collector. Old objects tend to live longer & are monitored less + * frequently. When an object survives a GC, its age is incremented. When + * age is equal to RVALUE_OLD_AGE, the object becomes Old. This flag is set + * when an object becomes old, and is used by the write barrier to check if + * an old object should be considered for marking more frequently - as old + * objects that have references added between major GCs need to be remarked + * to prevent the referred object being mistakenly swept. * * @internal * @@ -220,41 +215,14 @@ ruby_fl_type { * 3rd parties. It must be an implementation detail that they should never * know. Might better be hidden. */ - RUBY_FL_PROMOTED0 = (1<<5), + RUBY_FL_PROMOTED = (1<<5), /** - * This flag has something to do with our garbage collector. These days - * ruby objects are "generational". There are those who are young and - * those who are old. Young objects are prone to die; monitored relatively - * extensively by the garbage collector. OTOH old objects tend to live - * longer. They are relatively rarely considered. This flag is set when a - * object experienced two promotions i.e. survived garbage collections - * twice. + * This flag is no longer in use * * @internal - * - * But honestly, @shyouhei doesn't think this flag should be visible from - * 3rd parties. It must be an implementation detail that they should never - * know. Might better be hidden. - */ - RUBY_FL_PROMOTED1 = (1<<6), - - /** - * This flag has something to do with our garbage collector. These days - * ruby objects are "generational". There are those who are young and - * those who are old. Young objects are prone to die; monitored relatively - * extensively by the garbage collector. OTOH old objects tend to live - * longer. They are relatively rarely considered. This flag is set when a - * object experienced promotions i.e. survived more than one garbage - * collections. - * - * @internal - * - * But honestly, @shyouhei doesn't think this flag should be visible from - * 3rd parties. It must be an implementation detail that they should never - * know. Might better be hidden. */ - RUBY_FL_PROMOTED = RUBY_FL_PROMOTED0 | RUBY_FL_PROMOTED1, + RUBY_FL_UNUSED6 = (1<<6), /** * This flag has something to do with finalisers. A ruby object can have @@ -283,7 +251,7 @@ ruby_fl_type { # pragma deprecated(RUBY_FL_TAINT) #endif - = (1<<8), + = 0, /** * This flag has something to do with Ractor. Multiple Ractors run without @@ -310,7 +278,7 @@ ruby_fl_type { # pragma deprecated(RUBY_FL_UNTRUSTED) #endif - = (1<<8), + = 0, /** * This flag has something to do with object IDs. Unlike in the old days, @@ -427,7 +395,7 @@ ruby_fl_type { * 3rd parties. It must be an implementation detail that they should never * know. Might better be hidden. */ - RUBY_FL_SINGLETON = RUBY_FL_USER0, + RUBY_FL_SINGLETON = RUBY_FL_USER1, }; enum { @@ -451,12 +419,6 @@ enum { RBIMPL_SYMBOL_EXPORT_BEGIN() /** - * @deprecated Does nothing. This method is deprecated and will be removed in - * Ruby 3.2. - */ -void rb_obj_infect(VALUE victim, VALUE carrier); - -/** * This is an implementation detail of #RB_OBJ_FREEZE(). People don't use it * directly. * @@ -495,7 +457,7 @@ RB_FL_ABLE(VALUE obj) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() /** - * This is an implenentation detail of RB_FL_TEST(). 3rd parties need not use + * This is an implementation detail of RB_FL_TEST(). 3rd parties need not use * this. Just always use RB_FL_TEST(). * * @param[in] obj Object in question. @@ -543,7 +505,7 @@ RB_FL_TEST(VALUE obj, VALUE flags) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() /** - * This is an implenentation detail of RB_FL_ANY(). 3rd parties need not use + * This is an implementation detail of RB_FL_ANY(). 3rd parties need not use * this. Just always use RB_FL_ANY(). * * @param[in] obj Object in question. @@ -577,7 +539,7 @@ RB_FL_ANY(VALUE obj, VALUE flags) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() /** - * This is an implenentation detail of RB_FL_ALL(). 3rd parties need not use + * This is an implementation detail of RB_FL_ALL(). 3rd parties need not use * this. Just always use RB_FL_ALL(). * * @param[in] obj Object in question. @@ -613,7 +575,7 @@ RBIMPL_ATTR_ARTIFICIAL() /** * @private * - * This is an implenentation detail of RB_FL_SET(). 3rd parties need not use + * This is an implementation detail of RB_FL_SET(). 3rd parties need not use * this. Just always use RB_FL_SET(). * * @param[out] obj Object in question. @@ -633,7 +595,7 @@ rbimpl_fl_set_raw_raw(struct RBasic *obj, VALUE flags) RBIMPL_ATTR_ARTIFICIAL() /** - * This is an implenentation detail of RB_FL_SET(). 3rd parties need not use + * This is an implementation detail of RB_FL_SET(). 3rd parties need not use * this. Just always use RB_FL_SET(). * * @param[out] obj Object in question. @@ -673,7 +635,7 @@ RBIMPL_ATTR_ARTIFICIAL() /** * @private * - * This is an implenentation detail of RB_FL_UNSET(). 3rd parties need not use + * This is an implementation detail of RB_FL_UNSET(). 3rd parties need not use * this. Just always use RB_FL_UNSET(). * * @param[out] obj Object in question. @@ -693,7 +655,7 @@ rbimpl_fl_unset_raw_raw(struct RBasic *obj, VALUE flags) RBIMPL_ATTR_ARTIFICIAL() /** - * This is an implenentation detail of RB_FL_UNSET(). 3rd parties need not use + * This is an implementation detail of RB_FL_UNSET(). 3rd parties need not use * this. Just always use RB_FL_UNSET(). * * @param[out] obj Object in question. @@ -728,7 +690,7 @@ RBIMPL_ATTR_ARTIFICIAL() /** * @private * - * This is an implenentation detail of RB_FL_REVERSE(). 3rd parties need not + * This is an implementation detail of RB_FL_REVERSE(). 3rd parties need not * use this. Just always use RB_FL_REVERSE(). * * @param[out] obj Object in question. @@ -748,7 +710,7 @@ rbimpl_fl_reverse_raw_raw(struct RBasic *obj, VALUE flags) RBIMPL_ATTR_ARTIFICIAL() /** - * This is an implenentation detail of RB_FL_REVERSE(). 3rd parties need not + * This is an implementation detail of RB_FL_REVERSE(). 3rd parties need not * use this. Just always use RB_FL_REVERSE(). * * @param[out] obj Object in question. @@ -793,6 +755,7 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) static inline bool RB_OBJ_TAINTABLE(VALUE obj) { + (void)obj; return false; } @@ -810,6 +773,7 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) static inline VALUE RB_OBJ_TAINTED_RAW(VALUE obj) { + (void)obj; return false; } @@ -827,6 +791,7 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) static inline bool RB_OBJ_TAINTED(VALUE obj) { + (void)obj; return false; } @@ -842,6 +807,7 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) static inline void RB_OBJ_TAINT_RAW(VALUE obj) { + (void)obj; return; } @@ -857,6 +823,7 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) static inline void RB_OBJ_TAINT(VALUE obj) { + (void)obj; return; } @@ -873,6 +840,8 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) static inline void RB_OBJ_INFECT_RAW(VALUE dst, VALUE src) { + (void)dst; + (void)src; return; } @@ -889,13 +858,15 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) static inline void RB_OBJ_INFECT(VALUE dst, VALUE src) { + (void)dst; + (void)src; return; } RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() /** - * This is an implenentation detail of RB_OBJ_FROZEN(). 3rd parties need not + * This is an implementation detail of RB_OBJ_FROZEN(). 3rd parties need not * use this. Just always use RB_OBJ_FROZEN(). * * @param[in] obj Object in question. @@ -934,9 +905,13 @@ RB_OBJ_FROZEN(VALUE obj) } } +RUBY_SYMBOL_EXPORT_BEGIN +void rb_obj_freeze_inline(VALUE obj); +RUBY_SYMBOL_EXPORT_END + RBIMPL_ATTR_ARTIFICIAL() /** - * This is an implenentation detail of RB_OBJ_FREEZE(). 3rd parties need not + * This is an implementation detail of RB_OBJ_FREEZE(). 3rd parties need not * use this. Just always use RB_OBJ_FREEZE(). * * @param[out] obj Object in question. @@ -944,24 +919,7 @@ RBIMPL_ATTR_ARTIFICIAL() static inline void RB_OBJ_FREEZE_RAW(VALUE obj) { - RB_FL_SET_RAW(obj, RUBY_FL_FREEZE); -} - -/** - * Prevents further modifications to the given object. ::rb_eFrozenError shall - * be raised if modification is attempted. - * - * @param[out] x Object in question. - */ -static inline void -rb_obj_freeze_inline(VALUE x) -{ - if (RB_FL_ABLE(x)) { - RB_OBJ_FREEZE_RAW(x); - if (RBASIC_CLASS(x) && !(RBASIC(x)->flags & RUBY_FL_SINGLETON)) { - rb_freeze_singleton_class(x); - } - } + rb_obj_freeze_inline(obj); } #endif /* RBIMPL_FL_TYPE_H */ diff --git a/include/ruby/internal/gc.h b/include/ruby/internal/gc.h index 66fc14e511..462f416af2 100644 --- a/include/ruby/internal/gc.h +++ b/include/ruby/internal/gc.h @@ -20,16 +20,379 @@ * extension libraries. They could be written in C++98. * @brief Registering values to the GC. */ +#include "ruby/internal/config.h" + +#ifdef STDC_HEADERS +# include <stddef.h> /* size_t */ +#endif + +#ifdef HAVE_SYS_TYPES_H +# include <sys/types.h> /* ssize_t */ +#endif + +#include "ruby/assert.h" +#include "ruby/internal/attr/cold.h" +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/attr/noreturn.h" +#include "ruby/internal/attr/artificial.h" +#include "ruby/internal/attr/maybe_unused.h" +#include "ruby/internal/attr/pure.h" #include "ruby/internal/dllexport.h" +#include "ruby/internal/special_consts.h" +#include "ruby/internal/stdbool.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() +#define RUBY_REF_EDGE(s, p) offsetof(s, p) +#define RUBY_REFS_LIST_PTR(l) (RUBY_DATA_FUNC)(l) +#define RUBY_REF_END SIZE_MAX +#define RUBY_REFERENCES(t) static const size_t t[] +#define RUBY_REFERENCES_START(t) RUBY_REFERENCES(t) = { +#define RUBY_REFERENCES_END RUBY_REF_END, }; + +/* gc.c */ + +RBIMPL_ATTR_COLD() +RBIMPL_ATTR_NORETURN() +/** + * Triggers out-of-memory error. If possible it raises ::rb_eNoMemError. But + * because we are running out of memory that is not always doable. This + * function tries hard to show something, but ultimately can die silently. + * + * @exception rb_eNoMemError Raises it if possible. + */ +void rb_memerror(void); + +RBIMPL_ATTR_PURE() +/** + * Queries if the GC is busy. + * + * @retval 0 It isn't. + * @retval 1 It is. + */ +int rb_during_gc(void); + +RBIMPL_ATTR_NONNULL((1)) +/** + * Marks objects between the two pointers. This is one of the GC utility + * functions that you can call when you design your own + * ::rb_data_type_struct::dmark. + * + * @pre Continuous memory region from `start` to `end` shall be fully + * addressable. + * @param[out] start Pointer to an array of objects. + * @param[out] end Pointer that terminates the array of objects. + * @post Objects from `start` (included) to `end` (excluded) are marked. + * + * @internal + * + * `end` can be NULL... But that just results in no-op. + */ +void rb_gc_mark_locations(const VALUE *start, const VALUE *end); + +/** + * Identical to rb_mark_hash(), except it marks only values of the table and + * leave their associated keys unmarked. This is one of the GC utility + * functions that you can call when you design your own + * ::rb_data_type_struct::dmark. + * + * @warning Of course it can break GC. Leave it unused if unsure. + * @param[in] tbl A table to mark. + * @post Values stored in `tbl` are marked. + */ +void rb_mark_tbl(struct st_table *tbl); + +/** + * Identical to rb_mark_tbl(), except it marks objects using + * rb_gc_mark_movable(). This is one of the GC utility functions that you can + * call when you design your own ::rb_data_type_struct::dmark. + * + * @warning Of course it can break GC. Leave it unused if unsure. + * @param[in] tbl A table to mark. + * @post Values stored in `tbl` are marked. + */ +void rb_mark_tbl_no_pin(struct st_table *tbl); + +/** + * Identical to rb_mark_hash(), except it marks only keys of the table and + * leave their associated values unmarked. This is one of the GC utility + * functions that you can call when you design your own + * ::rb_data_type_struct::dmark. + * + * @warning Of course it can break GC. Leave it unused if unsure. + * @param[in] tbl A table to mark. + * @post Keys stored in `tbl` are marked. + */ +void rb_mark_set(struct st_table *tbl); + /** - * Inform the garbage collector that `valptr` points to a live Ruby object that - * should not be moved. Note that extensions should use this API on global - * constants instead of assuming constants defined in Ruby are always alive. - * Ruby code can remove global constants. + * Marks keys and values associated inside of the given table. This is one of + * the GC utility functions that you can call when you design your own + * ::rb_data_type_struct::dmark. + * + * @param[in] tbl A table to mark. + * @post Objects stored in `tbl` are marked. + */ +void rb_mark_hash(struct st_table *tbl); + +/** + * Updates references inside of tables. After you marked values using + * rb_mark_tbl_no_pin(), the objects inside of the table could of course be + * moved. This function is to fixup those references. You can call this from + * your ::rb_data_type_struct::dcompact. + * + * @param[out] ptr A table that potentially includes moved references. + * @post Moved references, if any, are corrected. + */ +void rb_gc_update_tbl_refs(st_table *ptr); + +/** + * Identical to rb_gc_mark(), except it allows the passed value be a + * non-object. For instance pointers to different type of memory regions are + * allowed here. Such values are silently ignored. This is one of the GC + * utility functions that you can call when you design your own + * ::rb_data_type_struct::dmark. + * + * @param[out] obj A possible object. + * @post `obj` is marked, if possible. + */ +void rb_gc_mark_maybe(VALUE obj); + +/** + * Marks an object. This is one of the GC utility functions that you can call + * when you design your own ::rb_data_type_struct::dmark. + * + * @param[out] obj Arbitrary Ruby object. + * @post `obj` is marked. + */ +void rb_gc_mark(VALUE obj); + +/** + * Maybe this is the only function provided for C extensions to control the + * pinning of objects, so let us describe it in detail. These days Ruby's GC + * is copying. As far as an object's physical address is guaranteed unused, it + * can move around the object space. Our GC engine rearranges these objects + * after it reclaims unreachable objects from our object space, so that the + * space is compact (improves memory locality). This is called the + * "compaction" phase, and works well most of the time... as far as there are + * no C extensions. C extensions complicate the scenario because Ruby core + * cannot detect any use of the physical address of an object inside of C + * functions. In order to prevent memory corruptions, objects observable from + * C extensions are "pinned"; they stick to where they are born until they die, + * just in case any C extensions touch their raw pointers. This variant of + * scheme is called "Mostly-Copying" garbage collector. Authors of C + * extensions, however, can extremely carefully write them to become + * compaction-aware. To do so avoid referring to a Ruby object from inside of + * your struct in the first place. But if that is not possible, use this + * function from your ::rb_data_type_struct::dmark then. This way objects + * marked using it are considered movable. If you chose this way you have to + * manually fix up locations of such moved pointers using rb_gc_location(). + * + * @see Bartlett, Joel F., "Compacting Garbage Collection with Ambiguous + * Roots", ACM SIGPLAN Lisp Pointers Volume 1 Issue 6 pp. 3-12, + * April-May-June, 1988. https://doi.org/10.1145/1317224.1317225 + * + * @param[in] obj Object that is movable. + * @post Values stored in `tbl` are marked. + */ +void rb_gc_mark_movable(VALUE obj); + +/** + * Finds a new "location" of an object. An object can be moved on compaction. + * This function projects its new abode, or just returns the passed object if + * not moved. This is one of the GC utility functions that you can call when + * you design your own ::rb_data_type_struct::dcompact. + * + * @param[in] obj An object, possibly already moved to somewhere else. + * @return An object, which holds the current contents of former `obj`. + */ +VALUE rb_gc_location(VALUE obj); + +/** + * Triggers a GC process. This was the only GC entry point that we had at the + * beginning. Over time our GC evolved. Now what this function does is just a + * very simplified variation of the entire GC algorithms. A series of + * procedures kicked by this API is called a "full" GC. + * + * - It immediately scans the entire object space to sort the dead. + * - It immediately reclaims any single dead bodies to reuse later. + * + * It is worth noting that the procedures above do not include evaluations of + * finalisers. They run later. + * + * @internal + * + * Finalisers are deferred until we can handle interrupts. See + * `rb_postponed_job_flush` in vm_trace.c. + * + * Of course there are GC that are not "full". For instance this one and the + * GC which runs when we are running out of memory are different. See + * `gc_profile_record_flag` defined in gc.c for the kinds of GC. + * + * In spite of the name this is not what everything that a GC can trigger. As + * of writing it seems this function does not trigger compaction. But this + * might change in future. + */ +void rb_gc(void); + +/** + * Copy&paste an object's finaliser to another. This is one of the GC utility + * functions that you can call when you design your own `initialize_copy`, + * `initialize_dup`, `initialize_clone`. + * + * @param[out] dst Destination object. + * @param[in] src Source object. + * @post `dst` and `src` share the same finaliser. + * + * @internal + * + * But isn't it easier for you to call super, and let `Object#initialize_copy` + * call this function instead? + */ +void rb_gc_copy_finalizer(VALUE dst, VALUE src); + +/** + * (Re-) enables GC. This makes sense only after you called rb_gc_disable(). + * + * @retval RUBY_Qtrue GC was disabled before. + * @retval RUBY_Qfalse GC was enabled before. + * @post GC is enabled. + * + * @internal + * + * This is one of such exceptional functions that does not raise both Ruby + * exceptions and C++ exceptions. + */ +VALUE rb_gc_enable(void); + +/** + * Disables GC. This prevents automatic GC runs when the process is running + * out of memory. Such situations shall result in rb_memerror(). However this + * does not prevent users from manually invoking rb_gc(). That should work. + * People abused this by disabling GC at the beginning of an event loop, + * process events without GC overheads, then manually force reclaiming garbage + * at the bottom of the loop. However because our GC is now much smarter than + * just calling rb_gc(), this technique is proven to be sub-optimal these days. + * It is believed that there is currently practically no needs of this + * function. + * + * @retval RUBY_Qtrue GC was disabled before. + * @retval RUBY_Qfalse GC was enabled before. + * @post GC is disabled. + */ +VALUE rb_gc_disable(void); + +/** + * Identical to rb_gc(), except the return value. + * + * @return Always returns ::RUBY_Qnil. + */ +VALUE rb_gc_start(void); + +/** + * Assigns a finaliser for an object. Each objects can have objects (typically + * blocks) that run immediately after that object dies. They are called + * finalisers of an object. This function associates a finaliser object with a + * target object. + * + * @note Note that finalisers run _after_ the object they finalise dies. You + * cannot for instance call its methods. + * @note If your finaliser references the object it finalises that object + * loses any chance to become a garbage; effectively leaks memory until + * the end of the process. + * + * @param[in] obj Target to finalise. + * @param[in] block Something `call`able. + * @exception rb_eRuntimeError Somehow `obj` cannot have finalisers. + * @exception rb_eFrozenError `obj` is frozen. + * @exception rb_eArgError `block` doesn't respond to `call`. + * @return The passed `block`. + * @post `block` runs after `obj` dies. + */ +VALUE rb_define_finalizer(VALUE obj, VALUE block); + +/** + * Modifies the object so that it has no finalisers at all. This function is + * mainly provided for symmetry. No practical usages can be thought of. + * + * @param[out] obj Object to clear its finalisers. + * @exception rb_eFrozenError `obj` is frozen. + * @return The passed `obj`. + * @post `obj` has no finalisers. + * @note There is no way to undefine a specific part of many finalisers + * that `obj` could have. All you can do is to clear them all. + */ +VALUE rb_undefine_finalizer(VALUE obj); + +/** + * Identical to rb_gc_stat(), with "count" parameter. + * + * @return Lifetime total number of runs of GC. + */ +size_t rb_gc_count(void); + +/** + * Obtains various GC related profiles. The parameter can be either a Symbol + * or a Hash. If a Hash is passed, it is filled with everything currently + * available. If a Symbol is passed just that portion is returned. + * + * Possible variations of keys you can pass here change from version to + * version. You can get the list of known keys by passing an empty hash and + * let it be filled. + * + * @param[in,out] key_or_buf A Symbol, or a Hash. + * @exception rb_eTypeError Neither Symbol nor Hash. + * @exception rb_eFrozenError Frozen hash is passed. + * @return In case a Hash is passed it returns 0. Otherwise the + * profile value associated with the given key is returned. + * @post In case a Hash is passed it is filled with values. + */ +size_t rb_gc_stat(VALUE key_or_buf); + +/** + * Obtains various info regarding the most recent GC run. This includes for + * instance the reason of the GC. The parameter can be either a Symbol or a + * Hash. If a Hash is passed, it is filled with everything currently + * available. If a Symbol is passed just that portion is returned. + * + * Possible variations of keys you can pass here change from version to + * version. You can get the list of known keys by passing an empty hash and + * let it be filled. + * + * @param[in,out] key_or_buf A Symbol, or a Hash. + * @exception rb_eTypeError Neither Symbol nor Hash. + * @exception rb_eFrozenError Frozen hash is passed. + * @return In case a Hash is passed it returns that hash. Otherwise + * the profile value associated with the given key is returned. + * @post In case a Hash is passed it is filled with values. + */ +VALUE rb_gc_latest_gc_info(VALUE key_or_buf); + +/** + * Informs that there are external memory usages. Our GC runs when we are + * running out of memory. The amount of memory, however, can increase/decrease + * behind-the-scene. For instance DLLs can allocate memories using `mmap(2)` + * etc, which are opaque to us. Registering such external allocations using + * this function enables proper detection of how much memories an object used + * as a whole. That will trigger GCs more often than it would otherwise. You + * can also pass negative numbers here, to indicate that such external + * allocations are gone. + * + * @param[in] diff Amount of memory increased(+)/decreased(-). + */ +void rb_gc_adjust_memory_usage(ssize_t diff); + +/** + * Inform the garbage collector that the global or static variable pointed by + * `valptr` stores a live Ruby object that should not be moved. Note that + * extensions should use this API on global constants instead of assuming + * constants defined in Ruby are always alive. Ruby code can remove global + * constants. + * + * Because this registration itself has a possibility to trigger a GC, this + * function must be called before any GC-able objects is assigned to the + * address pointed by `valptr`. */ void rb_gc_register_address(VALUE *valptr); @@ -54,4 +417,413 @@ void rb_gc_register_mark_object(VALUE object); RBIMPL_SYMBOL_EXPORT_END() +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ +#undef USE_RGENGC +#define USE_RGENGC 1 + +/** + * @deprecated This macro seems broken. Setting this to anything other than + * zero just doesn't compile. We need to KonMari. + */ +#ifndef USE_RGENGC_LOGGING_WB_UNPROTECT +# define USE_RGENGC_LOGGING_WB_UNPROTECT 0 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RArray. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_ARRAY +# define RGENGC_WB_PROTECTED_ARRAY 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RHash. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_HASH +# define RGENGC_WB_PROTECTED_HASH 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RStruct. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_STRUCT +# define RGENGC_WB_PROTECTED_STRUCT 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RString. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_STRING +# define RGENGC_WB_PROTECTED_STRING 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RObject. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_OBJECT +# define RGENGC_WB_PROTECTED_OBJECT 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RRegexp. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_REGEXP +# define RGENGC_WB_PROTECTED_REGEXP 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RMatch. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_MATCH +# define RGENGC_WB_PROTECTED_MATCH 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RClass. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_CLASS +# define RGENGC_WB_PROTECTED_CLASS 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RFloat. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_FLOAT +# define RGENGC_WB_PROTECTED_FLOAT 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RComplex. It has to be set at the time ruby itself compiles. + * Makes no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_COMPLEX +# define RGENGC_WB_PROTECTED_COMPLEX 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RRational. It has to be set at the time ruby itself compiles. + * Makes no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_RATIONAL +# define RGENGC_WB_PROTECTED_RATIONAL 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RBignum. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_BIGNUM +# define RGENGC_WB_PROTECTED_BIGNUM 1 +#endif + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + * + * @internal + * + * @shyouhei doesn't think anybody uses this right now. + */ +#ifndef RGENGC_WB_PROTECTED_NODE_CREF +# define RGENGC_WB_PROTECTED_NODE_CREF 1 +#endif + +/** + * @defgroup rgengc Write barrier (WB) interfaces: + * + * @note The following core interfaces can be changed in the future. Please + * catch up if you want to insert WB into C-extensions correctly. + * + * @{ + */ + +/** + * Declaration of a "back" pointer. This is a write barrier for new reference + * from "old" generation to "young" generation. It writes `young` into + * `*slot`, which is a pointer inside of `old`. + * + * @param[in] old An old object. + * @param[in] slot A pointer inside of `old`. + * @param[out] young A young object. + */ +#define RB_OBJ_WRITE(old, slot, young) \ + RBIMPL_CAST(rb_obj_write((VALUE)(old), (VALUE *)(slot), (VALUE)(young), __FILE__, __LINE__)) + +/** + * Identical to #RB_OBJ_WRITE(), except it doesn't write any values, but only a + * WB declaration. `oldv` is replaced value with `b` (not used in current + * Ruby). + * + * @param[in] old An old object. + * @param[in] oldv An object previously stored inside of `old`. + * @param[out] young A young object. + */ +#define RB_OBJ_WRITTEN(old, oldv, young) \ + RBIMPL_CAST(rb_obj_written((VALUE)(old), (VALUE)(oldv), (VALUE)(young), __FILE__, __LINE__)) +/** @} */ + +#define OBJ_PROMOTED_RAW RB_OBJ_PROMOTED_RAW /**< @old{RB_OBJ_PROMOTED_RAW} */ +#define OBJ_PROMOTED RB_OBJ_PROMOTED /**< @old{RB_OBJ_PROMOTED} */ +#define OBJ_WB_UNPROTECT RB_OBJ_WB_UNPROTECT /**< @old{RB_OBJ_WB_UNPROTECT} */ + +/** + * Asserts that the passed object is not fenced by write barriers. Objects of + * such property do not contribute to generational GCs. They are scanned + * always. + * + * @param[out] x An object that would not be protected by the barrier. + */ +#define RB_OBJ_WB_UNPROTECT(x) rb_obj_wb_unprotect(x, __FILE__, __LINE__) + +/** + * Identical to #RB_OBJ_WB_UNPROTECT(), except it can also assert that the + * given object is of given type. + * + * @param[in] type One of `ARRAY`, `STRING`, etc. + * @param[out] obj An object of `type` that would not be protected. + * + * @internal + * + * @shyouhei doesn't understand why this has to be visible from extensions. + */ +#define RB_OBJ_WB_UNPROTECT_FOR(type, obj) \ + (RGENGC_WB_PROTECTED_##type ? OBJ_WB_UNPROTECT(obj) : obj) + +/** + * @private + * + * This is an implementation detail of rb_obj_wb_unprotect(). People don't use + * it directly. + */ +#define RGENGC_LOGGING_WB_UNPROTECT rb_gc_unprotect_logging + +/** @cond INTERNAL_MACRO */ +#define RB_OBJ_PROMOTED_RAW RB_OBJ_PROMOTED_RAW +#define RB_OBJ_PROMOTED RB_OBJ_PROMOTED +/** @endcond */ + +RBIMPL_SYMBOL_EXPORT_BEGIN() +/** + * This is the implementation of #RB_OBJ_WRITE(). People don't use it + * directly. + * + * @param[in] old An object that points to `young`. + * @param[out] young An object that is referenced from `old`. + */ +void rb_gc_writebarrier(VALUE old, VALUE young); + +/** + * This is the implementation of #RB_OBJ_WB_UNPROTECT(). People don't use it + * directly. + * + * @param[out] obj An object that does not participate in WB. + */ +void rb_gc_writebarrier_unprotect(VALUE obj); + +#if USE_RGENGC_LOGGING_WB_UNPROTECT +/** + * @private + * + * This is the implementation of #RGENGC_LOGGING_WB_UNPROTECT(). People + * don't use it directly. + * + * @param[in] objptr Don't know why this is a pointer to void but in + * reality this is a pointer to an object that is about + * to be un-protected. + * @param[in] filename Pass C's `__FILE__` here. + * @param[in] line Pass C's `__LINE__` here. + */ +void rb_gc_unprotect_logging(void *objptr, const char *filename, int line); +#endif + +RBIMPL_SYMBOL_EXPORT_END() + +RBIMPL_ATTR_PURE_UNLESS_DEBUG() +RBIMPL_ATTR_ARTIFICIAL() +/** + * This is the implementation of #RB_OBJ_PROMOTED(). People don't use it + * directly. + * + * @param[in] obj An object to query. + * @retval true The object is "promoted". + * @retval false The object is young. Have not experienced GC at all. + */ +static inline bool +RB_OBJ_PROMOTED_RAW(VALUE obj) +{ + RBIMPL_ASSERT_OR_ASSUME(RB_FL_ABLE(obj)); + return RB_FL_ANY_RAW(obj, RUBY_FL_PROMOTED); +} + +RBIMPL_ATTR_PURE_UNLESS_DEBUG() +RBIMPL_ATTR_ARTIFICIAL() +/** + * Tests if the object is "promoted" -- that is, whether the object experienced + * one or more GC marks. + * + * @param[in] obj An object to query. + * @retval true The object is "promoted". + * @retval false The object is young. Have not experienced GC at all. + * @note Hello, is anyone actively calling this function? @shyouhei have + * never seen any actual usages outside of the GC implementation + * itself. + */ +static inline bool +RB_OBJ_PROMOTED(VALUE obj) +{ + if (! RB_FL_ABLE(obj)) { + return false; + } + else { + return RB_OBJ_PROMOTED_RAW(obj); + } +} + +/** + * This is the implementation of #RB_OBJ_WB_UNPROTECT(). People don't use it + * directly. + * + * @param[out] x An object that does not participate in WB. + * @param[in] filename C's `__FILE__` of the caller function. + * @param[in] line C's `__LINE__` of the caller function. + * @return x + */ +static inline VALUE +rb_obj_wb_unprotect( + VALUE x, + RBIMPL_ATTR_MAYBE_UNUSED() + const char *filename, + RBIMPL_ATTR_MAYBE_UNUSED() + int line) +{ +#if USE_RGENGC_LOGGING_WB_UNPROTECT + RGENGC_LOGGING_WB_UNPROTECT(RBIMPL_CAST((void *)x), filename, line); +#endif + rb_gc_writebarrier_unprotect(x); + return x; +} + +/** + * @private + * + * This is the implementation of #RB_OBJ_WRITTEN(). People don't use it + * directly. + * + * @param[in] a An old object. + * @param[in] oldv An object previously stored inside of `old`. + * @param[out] b A young object. + * @param[in] filename C's `__FILE__` of the caller function. + * @param[in] line C's `__LINE__` of the caller function. + * @return a + */ +static inline VALUE +rb_obj_written( + VALUE a, + RBIMPL_ATTR_MAYBE_UNUSED() + VALUE oldv, + VALUE b, + RBIMPL_ATTR_MAYBE_UNUSED() + const char *filename, + RBIMPL_ATTR_MAYBE_UNUSED() + int line) +{ +#if USE_RGENGC_LOGGING_WB_UNPROTECT + RGENGC_LOGGING_OBJ_WRITTEN(a, oldv, b, filename, line); +#endif + + if (!RB_SPECIAL_CONST_P(b)) { + rb_gc_writebarrier(a, b); + } + + return a; +} + +/** + * @private + * + * This is the implementation of #RB_OBJ_WRITE(). People don't use it + * directly. + * + * @param[in] a An old object. + * @param[in] slot A pointer inside of `old`. + * @param[out] b A young object. + * @param[in] filename C's `__FILE__` of the caller function. + * @param[in] line C's `__LINE__` of the caller function. + * @return a + */ +static inline VALUE +rb_obj_write( + VALUE a, VALUE *slot, VALUE b, + RBIMPL_ATTR_MAYBE_UNUSED() + const char *filename, + RBIMPL_ATTR_MAYBE_UNUSED() + int line) +{ +#ifdef RGENGC_LOGGING_WRITE + RGENGC_LOGGING_WRITE(a, slot, b, filename, line); +#endif + + *slot = b; + + rb_obj_written(a, RUBY_Qundef /* ignore `oldv' now */, b, filename, line); + return a; +} + +RBIMPL_ATTR_DEPRECATED(("Will be removed soon")) +static inline void rb_gc_force_recycle(VALUE obj){} + #endif /* RBIMPL_GC_H */ diff --git a/include/ruby/internal/globals.h b/include/ruby/internal/globals.h index b478e30b04..60d8e5309a 100644 --- a/include/ruby/internal/globals.h +++ b/include/ruby/internal/globals.h @@ -82,6 +82,7 @@ RUBY_EXTERN VALUE rb_cInteger; /**< `Module` class. */ RUBY_EXTERN VALUE rb_cMatch; /**< `MatchData` class. */ RUBY_EXTERN VALUE rb_cMethod; /**< `Method` class. */ RUBY_EXTERN VALUE rb_cModule; /**< `Module` class. */ +RUBY_EXTERN VALUE rb_cRefinement; /**< `Refinement` class. */ RUBY_EXTERN VALUE rb_cNameErrorMesg; /**< `NameError::Message` class. */ RUBY_EXTERN VALUE rb_cNilClass; /**< `NilClass` class. */ RUBY_EXTERN VALUE rb_cNumeric; /**< `Numeric` class. */ @@ -93,7 +94,7 @@ RUBY_EXTERN VALUE rb_cRegexp; /**< `Regexp` class. */ RUBY_EXTERN VALUE rb_cStat; /**< `File::Stat` class. */ RUBY_EXTERN VALUE rb_cString; /**< `String` class. */ RUBY_EXTERN VALUE rb_cStruct; /**< `Struct` class. */ -RUBY_EXTERN VALUE rb_cSymbol; /**< `Sumbol` class. */ +RUBY_EXTERN VALUE rb_cSymbol; /**< `Symbol` class. */ RUBY_EXTERN VALUE rb_cThread; /**< `Thread` class. */ RUBY_EXTERN VALUE rb_cTime; /**< `Time` class. */ RUBY_EXTERN VALUE rb_cTrueClass; /**< `TrueClass` class. */ diff --git a/include/ruby/internal/has/builtin.h b/include/ruby/internal/has/builtin.h index 957aff8375..243ba2a34c 100644 --- a/include/ruby/internal/has/builtin.h +++ b/include/ruby/internal/has/builtin.h @@ -53,8 +53,10 @@ # define RBIMPL_HAS_BUILTIN___builtin_assume 0 # /* See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52624 for bswap16. */ # define RBIMPL_HAS_BUILTIN___builtin_bswap16 RBIMPL_COMPILER_SINCE(GCC, 4, 8, 0) +#ifndef __OpenBSD__ # define RBIMPL_HAS_BUILTIN___builtin_bswap32 RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0) # define RBIMPL_HAS_BUILTIN___builtin_bswap64 RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0) +#endif # define RBIMPL_HAS_BUILTIN___builtin_clz RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0) # define RBIMPL_HAS_BUILTIN___builtin_clzl RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0) # define RBIMPL_HAS_BUILTIN___builtin_clzll RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0) @@ -76,10 +78,6 @@ # define RBIMPL_HAS_BUILTIN___builtin_unreachable RBIMPL_COMPILER_SINCE(GCC, 4, 5, 0) # /* Note that "0, 0, 0" might be inaccurate. */ -#elif RBIMPL_COMPILER_IS(MSVC) -# /* MSVC has UNREACHABLE, but that is not __builtin_unreachable. */ -# define RBIMPL_HAS_BUILTIN(_) 0 - #else # /* Take config.h definition when available */ # define RBIMPL_HAS_BUILTIN(_) ((RBIMPL_HAS_BUILTIN_ ## _)+0) @@ -109,7 +107,7 @@ # define RBIMPL_HAS_BUILTIN___builtin_rotateright64 0 # define RBIMPL_HAS_BUILTIN___builtin_popcountll HAVE_BUILTIN___BUILTIN_POPCOUNTLL # define RBIMPL_HAS_BUILTIN___builtin_sub_overflow HAVE_BUILTIN___BUILTIN_SUB_OVERFLOW -# if defined(UNREACHABLE) +# if defined(HAVE___BUILTIN_UNREACHABLE) # define RBIMPL_HAS_BUILTIN___builtin_unreachable 1 # else # define RBIMPL_HAS_BUILTIN___builtin_unreachable 0 diff --git a/include/ruby/internal/has/c_attribute.h b/include/ruby/internal/has/c_attribute.h index c5c48867bf..69b0f402cd 100644 --- a/include/ruby/internal/has/c_attribute.h +++ b/include/ruby/internal/has/c_attribute.h @@ -21,11 +21,23 @@ * @brief Defines #RBIMPL_HAS_C_ATTRIBUTE. */ +#include "ruby/internal/has/extension.h" +#include "ruby/internal/has/warning.h" + /** Wraps (or simulates) `__has_c_attribute`. */ #if defined(__cplusplus) # /* Makes no sense. */ # define RBIMPL_HAS_C_ATTRIBUTE(_) 0 +#elif RBIMPL_HAS_EXTENSION(c_attributes) +# /* Hmm. It seems Clang 17 has this macro defined even when -std=c99 mode, +# * _and_ fails to compile complaining that attributes are C2X feature. We +# * need to work around this nonsense. */ +# define RBIMPL_HAS_C_ATTRIBUTE(_) __has_c_attribute(_) + +#elif RBIMPL_HAS_WARNING("-Wc2x-extensions") +# define RBIMPL_HAS_C_ATTRIBUTE(_) 0 + #elif defined(__has_c_attribute) # define RBIMPL_HAS_C_ATTRIBUTE(_) __has_c_attribute(_) diff --git a/include/ruby/internal/intern/array.h b/include/ruby/internal/intern/array.h index 17964bf810..1909fdf17b 100644 --- a/include/ruby/internal/intern/array.h +++ b/include/ruby/internal/intern/array.h @@ -107,14 +107,14 @@ VALUE rb_ary_new_from_args(long n, ...); VALUE rb_ary_new_from_values(long n, const VALUE *elts); /** - * Allocates a "temporary" array. This is a hidden empty array. Handy on - * occasions. + * Allocates a hidden (no class) empty array. * * @param[in] capa Designed capacity of the array. * @return A hidden, empty array. * @see rb_obj_hide() */ -VALUE rb_ary_tmp_new(long capa); +VALUE rb_ary_hidden_new(long capa); +#define rb_ary_tmp_new rb_ary_hidden_new /** * Destroys the given array for no reason. @@ -187,7 +187,7 @@ VALUE rb_ary_shared_with_p(VALUE lhs, VALUE rhs); * : (int i) -> T? * | (int beg, int len) -> ::Array[T]? * | (Range[int] r) -> ::Array[T]? - * | (ArithmeticSequence as) -> ::Array[T]? # This also raises RagneError. + * | (ArithmeticSequence as) -> ::Array[T]? # This also raises RangeError. * end * ``` */ diff --git a/include/ruby/internal/intern/bignum.h b/include/ruby/internal/intern/bignum.h index 43d68018de..c27f77a1fb 100644 --- a/include/ruby/internal/intern/bignum.h +++ b/include/ruby/internal/intern/bignum.h @@ -51,7 +51,7 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() VALUE rb_big_new(size_t len, int sign); /** - * Queries if the passed bignum instance is a "bigzro". What is a bigzero? + * Queries if the passed bignum instance is a "bigzero". What is a bigzero? * Well, bignums are for very big integers, but can also represent tiny ones * like -1, 0, 1. Bigzero are instances of bignums whose values are zero. * Knowing if a bignum is bigzero can be handy on occasions, like for instance @@ -793,7 +793,7 @@ size_t rb_absint_size(VALUE val, int *nlz_bits_ret); * @exception rb_eTypeError `val` doesn't respond to `#to_int`. * @retval (size_t)-1 Overflowed. * @retval otherwise - `((val_numbits * CHAR_BIT + word_numbits - 1) / word_numbits)`, + * `((val_numbits * CHAR_BIT + word_numbits - 1) / word_numbits)`, * where val_numbits is the number of bits of `abs(val)`. * @post If `nlz_bits_ret` is not `NULL` and there is no overflow, * `(return_value * word_numbits - val_numbits)` is stored in diff --git a/include/ruby/internal/intern/class.h b/include/ruby/internal/intern/class.h index af0c0768b8..357af5d176 100644 --- a/include/ruby/internal/intern/class.h +++ b/include/ruby/internal/intern/class.h @@ -88,8 +88,8 @@ VALUE rb_define_class_id(ID id, VALUE super); * @post `outer::id` refers the returned class. * @note If a class named `id` is already defined and its superclass is * `super`, the function just returns the defined class. - * @note The compaction GC does not move classes returned by this - * function. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. */ VALUE rb_define_class_id_under(VALUE outer, ID id, VALUE super); @@ -100,6 +100,14 @@ VALUE rb_define_class_id_under(VALUE outer, ID id, VALUE super); */ VALUE rb_module_new(void); + +/** + * Creates a new, anonymous refinement. + * + * @return An anonymous refinement. + */ +VALUE rb_refinement_new(void); + /** * This is a very badly designed API that creates an anonymous module. * @@ -119,8 +127,8 @@ VALUE rb_define_module_id(ID id); * constant is not a module. * @return The created module. * @post `outer::id` refers the returned module. - * @note The compaction GC does not move classes returned by this - * function. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. */ VALUE rb_define_module_id_under(VALUE outer, ID id); @@ -150,7 +158,7 @@ VALUE rb_mod_included_modules(VALUE mod); VALUE rb_mod_include_p(VALUE child, VALUE parent); /** - * Queries the module's ancestors. This routine gathers classes and modules + * Queries the module's ancestors. This routine gathers classes and modules * that the passed module either inherits, includes, or prepends, then * recursively applies that routine again and again to the collected entries * until the list doesn't grow up. @@ -167,6 +175,44 @@ VALUE rb_mod_include_p(VALUE child, VALUE parent); VALUE rb_mod_ancestors(VALUE mod); /** + * Queries the class's descendants. This routine gathers classes that are + * subclasses of the given class (or subclasses of those subclasses, etc.), + * returning an array of classes that have the given class as an ancestor. + * The returned array does not include the given class or singleton classes. + * + * @param[in] klass A class. + * @return An array of classes where `klass` is an ancestor. + * + * @internal + */ +VALUE rb_class_descendants(VALUE klass); + +/** + * Queries the class's direct descendants. This routine gathers classes that are + * direct subclasses of the given class, + * returning an array of classes that have the given class as a superclass. + * The returned array does not include singleton classes. + * + * @param[in] klass A class. + * @return An array of classes where `klass` is the `superclass`. + * + * @internal + */ +VALUE rb_class_subclasses(VALUE klass); + + +/** + * Returns the attached object for a singleton class. + * If the given class is not a singleton class, raises a TypeError. + * + * @param[in] klass A class. + * @return The object which has the singleton class `klass`. + * + * @internal + */ +VALUE rb_class_attached_object(VALUE klass); + +/** * Generates an array of symbols, which are the list of method names defined in * the passed class. * diff --git a/include/ruby/internal/intern/cont.h b/include/ruby/internal/intern/cont.h index b0d9137dd9..32647f48aa 100644 --- a/include/ruby/internal/intern/cont.h +++ b/include/ruby/internal/intern/cont.h @@ -39,6 +39,28 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() VALUE rb_fiber_new(rb_block_call_func_t func, VALUE callback_obj); /** + * Creates a Fiber instance from a C-backended block with the specified + * storage. + * + * If the given storage is Qundef or Qtrue, this function is equivalent to + * rb_fiber_new() which inherits storage from the current fiber. + * + * Specifying Qtrue is experimental and may be changed in the future. + * + * If the given storage is Qnil, this function will lazy initialize the + * internal storage which starts of empty (without any inheritance). + * + * Otherwise, the given storage is used as the internal storage. + * + * @param[in] func A function, to become the fiber's body. + * @param[in] callback_obj Passed as-is to `func`. + * @param[in] storage The way to set up the storage for the fiber. + * @return An allocated new instance of rb_cFiber, which is ready to be + * "resume"d. + */ +VALUE rb_fiber_new_storage(rb_block_call_func_t func, VALUE callback_obj, VALUE storage); + +/** * Queries the fiber which is calling this function. Any ruby execution * context has its fiber, either explicitly or implicitly. * @@ -139,8 +161,7 @@ VALUE rb_fiber_resume_kw(VALUE fiber, int argc, const VALUE *argv, int kw_splat) * fiber then suspends its execution until next time it is resumed. * * This function can also raise arbitrary exceptions injected from outside of - * the fiber, using `Fiber#raise` Ruby level API. There is no way to do that - * from C though. + * the fiber using rb_fiber_raise(). * * ```ruby * exc = Class.new Exception @@ -159,12 +180,6 @@ VALUE rb_fiber_resume_kw(VALUE fiber, int argc, const VALUE *argv, int kw_splat) * @param[in] argv Passed to rb_fiber_resume(). * @exception rb_eException (See above) * @return (See rb_fiber_resume() for details) - * - * @internal - * - * "There is no way to do that from C" is a lie. But @shyouhei doesn't think - * this very intentionally obfuscated way to raise arbitrary exceptions from C - * is an official C API. Extension libraries must not know this fact. */ VALUE rb_fiber_yield(int argc, const VALUE *argv); @@ -239,7 +254,28 @@ VALUE rb_fiber_transfer(VALUE fiber, int argc, const VALUE *argv); */ VALUE rb_fiber_transfer_kw(VALUE fiber, int argc, const VALUE *argv, int kw_splat); -VALUE rb_fiber_raise(VALUE fiber, int argc, VALUE *argv); +/** + * Identical to rb_fiber_resume() but instead of resuming normal execution of + * the passed fiber, it raises the given exception in it. From inside of the + * fiber this would be seen as if rb_fiber_yield() raised. + * + * This function does return in case the passed fiber gracefully handled the + * passed exception. But if it does not, the raised exception propagates out + * of the passed fiber; this function then does not return. + * + * Parameters are passed to rb_make_exception() to create an exception object. + * See its document for what are allowed here. + * + * It is a failure to call this function against a fiber which is resuming, + * have never run yet, or has already finished running. + * + * @param[out] fiber Where exception is raised. + * @param[in] argc Passed as-is to rb_make_exception(). + * @param[in] argv Passed as-is to rb_make_exception(). + * @exception rb_eFiberError `fiber` is terminated etc. + * @return (See rb_fiber_resume() for details) + */ +VALUE rb_fiber_raise(VALUE fiber, int argc, const VALUE *argv); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/error.h b/include/ruby/internal/intern/error.h index 37d3b8592b..11e147a121 100644 --- a/include/ruby/internal/intern/error.h +++ b/include/ruby/internal/intern/error.h @@ -38,8 +38,6 @@ #define rb_exc_new3 rb_exc_new_str /**< @old{rb_exc_new_str} */ /** @cond INTERNAL_MACRO */ -#define rb_check_trusted rb_check_trusted -#define rb_check_trusted_inline rb_check_trusted #define rb_check_arity rb_check_arity /** @endcond */ @@ -192,7 +190,6 @@ RBIMPL_ATTR_NONNULL(()) */ void rb_error_frozen(const char *what); -RBIMPL_ATTR_NORETURN() /** * Identical to rb_error_frozen(), except it takes arbitrary Ruby object * instead of C's string. @@ -204,12 +201,6 @@ RBIMPL_ATTR_NORETURN() void rb_error_frozen_object(VALUE what); /** - * @deprecated Does nothing. This method is deprecated and will be removed in - * Ruby 3.2. - */ -void rb_error_untrusted(VALUE); - -/** * Queries if the passed object is frozen. * * @param[in] obj Target object to test frozen-ness. @@ -219,12 +210,6 @@ void rb_error_untrusted(VALUE); void rb_check_frozen(VALUE obj); /** - * @deprecated Does nothing. This method is deprecated and will be removed in - * Ruby 3.2. - */ -void rb_check_trusted(VALUE); - -/** * Ensures that the passed object can be `initialize_copy` relationship. When * you implement your own one you would better call this at the right beginning * of your implementation. @@ -249,7 +234,7 @@ RBIMPL_ATTR_NORETURN() * @param[in] max Maximum allowed `argc`. * @exception rb_eArgError Always. */ -MJIT_STATIC void rb_error_arity(int argc, int min, int max); +void rb_error_arity(int argc, int min, int max); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/file.h b/include/ruby/internal/intern/file.h index 8e98ba08f8..79820fdc61 100644 --- a/include/ruby/internal/intern/file.h +++ b/include/ruby/internal/intern/file.h @@ -187,6 +187,27 @@ RBIMPL_ATTR_PURE() */ int rb_is_absolute_path(const char *path); +/** + * Queries the file size of the given file. Because this function calls + * `fstat(2)` internally, it is a failure to pass a closed file to this + * function. + * + * This function flushes the passed file's buffer if any. Can take time. + * + * @param[in] file A file object. + * @exception rb_eFrozenError `file` is frozen. + * @exception rb_eIOError `file` is closed. + * @exception rb_eSystemCallError Permission denied etc. + * @exception rb_eNoMethodError The given non-file object doesn't respond + * to `#size`. + * @return The size of the passed file. + * @note Passing a non-regular file such as a UNIX domain socket to this + * function is not a failure. But the return value is + * unpredictable. POSIX's `<sys/stat.h>` states that "the use of + * this field is unspecified" then. + */ +rb_off_t rb_file_size(VALUE file); + RBIMPL_SYMBOL_EXPORT_END() #endif /* RBIMPL_INTERN_FILE_H */ diff --git a/include/ruby/internal/intern/gc.h b/include/ruby/internal/intern/gc.h deleted file mode 100644 index 1617a7cef6..0000000000 --- a/include/ruby/internal/intern/gc.h +++ /dev/null @@ -1,390 +0,0 @@ -#ifndef RBIMPL_INTERN_GC_H /*-*-C++-*-vi:se ft=cpp:*/ -#define RBIMPL_INTERN_GC_H -/** - * @file - * @author Ruby developers <ruby-core@ruby-lang.org> - * @copyright This file is a part of the programming language Ruby. - * Permission is hereby granted, to either redistribute and/or - * modify this file, provided that the conditions mentioned in the - * file COPYING are met. Consult the file for details. - * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are - * implementation details. Don't take them as canon. They could - * rapidly appear then vanish. The name (path) of this header file - * is also an implementation detail. Do not expect it to persist - * at the place it is now. Developers are free to move it anywhere - * anytime at will. - * @note To ruby-core: remember that this header can be possibly - * recursively included from extension libraries written in C++. - * Do not expect for instance `__VA_ARGS__` is always available. - * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Public APIs related to ::rb_mGC. - */ -#include "ruby/internal/config.h" - -#ifdef STDC_HEADERS -# include <stddef.h> /* size_t */ -#endif - -#if HAVE_SYS_TYPES_H -# include <sys/types.h> /* ssize_t */ -#endif - -#include "ruby/internal/attr/cold.h" -#include "ruby/internal/attr/noreturn.h" -#include "ruby/internal/attr/nonnull.h" -#include "ruby/internal/attr/pure.h" -#include "ruby/internal/dllexport.h" -#include "ruby/internal/value.h" - -RBIMPL_SYMBOL_EXPORT_BEGIN() - -/* gc.c */ - -RBIMPL_ATTR_COLD() -RBIMPL_ATTR_NORETURN() -/** - * Triggers out-of-memory error. If possible it raises ::rb_eNoMemError. But - * because we are running out of memory that is not always doable. This - * function tries hard to show something, but ultimately can die silently. - * - * @exception rb_eNoMemError Raises it if possible. - */ -void rb_memerror(void); - -RBIMPL_ATTR_PURE() -/** - * Queries if the GC is busy. - * - * @retval 0 It isn't. - * @retval 1 It is. - */ -int rb_during_gc(void); - -RBIMPL_ATTR_NONNULL((1)) -/** - * Marks objects between the two pointers. This is one of the GC utility - * functions that you can call when you design your own - * ::rb_data_type_struct::dmark. - * - * @pre Continuous memory region from `start` to `end` shall be fully - * addressable. - * @param[out] start Pointer to an array of objects. - * @param[out] end Pointer that terminates the array of objects. - * @post Objects from `start` to `end`, both inclusive, are marked. - * - * @internal - * - * `end` can be NULL... But that just results in no-op. - */ -void rb_gc_mark_locations(const VALUE *start, const VALUE *end); - -/** - * Identical to rb_mark_hash(), except it marks only values of the table and - * leave their associated keys unmarked. This is one of the GC utility - * functions that you can call when you design your own - * ::rb_data_type_struct::dmark. - * - * @warning Of course it can break GC. Leave it unused if unsure. - * @param[in] tbl A table to mark. - * @post Values stored in `tbl` are marked. - */ -void rb_mark_tbl(struct st_table *tbl); - -/** - * Identical to rb_mark_tbl(), except it marks objects using - * rb_gc_mark_movable(). This is one of the GC utility functions that you can - * call when you design your own ::rb_data_type_struct::dmark. - * - * @warning Of course it can break GC. Leave it unused if unsure. - * @param[in] tbl A table to mark. - * @post Values stored in `tbl` are marked. - */ -void rb_mark_tbl_no_pin(struct st_table *tbl); - -/** - * Identical to rb_mark_hash(), except it marks only keys of the table and - * leave their associated values unmarked. This is one of the GC utility - * functions that you can call when you design your own - * ::rb_data_type_struct::dmark. - * - * @warning Of course it can break GC. Leave it unused if unsure. - * @param[in] tbl A table to mark. - * @post Keys stored in `tbl` are marked. - */ -void rb_mark_set(struct st_table *tbl); - -/** - * Marks keys and values associated inside of the given table. This is one of - * the GC utility functions that you can call when you design your own - * ::rb_data_type_struct::dmark. - * - * @param[in] tbl A table to mark. - * @post Objects stored in `tbl` are marked. - */ -void rb_mark_hash(struct st_table *tbl); - -/** - * Updates references inside of tables. After you marked values using - * rb_mark_tbl_no_pin(), the objects inside of the table could of course be - * moved. This function is to fixup those references. You can call this from - * your ::rb_data_type_struct::dcompact. - * - * @param[out] ptr A table that potentially includes moved references. - * @post Moved references, if any, are corrected. - */ -void rb_gc_update_tbl_refs(st_table *ptr); - -/** - * Identical to rb_gc_mark(), except it allows the passed value be a - * non-object. For instance pointers to different type of memory regions are - * allowed here. Such values are silently ignored. This is one of the GC - * utility functions that you can call when you design your own - * ::rb_data_type_struct::dmark. - * - * @param[out] obj A possible object. - * @post `obj` is marked, if possible. - */ -void rb_gc_mark_maybe(VALUE obj); - -/** - * Marks an object. This is one of the GC utility functions that you can call - * when you design your own ::rb_data_type_struct::dmark. - * - * @param[out] obj Arbitrary Ruby object. - * @post `obj` is marked. - */ -void rb_gc_mark(VALUE obj); - -/** - * Maybe this is the only function provided for C extensions to control the - * pinning of objects, so let us describe it in detail. These days Ruby's GC - * is copying. As far as an object's physical address is guaranteed unused, it - * can move around the object space. Our GC engine rearranges these objects - * after it reclaims unreachable objects from our object space, so that the - * space is compact (improves memory locality). This is called the - * "compaction" phase, and works well most of the time... as far as there are - * no C extensions. C extensions complicate the scenario because Ruby core - * cannot detect any use of the physical address of an object inside of C - * functions. In order to prevent memory corruptions, objects observable from - * C extensions are "pinned"; they stick to where they are born until they die, - * just in case any C extensions touch their raw pointers. This variant of - * scheme is called "Mostly-Copying" garbage collector. Authors of C - * extensions, however, can extremely carefully write them to become - * compaction-aware. To do so avoid referring to a Ruby object from inside of - * your struct in the first place. But if that is not possible, use this - * function from your ::rb_data_type_struct::dmark then. This way objects - * marked using it are considered movable. If you chose this way you have to - * manually fix up locations of such moved pointers using rb_gc_location(). - * - * @see Bartlett, Joel F., "Compacting Garbage Collection with Ambiguous - * Roots", ACM SIGPLAN Lisp Pointers Volume 1 Issue 6 pp. 3-12, - * April-May-June, 1988. https://doi.org/10.1145/1317224.1317225 - * - * @param[in] obj Object that is movable. - * @post Values stored in `tbl` are marked. - */ -void rb_gc_mark_movable(VALUE obj); - -/** - * Finds a new "location" of an object. An object can be moved on compaction. - * This function projects its new abode, or just returns the passed object if - * not moved. This is one of the GC utility functions that you can call when - * you design your own ::rb_data_type_struct::dcompact. - * - * @param[in] obj An object, possibly already moved to somewhere else. - * @return An object, which holds the current contents of former `obj`. - */ -VALUE rb_gc_location(VALUE obj); - -/** - * Asserts that the passed object is no longer needed. Such objects are - * reclaimed sooner or later so this function is not mandatory. But sometimes - * you can know from your application knowledge that an object is surely dead - * at some point. Calling this as a hint can be a polite way. - * - * @param[out] obj Object, dead. - * @pre `obj` have never been passed to this function before. - * @post `obj` could be invalidated. - * @warning It is a failure to pass an object multiple times to this - * function. - */ -void rb_gc_force_recycle(VALUE obj); - -/** - * Triggers a GC process. This was the only GC entry point that we had at the - * beginning. Over time our GC evolved. Now what this function does is just a - * very simplified variation of the entire GC algorithms. A series of - * procedures kicked by this API is called a "full" GC. - * - * - It immediately scans the entire object space to sort the dead. - * - It immediately reclaims any single dead bodies to reuse later. - * - * It is worth noting that the procedures above do not include evaluations of - * finalisers. They run later. - * - * @internal - * - * Finalisers are deferred until we can handle interrupts. See - * `rb_postponed_job_flush` in vm_trace.c. - * - * Of course there are GC that are not "full". For instance this one and the - * GC which runs when we are running out of memory are different. See - * `gc_profile_record_flag` defined in gc.c for the kinds of GC. - * - * In spite of the name this is not what everything that a GC can trigger. As - * of writing it seems this function does not trigger compaction. But this - * might change in future. - */ -void rb_gc(void); - -/** - * Copy&paste an object's finaliser to another. This is one of the GC utility - * functions that you can call when you design your own `initialize_copy`, - * `initialize_dup`, `initialize_clone`. - * - * @param[out] dst Destination object. - * @param[in] src Source object. - * @post `dst` and `src` share the same finaliser. - * - * @internal - * - * But isn't it easier for you to call super, and let `Object#intialize_copy` - * call this function instead? - */ -void rb_gc_copy_finalizer(VALUE dst, VALUE src); - -/** - * (Re-) enables GC. This makes sense only after you called rb_gc_disable(). - * - * @retval RUBY_Qtrue GC was disabled before. - * @retval RUBY_Qfalse GC was enabled before. - * @post GC is enabled. - * - * @internal - * - * This is one of such exceptional functions that does not raise both Ruby - * exceptions and C++ exceptions. - */ -VALUE rb_gc_enable(void); - -/** - * Disables GC. This prevents automatic GC runs when the process is running - * out of memory. Such situations shall result in rb_memerror(). However this - * does not prevent users from manually invoking rb_gc(). That should work. - * People abused this by disabling GC at the beginning of an event loop, - * process events without GC overheads, then manually force reclaiming garbage - * at the bottom of the loop. However because our GC is now much smarter than - * just calling rb_gc(), this technique is proven to be sub-optimal these days. - * It is believed that there is currently practically no needs of this - * function. - * - * @retval RUBY_Qtrue GC was disabled before. - * @retval RUBY_Qfalse GC was enabled before. - * @post GC is disabled. - */ -VALUE rb_gc_disable(void); - -/** - * Identical to rb_gc(), except the return value. - * - * @return Always returns ::RUBY_Qnil. - */ -VALUE rb_gc_start(void); - -/** - * Assigns a finaliser for an object. Each objects can have objects (typically - * blocks) that run immediately after that object dies. They are called - * finalisers of an object. This function associates a finaliser object with a - * target object. - * - * @note Note that finalisers run _after_ the object they finalise dies. You - * cannot for instance call its methods. - * @note If your finaliser references the object it finalises that object - * loses any chance to become a garbage; effectively leaks memory until - * the end of the process. - * - * @param[in] obj Target to finalise. - * @param[in] block Something `call`able. - * @exception rb_eRuntimeError Somehow `obj` cannot have finalisers. - * @exception rb_eFrozenError `obj` is frozen. - * @exception rb_eArgError `block` doesn't respond to `call`. - * @return The passed `block`. - * @post `block` runs after `obj` dies. - */ -VALUE rb_define_finalizer(VALUE obj, VALUE block); - -/** - * Modifies the object so that it has no finalisers at all. This function is - * mainly provided for symmetry. No practical usages can be thought of. - * - * @param[out] obj Object to clear its finalisers. - * @exception rb_eFrozenError `obj` is frozen. - * @return The passed `obj`. - * @post `obj` has no finalisers. - * @note There is no way to undefine a specific part of many finalisers - * that `obj` could have. All you can do is to clear them all. - */ -VALUE rb_undefine_finalizer(VALUE obj); - -/** - * Identical to rb_gc_stat(), with "count" parameter. - * - * @return Lifetime total number of runs of GC. - */ -size_t rb_gc_count(void); - -/** - * Obtains various GC related profiles. The parameter can be either a Symbol - * or a Hash. If a Hash is passed, it is filled with everything currently - * available. If a Symbol is passed just that portion is returned. - * - * Possible variations of keys you can pass here change from version to - * version. You can get the list of known keys by passing an empty hash and - * let it be filled. - * - * @param[in,out] key_or_buf A Symbol, or a Hash. - * @exception rb_eTypeError Neither Symbol nor Hash. - * @exception rb_eFrozenError Frozen hash is passed. - * @return In case a Hash is passed it returns 0. Otherwise the - * profile value associated with the given key is returned. - * @post In case a Hash is passed it is filled with values. - */ -size_t rb_gc_stat(VALUE key_or_buf); - -/** - * Obtains various info regarding the most recent GC run. This includes for - * instance the reason of the GC. The parameter can be either a Symbol or a - * Hash. If a Hash is passed, it is filled with everything currently - * available. If a Symbol is passed just that portion is returned. - * - * Possible variations of keys you can pass here change from version to - * version. You can get the list of known keys by passing an empty hash and - * let it be filled. - * - * @param[in,out] key_or_buf A Symbol, or a Hash. - * @exception rb_eTypeError Neither Symbol nor Hash. - * @exception rb_eFrozenError Frozen hash is passed. - * @return In case a Hash is passed it returns that hash. Otherwise - * the profile value associated with the given key is returned. - * @post In case a Hash is passed it is filled with values. - */ -VALUE rb_gc_latest_gc_info(VALUE key_or_buf); - -/** - * Informs that there are external memory usages. Our GC runs when we are - * running out of memory. The amount of memory, however, can increase/decrease - * behind-the-scene. For instance DLLs can allocate memories using `mmap(2)` - * etc, which are opaque to us. Registering such external allocations using - * this function enables proper detection of how much memories an object used - * as a whole. That will trigger GCs more often than it would otherwise. You - * can also pass negative numbers here, to indicate that such external - * allocations are gone. - * - * @param[in] diff Amount of memory increased(+)/decreased(-). - */ -void rb_gc_adjust_memory_usage(ssize_t diff); - -RBIMPL_SYMBOL_EXPORT_END() - -#endif /* RBIMPL_INTERN_GC_H */ diff --git a/include/ruby/internal/intern/hash.h b/include/ruby/internal/intern/hash.h index 9d2ce8279a..af8dfd5d8f 100644 --- a/include/ruby/internal/intern/hash.h +++ b/include/ruby/internal/intern/hash.h @@ -107,6 +107,17 @@ VALUE rb_hash(VALUE obj); VALUE rb_hash_new(void); /** + * Identical to rb_hash_new(), except it additionally specifies how many keys + * it is expected to contain. This way you can create a hash that is large enough + * for your need. For large hashes it means it won't need to be reallocated and + * rehashed as much, improving performance. + * + * @param[in] capa Designed capacity of the hash. + * @return An empty Hash, whose capacity is `capa`. + */ +VALUE rb_hash_new_capa(long capa); + +/** * Duplicates a hash. * * @param[in] hash An instance of ::rb_cHash. @@ -288,15 +299,6 @@ int rb_path_check(const char *path); /* hash.c */ /** - * @deprecated This function once was a thing in the old days, but makes no - * sense any longer today. Exists here for backwards - * compatibility only. You can safely forget about it. - * - * @return 0 always. - */ -int rb_env_path_tainted(void); - -/** * Destructively removes every environment variables of the running process. * * @return The `ENV` object. diff --git a/include/ruby/internal/intern/load.h b/include/ruby/internal/intern/load.h index 288a16c2ec..9ceb98c2e4 100644 --- a/include/ruby/internal/intern/load.h +++ b/include/ruby/internal/intern/load.h @@ -177,6 +177,43 @@ VALUE rb_f_require(VALUE self, VALUE feature); VALUE rb_require_string(VALUE feature); /** + * Resolves and returns a symbol of a function in the native extension + * specified by the feature and symbol names. Extensions will use this function + * to access the symbols provided by other native extensions. + * + * @param[in] feature Name of a feature, e.g. `"json"`. + * @param[in] symbol Name of a symbol defined by the feature. + * @return The resolved symbol of a function, defined and externed by the + * specified feature. It may be NULL if the feature is not loaded, + * the feature is not extension, or the symbol is not found. + */ +void *rb_ext_resolve_symbol(const char *feature, const char *symbol); + +/** + * This macro is to provide backwards compatibility. It provides a way to + * define function prototypes and resolving function symbols in a safe way. + * + * ```CXX + * // prototypes + * #ifdef HAVE_RB_EXT_RESOLVE_SYMBOL + * VALUE *(*other_extension_func)(VALUE,VALUE); + * #else + * VALUE other_extension_func(VALUE); + * #endif + * + * // in Init_xxx() + * #ifdef HAVE_RB_EXT_RESOLVE_SYMBOL + * other_extension_func = \ + * (VALUE(*)(VALUE,VALUE))rb_ext_resolve_symbol(fname, sym_name); + * if (other_extension_func == NULL) { + * // raise your own error + * } + * #endif + * ``` + */ +#define HAVE_RB_EXT_RESOLVE_SYMBOL 1 + +/** * @name extension configuration * @{ */ diff --git a/include/ruby/internal/intern/object.h b/include/ruby/internal/intern/object.h index 6bb4ccb2fe..9daad7d046 100644 --- a/include/ruby/internal/intern/object.h +++ b/include/ruby/internal/intern/object.h @@ -92,8 +92,8 @@ VALUE rb_class_new_instance_kw(int argc, const VALUE *argv, VALUE klass, int kw_ * * @param[in] lhs Comparison left hand side. * @param[in] rhs Comparison right hand side. - * @retval RUBY_Qtrue They are equal. - * @retval RUBY_Qfalse Otherwise. + * @retval non-zero They are equal. + * @retval 0 Otherwise. * @note This function actually calls `lhs.eql?(rhs)` so you cannot * implement your class' `#eql?` method using it. */ @@ -151,13 +151,12 @@ VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass); * @return An allocated, not yet initialised instance of `klass`. * @note It calls the allocator defined by rb_define_alloc_func(). You * cannot use this function to define an allocator. Use - * rb_newobj_of(), #TypedData_Make_Struct or others, instead. + * TypedData_Make_Struct or others, instead. * @note Usually prefer rb_class_new_instance() to rb_obj_alloc() and * rb_obj_call_init(). * @see rb_class_new_instance() * @see rb_obj_call_init() * @see rb_define_alloc_func() - * @see rb_newobj_of() * @see #TypedData_Make_Struct */ VALUE rb_obj_alloc(VALUE klass); @@ -202,74 +201,6 @@ VALUE rb_obj_dup(VALUE obj); */ VALUE rb_obj_init_copy(VALUE src, VALUE dst); -RBIMPL_ATTR_DEPRECATED_EXT(("taintedness turned out to be a wrong idea.")) -/** - * @deprecated This function once was a thing in the old days, but makes no - * sense any longer today. Exists here for backwards - * compatibility only. You can safely forget about it. - * - * @param[in] obj Object in question. - * @return Verbatim `obj`. - */ -VALUE rb_obj_taint(VALUE obj); - -RBIMPL_ATTR_PURE() -RBIMPL_ATTR_DEPRECATED_EXT(("taintedness turned out to be a wrong idea.")) -/** - * @deprecated This function once was a thing in the old days, but makes no - * sense any longer today. Exists here for backwards - * compatibility only. You can safely forget about it. - * - * @param[in] obj Object in question. - * @return Always returns ::RUBY_Qfalse. - */ -VALUE rb_obj_tainted(VALUE obj); - -RBIMPL_ATTR_DEPRECATED_EXT(("taintedness turned out to be a wrong idea.")) -/** - * @deprecated This function once was a thing in the old days, but makes no - * sense any longer today. Exists here for backwards - * compatibility only. You can safely forget about it. - * - * @param[in] obj Object in question. - * @return Verbatim `obj`. - */ -VALUE rb_obj_untaint(VALUE obj); - -RBIMPL_ATTR_DEPRECATED_EXT(("trustedness turned out to be a wrong idea.")) -/** - * @deprecated This function once was a thing in the old days, but makes no - * sense any longer today. Exists here for backwards - * compatibility only. You can safely forget about it. - * - * @param[in] obj Object in question. - * @return Verbatim `obj`. - */ -VALUE rb_obj_untrust(VALUE obj); - -RBIMPL_ATTR_PURE() -RBIMPL_ATTR_DEPRECATED_EXT(("trustedness turned out to be a wrong idea.")) -/** - * @deprecated This function once was a thing in the old days, but makes no - * sense any longer today. Exists here for backwards - * compatibility only. You can safely forget about it. - * - * @param[in] obj Object in question. - * @return Always returns ::RUBY_Qfalse. - */ -VALUE rb_obj_untrusted(VALUE obj); - -RBIMPL_ATTR_DEPRECATED_EXT(("trustedness turned out to be a wrong idea.")) -/** - * @deprecated This function once was a thing in the old days, but makes no - * sense any longer today. Exists here for backwards - * compatibility only. You can safely forget about it. - * - * @param[in] obj Object in question. - * @return Verbatim `obj`. - */ -VALUE rb_obj_trust(VALUE obj); - /** * Just calls rb_obj_freeze_inline() inside. Does this make any sens to * extension libraries? diff --git a/include/ruby/internal/intern/process.h b/include/ruby/internal/intern/process.h index 7a7b24ed4b..cfa5e13162 100644 --- a/include/ruby/internal/intern/process.h +++ b/include/ruby/internal/intern/process.h @@ -31,6 +31,15 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* process.c */ /** + * Wait for the specified process to terminate, reap it, and return its status. + * + * @param[in] pid The process ID to wait for. + * @param[in] flags The flags to pass to waitpid(2). + * @return VALUE An instance of Process::Status. + */ +VALUE rb_process_status_wait(rb_pid_t pid, int flags); + +/** * Sets the "last status", or the `$?`. * * @param[in] status The termination status, as defined in `waitpid(3posix)`. @@ -247,7 +256,7 @@ rb_pid_t rb_spawn_err(int argc, const VALUE *argv, char *errbuf, size_t buflen); * * @internal * - * This function might or might not exist depending on `./confiugre` result. + * This function might or might not exist depending on `./configure` result. * It must be a portability hell. Better not use. */ VALUE rb_proc_times(VALUE _); diff --git a/include/ruby/internal/intern/re.h b/include/ruby/internal/intern/re.h index 31f5593275..4dd58b469b 100644 --- a/include/ruby/internal/intern/re.h +++ b/include/ruby/internal/intern/re.h @@ -87,11 +87,6 @@ void rb_match_busy(VALUE md); * @retval RUBY_Qfalse There is a `n`-th capture and is empty. * @retval RUBY_Qtrue There is a `n`-th capture that has something. * - * @internal - * - * @shyouhei wonders: why there are both rb_reg_match_defined() and - * rb_match_nth_defined, which are largely the same things, but do not share - * their implementations at all? */ VALUE rb_reg_nth_defined(int n, VALUE md); diff --git a/include/ruby/internal/intern/select.h b/include/ruby/internal/intern/select.h index fabc287cd1..6ba84c6e63 100644 --- a/include/ruby/internal/intern/select.h +++ b/include/ruby/internal/intern/select.h @@ -76,7 +76,7 @@ struct timeval; * * Although any file descriptors are possible here, it makes completely no * sense to pass a descriptor that is not `O_NONBLOCK`. If you want to know - * the reason for this limitatuon in detail, you might find this thread super + * the reason for this limitation in detail, you might find this thread super * interesting: https://lkml.org/lkml/2004/10/6/117 */ int rb_thread_fd_select(int nfds, rb_fdset_t *rfds, rb_fdset_t *wfds, rb_fdset_t *efds, struct timeval *timeout); diff --git a/include/ruby/internal/intern/select/largesize.h b/include/ruby/internal/intern/select/largesize.h index d156f62034..d65f088c06 100644 --- a/include/ruby/internal/intern/select/largesize.h +++ b/include/ruby/internal/intern/select/largesize.h @@ -35,9 +35,6 @@ * `select(2)` documents how to allocate fd_set dynamically. * http://www.openbsd.org/cgi-bin/man.cgi?query=select&manpath=OpenBSD+4.4 * - * - HP-UX documents how to allocate fd_set dynamically. - * http://docs.hp.com/en/B2355-60105/select.2.html - * * - Solaris 8 has `select_large_fdset` * * - Mac OS X 10.7 (Lion) diff --git a/include/ruby/internal/intern/select/posix.h b/include/ruby/internal/intern/select/posix.h index bfde159890..0a9b0b2e51 100644 --- a/include/ruby/internal/intern/select/posix.h +++ b/include/ruby/internal/intern/select/posix.h @@ -95,11 +95,10 @@ RBIMPL_ATTR_NOALIAS() * * @param[out] dst Target fdset. * @param[in] src Source fdset. - * @param[in] n Unused parameter. * @post `dst` is a copy of `src`. */ static inline void -rb_fd_dup(rb_fdset_t *dst, const fd_set *src, int n) +rb_fd_dup(rb_fdset_t *dst, const fd_set *src) { *dst = *src; } @@ -137,7 +136,7 @@ rb_fd_max(const rb_fdset_t *f) } /** @cond INTERNAL_MACRO */ -/* :FIXME: What are these? They don't exist for shibling implementations. */ +/* :FIXME: What are these? They don't exist for sibling implementations. */ #define rb_fd_init_copy(d, s) (*(d) = *(s)) #define rb_fd_term(f) ((void)(f)) /** @endcond */ diff --git a/include/ruby/internal/intern/signal.h b/include/ruby/internal/intern/signal.h index 84f7558404..4773788651 100644 --- a/include/ruby/internal/intern/signal.h +++ b/include/ruby/internal/intern/signal.h @@ -97,7 +97,7 @@ RBIMPL_ATTR_NONNULL(()) * - Case #11: When signo and PID are both negative, the behaviour of this * function depends on how `killpg(3)` works. On Linux, it seems such * attempt is strictly prohibited and `Errno::EINVAL` is raised. But on - * macOS, it seems it tries to to send the signal actually to the process + * macOS, it seems it tries to send the signal actually to the process * group. * * @note Above description is in fact different from how `kill(2)` works. @@ -113,12 +113,6 @@ RBIMPL_ATTR_NONNULL(()) */ VALUE rb_f_kill(int argc, const VALUE *argv); -/* This must be private, @shyouhei guesses. */ -#ifdef POSIX_SIGNAL -#define posix_signal ruby_posix_signal -void (*posix_signal(int, void (*)(int)))(int); -#endif - RBIMPL_ATTR_PURE() /** * Queries the name of the signal. It returns for instance `"KILL"` for diff --git a/include/ruby/internal/intern/string.h b/include/ruby/internal/intern/string.h index 0e2e6d6af7..6827563e8d 100644 --- a/include/ruby/internal/intern/string.h +++ b/include/ruby/internal/intern/string.h @@ -62,13 +62,13 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() */ VALUE rb_str_new(const char *ptr, long len); -RBIMPL_ATTR_NONNULL(()) /** * Identical to rb_str_new(), except it assumes the passed pointer is a pointer * to a C string. * * @param[in] ptr A C string. * @exception rb_eNoMemError Failed to allocate memory. + * @exception rb_eArgError `ptr` is a null pointer. * @return An instance of ::rb_cString, of "binary" encoding, whose * contents are verbatim copy of `ptr`. * @pre `ptr` must not be a null pointer. @@ -122,37 +122,6 @@ VALUE rb_str_new_frozen(VALUE str); */ VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len); -RBIMPL_ATTR_NONNULL(()) -/** - * @deprecated This function once was a thing in the old days, but makes no - * sense any longer today. Exists here for backwards - * compatibility only. You can safely forget about it. - * - * @param[in] ptr A C string. - * @exception rb_eNoMemError Failed to allocate memory. - * @return An instance of ::rb_cString, of "binary" encoding, whose - * contents are verbatim copy of `ptr`. - * @pre `ptr` must not be a null pointer. - */ -VALUE rb_tainted_str_new_cstr(const char *ptr); - -/** - * @deprecated This function once was a thing in the old days, but makes no - * sense any longer today. Exists here for backwards - * compatibility only. You can safely forget about it. - * - * @param[in] ptr A memory region of `len` bytes length. - * @param[in] len Length of `ptr`, in bytes, not including the - * terminating NUL character. - * @exception rb_eNoMemError Failed to allocate `len+1` bytes. - * @exception rb_eArgError `len` is negative. - * @return An instance of ::rb_cString, of `len` bytes length, of - * "binary" encoding, whose contents are verbatim copy of `ptr`. - * @pre At least `len` bytes of continuous memory region shall be - * accessible via `ptr`. - */ -VALUE rb_tainted_str_new(const char *ptr, long len); - /** * Identical to rb_str_new(), except it generates a string of "default * external" encoding. @@ -333,7 +302,6 @@ VALUE rb_str_tmp_new(long len); */ VALUE rb_usascii_str_new(const char *ptr, long len); -RBIMPL_ATTR_NONNULL(()) /** * Identical to rb_str_new_cstr(), except it generates a string of "US ASCII" * encoding. It can also be seen as a routine Identical to @@ -342,6 +310,7 @@ RBIMPL_ATTR_NONNULL(()) * * @param[in] ptr A C string. * @exception rb_eNoMemError Failed to allocate memory. + * @exception rb_eArgError `ptr` is a null pointer. * @return An instance of ::rb_cString, of "US ASCII" encoding, whose * contents are verbatim copy of `ptr`. * @pre `ptr` must not be a null pointer. @@ -361,7 +330,6 @@ VALUE rb_usascii_str_new_cstr(const char *ptr); */ VALUE rb_utf8_str_new(const char *ptr, long len); -RBIMPL_ATTR_NONNULL(()) /** * Identical to rb_str_new_cstr(), except it generates a string of "UTF-8" * encoding. It can also be seen as a routine Identical to @@ -370,6 +338,7 @@ RBIMPL_ATTR_NONNULL(()) * * @param[in] ptr A C string. * @exception rb_eNoMemError Failed to allocate memory. + * @exception rb_eArgError `ptr` is a null pointer. * @return An instance of ::rb_cString, of "UTF-8" encoding, whose contents * are verbatim copy of `ptr`. * @pre `ptr` must not be a null pointer. @@ -443,7 +412,7 @@ VALUE rb_utf8_str_new_static(const char *ptr, long len); /** * Identical to rb_interned_str(), except it takes a Ruby's string instead of - * C's. It can also be seen as a routine identical to to rb_str_new_shared(), + * C's. It can also be seen as a routine identical to rb_str_new_shared(), * except it returns an infamous "f"string. * * @param[in] str An object of ::RString. @@ -485,7 +454,7 @@ VALUE rb_interned_str(const char *ptr, long len); RBIMPL_ATTR_NONNULL(()) /** * Identical to rb_interned_str(), except it assumes the passed pointer is a - * pointer to a C's string. It can also be seen as a routine identical to to + * pointer to a C's string. It can also be seen as a routine identical to * rb_str_to_interned_str(), except it takes a C's string instead of Ruby's. * Or it can also be seen as a routine identical to rb_str_new_cstr(), except * it returns an infamous "f"string. @@ -553,7 +522,6 @@ VALUE rb_str_buf_append(VALUE dst, VALUE src); /** @alias{rb_str_cat} */ VALUE rb_str_buf_cat(VALUE, const char*, long); -RBIMPL_ATTR_NONNULL(()) /** @alias{rb_str_cat_cstr} */ VALUE rb_str_buf_cat2(VALUE, const char*); @@ -634,6 +602,21 @@ VALUE rb_str_dup(VALUE str); VALUE rb_str_resurrect(VALUE str); /** + * Returns whether a string is chilled or not. + * + * This function is temporary and users must check for its presence using + * #ifdef HAVE_RB_STR_CHILLED_P. If HAVE_RB_STR_CHILLED_P is not defined, then + * strings can't be chilled. + * + * @param[in] str A string. + * @retval 1 The string is chilled. + * @retval 0 Otherwise. + */ +bool rb_str_chilled_p(VALUE str); + +#define HAVE_RB_STR_CHILLED_P 1 + +/** * Obtains a "temporary lock" of the string. This advisory locking mechanism * prevents other cooperating threads from tampering the receiver. The same * thing could be done via freeze mechanism, but this one can also be unlocked @@ -874,7 +857,6 @@ VALUE rb_str_resize(VALUE str, long len); */ VALUE rb_str_cat(VALUE dst, const char *src, long srclen); -RBIMPL_ATTR_NONNULL(()) /** * Identical to rb_str_cat(), except it assumes the passed pointer is a pointer * to a C string. @@ -882,6 +864,7 @@ RBIMPL_ATTR_NONNULL(()) * @param[out] dst Destination object. * @param[in] src Contents to append. * @exception rb_eArgError Result string too big. + * @exception rb_eArgError `src` is a null pointer. * @return The passed `dst`. * @pre `dst` must not be any arbitrary objects except ::RString. * @pre `src` must not be a null pointer. @@ -889,7 +872,6 @@ RBIMPL_ATTR_NONNULL(()) */ VALUE rb_str_cat_cstr(VALUE dst, const char *src); -RBIMPL_ATTR_NONNULL(()) /** @alias{rb_str_cat_cstr} */ VALUE rb_str_cat2(VALUE, const char*); @@ -1153,7 +1135,6 @@ VALUE rb_str_inspect(VALUE str); */ VALUE rb_str_dump(VALUE str); -RBIMPL_ATTR_NONNULL(()) /** * Divides the given string based on the given delimiter. This is the * 1-argument 0-block version of `String#split`. @@ -1161,6 +1142,7 @@ RBIMPL_ATTR_NONNULL(()) * @param[in] str Object in question to split. * @param[in] delim Delimiter, in C string. * @exception rb_eTypeError `str` has no implicit conversion to String. + * @exception rb_eArgError `delim` is a null pointer. * @return An array of strings, which are substrings of the passed `str`. * If `delim` is an empty C string (i.e. `""`), `str` is split into * each characters. If `delim` is a C string whose sole content is @@ -1400,22 +1382,6 @@ rbimpl_str_new_cstr(const char *str) return rb_str_new_static(str, len); } -RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) -/** - * @private - * - * This is an implementation detail. Don't bother. - * - * @param[in] str A C string literal. - * @return Corresponding Ruby string. - */ -static inline VALUE -rbimpl_tainted_str_new_cstr(const char *str) -{ - long len = rbimpl_strlen(str); - return rb_tainted_str_new(str, len); -} - RBIMPL_ATTR_NONNULL(()) /** * @private @@ -1602,22 +1568,6 @@ rbimpl_exc_new_cstr(VALUE exc, const char *str) rb_utf8_str_new) ((str), (len))) /** - * @deprecated This macro once was a thing in the old days, but makes no sense - * any longer today. Exists here for backwards compatibility - * only. You can safely forget about it. - * - * @param[in] str A C string. - * @exception rb_eNoMemError Failed to allocate memory. - * @return An instance of ::rb_cString, of "binary" encoding, whose - * contents are verbatim copy of `str`. - * @pre `str` must not be a null pointer. - */ -#define rb_tainted_str_new_cstr(str) \ - ((RBIMPL_CONSTANT_P(str) ? \ - rbimpl_tainted_str_new_cstr : \ - rb_tainted_str_new_cstr) (str)) - -/** * Identical to #rb_str_new_cstr, except it generates a string of "US ASCII" * encoding. It can also be seen as a routine Identical to * #rb_usascii_str_new, except it assumes the passed pointer is a pointer to a @@ -1741,7 +1691,6 @@ rbimpl_exc_new_cstr(VALUE exc, const char *str) #define rb_str_new3 rb_str_new_shared /**< @old{rb_str_new_shared} */ #define rb_str_new4 rb_str_new_frozen /**< @old{rb_str_new_frozen} */ #define rb_str_new5 rb_str_new_with_class /**< @old{rb_str_new_with_class} */ -#define rb_tainted_str_new2 rb_tainted_str_new_cstr /**< @old{rb_tainted_str_new_cstr} */ #define rb_str_buf_new2 rb_str_buf_new_cstr /**< @old{rb_str_buf_new_cstr} */ #define rb_usascii_str_new2 rb_usascii_str_new_cstr /**< @old{rb_usascii_str_new_cstr} */ #define rb_str_buf_cat rb_str_cat /**< @alias{rb_str_cat} */ diff --git a/include/ruby/internal/intern/struct.h b/include/ruby/internal/intern/struct.h index 312cf444e2..16b3fad4e0 100644 --- a/include/ruby/internal/intern/struct.h +++ b/include/ruby/internal/intern/struct.h @@ -46,14 +46,16 @@ VALUE rb_struct_new(VALUE klass, ...); * * @param[in] name Name of the class. * @param[in] ... Arbitrary number of `const char*`, terminated by - * zero. Each of which are the name of fields. + * NULL. Each of which are the name of fields. * @exception rb_eNameError `name` is not a constant name. * @exception rb_eTypeError `name` is already taken. - * @exception rb_eArgError Duplicated field name. + * @exception rb_eArgError Duplicated field name. * @return The defined class. * @post Global toplevel constant `name` is defined. * @note `name` is allowed to be a null pointer. This function creates * an anonymous struct class then. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. * * @internal * @@ -70,14 +72,16 @@ RBIMPL_ATTR_NONNULL((2)) * @param[out] space Namespace that the defining class shall reside. * @param[in] name Name of the class. * @param[in] ... Arbitrary number of `const char*`, terminated by - * zero. Each of which are the name of fields. + * NULL. Each of which are the name of fields. * @exception rb_eNameError `name` is not a constant name. * @exception rb_eTypeError `name` is already taken. - * @exception rb_eArgError Duplicated field name. + * @exception rb_eArgError Duplicated field name. * @return The defined class. * @post `name` is a constant under `space`. * @note In contrast to rb_struct_define(), it doesn't make any sense to * pass a null pointer to this function. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. */ VALUE rb_struct_define_under(VALUE space, const char *name, ...); @@ -164,10 +168,10 @@ VALUE rb_struct_alloc_noinit(VALUE klass); * @param[in] super Superclass of the defining class. * @param[in] func Must be 0 for extension libraries. * @param[in] ... Arbitrary number of `const char*`, terminated by - * zero. Each of which are the name of fields. + * NULL. Each of which are the name of fields. * @exception rb_eNameError `name` is not a constant name. * @exception rb_eTypeError `name` is already taken. - * @exception rb_eArgError Duplicated field name. + * @exception rb_eArgError Duplicated field name. * @return The defined class. * @post Global toplevel constant `name` is defined. * @note `name` is allowed to be a null pointer. This function creates @@ -187,17 +191,35 @@ RBIMPL_ATTR_NONNULL((2)) * @param[in] super Superclass of the defining class. * @param[in] alloc Must be 0 for extension libraries. * @param[in] ... Arbitrary number of `const char*`, terminated by - * zero. Each of which are the name of fields. + * NULL. Each of which are the name of fields. * @exception rb_eNameError `class_name` is not a constant name. * @exception rb_eTypeError `class_name` is already taken. - * @exception rb_eArgError Duplicated field name. + * @exception rb_eArgError Duplicated field name. * @return The defined class. * @post `class_name` is a constant under `outer`. * @note In contrast to rb_struct_define_without_accessor(), it doesn't * make any sense to pass a null name. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. */ VALUE rb_struct_define_without_accessor_under(VALUE outer, const char *class_name, VALUE super, rb_alloc_func_t alloc, ...); +/** + * Defines an anonymous data class. + * + * @endinternal + * + * @param[in] super Superclass of the defining class. Must be a + * descendant of ::rb_cData, or 0 as ::rb_cData. + * @param[in] ... Arbitrary number of `const char*`, terminated by + * NULL. Each of which are the name of fields. + * @exception rb_eArgError Duplicated field name. + * @return The defined class. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. + */ +VALUE rb_data_define(VALUE super, ...); + RBIMPL_SYMBOL_EXPORT_END() #endif /* RBIMPL_INTERN_STRUCT_H */ diff --git a/include/ruby/internal/intern/thread.h b/include/ruby/internal/intern/thread.h index 294e552fe9..716375acd7 100644 --- a/include/ruby/internal/intern/thread.h +++ b/include/ruby/internal/intern/thread.h @@ -46,7 +46,7 @@ void rb_thread_schedule(void); * * @param[in] fd A file descriptor. * @exception rb_eIOError Closed stream. - * @exception rb_eSystemCalleError Situations like EBADF. + * @exception rb_eSystemCallError Situations like EBADF. */ int rb_thread_wait_fd(int fd); @@ -56,7 +56,7 @@ int rb_thread_wait_fd(int fd); * * @param[in] fd A file descriptor. * @exception rb_eIOError Closed stream. - * @exception rb_eSystemCalleError Situations like EBADF. + * @exception rb_eSystemCallError Situations like EBADF. */ int rb_thread_fd_writable(int fd); diff --git a/include/ruby/internal/intern/vm.h b/include/ruby/internal/intern/vm.h index 562d30a6fe..29e0c7f534 100644 --- a/include/ruby/internal/intern/vm.h +++ b/include/ruby/internal/intern/vm.h @@ -229,8 +229,7 @@ void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func); * restrict creation of an instance of a class. For example it rarely makes * sense for a DB adaptor class to allow programmers creating DB row objects * without querying the DB itself. You can kill sporadic creation of such - * objects then, by nullifying the allocator function using this API. Your - * object shall be allocated using #RB_NEWOBJ_OF() directly. + * objects then, by nullifying the allocator function using this API. * * @param[out] klass The class to modify. * @pre `klass` must be an instance of Class. @@ -247,21 +246,17 @@ void rb_undef_alloc_func(VALUE klass); * * @internal * - * Who cares? @shyouhei fins no practical usage of the return value. Maybe we + * Who cares? @shyouhei finds no practical usage of the return value. Maybe we * need KonMari. */ rb_alloc_func_t rb_get_alloc_func(VALUE klass); /** - * Clears the constant cache. Extension libraries should not bother such - * things. Just forget about this API (or even, the presence of constant - * cache). - * - * @internal - * - * Completely no idea why this function is defined in vm_method.c. + * Clears the inline constant caches associated with a particular ID. Extension + * libraries should not bother with such things. Just forget about this API (or + * even, the presence of constant caches). */ -void rb_clear_constant_cache(void); +void rb_clear_constant_cache_for_id(ID id); /** * Resembles `alias`. diff --git a/include/ruby/internal/interpreter.h b/include/ruby/internal/interpreter.h index 662d39c0ec..a10e7ad2d8 100644 --- a/include/ruby/internal/interpreter.h +++ b/include/ruby/internal/interpreter.h @@ -141,7 +141,7 @@ void ruby_show_copyright(void); * * @param[in] addr A pointer somewhere on the stack, near its bottom. */ -void ruby_init_stack(volatile VALUE *addr); +void ruby_init_stack(void *addr); /** * Initializes the VM and builtin libraries. diff --git a/include/ruby/internal/memory.h b/include/ruby/internal/memory.h index aa3464465d..270cc1ac8b 100644 --- a/include/ruby/internal/memory.h +++ b/include/ruby/internal/memory.h @@ -38,7 +38,7 @@ # include <alloca.h> #endif -#if defined(_MSC_VER) && defined(_WIN64) +#if defined(_MSC_VER) && defined(_M_AMD64) # include <intrin.h> # pragma intrinsic(_umul128) #endif @@ -56,13 +56,14 @@ #include "ruby/internal/has/builtin.h" #include "ruby/internal/stdalign.h" #include "ruby/internal/stdbool.h" +#include "ruby/internal/stdckdint.h" #include "ruby/internal/xmalloc.h" #include "ruby/backward/2/limits.h" #include "ruby/backward/2/long_long.h" #include "ruby/backward/2/assume.h" #include "ruby/defines.h" -/** @cond INTENAL_MACRO */ +/** @cond INTERNAL_MACRO */ /* Make alloca work the best possible way. */ #if defined(alloca) @@ -287,12 +288,12 @@ typedef uint128_t DSIZE_T; RBIMPL_CAST((type *)alloca(rbimpl_size_mul_or_raise(sizeof(type), (n)))) /** - * Identical to #RB_ALLOCV_N(), except it implicitly assumes the type of array - * is ::VALUE. + * Identical to #RB_ALLOCV_N(), except that it allocates a number of bytes and + * returns a void* . * * @param v A variable to hold the just-in-case opaque Ruby object. * @param n Size of allocation, in bytes. - * @return An array of `n` bytes of ::VALUE. + * @return A void pointer to `n` bytes storage. * @note `n` may be evaluated twice. */ #define RB_ALLOCV(v, n) \ @@ -363,7 +364,7 @@ typedef uint128_t DSIZE_T; * @return `p1`. * @post First `n` elements of `p2` are copied into `p1`. */ -#define MEMCPY(p1,p2,type,n) memcpy((p1), (p2), rbimpl_size_mul_or_raise(sizeof(type), (n))) +#define MEMCPY(p1,p2,type,n) ruby_nonempty_memcpy((p1), (p2), rbimpl_size_mul_or_raise(sizeof(type), (n))) /** * Handy macro to call memmove. @@ -567,7 +568,10 @@ rbimpl_size_mul_overflow(size_t x, size_t y) { struct rbimpl_size_mul_overflow_tag ret = { false, 0, }; -#if RBIMPL_HAS_BUILTIN(__builtin_mul_overflow) +#if defined(ckd_mul) + ret.left = ckd_mul(&ret.right, x, y); + +#elif RBIMPL_HAS_BUILTIN(__builtin_mul_overflow) ret.left = __builtin_mul_overflow(x, y, &ret.right); #elif defined(DSIZE_T) @@ -644,7 +648,6 @@ rb_alloc_tmp_buffer2(volatile VALUE *store, long count, size_t elsize) return rb_alloc_tmp_buffer_with_count(store, total_size, cnt); } -#if ! defined(__MINGW32__) && ! defined(__DOXYGEN__) RBIMPL_SYMBOL_EXPORT_BEGIN() RBIMPL_ATTR_NOALIAS() RBIMPL_ATTR_NONNULL((1)) @@ -663,8 +666,5 @@ ruby_nonempty_memcpy(void *dest, const void *src, size_t n) } } RBIMPL_SYMBOL_EXPORT_END() -#undef memcpy -#define memcpy ruby_nonempty_memcpy -#endif #endif /* RBIMPL_MEMORY_H */ diff --git a/include/ruby/internal/module.h b/include/ruby/internal/module.h index d678dd2102..97b0b2b8b0 100644 --- a/include/ruby/internal/module.h +++ b/include/ruby/internal/module.h @@ -56,8 +56,8 @@ RBIMPL_ATTR_NONNULL(()) * @post Top-level constant named `name` refers the returned class. * @note If a class named `name` is already defined and its superclass is * `super`, the function just returns the defined class. - * @note The compaction GC does not move classes returned by this - * function. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. * * @internal * @@ -75,8 +75,8 @@ RBIMPL_ATTR_NONNULL(()) * constant is not a module. * @return The created module. * @post Top-level constant named `name` refers the returned module. - * @note The compaction GC does not move classes returned by this - * function. + * @note The GC does not collect nor move modules returned by this + * function. They are immortal. * * @internal * @@ -103,8 +103,8 @@ RBIMPL_ATTR_NONNULL(()) * @post `outer::name` refers the returned class. * @note If a class named `name` is already defined and its superclass * is `super`, the function just returns the defined class. - * @note The compaction GC does not move classes returned by this - * function. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. */ VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super); @@ -118,8 +118,8 @@ RBIMPL_ATTR_NONNULL(()) * the constant is not a class. * @return The created module. * @post `outer::name` refers the returned module. - * @note The compaction GC does not move classes returned by this - * function. + * @note The GC does not collect nor move modules returned by this + * function. They are immortal. */ VALUE rb_define_module_under(VALUE outer, const char *name); diff --git a/include/ruby/internal/newobj.h b/include/ruby/internal/newobj.h index a8a5557a25..6eee2fa5fa 100644 --- a/include/ruby/internal/newobj.h +++ b/include/ruby/internal/newobj.h @@ -29,63 +29,14 @@ #include "ruby/internal/value.h" #include "ruby/assert.h" -/** - * Declares, allocates, then assigns a new object to the given variable. - * - * @param obj Variable name. - * @param type Variable type. - * @exception rb_eNoMemError No space left. - * @return An allocated object, not initialised. - * @note Modern programs tend to use #NEWOBJ_OF instead. - * - * @internal - * - * :FIXME: Should we deprecate it? - */ -#define RB_NEWOBJ(obj,type) type *(obj) = RBIMPL_CAST((type *)rb_newobj()) - -/** - * Identical to #RB_NEWOBJ, except it also accepts the allocating object's - * class and flags. - * - * @param obj Variable name. - * @param type Variable type. - * @param klass Object's class. - * @param flags Object's flags. - * @exception rb_eNoMemError No space left. - * @return An allocated object, filled with the arguments. - */ -#define RB_NEWOBJ_OF(obj,type,klass,flags) type *(obj) = RBIMPL_CAST((type *)rb_newobj_of(klass, flags)) - -#define NEWOBJ RB_NEWOBJ /**< @old{RB_NEWOBJ} */ -#define NEWOBJ_OF RB_NEWOBJ_OF /**< @old{RB_NEWOBJ_OF} */ #define OBJSETUP rb_obj_setup /**< @old{rb_obj_setup} */ #define CLONESETUP rb_clone_setup /**< @old{rb_clone_setup} */ #define DUPSETUP rb_dup_setup /**< @old{rb_dup_setup} */ RBIMPL_SYMBOL_EXPORT_BEGIN() /** - * This is the implementation detail of #RB_NEWOBJ. - * - * @exception rb_eNoMemError No space left. - * @return An allocated object, not initialised. - */ -VALUE rb_newobj(void); - -/** - * This is the implementation detail of #RB_NEWOBJ_OF. - * - * @param klass Object's class. - * @param flags Object's flags. - * @exception rb_eNoMemError No space left. - * @return An allocated object, filled with the arguments. - */ -VALUE rb_newobj_of(VALUE klass, VALUE flags); - -/** * Fills common fields in the object. * - * @note Prefer rb_newobj_of() to this function. * @param[in,out] obj A Ruby object to be set up. * @param[in] klass `obj` will belong to this class. * @param[in] type One of ::ruby_value_type. @@ -172,6 +123,8 @@ RBIMPL_ATTR_DEPRECATED(("This is no longer how Object#clone works.")) static inline void rb_clone_setup(VALUE clone, VALUE obj) { + (void)clone; + (void)obj; return; } @@ -189,6 +142,8 @@ RBIMPL_ATTR_DEPRECATED(("This is no longer how Object#dup works.")) static inline void rb_dup_setup(VALUE dup, VALUE obj) { + (void)dup; + (void)obj; return; } diff --git a/include/ruby/internal/rgengc.h b/include/ruby/internal/rgengc.h deleted file mode 100644 index 7ea04442f6..0000000000 --- a/include/ruby/internal/rgengc.h +++ /dev/null @@ -1,443 +0,0 @@ -#ifndef RBIMPL_RGENGC_H /*-*-C++-*-vi:se ft=cpp:*/ -#define RBIMPL_RGENGC_H -/** - * @file - * @author Ruby developers <ruby-core@ruby-lang.org> - * @copyright This file is a part of the programming language Ruby. - * Permission is hereby granted, to either redistribute and/or - * modify this file, provided that the conditions mentioned in the - * file COPYING are met. Consult the file for details. - * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are - * implementation details. Don't take them as canon. They could - * rapidly appear then vanish. The name (path) of this header file - * is also an implementation detail. Do not expect it to persist - * at the place it is now. Developers are free to move it anywhere - * anytime at will. - * @note To ruby-core: remember that this header can be possibly - * recursively included from extension libraries written in C++. - * Do not expect for instance `__VA_ARGS__` is always available. - * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief RGENGC write-barrier APIs. - * @see Sasada, K., "Gradual write-barrier insertion into a Ruby - * interpreter", in proceedings of the 2019 ACM SIGPLAN - * International Symposium on Memory Management (ISMM 2019), pp - * 115-121, 2019. https://doi.org/10.1145/3315573.3329986 - */ -#include "ruby/internal/attr/artificial.h" -#include "ruby/internal/attr/maybe_unused.h" -#include "ruby/internal/attr/pure.h" -#include "ruby/internal/dllexport.h" -#include "ruby/internal/special_consts.h" -#include "ruby/internal/stdbool.h" -#include "ruby/internal/value.h" -#include "ruby/assert.h" - -/** - * @private - * - * @deprecated This macro once was a thing in the old days, but makes no sense - * any longer today. Exists here for backwards compatibility - * only. You can safely forget about it. - */ -#undef USE_RGENGC -#define USE_RGENGC 1 - -/** - * @private - * - * This is a compile-time flag to enable/disable incremental GC feature. It - * has to be set at the time ruby itself compiles. Makes no sense for 3rd - * parties. It is safe for them to set this though; that just doesn't change - * anything. - */ -#ifndef USE_RINCGC -# define USE_RINCGC 1 -#endif - -/** - * @deprecated This macro seems broken. Setting this to anything other than - * zero just doesn't compile. We need to KonMari. - */ -#ifndef USE_RGENGC_LOGGING_WB_UNPROTECT -# define USE_RGENGC_LOGGING_WB_UNPROTECT 0 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RArray. It has to be set at the time ruby itself compiles. Makes - * no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_ARRAY -# define RGENGC_WB_PROTECTED_ARRAY 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RHash. It has to be set at the time ruby itself compiles. Makes - * no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_HASH -# define RGENGC_WB_PROTECTED_HASH 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RStruct. It has to be set at the time ruby itself compiles. Makes - * no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_STRUCT -# define RGENGC_WB_PROTECTED_STRUCT 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RString. It has to be set at the time ruby itself compiles. Makes - * no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_STRING -# define RGENGC_WB_PROTECTED_STRING 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RObject. It has to be set at the time ruby itself compiles. Makes - * no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_OBJECT -# define RGENGC_WB_PROTECTED_OBJECT 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RRegexp. It has to be set at the time ruby itself compiles. Makes - * no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_REGEXP -# define RGENGC_WB_PROTECTED_REGEXP 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RClass. It has to be set at the time ruby itself compiles. Makes - * no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_CLASS -# define RGENGC_WB_PROTECTED_CLASS 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RFloat. It has to be set at the time ruby itself compiles. Makes - * no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_FLOAT -# define RGENGC_WB_PROTECTED_FLOAT 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RComplex. It has to be set at the time ruby itself compiles. - * Makes no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_COMPLEX -# define RGENGC_WB_PROTECTED_COMPLEX 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RRational. It has to be set at the time ruby itself compiles. - * Makes no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_RATIONAL -# define RGENGC_WB_PROTECTED_RATIONAL 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RBignum. It has to be set at the time ruby itself compiles. Makes - * no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_BIGNUM -# define RGENGC_WB_PROTECTED_BIGNUM 1 -#endif - -/** - * @private - * - * @deprecated This macro once was a thing in the old days, but makes no sense - * any longer today. Exists here for backwards compatibility - * only. You can safely forget about it. - * - * @internal - * - * @shyouhei doesn't think anybody uses this right now. - */ -#ifndef RGENGC_WB_PROTECTED_NODE_CREF -# define RGENGC_WB_PROTECTED_NODE_CREF 1 -#endif - -/** - * @defgroup rgengc Write barrier (WB) interfaces: - * - * @note The following core interfaces can be changed in the future. Please - * catch up if you want to insert WB into C-extensions correctly. - * - * @{ - */ - -/** - * Declaration of a "back" pointer. This is a write barrier for new reference - * from "old" generation to "young" generation. It writes `young` into - * `*slot`, which is a pointer inside of `old`. - * - * @param[in] old An old object. - * @param[in] slot A pointer inside of `old`. - * @param[out] young A young object. - */ -#define RB_OBJ_WRITE(old, slot, young) \ - RBIMPL_CAST(rb_obj_write((VALUE)(old), (VALUE *)(slot), (VALUE)(young), __FILE__, __LINE__)) - -/** - * Identical to #RB_OBJ_WRITE(), except it doesn't write any values, but only a - * WB declaration. `oldv` is replaced value with `b` (not used in current - * Ruby). - * - * @param[in] old An old object. - * @param[in] oldv An object previously stored inside of `old`. - * @param[out] young A young object. - */ -#define RB_OBJ_WRITTEN(old, oldv, young) \ - RBIMPL_CAST(rb_obj_written((VALUE)(old), (VALUE)(oldv), (VALUE)(young), __FILE__, __LINE__)) -/** @} */ - -#define OBJ_PROMOTED_RAW RB_OBJ_PROMOTED_RAW /**< @old{RB_OBJ_PROMOTED_RAW} */ -#define OBJ_PROMOTED RB_OBJ_PROMOTED /**< @old{RB_OBJ_PROMOTED} */ -#define OBJ_WB_UNPROTECT RB_OBJ_WB_UNPROTECT /**< @old{RB_OBJ_WB_UNPROTECT} */ - -/** - * Asserts that the passed object is not fenced by write barriers. Objects of - * such property do not contribute to generational GCs. They are scanned - * always. - * - * @param[out] x An object that would not be protected by the barrier. - */ -#define RB_OBJ_WB_UNPROTECT(x) rb_obj_wb_unprotect(x, __FILE__, __LINE__) - -/** - * Identical to #RB_OBJ_WB_UNPROTECT(), except it can also assert that the - * given object is of given type. - * - * @param[in] type One of `ARRAY`, `STRING`, etc. - * @param[out] obj An object of `type` that would not be protected. - * - * @internal - * - * @shyouhei doesn't understand why this has to be visible from extensions. - */ -#define RB_OBJ_WB_UNPROTECT_FOR(type, obj) \ - (RGENGC_WB_PROTECTED_##type ? OBJ_WB_UNPROTECT(obj) : obj) - -/** - * @private - * - * This is an implementation detail of rb_obj_wb_unprotect(). People don't use - * it directly. - */ -#define RGENGC_LOGGING_WB_UNPROTECT rb_gc_unprotect_logging - -/** @cond INTERNAL_MACRO */ -#define RB_OBJ_PROMOTED_RAW RB_OBJ_PROMOTED_RAW -#define RB_OBJ_PROMOTED RB_OBJ_PROMOTED -/** @endcond */ - -RBIMPL_SYMBOL_EXPORT_BEGIN() -/** - * This is the implementation of #RB_OBJ_WRITE(). People don't use it - * directly. - * - * @param[in] old An object that points to `young`. - * @param[out] young An object that is referenced from `old`. - */ -void rb_gc_writebarrier(VALUE old, VALUE young); - -/** - * This is the implementation of #RB_OBJ_WB_UNPROTECT(). People don't use it - * directly. - * - * @param[out] obj An object that does not participate in WB. - */ -void rb_gc_writebarrier_unprotect(VALUE obj); - -#if USE_RGENGC_LOGGING_WB_UNPROTECT -/** - * @private - * - * This is the implementation of #RGENGC_LOGGING_WB_UNPROTECT(). People - * don't use it directly. - * - * @param[in] objptr Don't know why this is a pointer to void but in - * reality this is a pointer to an object that is about - * to be un-protected. - * @param[in] filename Pass C's `__FILE__` here. - * @param[in] line Pass C's `__LINE__` here. - */ -void rb_gc_unprotect_logging(void *objptr, const char *filename, int line); -#endif - -RBIMPL_SYMBOL_EXPORT_END() - -RBIMPL_ATTR_PURE_UNLESS_DEBUG() -RBIMPL_ATTR_ARTIFICIAL() -/** - * This is the implementation of #RB_OBJ_PROMOTED(). People don't use it - * directly. - * - * @param[in] obj An object to query. - * @retval true The object is "promoted". - * @retval false The object is young. Have not experienced GC at all. - */ -static inline bool -RB_OBJ_PROMOTED_RAW(VALUE obj) -{ - RBIMPL_ASSERT_OR_ASSUME(RB_FL_ABLE(obj)); - return RB_FL_ANY_RAW(obj, RUBY_FL_PROMOTED); -} - -RBIMPL_ATTR_PURE_UNLESS_DEBUG() -RBIMPL_ATTR_ARTIFICIAL() -/** - * Tests if the object is "promoted" -- that is, whether the object experienced - * one or more GC marks. - * - * @param[in] obj An object to query. - * @retval true The object is "promoted". - * @retval false The object is young. Have not experienced GC at all. - * @note Hello, is anyone actively calling this function? @shyouhei have - * never seen any actual usages outside of the GC implementation - * itself. - */ -static inline bool -RB_OBJ_PROMOTED(VALUE obj) -{ - if (! RB_FL_ABLE(obj)) { - return false; - } - else { - return RB_OBJ_PROMOTED_RAW(obj); - } -} - -/** - * This is the implementation of #RB_OBJ_WB_UNPROTECT(). People don't use it - * directly. - * - * @param[out] x An object that does not participate in WB. - * @param[in] filename C's `__FILE__` of the caller function. - * @param[in] line C's `__LINE__` of the caller function. - * @return x - */ -static inline VALUE -rb_obj_wb_unprotect( - VALUE x, - RBIMPL_ATTR_MAYBE_UNUSED() - const char *filename, - RBIMPL_ATTR_MAYBE_UNUSED() - int line) -{ -#if USE_RGENGC_LOGGING_WB_UNPROTECT - RGENGC_LOGGING_WB_UNPROTECT(RBIMPL_CAST((void *)x), filename, line); -#endif - rb_gc_writebarrier_unprotect(x); - return x; -} - -/** - * @private - * - * This is the implementation of #RB_OBJ_WRITTEN(). People don't use it - * directly. - * - * @param[in] a An old object. - * @param[in] oldv An object previously stored inside of `old`. - * @param[out] b A young object. - * @param[in] filename C's `__FILE__` of the caller function. - * @param[in] line C's `__LINE__` of the caller function. - * @return a - */ -static inline VALUE -rb_obj_written( - VALUE a, - RBIMPL_ATTR_MAYBE_UNUSED() - VALUE oldv, - VALUE b, - RBIMPL_ATTR_MAYBE_UNUSED() - const char *filename, - RBIMPL_ATTR_MAYBE_UNUSED() - int line) -{ -#if USE_RGENGC_LOGGING_WB_UNPROTECT - RGENGC_LOGGING_OBJ_WRITTEN(a, oldv, b, filename, line); -#endif - - if (!RB_SPECIAL_CONST_P(b)) { - rb_gc_writebarrier(a, b); - } - - return a; -} - -/** - * @private - * - * This is the implementation of #RB_OBJ_WRITE(). People don't use it - * directly. - * - * @param[in] a An old object. - * @param[in] slot A pointer inside of `old`. - * @param[out] b A young object. - * @param[in] filename C's `__FILE__` of the caller function. - * @param[in] line C's `__LINE__` of the caller function. - * @return a - */ -static inline VALUE -rb_obj_write( - VALUE a, VALUE *slot, VALUE b, - RBIMPL_ATTR_MAYBE_UNUSED() - const char *filename, - RBIMPL_ATTR_MAYBE_UNUSED() - int line) -{ -#ifdef RGENGC_LOGGING_WRITE - RGENGC_LOGGING_WRITE(a, slot, b, filename, line); -#endif - - *slot = b; - - rb_obj_written(a, RUBY_Qundef /* ignore `oldv' now */, b, filename, line); - return a; -} - -#endif /* RBIMPL_RGENGC_H */ diff --git a/include/ruby/internal/scan_args.h b/include/ruby/internal/scan_args.h index cf5b18f77d..1ed2bf6368 100644 --- a/include/ruby/internal/scan_args.h +++ b/include/ruby/internal/scan_args.h @@ -100,7 +100,7 @@ RBIMPL_ATTR_NONNULL((2, 3)) * param-arg-spec := pre-arg-spec [post-arg-spec] / post-arg-spec / * pre-opt-post-arg-spec * pre-arg-spec := num-of-leading-mandatory-args - [num-of-optional-args] + * [num-of-optional-args] * post-arg-spec := sym-for-variable-length-args * [num-of-trailing-mandatory-args] * pre-opt-post-arg-spec := num-of-leading-mandatory-args num-of-optional-args diff --git a/include/ruby/internal/special_consts.h b/include/ruby/internal/special_consts.h index 38934e4da3..dc0a6b41d6 100644 --- a/include/ruby/internal/special_consts.h +++ b/include/ruby/internal/special_consts.h @@ -76,6 +76,8 @@ #define RB_SPECIAL_CONST_P RB_SPECIAL_CONST_P #define RB_STATIC_SYM_P RB_STATIC_SYM_P #define RB_TEST RB_TEST +#define RB_UNDEF_P RB_UNDEF_P +#define RB_NIL_OR_UNDEF_P RB_NIL_OR_UNDEF_P /** @endcond */ /** special constants - i.e. non-zero and non-fixnum constants */ @@ -94,9 +96,9 @@ ruby_special_consts { RUBY_SYMBOL_FLAG, /**< Flag to denote a static symbol. */ #elif USE_FLONUM RUBY_Qfalse = 0x00, /* ...0000 0000 */ + RUBY_Qnil = 0x04, /* ...0000 0100 */ RUBY_Qtrue = 0x14, /* ...0001 0100 */ - RUBY_Qnil = 0x08, /* ...0000 1000 */ - RUBY_Qundef = 0x34, /* ...0011 0100 */ + RUBY_Qundef = 0x24, /* ...0010 0100 */ RUBY_IMMEDIATE_MASK = 0x07, /* ...0000 0111 */ RUBY_FIXNUM_FLAG = 0x01, /* ...xxxx xxx1 */ RUBY_FLONUM_MASK = 0x03, /* ...0000 0011 */ @@ -104,14 +106,14 @@ ruby_special_consts { RUBY_SYMBOL_FLAG = 0x0c, /* ...xxxx 1100 */ #else RUBY_Qfalse = 0x00, /* ...0000 0000 */ - RUBY_Qtrue = 0x02, /* ...0000 0010 */ - RUBY_Qnil = 0x04, /* ...0000 0100 */ - RUBY_Qundef = 0x06, /* ...0000 0110 */ + RUBY_Qnil = 0x02, /* ...0000 0010 */ + RUBY_Qtrue = 0x06, /* ...0000 0110 */ + RUBY_Qundef = 0x0a, /* ...0000 1010 */ RUBY_IMMEDIATE_MASK = 0x03, /* ...0000 0011 */ RUBY_FIXNUM_FLAG = 0x01, /* ...xxxx xxx1 */ RUBY_FLONUM_MASK = 0x00, /* any values ANDed with FLONUM_MASK cannot be FLONUM_FLAG */ RUBY_FLONUM_FLAG = 0x02, /* ...0000 0010 */ - RUBY_SYMBOL_FLAG = 0x0e, /* ...0000 1110 */ + RUBY_SYMBOL_FLAG = 0x0e, /* ...xxxx 1110 */ #endif RUBY_SPECIAL_SHIFT = 8 /**< Least significant 8 bits are reserved. */ @@ -136,12 +138,21 @@ static inline bool RB_TEST(VALUE obj) { /* + * if USE_FLONUM * Qfalse: ....0000 0000 - * Qnil: ....0000 1000 - * ~Qnil: ....1111 0111 + * Qnil: ....0000 0100 + * ~Qnil: ....1111 1011 * v ....xxxx xxxx * ---------------------------- - * RTEST(v) ....xxxx 0xxx + * RTEST(v) ....xxxx x0xx + * + * if ! USE_FLONUM + * Qfalse: ....0000 0000 + * Qnil: ....0000 0010 + * ~Qnil: ....1111 1101 + * v ....xxxx xxxx + * ---------------------------- + * RTEST(v) ....xxxx xx0x * * RTEST(v) can be 0 if and only if (v == Qfalse || v == Qnil). */ @@ -168,6 +179,62 @@ RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() /** + * Checks if the given object is undef. + * + * @param[in] obj An arbitrary ruby object. + * @retval true `obj` is ::RUBY_Qundef. + * @retval false Anything else. + */ +static inline bool +RB_UNDEF_P(VALUE obj) +{ + return obj == RUBY_Qundef; +} + +RBIMPL_ATTR_CONST() +RBIMPL_ATTR_CONSTEXPR(CXX14) +RBIMPL_ATTR_ARTIFICIAL() +/** + * Checks if the given object is nil or undef. Can be used to see if + * a keyword argument is not given or given `nil`. + * + * @param[in] obj An arbitrary ruby object. + * @retval true `obj` is ::RUBY_Qnil or ::RUBY_Qundef. + * @retval false Anything else. + */ +static inline bool +RB_NIL_OR_UNDEF_P(VALUE obj) +{ + /* + * if USE_FLONUM + * Qundef: ....0010 0100 + * Qnil: ....0000 0100 + * mask: ....1101 1111 + * common_bits: ....0000 0100 + * --------------------------------- + * Qnil & mask ....0000 0100 + * Qundef & mask ....0000 0100 + * + * if ! USE_FLONUM + * Qundef: ....0000 1010 + * Qnil: ....0000 0010 + * mask: ....1111 0111 + * common_bits: ....0000 0010 + * ---------------------------- + * Qnil & mask ....0000 0010 + * Qundef & mask ....0000 0010 + * + * NIL_OR_UNDEF_P(v) can be true only when v is Qundef or Qnil. + */ + const VALUE mask = ~(RUBY_Qundef ^ RUBY_Qnil); + const VALUE common_bits = RUBY_Qundef & RUBY_Qnil; + return (obj & mask) == common_bits; +} + +RBIMPL_ATTR_CONST() +RBIMPL_ATTR_CONSTEXPR(CXX11) +RBIMPL_ATTR_ARTIFICIAL() +/** * Checks if the given object is a so-called Fixnum. * * @param[in] obj An arbitrary ruby object. @@ -259,7 +326,7 @@ RBIMPL_ATTR_ARTIFICIAL() static inline bool RB_SPECIAL_CONST_P(VALUE obj) { - return RB_IMMEDIATE_P(obj) || ! RB_TEST(obj); + return RB_IMMEDIATE_P(obj) || obj == RUBY_Qfalse; } RBIMPL_ATTR_CONST() diff --git a/include/ruby/internal/static_assert.h b/include/ruby/internal/static_assert.h index 594c2b2917..b9ff6646e7 100644 --- a/include/ruby/internal/static_assert.h +++ b/include/ruby/internal/static_assert.h @@ -71,7 +71,7 @@ #else # define RBIMPL_STATIC_ASSERT(name, expr) \ - typedef int static_assert_ ## name ## _check[1 - 2 * !(expr)] + MAYBE_UNUSED(typedef int static_assert_ ## name ## _check[1 - 2 * !(expr)]) #endif #endif /* RBIMPL_STATIC_ASSERT_H */ diff --git a/include/ruby/internal/stdbool.h b/include/ruby/internal/stdbool.h index b15321cb00..1ca61136ba 100644 --- a/include/ruby/internal/stdbool.h +++ b/include/ruby/internal/stdbool.h @@ -39,7 +39,7 @@ # /* Take stdbool.h definition. */ # include <stdbool.h> -#else +#elif !defined(HAVE__BOOL) typedef unsigned char _Bool; # /* See also http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2229.htm */ # define bool _Bool diff --git a/include/ruby/internal/stdckdint.h b/include/ruby/internal/stdckdint.h new file mode 100644 index 0000000000..d02530136e --- /dev/null +++ b/include/ruby/internal/stdckdint.h @@ -0,0 +1,60 @@ +#ifndef RBIMPL_STDCKDINT_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RBIMPL_STDCKDINT_H +/** + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief C23 shim for <stdckdint.h> + */ +#include "ruby/internal/config.h" +#include "ruby/internal/has/builtin.h" +#include "ruby/internal/stdbool.h" + +#ifdef __has_include +# if __has_include(<stdckdint.h>) +# /* Conforming C23 situation; e.g. recent clang */ +# define RBIMPL_HAVE_STDCKDINT_H +# endif +#endif + +#ifdef HAVE_STDCKDINT_H +# /* Some OSes (most notably FreeBSD) have this file. */ +# define RBIMPL_HAVE_STDCKDINT_H +#endif + +#ifdef RBIMPL_HAVE_STDCKDINT_H +# /* Take that. */ +# include <stdckdint.h> + +#elif RBIMPL_HAS_BUILTIN(__builtin_add_overflow) +# define ckd_add(x, y, z) ((bool)__builtin_add_overflow((y), (z), (x))) +# define ckd_sub(x, y, z) ((bool)__builtin_sub_overflow((y), (z), (x))) +# define ckd_mul(x, y, z) ((bool)__builtin_mul_overflow((y), (z), (x))) +# define __STDC_VERSION_STDCKDINT_H__ 202311L + +#/* elif defined(__cplusplus) */ +#/* :TODO: if we assume C++11 we can use `<type_traits>` to implement them. */ + +#else +# /* intentionally leave them undefined */ +# /* to make `#ifdef ckd_add` etc. work as intended. */ +# undef ckd_add +# undef ckd_sub +# undef ckd_mul +# undef __STDC_VERSION_STDCKDINT_H__ +#endif + +#endif /* RBIMPL_STDCKDINT_H */ diff --git a/include/ruby/internal/variable.h b/include/ruby/internal/variable.h index 1f84b92db0..c017ffe3f7 100644 --- a/include/ruby/internal/variable.h +++ b/include/ruby/internal/variable.h @@ -147,7 +147,7 @@ RBIMPL_ATTR_NONNULL(()) * init_Foo(void) * { * foo = rb_eval_string("..."); - * rb_define_global_variable("$foo", &foo); + * rb_define_variable("$foo", &foo); * } * ``` * |