diff options
Diffstat (limited to 'include')
192 files changed, 26418 insertions, 2846 deletions
diff --git a/include/ruby.h b/include/ruby.h index 2775c11a40..789804b7b4 100644 --- a/include/ruby.h +++ b/include/ruby.h @@ -1,7 +1,6 @@ #ifndef RUBY_H /*-*-C++-*-vi:se ft=cpp:*/ #define RUBY_H 1 /** - * @file * @author $Author$ * @date Sun 10 12:06:15 Jun JST 2007 * @copyright 2007-2008 Yukihiro Matsumoto diff --git a/include/ruby/assert.h b/include/ruby/assert.h index b0bbdd2c14..e9edd9e640 100644 --- a/include/ruby/assert.h +++ b/include/ruby/assert.h @@ -18,10 +18,11 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. */ #include "ruby/internal/assume.h" #include "ruby/internal/attr/cold.h" +#include "ruby/internal/attr/format.h" #include "ruby/internal/attr/noreturn.h" #include "ruby/internal/cast.h" #include "ruby/internal/dllexport.h" @@ -103,7 +104,7 @@ # /* keep NDEBUG undefined */ #elif (RBIMPL_NDEBUG == 0) && (RBIMPL_RUBY_DEBUG == 0) -# /* The (*1) situation in avobe diagram. */ +# /* The (*1) situation in above diagram. */ # define RUBY_DEBUG 0 # define RUBY_NDEBUG 1 # define NDEBUG @@ -132,6 +133,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() RBIMPL_ATTR_NORETURN() RBIMPL_ATTR_COLD() void rb_assert_failure(const char *file, int line, const char *name, const char *expr); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_COLD() +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 5, 6) +void rb_assert_failure_detail(const char *file, int line, const char *name, const char *expr, const char *fmt, ...); RBIMPL_SYMBOL_EXPORT_END() #ifdef RUBY_FUNCTION_NAME_STRING @@ -147,8 +153,28 @@ RBIMPL_SYMBOL_EXPORT_END() * * @param mesg The message to display. */ -#define RUBY_ASSERT_FAIL(mesg) \ +#if defined(HAVE___VA_OPT__) +# if RBIMPL_HAS_WARNING("-Wgnu-zero-variadic-macro-arguments") +/* __VA_OPT__ is to be used for the zero variadic macro arguments + * cases. */ +RBIMPL_WARNING_IGNORED(-Wgnu-zero-variadic-macro-arguments) +# endif +# define RBIMPL_VA_OPT_ARGS(...) __VA_OPT__(,) __VA_ARGS__ + +# define RUBY_ASSERT_FAIL(mesg, ...) \ + rb_assert_failure##__VA_OPT__(_detail)( \ + __FILE__, __LINE__, RBIMPL_ASSERT_FUNC, mesg RBIMPL_VA_OPT_ARGS(__VA_ARGS__)) +#elif !defined(__cplusplus) +# define RBIMPL_VA_OPT_ARGS(...) + +# define RUBY_ASSERT_FAIL(mesg, ...) \ + rb_assert_failure(__FILE__, __LINE__, RBIMPL_ASSERT_FUNC, mesg) +#else +# undef RBIMPL_VA_OPT_ARGS + +# define RUBY_ASSERT_FAIL(mesg) \ rb_assert_failure(__FILE__, __LINE__, RBIMPL_ASSERT_FUNC, mesg) +#endif /** * Asserts that the expression is truthy. If not aborts with the message. @@ -156,15 +182,25 @@ RBIMPL_SYMBOL_EXPORT_END() * @param expr What supposedly evaluates to true. * @param mesg The message to display on failure. */ -#define RUBY_ASSERT_MESG(expr, mesg) \ +#if defined(RBIMPL_VA_OPT_ARGS) +# define RUBY_ASSERT_MESG(expr, ...) \ + (RB_LIKELY(expr) ? RBIMPL_ASSERT_NOTHING : RUBY_ASSERT_FAIL(__VA_ARGS__)) +#else +# define RUBY_ASSERT_MESG(expr, mesg) \ (RB_LIKELY(expr) ? RBIMPL_ASSERT_NOTHING : RUBY_ASSERT_FAIL(mesg)) +#endif /** * A variant of #RUBY_ASSERT that does not interface with #RUBY_DEBUG. * * @copydetails #RUBY_ASSERT */ -#define RUBY_ASSERT_ALWAYS(expr) RUBY_ASSERT_MESG((expr), #expr) +#if defined(RBIMPL_VA_OPT_ARGS) +# define RUBY_ASSERT_ALWAYS(expr, ...) \ + RUBY_ASSERT_MESG(expr, #expr RBIMPL_VA_OPT_ARGS(__VA_ARGS__)) +#else +# define RUBY_ASSERT_ALWAYS(expr) RUBY_ASSERT_MESG((expr), #expr) +#endif /** * Asserts that the given expression is truthy if and only if #RUBY_DEBUG is truthy. @@ -172,9 +208,18 @@ RBIMPL_SYMBOL_EXPORT_END() * @param expr What supposedly evaluates to true. */ #if RUBY_DEBUG -# define RUBY_ASSERT(expr) RUBY_ASSERT_MESG((expr), #expr) +# if defined(RBIMPL_VA_OPT_ARGS) +# define RUBY_ASSERT(expr, ...) \ + RUBY_ASSERT_MESG((expr), #expr RBIMPL_VA_OPT_ARGS(__VA_ARGS__)) +# else +# define RUBY_ASSERT(expr) RUBY_ASSERT_MESG((expr), #expr) +# endif #else -# define RUBY_ASSERT(expr) RBIMPL_ASSERT_NOTHING +# if defined(RBIMPL_VA_OPT_ARGS) +# define RUBY_ASSERT(/* expr, */...) RBIMPL_ASSERT_NOTHING +# else +# define RUBY_ASSERT(expr) RBIMPL_ASSERT_NOTHING +# endif #endif /** @@ -187,9 +232,18 @@ RBIMPL_SYMBOL_EXPORT_END() /* Currently `RUBY_DEBUG == ! defined(NDEBUG)` is always true. There is no * difference any longer between this one and `RUBY_ASSERT`. */ #if defined(NDEBUG) -# define RUBY_ASSERT_NDEBUG(expr) RBIMPL_ASSERT_NOTHING +# if defined(RBIMPL_VA_OPT_ARGS) +# define RUBY_ASSERT_NDEBUG(/* expr, */...) RBIMPL_ASSERT_NOTHING +# else +# define RUBY_ASSERT_NDEBUG(expr) RBIMPL_ASSERT_NOTHING +# endif #else -# define RUBY_ASSERT_NDEBUG(expr) RUBY_ASSERT_MESG((expr), #expr) +# if defined(RBIMPL_VA_OPT_ARGS) +# define RUBY_ASSERT_NDEBUG(expr, ...) \ + RUBY_ASSERT_MESG((expr), #expr RBIMPL_VA_OPT_ARGS(__VA_ARGS__)) +# else +# define RUBY_ASSERT_NDEBUG(expr) RUBY_ASSERT_MESG((expr), #expr) +# endif #endif /** @@ -197,10 +251,20 @@ RBIMPL_SYMBOL_EXPORT_END() * @param mesg The message to display on failure. */ #if RUBY_DEBUG -# define RUBY_ASSERT_MESG_WHEN(cond, expr, mesg) RUBY_ASSERT_MESG((expr), (mesg)) +# if defined(RBIMPL_VA_OPT_ARGS) +# define RUBY_ASSERT_MESG_WHEN(cond, /* expr, */...) \ + RUBY_ASSERT_MESG(__VA_ARGS__) +# else +# define RUBY_ASSERT_MESG_WHEN(cond, expr, mesg) RUBY_ASSERT_MESG((expr), (mesg)) +# endif #else -# define RUBY_ASSERT_MESG_WHEN(cond, expr, mesg) \ +# if defined(RBIMPL_VA_OPT_ARGS) +# define RUBY_ASSERT_MESG_WHEN(cond, expr, ...) \ + ((cond) ? RUBY_ASSERT_MESG((expr), __VA_ARGS__) : RBIMPL_ASSERT_NOTHING) +# else +# define RUBY_ASSERT_MESG_WHEN(cond, expr, mesg) \ ((cond) ? RUBY_ASSERT_MESG((expr), (mesg)) : RBIMPL_ASSERT_NOTHING) +# endif #endif /** @@ -210,7 +274,23 @@ RBIMPL_SYMBOL_EXPORT_END() * @param cond Extra condition that shall hold for assertion to take effect. * @param expr What supposedly evaluates to true. */ -#define RUBY_ASSERT_WHEN(cond, expr) RUBY_ASSERT_MESG_WHEN((cond), (expr), #expr) +#if defined(RBIMPL_VA_OPT_ARGS) +# define RUBY_ASSERT_WHEN(cond, expr, ...) \ + RUBY_ASSERT_MESG_WHEN(cond, expr, #expr RBIMPL_VA_OPT_ARGS(__VA_ARGS__)) +#else +# define RUBY_ASSERT_WHEN(cond, expr) RUBY_ASSERT_MESG_WHEN((cond), (expr), #expr) +#endif + +/** + * A variant of #RUBY_ASSERT that asserts when either #RUBY_DEBUG or built-in + * type of `obj` is `type`. + * + * @param obj Object to check its built-in typue. + * @param type Built-in type constant, T_ARRAY, T_STRING, etc. + */ +#define RUBY_ASSERT_BUILTIN_TYPE(obj, type) \ + RUBY_ASSERT(RB_TYPE_P(obj, type), \ + "Actual type is %s", rb_builtin_type_name(BUILTIN_TYPE(obj))) /** * This is either #RUBY_ASSERT or #RBIMPL_ASSUME, depending on #RUBY_DEBUG. diff --git a/include/ruby/atomic.h b/include/ruby/atomic.h index 083f1f6aa0..043a6a9945 100644 --- a/include/ruby/atomic.h +++ b/include/ruby/atomic.h @@ -1,236 +1,945 @@ -#ifndef RUBY_ATOMIC_H +#ifndef RUBY_ATOMIC_H /*-*-C++-*-vi:se ft=cpp:*/ #define RUBY_ATOMIC_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Atomic operations + * + * Basically, if we could assume either C11 or C++11, these macros are just + * redundant. Sadly we cannot. We have to do them ourselves. + */ + +#include "ruby/internal/config.h" + +#ifdef STDC_HEADERS +# include <stddef.h> /* size_t */ +#endif + +#ifdef HAVE_SYS_TYPES_H +# include <sys/types.h> /* ssize_t */ +#endif + +#if RBIMPL_COMPILER_SINCE(MSVC, 13, 0, 0) +# pragma intrinsic(_InterlockedOr) +#elif defined(__sun) && defined(HAVE_ATOMIC_H) +# include <atomic.h> +#endif + +#include "ruby/assert.h" +#include "ruby/backward/2/limits.h" +#include "ruby/internal/attr/artificial.h" +#include "ruby/internal/attr/noalias.h" +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/compiler_since.h" +#include "ruby/internal/cast.h" +#include "ruby/internal/value.h" +#include "ruby/internal/static_assert.h" +#include "ruby/internal/stdbool.h" /* - * - RUBY_ATOMIC_CAS, RUBY_ATOMIC_EXCHANGE, RUBY_ATOMIC_FETCH_*: - * return the old value. - * - RUBY_ATOMIC_ADD, RUBY_ATOMIC_SUB, RUBY_ATOMIC_INC, RUBY_ATOMIC_DEC, RUBY_ATOMIC_OR, RUBY_ATOMIC_SET: - * may be void. + * Asserts that your environment supports more than one atomic types. These + * days systems tend to have such property (C11 was a standard of decades ago, + * right?) but we still support older ones. */ -#if 0 -#elif defined HAVE_GCC_ATOMIC_BUILTINS +#if defined(__DOXYGEN__) || defined(HAVE_GCC_ATOMIC_BUILTINS) || defined(HAVE_GCC_SYNC_BUILTINS) +# define RUBY_ATOMIC_GENERIC_MACRO 1 +#endif + +/** + * Type that is eligible for atomic operations. Depending on your host + * platform you might have more than one such type, but we choose one of them + * anyways. + */ +#if defined(__DOXYGEN__) +using rb_atomic_t = std::atomic<unsigned>; +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) +typedef unsigned int rb_atomic_t; +#elif defined(HAVE_GCC_SYNC_BUILTINS) typedef unsigned int rb_atomic_t; -# define RUBY_ATOMIC_FETCH_ADD(var, val) __atomic_fetch_add(&(var), (val), __ATOMIC_SEQ_CST) -# define RUBY_ATOMIC_FETCH_SUB(var, val) __atomic_fetch_sub(&(var), (val), __ATOMIC_SEQ_CST) -# define RUBY_ATOMIC_OR(var, val) __atomic_fetch_or(&(var), (val), __ATOMIC_SEQ_CST) -# define RUBY_ATOMIC_EXCHANGE(var, val) __atomic_exchange_n(&(var), (val), __ATOMIC_SEQ_CST) -# define RUBY_ATOMIC_CAS(var, oldval, newval) RB_GNUC_EXTENSION_BLOCK( \ - __typeof__(var) oldvaldup = (oldval); /* oldval should not be modified */ \ - __atomic_compare_exchange_n(&(var), &oldvaldup, (newval), 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); \ - oldvaldup ) +#elif defined(_WIN32) +typedef LONG rb_atomic_t; +#elif defined(__sun) && defined(HAVE_ATOMIC_H) +typedef unsigned int rb_atomic_t; +#else +# error No atomic operation found +#endif -# define RUBY_ATOMIC_GENERIC_MACRO 1 +/** + * Atomically replaces the value pointed by `var` with the result of addition + * of `val` to the old value of `var`. + * + * @param var A variable of ::rb_atomic_t. + * @param val Value to add. + * @return What was stored in `var` before the addition. + * @post `var` holds `var + val`. + */ +#define RUBY_ATOMIC_FETCH_ADD(var, val) rbimpl_atomic_fetch_add(&(var), (val)) + +/** + * Atomically replaces the value pointed by `var` with the result of + * subtraction of `val` to the old value of `var`. + * + * @param var A variable of ::rb_atomic_t. + * @param val Value to subtract. + * @return What was stored in `var` before the subtraction. + * @post `var` holds `var - val`. + */ +#define RUBY_ATOMIC_FETCH_SUB(var, val) rbimpl_atomic_fetch_sub(&(var), (val)) + +/** + * Atomically replaces the value pointed by `var` with the result of + * bitwise OR between `val` and the old value of `var`. + * + * @param var A variable of ::rb_atomic_t. + * @param val Value to mix. + * @return void + * @post `var` holds `var | val`. + * @note For portability, this macro can return void. + */ +#define RUBY_ATOMIC_OR(var, val) rbimpl_atomic_or(&(var), (val)) + +/** + * Atomically replaces the value pointed by `var` with `val`. This is just an + * assignment, but you can additionally know the previous value. + * + * @param var A variable of ::rb_atomic_t. + * @param val Value to set. + * @return What was stored in `var` before the assignment. + * @post `var` holds `val`. + */ +#define RUBY_ATOMIC_EXCHANGE(var, val) rbimpl_atomic_exchange(&(var), (val)) + +/** + * Atomic compare-and-swap. This stores `val` to `var` if and only if the + * assignment changes the value of `var` from `oldval` to `newval`. You can + * detect whether the assignment happened or not using the return value. + * + * @param var A variable of ::rb_atomic_t. + * @param oldval Expected value of `var` before the assignment. + * @param newval What you want to store at `var`. + * @retval oldval Successful assignment (`var` is now `newval`). + * @retval otherwise Something else is at `var`; not updated. + */ +#define RUBY_ATOMIC_CAS(var, oldval, newval) \ + rbimpl_atomic_cas(&(var), (oldval), (newval)) + +/** + * Atomic load. This loads `var` with an atomic intrinsic and returns + * its value. + * + * @param var A variable of ::rb_atomic_t + * @return What was stored in `var`j + */ +#define RUBY_ATOMIC_LOAD(var) rbimpl_atomic_load(&(var)) + +/** + * Identical to #RUBY_ATOMIC_EXCHANGE, except for the return type. + * + * @param var A variable of ::rb_atomic_t. + * @param val Value to set. + * @return void + * @post `var` holds `val`. + */ +#define RUBY_ATOMIC_SET(var, val) rbimpl_atomic_set(&(var), (val)) + +/** + * Identical to #RUBY_ATOMIC_FETCH_ADD, except for the return type. + * + * @param var A variable of ::rb_atomic_t. + * @param val Value to add. + * @return void + * @post `var` holds `var + val`. + */ +#define RUBY_ATOMIC_ADD(var, val) rbimpl_atomic_add(&(var), (val)) + +/** + * Identical to #RUBY_ATOMIC_FETCH_SUB, except for the return type. + * + * @param var A variable of ::rb_atomic_t. + * @param val Value to subtract. + * @return void + * @post `var` holds `var - val`. + */ +#define RUBY_ATOMIC_SUB(var, val) rbimpl_atomic_sub(&(var), (val)) + +/** + * Atomically increments the value pointed by `var`. + * + * @param var A variable of ::rb_atomic_t. + * @return void + * @post `var` holds `var + 1`. + */ +#define RUBY_ATOMIC_INC(var) rbimpl_atomic_inc(&(var)) + +/** + * Atomically decrements the value pointed by `var`. + * + * @param var A variable of ::rb_atomic_t. + * @return void + * @post `var` holds `var - 1`. + */ +#define RUBY_ATOMIC_DEC(var) rbimpl_atomic_dec(&(var)) + +/** + * Identical to #RUBY_ATOMIC_INC, except it expects its argument is `size_t`. + * There are cases where ::rb_atomic_t is 32bit while `size_t` is 64bit. This + * should be used for size related operations to support such platforms. + * + * @param var A variable of `size_t`. + * @return void + * @post `var` holds `var + 1`. + */ +#define RUBY_ATOMIC_SIZE_INC(var) rbimpl_atomic_size_inc(&(var)) + +/** + * Identical to #RUBY_ATOMIC_DEC, except it expects its argument is `size_t`. + * There are cases where ::rb_atomic_t is 32bit while `size_t` is 64bit. This + * should be used for size related operations to support such platforms. + * + * @param var A variable of `size_t`. + * @return void + * @post `var` holds `var - 1`. + */ +#define RUBY_ATOMIC_SIZE_DEC(var) rbimpl_atomic_size_dec(&(var)) + +/** + * Identical to #RUBY_ATOMIC_EXCHANGE, except it expects its arguments are + * `size_t`. There are cases where ::rb_atomic_t is 32bit while `size_t` is + * 64bit. This should be used for size related operations to support such + * platforms. + * + * @param var A variable of `size_t`. + * @param val Value to set. + * @return What was stored in `var` before the assignment. + * @post `var` holds `val`. + */ +#define RUBY_ATOMIC_SIZE_EXCHANGE(var, val) \ + rbimpl_atomic_size_exchange(&(var), (val)) + +/** + * Identical to #RUBY_ATOMIC_CAS, except it expects its arguments are `size_t`. + * There are cases where ::rb_atomic_t is 32bit while `size_t` is 64bit. This + * should be used for size related operations to support such platforms. + * + * @param var A variable of `size_t`. + * @param oldval Expected value of `var` before the assignment. + * @param newval What you want to store at `var`. + * @retval oldval Successful assignment (`var` is now `newval`). + * @retval otherwise Something else is at `var`; not updated. + */ +#define RUBY_ATOMIC_SIZE_CAS(var, oldval, newval) \ + rbimpl_atomic_size_cas(&(var), (oldval), (newval)) + +/** + * Identical to #RUBY_ATOMIC_ADD, except it expects its arguments are `size_t`. + * There are cases where ::rb_atomic_t is 32bit while `size_t` is 64bit. This + * should be used for size related operations to support such platforms. + * + * @param var A variable of `size_t`. + * @param val Value to add. + * @return void + * @post `var` holds `var + val`. + */ +#define RUBY_ATOMIC_SIZE_ADD(var, val) rbimpl_atomic_size_add(&(var), (val)) + +/** + * Identical to #RUBY_ATOMIC_SUB, except it expects its arguments are `size_t`. + * There are cases where ::rb_atomic_t is 32bit while `size_t` is 64bit. This + * should be used for size related operations to support such platforms. + * + * @param var A variable of `size_t`. + * @param val Value to subtract. + * @return void + * @post `var` holds `var - val`. + */ +#define RUBY_ATOMIC_SIZE_SUB(var, val) rbimpl_atomic_size_sub(&(var), (val)) + +/** + * Identical to #RUBY_ATOMIC_EXCHANGE, except it expects its arguments are + * `void*`. There are cases where ::rb_atomic_t is 32bit while `void*` is + * 64bit. This should be used for pointer related operations to support such + * platforms. + * + * @param var A variable of `void *`. + * @param val Value to set. + * @return What was stored in `var` before the assignment. + * @post `var` holds `val`. + * + * @internal + * + * :FIXME: this `(void*)` cast is evil! However `void*` is incompatible with + * some pointers, most notably function pointers. + */ +#define RUBY_ATOMIC_PTR_EXCHANGE(var, val) \ + RBIMPL_CAST(rbimpl_atomic_ptr_exchange((void **)&(var), (void *)val)) + +/** + * Identical to #RUBY_ATOMIC_LOAD, except it expects its arguments are `void*`. + * There are cases where ::rb_atomic_t is 32bit while `void*` is 64bit. This + * should be used for size related operations to support such platforms. + * + * @param var A variable of `void*` + * @return The value of `var` (without tearing) + */ +#define RUBY_ATOMIC_PTR_LOAD(var) \ + RBIMPL_CAST(rbimpl_atomic_ptr_load((void **)&var)) + +/** + * Identical to #RUBY_ATOMIC_CAS, except it expects its arguments are `void*`. + * There are cases where ::rb_atomic_t is 32bit while `void*` is 64bit. This + * should be used for size related operations to support such platforms. + * + * @param var A variable of `void*`. + * @param oldval Expected value of `var` before the assignment. + * @param newval What you want to store at `var`. + * @retval oldval Successful assignment (`var` is now `newval`). + * @retval otherwise Something else is at `var`; not updated. + */ +#define RUBY_ATOMIC_PTR_CAS(var, oldval, newval) \ + RBIMPL_CAST(rbimpl_atomic_ptr_cas((void **)&(var), (oldval), (newval))) + +/** + * Identical to #RUBY_ATOMIC_EXCHANGE, except it expects its arguments are + * ::VALUE. There are cases where ::rb_atomic_t is 32bit while ::VALUE is + * 64bit. This should be used for pointer related operations to support such + * platforms. + * + * @param var A variable of ::VALUE. + * @param val Value to set. + * @return What was stored in `var` before the assignment. + * @post `var` holds `val`. + */ +#define RUBY_ATOMIC_VALUE_EXCHANGE(var, val) \ + rbimpl_atomic_value_exchange(&(var), (val)) + +/** + * Identical to #RUBY_ATOMIC_CAS, except it expects its arguments are ::VALUE. + * There are cases where ::rb_atomic_t is 32bit while ::VALUE is 64bit. This + * should be used for size related operations to support such platforms. + * + * @param var A variable of `void*`. + * @param oldval Expected value of `var` before the assignment. + * @param newval What you want to store at `var`. + * @retval oldval Successful assignment (`var` is now `newval`). + * @retval otherwise Something else is at `var`; not updated. + */ +#define RUBY_ATOMIC_VALUE_CAS(var, oldval, newval) \ + rbimpl_atomic_value_cas(&(var), (oldval), (newval)) -#elif defined HAVE_GCC_SYNC_BUILTINS -/* @shyouhei hack to support atomic operations in case of gcc. Gcc - * has its own pseudo-insns to support them. See info, or - * http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html */ +/** @cond INTERNAL_MACRO */ +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline rb_atomic_t +rbimpl_atomic_fetch_add(volatile rb_atomic_t *ptr, rb_atomic_t val) +{ +#if 0 -typedef unsigned int rb_atomic_t; /* Anything OK */ -# define RUBY_ATOMIC_FETCH_ADD(var, val) __sync_fetch_and_add(&(var), (val)) -# define RUBY_ATOMIC_FETCH_SUB(var, val) __sync_fetch_and_sub(&(var), (val)) -# define RUBY_ATOMIC_OR(var, val) __sync_fetch_and_or(&(var), (val)) -# define RUBY_ATOMIC_EXCHANGE(var, val) __sync_lock_test_and_set(&(var), (val)) -# define RUBY_ATOMIC_CAS(var, oldval, newval) __sync_val_compare_and_swap(&(var), (oldval), (newval)) +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + return __atomic_fetch_add(ptr, val, __ATOMIC_SEQ_CST); -# define RUBY_ATOMIC_GENERIC_MACRO 1 +#elif defined(HAVE_GCC_SYNC_BUILTINS) + return __sync_fetch_and_add(ptr, val); -#elif defined _WIN32 -#if RBIMPL_COMPILER_SINCE(MSVC, 13, 0, 0) -#pragma intrinsic(_InterlockedOr) +#elif defined(_WIN32) + return InterlockedExchangeAdd(ptr, val); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) + /* + * `atomic_add_int_nv` takes its second argument as `int`! Meanwhile our + * `rb_atomic_t` is unsigned. We cannot pass `val` as-is. We have to + * manually check integer overflow. + */ + RBIMPL_ASSERT_OR_ASSUME(val <= INT_MAX); + return atomic_add_int_nv(ptr, val) - val; + +#else +# error Unsupported platform. #endif -typedef LONG rb_atomic_t; +} -# define RUBY_ATOMIC_SET(var, val) InterlockedExchange(&(var), (val)) -# define RUBY_ATOMIC_INC(var) InterlockedIncrement(&(var)) -# define RUBY_ATOMIC_DEC(var) InterlockedDecrement(&(var)) -# define RUBY_ATOMIC_FETCH_ADD(var, val) InterlockedExchangeAdd(&(var), (val)) -# define RUBY_ATOMIC_FETCH_SUB(var, val) InterlockedExchangeAdd(&(var), -(LONG)(val)) -#if defined __GNUC__ -# define RUBY_ATOMIC_OR(var, val) __asm__("lock\n\t" "orl\t%1, %0" : "=m"(var) : "Ir"(val)) -#elif RBIMPL_COMPILER_BEFORE(MSVC, 13, 0, 0) -# define RUBY_ATOMIC_OR(var, val) rb_w32_atomic_or(&(var), (val)) +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) static inline void -rb_w32_atomic_or(volatile rb_atomic_t *var, rb_atomic_t val) +rbimpl_atomic_add(volatile rb_atomic_t *ptr, rb_atomic_t val) { -#ifdef _M_IX86 - __asm mov eax, var; - __asm mov ecx, val; - __asm lock or [eax], ecx; +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + /* + * GCC on amd64 is smart enough to detect this `__atomic_add_fetch`'s + * return value is not used, then compiles it into single `LOCK ADD` + * instruction. + */ + __atomic_add_fetch(ptr, val, __ATOMIC_SEQ_CST); + +#elif defined(HAVE_GCC_SYNC_BUILTINS) + __sync_add_and_fetch(ptr, val); + +#elif defined(_WIN32) + /* + * `InterlockedExchangeAdd` is `LOCK XADD`. It seems there also is + * `_InterlockedAdd` intrinsic in ARM Windows but not for x86? Sticking to + * `InterlockedExchangeAdd` for better portability. + */ + InterlockedExchangeAdd(ptr, val); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) + /* Ditto for `atomic_add_int_nv`. */ + RBIMPL_ASSERT_OR_ASSUME(val <= INT_MAX); + atomic_add_int(ptr, val); + #else -#error unsupported architecture +# error Unsupported platform. #endif } + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline void +rbimpl_atomic_size_add(volatile size_t *ptr, size_t val) +{ +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + __atomic_add_fetch(ptr, val, __ATOMIC_SEQ_CST); + +#elif defined(HAVE_GCC_SYNC_BUILTINS) + __sync_add_and_fetch(ptr, val); + +#elif defined(_WIN64) + /* Ditto for `InterlockeExchangedAdd`. */ + InterlockedExchangeAdd64(ptr, val); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) && (defined(_LP64) || defined(_I32LPx)) + /* Ditto for `atomic_add_int_nv`. */ + RBIMPL_ASSERT_OR_ASSUME(val <= LONG_MAX); + atomic_add_long(ptr, val); + #else -# define RUBY_ATOMIC_OR(var, val) _InterlockedOr(&(var), (val)) + RBIMPL_STATIC_ASSERT(size_of_rb_atomic_t, sizeof *ptr == sizeof(rb_atomic_t)); + + volatile rb_atomic_t *const tmp = RBIMPL_CAST((volatile rb_atomic_t *)ptr); + rbimpl_atomic_add(tmp, val); + #endif -# define RUBY_ATOMIC_EXCHANGE(var, val) InterlockedExchange(&(var), (val)) -# define RUBY_ATOMIC_CAS(var, oldval, newval) InterlockedCompareExchange(&(var), (newval), (oldval)) -# if RBIMPL_COMPILER_BEFORE(MSVC, 13, 0, 0) -static inline rb_atomic_t -rb_w32_atomic_cas(volatile rb_atomic_t *var, rb_atomic_t oldval, rb_atomic_t newval) +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline void +rbimpl_atomic_inc(volatile rb_atomic_t *ptr) { - return (rb_atomic_t)InterlockedCompareExchange((PVOID *)var, (PVOID)newval, (PVOID)oldval); -} -# undef RUBY_ATOMIC_CAS -# define RUBY_ATOMIC_CAS(var, oldval, newval) rb_w32_atomic_cas(&(var), (oldval), (newval)) -# endif -# ifdef _M_AMD64 -# define RUBY_ATOMIC_SIZE_ADD(var, val) InterlockedExchangeAdd64((LONG_LONG *)&(var), (val)) -# define RUBY_ATOMIC_SIZE_SUB(var, val) InterlockedExchangeAdd64((LONG_LONG *)&(var), -(LONG)(val)) -# define RUBY_ATOMIC_SIZE_INC(var) InterlockedIncrement64(&(var)) -# define RUBY_ATOMIC_SIZE_DEC(var) InterlockedDecrement64(&(var)) -# define RUBY_ATOMIC_SIZE_EXCHANGE(var, val) InterlockedExchange64(&(var), (val)) -# define RUBY_ATOMIC_SIZE_CAS(var, oldval, newval) InterlockedCompareExchange64(&(var), (newval), (oldval)) -# else -# define RUBY_ATOMIC_SIZE_ADD(var, val) InterlockedExchangeAdd((LONG *)&(var), (val)) -# define RUBY_ATOMIC_SIZE_SUB(var, val) InterlockedExchangeAdd((LONG *)&(var), -(LONG)(val)) -# define RUBY_ATOMIC_SIZE_INC(var) InterlockedIncrement((LONG *)&(var)) -# define RUBY_ATOMIC_SIZE_DEC(var) InterlockedDecrement((LONG *)&(var)) -# define RUBY_ATOMIC_SIZE_EXCHANGE(var, val) InterlockedExchange((LONG *)&(var), (val)) -# endif - -# ifdef InterlockedExchangePointer -# define RUBY_ATOMIC_PTR_EXCHANGE(var, val) InterlockedExchangePointer((PVOID volatile *)&(var), (PVOID)(val)) -# endif /* See below for definitions of other situations */ +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) || defined(HAVE_GCC_SYNC_BUILTINS) + rbimpl_atomic_add(ptr, 1); + +#elif defined(_WIN32) + InterlockedIncrement(ptr); #elif defined(__sun) && defined(HAVE_ATOMIC_H) -#include <atomic.h> -typedef unsigned int rb_atomic_t; + atomic_inc_uint(ptr); -# define RUBY_ATOMIC_INC(var) atomic_inc_uint(&(var)) -# define RUBY_ATOMIC_DEC(var) atomic_dec_uint(&(var)) -# define RUBY_ATOMIC_FETCH_ADD(var, val) rb_atomic_fetch_add(&(var), (val)) -# define RUBY_ATOMIC_FETCH_SUB(var, val) rb_atomic_fetch_sub(&(var), (val)) -# define RUBY_ATOMIC_ADD(var, val) atomic_add_uint(&(var), (val)) -# define RUBY_ATOMIC_SUB(var, val) atomic_sub_uint(&(var), (val)) -# define RUBY_ATOMIC_OR(var, val) atomic_or_uint(&(var), (val)) -# define RUBY_ATOMIC_EXCHANGE(var, val) atomic_swap_uint(&(var), (val)) -# define RUBY_ATOMIC_CAS(var, oldval, newval) atomic_cas_uint(&(var), (oldval), (newval)) +#else + rbimpl_atomic_add(ptr, 1); -static inline rb_atomic_t -rb_atomic_fetch_add(volatile rb_atomic_t *var, rb_atomic_t val) +#endif +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline void +rbimpl_atomic_size_inc(volatile size_t *ptr) { - return atomic_add_int_nv(var, val) - val; +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) || defined(HAVE_GCC_SYNC_BUILTINS) + rbimpl_atomic_size_add(ptr, 1); + +#elif defined(_WIN64) + InterlockedIncrement64(ptr); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) && (defined(_LP64) || defined(_I32LPx)) + atomic_inc_ulong(ptr); + +#else + RBIMPL_STATIC_ASSERT(size_of_size_t, sizeof *ptr == sizeof(rb_atomic_t)); + + rbimpl_atomic_size_add(ptr, 1); + +#endif } +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) static inline rb_atomic_t -rb_atomic_fetch_sub(volatile rb_atomic_t *var, rb_atomic_t val) +rbimpl_atomic_fetch_sub(volatile rb_atomic_t *ptr, rb_atomic_t val) { - return atomic_add_int_nv(var, (rb_atomic_t)(-(int)val)) + val; -} +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + return __atomic_fetch_sub(ptr, val, __ATOMIC_SEQ_CST); -# if defined(_LP64) || defined(_I32LPx) -# define RUBY_ATOMIC_SIZE_ADD(var, val) atomic_add_long(&(var), (val)) -# define RUBY_ATOMIC_SIZE_SUB(var, val) atomic_add_long(&(var), -(val)) -# define RUBY_ATOMIC_SIZE_INC(var) atomic_inc_ulong(&(var)) -# define RUBY_ATOMIC_SIZE_DEC(var) atomic_dec_ulong(&(var)) -# define RUBY_ATOMIC_SIZE_EXCHANGE(var, val) atomic_swap_ulong(&(var), (val)) -# define RUBY_ATOMIC_SIZE_CAS(var, oldval, val) atomic_cas_ulong(&(var), (oldval), (val)) -# else -# define RUBY_ATOMIC_SIZE_ADD(var, val) atomic_add_int(&(var), (val)) -# define RUBY_ATOMIC_SIZE_SUB(var, val) atomic_add_int(&(var), -(val)) -# define RUBY_ATOMIC_SIZE_INC(var) atomic_inc_uint(&(var)) -# define RUBY_ATOMIC_SIZE_DEC(var) atomic_dec_uint(&(var)) -# define RUBY_ATOMIC_SIZE_EXCHANGE(var, val) atomic_swap_uint(&(var), (val)) -# endif +#elif defined(HAVE_GCC_SYNC_BUILTINS) + return __sync_fetch_and_sub(ptr, val); + +#elif defined(_WIN32) + /* rb_atomic_t is signed here! Safe to do `-val`. */ + return InterlockedExchangeAdd(ptr, -val); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) + /* Ditto for `rbimpl_atomic_fetch_add`. */ + const signed neg = -1; + RBIMPL_ASSERT_OR_ASSUME(val <= INT_MAX); + return atomic_add_int_nv(ptr, neg * val) + val; #else -# error No atomic operation found +# error Unsupported platform. #endif +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline void +rbimpl_atomic_sub(volatile rb_atomic_t *ptr, rb_atomic_t val) +{ +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + __atomic_sub_fetch(ptr, val, __ATOMIC_SEQ_CST); + +#elif defined(HAVE_GCC_SYNC_BUILTINS) + __sync_sub_and_fetch(ptr, val); + +#elif defined(_WIN32) + InterlockedExchangeAdd(ptr, -val); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) + const signed neg = -1; + RBIMPL_ASSERT_OR_ASSUME(val <= INT_MAX); + atomic_add_int(ptr, neg * val); -#ifndef RUBY_ATOMIC_SET -# define RUBY_ATOMIC_SET(var, val) (void)RUBY_ATOMIC_EXCHANGE(var, val) +#else +# error Unsupported platform. #endif +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline void +rbimpl_atomic_size_sub(volatile size_t *ptr, size_t val) +{ +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + __atomic_sub_fetch(ptr, val, __ATOMIC_SEQ_CST); + +#elif defined(HAVE_GCC_SYNC_BUILTINS) + __sync_sub_and_fetch(ptr, val); + +#elif defined(_WIN64) + const ssize_t neg = -1; + InterlockedExchangeAdd64(ptr, neg * val); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) && (defined(_LP64) || defined(_I32LPx)) + const signed neg = -1; + RBIMPL_ASSERT_OR_ASSUME(val <= LONG_MAX); + atomic_add_long(ptr, neg * val); + +#else + RBIMPL_STATIC_ASSERT(size_of_rb_atomic_t, sizeof *ptr == sizeof(rb_atomic_t)); + + volatile rb_atomic_t *const tmp = RBIMPL_CAST((volatile rb_atomic_t *)ptr); + rbimpl_atomic_sub(tmp, val); -#ifndef RUBY_ATOMIC_ADD -# define RUBY_ATOMIC_ADD(var, val) (void)RUBY_ATOMIC_FETCH_ADD(var, val) #endif +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline void +rbimpl_atomic_dec(volatile rb_atomic_t *ptr) +{ +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) || defined(HAVE_GCC_SYNC_BUILTINS) + rbimpl_atomic_sub(ptr, 1); + +#elif defined(_WIN32) + InterlockedDecrement(ptr); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) + atomic_dec_uint(ptr); + +#else + rbimpl_atomic_sub(ptr, 1); -#ifndef RUBY_ATOMIC_SUB -# define RUBY_ATOMIC_SUB(var, val) (void)RUBY_ATOMIC_FETCH_SUB(var, val) #endif +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline void +rbimpl_atomic_size_dec(volatile size_t *ptr) +{ +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) || defined(HAVE_GCC_SYNC_BUILTINS) + rbimpl_atomic_size_sub(ptr, 1); + +#elif defined(_WIN64) + InterlockedDecrement64(ptr); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) && (defined(_LP64) || defined(_I32LPx)) + atomic_dec_ulong(ptr); + +#else + RBIMPL_STATIC_ASSERT(size_of_size_t, sizeof *ptr == sizeof(rb_atomic_t)); + + rbimpl_atomic_size_sub(ptr, 1); -#ifndef RUBY_ATOMIC_INC -# define RUBY_ATOMIC_INC(var) RUBY_ATOMIC_ADD(var, 1) #endif +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline void +rbimpl_atomic_or(volatile rb_atomic_t *ptr, rb_atomic_t val) +{ +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + __atomic_or_fetch(ptr, val, __ATOMIC_SEQ_CST); + +#elif defined(HAVE_GCC_SYNC_BUILTINS) + __sync_or_and_fetch(ptr, val); + +#elif RBIMPL_COMPILER_SINCE(MSVC, 13, 0, 0) + _InterlockedOr(ptr, val); + +#elif defined(_WIN32) && defined(__GNUC__) + /* This was for old MinGW. Maybe not needed any longer? */ + __asm__( + "lock\n\t" + "orl\t%1, %0" + : "=m"(ptr) + : "Ir"(val)); + +#elif defined(_WIN32) && defined(_M_IX86) + __asm mov eax, ptr; + __asm mov ecx, val; + __asm lock or [eax], ecx; + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) + atomic_or_uint(ptr, val); -#ifndef RUBY_ATOMIC_DEC -# define RUBY_ATOMIC_DEC(var) RUBY_ATOMIC_SUB(var, 1) +#else +# error Unsupported platform. #endif +} -#ifndef RUBY_ATOMIC_SIZE_INC -# define RUBY_ATOMIC_SIZE_INC(var) RUBY_ATOMIC_INC(var) +/* Nobody uses this but for theoretical backwards compatibility... */ +#if RBIMPL_COMPILER_BEFORE(MSVC, 13, 0, 0) +static inline rb_atomic_t +rb_w32_atomic_or(volatile rb_atomic_t *var, rb_atomic_t val) +{ + return rbimpl_atomic_or(var, val); +} #endif -#ifndef RUBY_ATOMIC_SIZE_DEC -# define RUBY_ATOMIC_SIZE_DEC(var) RUBY_ATOMIC_DEC(var) +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline rb_atomic_t +rbimpl_atomic_exchange(volatile rb_atomic_t *ptr, rb_atomic_t val) +{ +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + return __atomic_exchange_n(ptr, val, __ATOMIC_SEQ_CST); + +#elif defined(HAVE_GCC_SYNC_BUILTINS) + return __sync_lock_test_and_set(ptr, val); + +#elif defined(_WIN32) + return InterlockedExchange(ptr, val); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) + return atomic_swap_uint(ptr, val); + +#else +# error Unsupported platform. #endif +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline size_t +rbimpl_atomic_size_exchange(volatile size_t *ptr, size_t val) +{ +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + return __atomic_exchange_n(ptr, val, __ATOMIC_SEQ_CST); + +#elif defined(HAVE_GCC_SYNC_BUILTINS) + return __sync_lock_test_and_set(ptr, val); + +#elif defined(_WIN64) + return InterlockedExchange64(ptr, val); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) && (defined(_LP64) || defined(_I32LPx)) + return atomic_swap_ulong(ptr, val); + +#else + RBIMPL_STATIC_ASSERT(size_of_size_t, sizeof *ptr == sizeof(rb_atomic_t)); + + volatile rb_atomic_t *const tmp = RBIMPL_CAST((volatile rb_atomic_t *)ptr); + const rb_atomic_t ret = rbimpl_atomic_exchange(tmp, val); + return RBIMPL_CAST((size_t)ret); -#ifndef RUBY_ATOMIC_SIZE_EXCHANGE -# define RUBY_ATOMIC_SIZE_EXCHANGE(var, val) RUBY_ATOMIC_EXCHANGE(var, val) #endif +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline void * +rbimpl_atomic_ptr_exchange(void *volatile *ptr, const void *val) +{ +#if 0 + +#elif defined(InterlockedExchangePointer) + /* const_cast */ + PVOID *pptr = RBIMPL_CAST((PVOID *)ptr); + PVOID pval = RBIMPL_CAST((PVOID)val); + return InterlockedExchangePointer(pptr, pval); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) + return atomic_swap_ptr(ptr, RBIMPL_CAST((void *)val)); + +#else + RBIMPL_STATIC_ASSERT(sizeof_voidp, sizeof *ptr == sizeof(size_t)); + + const size_t sval = RBIMPL_CAST((size_t)val); + volatile size_t *const sptr = RBIMPL_CAST((volatile size_t *)ptr); + const size_t sret = rbimpl_atomic_size_exchange(sptr, sval); + return RBIMPL_CAST((void *)sret); -#ifndef RUBY_ATOMIC_SIZE_CAS -# define RUBY_ATOMIC_SIZE_CAS(var, oldval, val) RUBY_ATOMIC_CAS(var, oldval, val) #endif +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline VALUE +rbimpl_atomic_value_exchange(volatile VALUE *ptr, VALUE val) +{ + RBIMPL_STATIC_ASSERT(sizeof_value, sizeof *ptr == sizeof(size_t)); + + const size_t sval = RBIMPL_CAST((size_t)val); + volatile size_t *const sptr = RBIMPL_CAST((volatile size_t *)ptr); + const size_t sret = rbimpl_atomic_size_exchange(sptr, sval); + return RBIMPL_CAST((VALUE)sret); +} -#ifndef RUBY_ATOMIC_SIZE_ADD -# define RUBY_ATOMIC_SIZE_ADD(var, val) RUBY_ATOMIC_ADD(var, val) +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline rb_atomic_t +rbimpl_atomic_load(volatile rb_atomic_t *ptr) +{ +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + return __atomic_load_n(ptr, __ATOMIC_SEQ_CST); +#else + return rbimpl_atomic_fetch_add(ptr, 0); #endif +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline void +rbimpl_atomic_set(volatile rb_atomic_t *ptr, rb_atomic_t val) +{ +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + __atomic_store_n(ptr, val, __ATOMIC_SEQ_CST); + +#else + /* Maybe std::atomic<rb_atomic_t>::store can be faster? */ + rbimpl_atomic_exchange(ptr, val); -#ifndef RUBY_ATOMIC_SIZE_SUB -# define RUBY_ATOMIC_SIZE_SUB(var, val) RUBY_ATOMIC_SUB(var, val) #endif +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline rb_atomic_t +rbimpl_atomic_cas(volatile rb_atomic_t *ptr, rb_atomic_t oldval, rb_atomic_t newval) +{ +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + __atomic_compare_exchange_n( + ptr, &oldval, newval, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + return oldval; + +#elif defined(HAVE_GCC_SYNC_BUILTINS) + return __sync_val_compare_and_swap(ptr, oldval, newval); -#if RUBY_ATOMIC_GENERIC_MACRO -# ifndef RUBY_ATOMIC_PTR_EXCHANGE -# define RUBY_ATOMIC_PTR_EXCHANGE(var, val) RUBY_ATOMIC_EXCHANGE(var, val) -# endif +#elif RBIMPL_COMPILER_SINCE(MSVC, 13, 0, 0) + return InterlockedCompareExchange(ptr, newval, oldval); -# ifndef RUBY_ATOMIC_PTR_CAS -# define RUBY_ATOMIC_PTR_CAS(var, oldval, newval) RUBY_ATOMIC_CAS(var, oldval, newval) -# endif +#elif defined(_WIN32) + PVOID *pptr = RBIMPL_CAST((PVOID *)ptr); + PVOID pold = RBIMPL_CAST((PVOID)oldval); + PVOID pnew = RBIMPL_CAST((PVOID)newval); + PVOID pret = InterlockedCompareExchange(pptr, pnew, pold); + return RBIMPL_CAST((rb_atomic_t)pret); -# ifndef RUBY_ATOMIC_VALUE_EXCHANGE -# define RUBY_ATOMIC_VALUE_EXCHANGE(var, val) RUBY_ATOMIC_EXCHANGE(var, val) -# endif +#elif defined(__sun) && defined(HAVE_ATOMIC_H) + return atomic_cas_uint(ptr, oldval, newval); -# ifndef RUBY_ATOMIC_VALUE_CAS -# define RUBY_ATOMIC_VALUE_CAS(var, oldval, val) RUBY_ATOMIC_CAS(var, oldval, val) -# endif +#else +# error Unsupported platform. #endif +} -#ifndef RUBY_ATOMIC_PTR_EXCHANGE -# if SIZEOF_VOIDP == SIZEOF_SIZE_T -# define RUBY_ATOMIC_PTR_EXCHANGE(var, val) (void *)RUBY_ATOMIC_SIZE_EXCHANGE(*(size_t *)&(var), (size_t)(val)) -# else -# error No atomic exchange for void* -# endif +/* Nobody uses this but for theoretical backwards compatibility... */ +#if RBIMPL_COMPILER_BEFORE(MSVC, 13, 0, 0) +static inline rb_atomic_t +rb_w32_atomic_cas(volatile rb_atomic_t *var, rb_atomic_t oldval, rb_atomic_t newval) +{ + return rbimpl_atomic_cas(var, oldval, newval); +} #endif -#ifndef RUBY_ATOMIC_PTR_CAS -# if SIZEOF_VOIDP == SIZEOF_SIZE_T -# define RUBY_ATOMIC_PTR_CAS(var, oldval, val) (void *)RUBY_ATOMIC_SIZE_CAS(*(size_t *)&(var), (size_t)(oldval), (size_t)(val)) -# else -# error No atomic compare-and-set for void* -# endif +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline size_t +rbimpl_atomic_size_cas(volatile size_t *ptr, size_t oldval, size_t newval) +{ +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + __atomic_compare_exchange_n( + ptr, &oldval, newval, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + return oldval; + +#elif defined(HAVE_GCC_SYNC_BUILTINS) + return __sync_val_compare_and_swap(ptr, oldval, newval); + +#elif defined(_WIN64) + return InterlockedCompareExchange64(ptr, newval, oldval); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) && (defined(_LP64) || defined(_I32LPx)) + return atomic_cas_ulong(ptr, oldval, newval); + +#else + RBIMPL_STATIC_ASSERT(size_of_size_t, sizeof *ptr == sizeof(rb_atomic_t)); + + volatile rb_atomic_t *tmp = RBIMPL_CAST((volatile rb_atomic_t *)ptr); + return rbimpl_atomic_cas(tmp, oldval, newval); + #endif +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline void * +rbimpl_atomic_ptr_cas(void **ptr, const void *oldval, const void *newval) +{ +#if 0 + +#elif defined(InterlockedExchangePointer) + /* ... Can we say that InterlockedCompareExchangePtr surly exists when + * InterlockedExchangePointer is defined? Seems so but...?*/ + PVOID *pptr = RBIMPL_CAST((PVOID *)ptr); + PVOID pold = RBIMPL_CAST((PVOID)oldval); + PVOID pnew = RBIMPL_CAST((PVOID)newval); + return InterlockedCompareExchangePointer(pptr, pnew, pold); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) + void *pold = RBIMPL_CAST((void *)oldval); + void *pnew = RBIMPL_CAST((void *)newval); + return atomic_cas_ptr(ptr, pold, pnew); + + +#else + RBIMPL_STATIC_ASSERT(sizeof_voidp, sizeof *ptr == sizeof(size_t)); + + const size_t snew = RBIMPL_CAST((size_t)newval); + const size_t sold = RBIMPL_CAST((size_t)oldval); + volatile size_t *const sptr = RBIMPL_CAST((volatile size_t *)ptr); + const size_t sret = rbimpl_atomic_size_cas(sptr, sold, snew); + return RBIMPL_CAST((void *)sret); -#ifndef RUBY_ATOMIC_VALUE_EXCHANGE -# if SIZEOF_VALUE == SIZEOF_SIZE_T -# define RUBY_ATOMIC_VALUE_EXCHANGE(var, val) RUBY_ATOMIC_SIZE_EXCHANGE(*(size_t *)&(var), (size_t)(val)) -# else -# error No atomic exchange for VALUE -# endif #endif +} -#ifndef RUBY_ATOMIC_VALUE_CAS -# if SIZEOF_VALUE == SIZEOF_SIZE_T -# define RUBY_ATOMIC_VALUE_CAS(var, oldval, val) RUBY_ATOMIC_SIZE_CAS(*(size_t *)&(var), (size_t)(oldval), (size_t)(val)) -# else -# error No atomic compare-and-set for VALUE -# endif +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline void * +rbimpl_atomic_ptr_load(void **ptr) +{ +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + return __atomic_load_n(ptr, __ATOMIC_SEQ_CST); +#else + void *val = *ptr; + return rbimpl_atomic_ptr_cas(ptr, val, val); #endif +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline VALUE +rbimpl_atomic_value_cas(volatile VALUE *ptr, VALUE oldval, VALUE newval) +{ + RBIMPL_STATIC_ASSERT(sizeof_value, sizeof *ptr == sizeof(size_t)); + const size_t snew = RBIMPL_CAST((size_t)newval); + const size_t sold = RBIMPL_CAST((size_t)oldval); + volatile size_t *const sptr = RBIMPL_CAST((volatile size_t *)ptr); + const size_t sret = rbimpl_atomic_size_cas(sptr, sold, snew); + return RBIMPL_CAST((VALUE)sret); +} +/** @endcond */ #endif /* RUBY_ATOMIC_H */ diff --git a/include/ruby/backward.h b/include/ruby/backward.h index 445f4cc69c..f804c2c36e 100644 --- a/include/ruby/backward.h +++ b/include/ruby/backward.h @@ -1,7 +1,6 @@ #ifndef RUBY_RUBY_BACKWARD_H /*-*-C++-*-vi:se ft=cpp:*/ #define RUBY_RUBY_BACKWARD_H 1 /** - * @file * @author Ruby developers <ruby-core@ruby-lang.org> * @copyright This file is a part of the programming language Ruby. * Permission is hereby granted, to either redistribute and/or @@ -14,59 +13,13 @@ #define RBIMPL_ATTR_DEPRECATED_SINCE(ver) RBIMPL_ATTR_DEPRECATED(("since " #ver)) #define RBIMPL_ATTR_DEPRECATED_INTERNAL(ver) RBIMPL_ATTR_DEPRECATED(("since "#ver", also internal")) +#define RBIMPL_ATTR_DEPRECATED_INTERNAL_ONLY() RBIMPL_ATTR_DEPRECATED(("only for internal use")) -/* eval.c */ -RBIMPL_ATTR_DEPRECATED_SINCE(2.2) void rb_disable_super(); -RBIMPL_ATTR_DEPRECATED_SINCE(2.2) void rb_enable_super(); - -/* hash.c */ -RBIMPL_ATTR_DEPRECATED_SINCE(2.2) void rb_hash_iter_lev(); -RBIMPL_ATTR_DEPRECATED_SINCE(2.2) void rb_hash_ifnone(); - -/* string.c */ -RBIMPL_ATTR_DEPRECATED_SINCE(2.2) void rb_str_associate(); -RBIMPL_ATTR_DEPRECATED_SINCE(2.2) void rb_str_associated(); - -/* variable.c */ -RBIMPL_ATTR_DEPRECATED_SINCE(2.5) void rb_autoload(); - -/* eval.c */ -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.6) void rb_frozen_class_p(); -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.7) void rb_exec_end_proc(); - -/* error.c */ -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.3) void rb_compile_error(); -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.3) void rb_compile_error_with_enc(); -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.3) void rb_compile_error_append(); - -/* gc.c */ -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.7) void rb_gc_call_finalizer_at_exit(); - -/* signal.c */ -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.7) void rb_trap_exit(); - -/* struct.c */ -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.4) void rb_struct_ptr(); - -/* thread.c */ -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.7) void rb_clear_trace_func(); - -/* variable.c */ -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.7) void rb_generic_ivar_table(); -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.6) NORETURN(VALUE rb_mod_const_missing(VALUE, VALUE)); +RBIMPL_ATTR_DEPRECATED_INTERNAL_ONLY() void rb_clear_constant_cache(void); /* from version.c */ #if defined(RUBY_SHOW_COPYRIGHT_TO_DIE) && !!(RUBY_SHOW_COPYRIGHT_TO_DIE+0) -/* for source code backward compatibility */ -RBIMPL_ATTR_DEPRECATED_SINCE(2.4) -static inline int -ruby_show_copyright_to_die(int exitcode) -{ - ruby_show_copyright(); - return exitcode; -} -#define ruby_show_copyright() /* defer EXIT_SUCCESS */ \ - (exit(ruby_show_copyright_to_die(EXIT_SUCCESS))) +# error RUBY_SHOW_COPYRIGHT_TO_DIE is deprecated #endif #endif /* RUBY_RUBY_BACKWARD_H */ diff --git a/include/ruby/backward/2/assume.h b/include/ruby/backward/2/assume.h index 3fbb81439a..d148710127 100644 --- a/include/ruby/backward/2/assume.h +++ b/include/ruby/backward/2/assume.h @@ -17,24 +17,37 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #ASSUME / #RB_LIKELY / #UNREACHABLE */ #include "ruby/internal/config.h" #include "ruby/internal/assume.h" #include "ruby/internal/has/builtin.h" -#undef ASSUME /* Kill config.h definition */ -#undef UNREACHABLE /* Kill config.h definition */ -#define ASSUME RBIMPL_ASSUME -#define UNREACHABLE RBIMPL_UNREACHABLE() -#define UNREACHABLE_RETURN RBIMPL_UNREACHABLE_RETURN +#define ASSUME RBIMPL_ASSUME /**< @old{RBIMPL_ASSUME} */ +#define UNREACHABLE RBIMPL_UNREACHABLE() /**< @old{RBIMPL_UNREACHABLE} */ +#define UNREACHABLE_RETURN RBIMPL_UNREACHABLE_RETURN /**< @old{RBIMPL_UNREACHABLE_RETURN} */ /* likely */ #if RBIMPL_HAS_BUILTIN(__builtin_expect) +/** + * Asserts that the given Boolean expression likely holds. + * + * @param x An expression that likely holds. + * + * @note Consider this macro carefully. It has been here since when CPUs were + * like babies, but contemporary processors are beasts. They are + * smarter than mare mortals like us today. Their branch predictions + * highly expectedly outperform your use of this macro. + */ # define RB_LIKELY(x) (__builtin_expect(!!(x), 1)) -# define RB_UNLIKELY(x) (__builtin_expect(!!(x), 0)) +/** + * Asserts that the given Boolean expression likely doesn't hold. + * + * @param x An expression that likely doesn't hold. + */ +# define RB_UNLIKELY(x) (__builtin_expect(!!(x), 0)) #else # define RB_LIKELY(x) (x) # define RB_UNLIKELY(x) (x) diff --git a/include/ruby/backward/2/attributes.h b/include/ruby/backward/2/attributes.h index 0389f82190..916d9e9d5b 100644 --- a/include/ruby/backward/2/attributes.h +++ b/include/ruby/backward/2/attributes.h @@ -1,7 +1,6 @@ #ifndef RUBY_BACKWARD2_ATTRIBUTES_H /*-*-C++-*-vi:se ft=cpp:*/ #define RUBY_BACKWARD2_ATTRIBUTES_H /** - * @file * @author Ruby developers <ruby-core@ruby-lang.org> * @copyright This file is a part of the programming language Ruby. * Permission is hereby granted, to either redistribute and/or @@ -17,7 +16,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Various attribute-related macros. * * ### Q&A ### @@ -40,6 +39,7 @@ #include "ruby/internal/attr/noinline.h" #include "ruby/internal/attr/nonnull.h" #include "ruby/internal/attr/noreturn.h" +#include "ruby/internal/attr/packed_struct.h" #include "ruby/internal/attr/pure.h" #include "ruby/internal/attr/restrict.h" #include "ruby/internal/attr/returns_nonnull.h" @@ -81,10 +81,8 @@ #undef NOINLINE #define NOINLINE(x) RBIMPL_ATTR_NOINLINE() x -#ifndef MJIT_HEADER -# undef ALWAYS_INLINE -# define ALWAYS_INLINE(x) RBIMPL_ATTR_FORCEINLINE() x -#endif +#undef ALWAYS_INLINE +#define ALWAYS_INLINE(x) RBIMPL_ATTR_FORCEINLINE() x #undef ERRORFUNC #define ERRORFUNC(mesg, x) RBIMPL_ATTR_ERROR(mesg) x @@ -148,17 +146,14 @@ #define NORETURN(x) RBIMPL_ATTR_NORETURN() x #define NORETURN_STYLE_NEW -#ifndef PACKED_STRUCT -# define PACKED_STRUCT(x) x -#endif +#undef PACKED_STRUCT +#define PACKED_STRUCT(x) \ + RBIMPL_ATTR_PACKED_STRUCT_BEGIN() x RBIMPL_ATTR_PACKED_STRUCT_END() -#ifndef PACKED_STRUCT_UNALIGNED -# if UNALIGNED_WORD_ACCESS -# define PACKED_STRUCT_UNALIGNED(x) PACKED_STRUCT(x) -# else -# define PACKED_STRUCT_UNALIGNED(x) x -# endif -#endif +#undef PACKED_STRUCT_UNALIGNED +#define PACKED_STRUCT_UNALIGNED(x) \ + RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_BEGIN() x \ + RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_END() #undef RB_UNUSED_VAR #define RB_UNUSED_VAR(x) x RBIMPL_ATTR_MAYBE_UNUSED() diff --git a/include/ruby/backward/2/bool.h b/include/ruby/backward/2/bool.h index 53164eb3b8..f2fa390c80 100644 --- a/include/ruby/backward/2/bool.h +++ b/include/ruby/backward/2/bool.h @@ -1,7 +1,6 @@ #ifndef RUBY_BACKWARD2_BOOL_H /*-*-C++-*-vi:se ft=cpp:*/ #define RUBY_BACKWARD2_BOOL_H /** - * @file * @author Ruby developers <ruby-core@ruby-lang.org> * @copyright This file is a part of the programming language Ruby. * Permission is hereby granted, to either redistribute and/or @@ -17,8 +16,8 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Defines old #TRUE / #FALSE + * extension libraries. They could be written in C++98. + * @brief Defines old TRUE / FALSE */ #include "ruby/internal/stdbool.h" diff --git a/include/ruby/backward/2/gcc_version_since.h b/include/ruby/backward/2/gcc_version_since.h index 2a5b76c102..00cc40ca56 100644 --- a/include/ruby/backward/2/gcc_version_since.h +++ b/include/ruby/backward/2/gcc_version_since.h @@ -1,7 +1,6 @@ #ifndef RUBY_BACKWARD2_GCC_VERSION_SINCE_H /*-*-C++-*-vi:se ft=cpp:*/ #define RUBY_BACKWARD2_GCC_VERSION_SINCE_H /** - * @file * @author Ruby developers <ruby-core@ruby-lang.org> * @copyright This file is a part of the programming language Ruby. * Permission is hereby granted, to either redistribute and/or @@ -17,8 +16,8 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Defines old #GCC_VERSION_SINCE + * extension libraries. They could be written in C++98. + * @brief Defines old GCC_VERSION_SINCE */ #include "ruby/internal/compiler_since.h" diff --git a/include/ruby/backward/2/inttypes.h b/include/ruby/backward/2/inttypes.h index c1e376a107..45460878bc 100644 --- a/include/ruby/backward/2/inttypes.h +++ b/include/ruby/backward/2/inttypes.h @@ -1,7 +1,6 @@ #ifndef RUBY_BACKWARD2_INTTYPES_H /*-*-C++-*-vi:se ft=cpp:*/ #define RUBY_BACKWARD2_INTTYPES_H /** - * @file * @author Ruby developers <ruby-core@ruby-lang.org> * @copyright This file is a part of the programming language Ruby. * Permission is hereby granted, to either redistribute and/or @@ -17,7 +16,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief C99 shim for `<inttypes.h>` */ #include "ruby/internal/config.h" /* PRI_LL_PREFIX etc. are here */ diff --git a/include/ruby/backward/2/limits.h b/include/ruby/backward/2/limits.h index e38009b01a..6f7021e5f4 100644 --- a/include/ruby/backward/2/limits.h +++ b/include/ruby/backward/2/limits.h @@ -1,7 +1,6 @@ #ifndef RUBY_BACKWARD2_LIMITS_H /*-*-C++-*-vi:se ft=cpp:*/ #define RUBY_BACKWARD2_LIMITS_H /** - * @file * @author Ruby developers <ruby-core@ruby-lang.org> * @copyright This file is a part of the programming language Ruby. * Permission is hereby granted, to either redistribute and/or @@ -17,11 +16,11 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Historical shim for `<limits.h>`. * * The macros in this header file are obsolescent. Does anyone really need our - * own definition of #CHAR_BIT today? + * own definition of `CHAR_BIT` today? */ #include "ruby/internal/config.h" diff --git a/include/ruby/backward/2/long_long.h b/include/ruby/backward/2/long_long.h index 83eabb459c..8e6b2743fc 100644 --- a/include/ruby/backward/2/long_long.h +++ b/include/ruby/backward/2/long_long.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines old #LONG_LONG * * No known compiler that can compile today's ruby lacks long long. @@ -29,7 +29,15 @@ #include "ruby/internal/has/warning.h" #include "ruby/internal/warning_push.h" -#if RBIMPL_HAS_WARNING("-Wc++11-long-long") +#if defined(__DOXYGEN__) +# /** @cond INTERNAL_MACRO */ +# define HAVE_LONG_LONG 1 +# define HAVE_TRUE_LONG_LONG 1 +# /** @endcond */ +# /** @deprecated Just use `long long` directly. */ +# define LONG_LONG long long. + +#elif RBIMPL_HAS_WARNING("-Wc++11-long-long") # define HAVE_TRUE_LONG_LONG 1 # define LONG_LONG \ RBIMPL_WARNING_PUSH() \ diff --git a/include/ruby/backward/2/r_cast.h b/include/ruby/backward/2/r_cast.h index 4600699a9e..3d0f40fd1e 100644 --- a/include/ruby/backward/2/r_cast.h +++ b/include/ruby/backward/2/r_cast.h @@ -1,7 +1,6 @@ #ifndef RUBY_BACKWARD2_R_CAST_H /*-*-C++-*-vi:se ft=cpp:*/ #define RUBY_BACKWARD2_R_CAST_H /** - * @file * @author Ruby developers <ruby-core@ruby-lang.org> * @copyright This file is a part of the programming language Ruby. * Permission is hereby granted, to either redistribute and/or @@ -17,8 +16,8 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Defines old #R_CAST + * extension libraries. They could be written in C++98. + * @brief Defines old R_CAST * * Nobody is actively using this macro. */ diff --git a/include/ruby/backward/2/rmodule.h b/include/ruby/backward/2/rmodule.h index a3e2d39f35..53b37831c0 100644 --- a/include/ruby/backward/2/rmodule.h +++ b/include/ruby/backward/2/rmodule.h @@ -1,7 +1,6 @@ #ifndef RUBY_BACKWARD2_RMODULE_H /*-*-C++-*-vi:se ft=cpp:*/ #define RUBY_BACKWARD2_RMODULE_H /** - * @file * @author Ruby developers <ruby-core@ruby-lang.org> * @copyright This file is a part of the programming language Ruby. * Permission is hereby granted, to either redistribute and/or @@ -17,7 +16,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Orphan macros. * * These macros seems broken since at least 2011. Nobody (except ruby itself diff --git a/include/ruby/backward/2/stdalign.h b/include/ruby/backward/2/stdalign.h index 2d3c333bde..8b491bf564 100644 --- a/include/ruby/backward/2/stdalign.h +++ b/include/ruby/backward/2/stdalign.h @@ -17,14 +17,14 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RUBY_ALIGNAS / #RUBY_ALIGNOF */ #include "ruby/internal/stdalign.h" #undef RUBY_ALIGNAS #undef RUBY_ALIGNOF -#define RUBY_ALIGNAS RBIMPL_ALIGNAS -#define RUBY_ALIGNOF RBIMPL_ALIGNOF +#define RUBY_ALIGNAS RBIMPL_ALIGNAS /**< @copydoc RBIMPL_ALIGNAS */ +#define RUBY_ALIGNOF RBIMPL_ALIGNOF /**< @copydoc RBIMPL_ALIGNOF */ #endif /* RUBY_BACKWARD2_STDALIGN_H */ diff --git a/include/ruby/backward/2/stdarg.h b/include/ruby/backward/2/stdarg.h index 5c5e1b31ce..08659fee47 100644 --- a/include/ruby/backward/2/stdarg.h +++ b/include/ruby/backward/2/stdarg.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines old #_ * * Nobody should ever use these macros any longer. No known compilers lack @@ -25,6 +25,10 @@ */ #undef _ +/** + * @deprecated Nobody practically needs this macro any longer. + * @brief This was a transition path from K&R to ANSI. + */ #ifdef HAVE_PROTOTYPES # define _(args) args #else @@ -32,12 +36,30 @@ #endif #undef __ +/** + * @deprecated Nobody practically needs this macro any longer. + * @brief This was a transition path from K&R to ANSI. + */ #ifdef HAVE_STDARG_PROTOTYPES # define __(args) args #else # define __(args) () #endif +/** + * Functions declared using this macro take arbitrary arguments, including + * void. + * + * ```CXX + * void func(ANYARGS); + * ``` + * + * This was a necessary evil when there was no such thing like function + * overloading. But it is the 21st century today. People generally need not + * use this. Just use a granular typed function. + * + * @see ruby::backward::cxxanyargs + */ #ifdef __cplusplus #define ANYARGS ... #else diff --git a/include/ruby/backward/cxxanyargs.hpp b/include/ruby/backward/cxxanyargs.hpp index 0af55977b2..2414b7ae6d 100644 --- a/include/ruby/backward/cxxanyargs.hpp +++ b/include/ruby/backward/cxxanyargs.hpp @@ -1,12 +1,12 @@ #ifndef RUBY_BACKWARD_CXXANYARGS_HPP //-*-C++-*-vi:ft=cpp #define RUBY_BACKWARD_CXXANYARGS_HPP /// @file -/// @author \@shyouhei +/// @author @shyouhei /// @copyright This file is a part of the programming language Ruby. /// Permission is hereby granted, to either redistribute and/or /// modify this file, provided that the conditions mentioned in the /// file COPYING are met. Consult the file for details. -/// @note DO NOT MODERNIZE THIS FILE! As the file name implies it is +/// @note DO NOT MODERNISE THIS FILE! As the file name implies it is /// meant to be a backwards compatibility shim. Please stick to /// C++ 98 and never use newer features, like `constexpr`. /// @brief Provides old prototypes for C++ programs. @@ -39,7 +39,7 @@ namespace ruby { /// Backwards compatibility layer. namespace backward { -/// Provides ANYARGS deprecation warnings. In C, ANYARGS means there is no +/// Provides ANYARGS deprecation warnings. In C, ANYARGS means there is no /// function prototype. Literally anything, even including nothing, can be a /// valid ANYARGS. So passing a correctly prototyped function pointer to an /// ANYARGS-ed function parameter is valid, at the same time passing an @@ -68,7 +68,7 @@ RUBY_CXX_DEPRECATED("Use of ANYARGS in this function is deprecated") /// @param[in] e Setter function. /// @note Both functions can be nullptr. /// @see rb_define_hooked_variable() -/// @deprecated Use glanular typed overload instead. +/// @deprecated Use granular typed overload instead. inline void rb_define_virtual_variable(const char *q, type *w, void_type *e) { @@ -131,7 +131,7 @@ RUBY_CXX_DEPRECATED("Use of ANYARGS in this function is deprecated") /// @param[in] r Setter function. /// @note Both functions can be nullptr. /// @see rb_define_virtual_variable() -/// @deprecated Use glanular typed overload instead. +/// @deprecated Use granular typed overload instead. inline void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r) { @@ -190,6 +190,7 @@ rb_define_hooked_variable(const char *q, VALUE *w, std::nullptr_t e, void_type * /// @name Exceptions and tag jumps /// @{ +// RUBY_CXX_DEPRECATED("by rb_block_call since 1.9") RUBY_CXX_DEPRECATED("Use of ANYARGS in this function is deprecated") /// @brief Old way to implement iterators. /// @param[in] q A function that can yield. @@ -198,20 +199,21 @@ RUBY_CXX_DEPRECATED("Use of ANYARGS in this function is deprecated") /// @param[in] r Passed to `e`. /// @return The return value of `q`. /// @note `e` can be nullptr. -/// @deprecated This function is obsolated since long before 2.x era. Do not +/// @deprecated This function is obsoleted since long before 2.x era. Do not /// use it any longer. rb_block_call() is provided instead. inline VALUE rb_iterate(onearg_type *q, VALUE w, type *e, VALUE r) { rb_block_call_func_t t = reinterpret_cast<rb_block_call_func_t>(e); - return ::rb_iterate(q, w, t, r); + return backward::rb_iterate_deprecated(q, w, t, r); } #ifdef HAVE_NULLPTR +RUBY_CXX_DEPRECATED("by rb_block_call since 1.9") inline VALUE rb_iterate(onearg_type *q, VALUE w, std::nullptr_t e, VALUE r) { - return ::rb_iterate(q, w, e, r); + return backward::rb_iterate_deprecated(q, w, e, r); } #endif @@ -225,7 +227,7 @@ RUBY_CXX_DEPRECATED("Use of ANYARGS in this function is deprecated") /// @param[in] y Passed to `t` /// @return Return value of `q#w(*r,&t)` /// @note 't' can be nullptr. -/// @deprecated Use glanular typed overload instead. +/// @deprecated Use granular typed overload instead. inline VALUE rb_block_call(VALUE q, ID w, int e, const VALUE *r, type *t, VALUE y) { @@ -253,7 +255,7 @@ RUBY_CXX_DEPRECATED("Use of ANYARGS in this function is deprecated") /// @see rb_ensure() /// @see rb_rescue2() /// @see rb_protect() -/// @deprecated Use glanular typed overload instead. +/// @deprecated Use granular typed overload instead. inline VALUE rb_rescue(type *q, VALUE w, type *e, VALUE r) { @@ -277,7 +279,7 @@ RUBY_CXX_DEPRECATED("Use of ANYARGS in this function is deprecated") /// @see rb_ensure() /// @see rb_rescue() /// @see rb_protect() -/// @deprecated Use glanular typed overload instead. +/// @deprecated Use granular typed overload instead. inline VALUE rb_rescue2(type *q, VALUE w, type *e, VALUE r, ...) { @@ -303,7 +305,7 @@ RUBY_CXX_DEPRECATED("Use of ANYARGS in this function is deprecated") /// @see rb_rescue() /// @see rb_rescue2() /// @see rb_protect() -/// @deprecated Use glanular typed overload instead. +/// @deprecated Use granular typed overload instead. inline VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r) { @@ -324,7 +326,7 @@ RUBY_CXX_DEPRECATED("Use of ANYARGS in this function is deprecated") /// @see rb_protect() /// @see rb_rb_catch_obj() /// @see rb_rescue() -/// @deprecated Use glanular typed overload instead. +/// @deprecated Use granular typed overload instead. inline VALUE rb_catch(const char *q, type *w, VALUE e) { @@ -351,7 +353,7 @@ RUBY_CXX_DEPRECATED("Use of ANYARGS in this function is deprecated") /// @see rb_protect() /// @see rb_rb_catch_obj() /// @see rb_rescue() -/// @deprecated Use glanular typed overload instead. +/// @deprecated Use granular typed overload instead. inline VALUE rb_catch_obj(VALUE q, type *w, VALUE e) { @@ -364,14 +366,14 @@ rb_catch_obj(VALUE q, type *w, VALUE e) /// @{ RUBY_CXX_DEPRECATED("Use of ANYARGS in this function is deprecated") -/// @brief Creates a @ref rb_cFiber instance. +/// @brief Creates a rb_cFiber instance. /// @param[in] q The fiber body. /// @param[in] w Passed to `q`. /// @return What was allocated. /// @note It makes no sense to pass nullptr to`q`. /// @see rb_proc_new() -/// @see rb_thread_creatr() -/// @deprecated Use glanular typed overload instead. +/// @see rb_thread_create() +/// @deprecated Use granular typed overload instead. inline VALUE rb_fiber_new(type *q, VALUE w) { @@ -386,8 +388,8 @@ RUBY_CXX_DEPRECATED("Use of ANYARGS in this function is deprecated") /// @return What was allocated. /// @note It makes no sense to pass nullptr to`q`. /// @see rb_fiber_new() -/// @see rb_thread_creatr() -/// @deprecated Use glanular typed overload instead. +/// @see rb_thread_create() +/// @deprecated Use granular typed overload instead. inline VALUE rb_proc_new(type *q, VALUE w) { @@ -403,7 +405,7 @@ RUBY_CXX_DEPRECATED("Use of ANYARGS in this function is deprecated") /// @note It makes no sense to pass nullptr to`q`. /// @see rb_proc_new() /// @see rb_fiber_new() -/// @deprecated Use glanular typed overload instead. +/// @deprecated Use granular typed overload instead. inline VALUE rb_thread_create(type *q, void *w) { @@ -425,7 +427,7 @@ RUBY_CXX_DEPRECATED("Use of ANYARGS in this function is deprecated") /// @note It makes no sense to pass nullptr to`w`. /// @see st_foreach_check() /// @see rb_hash_foreach() -/// @deprecated Use glanular typed overload instead. +/// @deprecated Use granular typed overload instead. inline int st_foreach(st_table *q, int_type *w, st_data_t e) { @@ -443,7 +445,7 @@ RUBY_CXX_DEPRECATED("Use of ANYARGS in this function is deprecated") /// @retval 1 Element removed during traversing. /// @note It makes no sense to pass nullptr to`w`. /// @see st_foreach() -/// @deprecated Use glanular typed overload instead. +/// @deprecated Use granular typed overload instead. inline int st_foreach_check(st_table *q, int_type *w, st_data_t e, st_data_t) { @@ -459,7 +461,7 @@ RUBY_CXX_DEPRECATED("Use of ANYARGS in this function is deprecated") /// @param[in] e Passed to `w`. /// @note It makes no sense to pass nullptr to`w`. /// @see st_foreach_check() -/// @deprecated Use glanular typed overload instead. +/// @deprecated Use granular typed overload instead. inline void st_foreach_safe(st_table *q, int_type *w, st_data_t e) { @@ -475,7 +477,7 @@ RUBY_CXX_DEPRECATED("Use of ANYARGS in this function is deprecated") /// @param[in] e Passed to `w`. /// @note It makes no sense to pass nullptr to`w`. /// @see st_foreach() -/// @deprecated Use glanular typed overload instead. +/// @deprecated Use granular typed overload instead. inline void rb_hash_foreach(VALUE q, int_type *w, VALUE e) { @@ -491,7 +493,7 @@ RUBY_CXX_DEPRECATED("Use of ANYARGS in this function is deprecated") /// @param[in] e Passed to `w`. /// @note It makes no sense to pass nullptr to`w`. /// @see st_foreach() -/// @deprecated Use glanular typed overload instead. +/// @deprecated Use granular typed overload instead. inline void rb_ivar_foreach(VALUE q, int_type *w, VALUE e) { @@ -539,7 +541,7 @@ struct driver { RUBY_CXX_DEPRECATED("use of ANYARGS is deprecated") #endif /// @copydoc define(VALUE klass, T mid, U func) - /// @deprecated Pass corrctly typed function instead. + /// @deprecated Pass correctly typed function instead. static inline void define(VALUE klass, T mid, type func) { @@ -592,27 +594,42 @@ struct driver { /* We could perhaps merge this struct into the one above using variadic * template parameters if we could assume C++11, but sadly we cannot. */ +/// @copydoc ruby::backward::cxxanyargs::define_method::driver template<typename T, void (*F)(T mid, type func, int arity)> struct driver0 { + + /// @brief Defines a method + /// @tparam N Arity of the function. + /// @tparam U The function in question template<int N, typename U> struct engine { RUBY_CXX_DEPRECATED("use of ANYARGS is deprecated") + /// @copydoc define(T mid, U func) + /// @deprecated Pass correctly typed function instead. static inline void define(T mid, type func) { F(mid, func, N); } + + /// @brief Defines Kernel#mid as func, whose arity is N. + /// @param[in] mid Name of the method to define. + /// @param[in] func Function that implements klass#mid. static inline void define(T mid, U func) { F(mid, reinterpret_cast<type *>(func), N); } + + /// @copydoc define(T mid, U func) + /// @deprecated Pass correctly typed function instead. static inline void define(T mid, notimpl_type func) { F(mid, reinterpret_cast<type *>(func), N); } }; + /// @cond INTERNAL_MACRO template<int N, bool = false> struct specific : public engine<N, type *> {}; template<bool b> struct specific<15, b> : public engine<15, VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE)> {}; diff --git a/include/ruby/debug.h b/include/ruby/debug.h index 16891e8458..f7c8e6ca8d 100644 --- a/include/ruby/debug.h +++ b/include/ruby/debug.h @@ -10,6 +10,9 @@ * modify this file, provided that the conditions mentioned in the * file COPYING are met. Consult the file for details. */ +#include "ruby/internal/attr/deprecated.h" +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/attr/returns_nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/event.h" #include "ruby/internal/value.h" @@ -19,74 +22,762 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* Note: This file contains experimental APIs. */ /* APIs can be replaced at Ruby 2.0.1 or later */ +/** + * @name Frame-profiling APIs + * + * @{ + */ -/* profile frames APIs */ +RBIMPL_ATTR_NONNULL((3)) +/** + * Queries mysterious "frame"s of the given range. + * + * The returned values are opaque backtrace pointers, which you are allowed to + * issue a very limited set of operations listed below. Don't call arbitrary + * ruby methods. + * + * @param[in] start Start position (0 means the topmost). + * @param[in] limit Number objects of `buff`. + * @param[out] buff Return buffer. + * @param[out] lines Return buffer. + * @return Number of objects filled into `buff`. + * @post `buff` is filled with backtrace pointers. + * @post `lines` is filled with `__LINE__` of each backtraces. + * + * @internal + * + * @shyouhei doesn't like this abuse of ::VALUE. It should have been + * `const struct rb_callable_method_entry_struct *`. + */ int rb_profile_frames(int start, int limit, VALUE *buff, int *lines); + +/** + * Queries mysterious "frame"s of the given range. + * + * A per-thread version of rb_profile_frames(). + * Arguments and return values are the same with rb_profile_frames() with the + * exception of the first argument _thread_, which accepts the Thread to be + * profiled/queried. + * + * @param[in] thread The Ruby Thread to be profiled. + * @param[in] start Start position (0 means the topmost). + * @param[in] limit Number objects of `buff`. + * @param[out] buff Return buffer. + * @param[out] lines Return buffer. + * @return Number of objects filled into `buff`. + * @post `buff` is filled with backtrace pointers. + * @post `lines` is filled with `__LINE__` of each backtraces. + */ +int rb_profile_thread_frames(VALUE thread, int start, int limit, VALUE *buff, int *lines); + +/** + * Queries the path of the passed backtrace. + * + * @param[in] frame What rb_profile_frames() returned. + * @retval RUBY_Qnil The frame is implemented in C etc. + * @retval otherwise Where `frame` is running. + */ VALUE rb_profile_frame_path(VALUE frame); + +/** + * Identical to rb_profile_frame_path(), except it tries to expand the + * returning path. In case the path is `require`-d from something else + * rb_profile_frame_path() can return relative paths. This one tries to avoid + * that. + * + * @param[in] frame What rb_profile_frames() returned. + * @retval "<cfunc>" The frame is in C. + * @retval RUBY_Qnil Can't infer real path (inside of `eval` etc.). + * @retval otherwise Where `frame` is running. + */ VALUE rb_profile_frame_absolute_path(VALUE frame); + +/** + * Queries human-readable "label" string. This is `"<main>"` for the toplevel, + * `"<compiled>"` for evaluated ones, method name for methods, class name for + * classes. + * + * @param[in] frame What rb_profile_frames() returned. + * @retval RUBY_Qnil Can't infer the label (C etc.). + * @retval "<main>" The frame is global toplevel. + * @retval "<compiled>" The frame is dynamic. + * @retval otherwise Label of the frame. + */ VALUE rb_profile_frame_label(VALUE frame); + +/** + * Identical to rb_profile_frame_label(), except it does not "qualify" the + * result. Consider the following backtrace: + * + * ```ruby + * def bar + * caller_locations + * end + * + * def foo + * [1].map { bar }.first + * end + * + * obj = foo.first + * obj.label # => "block in foo" + * obj.base_label # => "foo" + * ``` + * + * @param[in] frame What rb_profile_frames() returned. + * @retval RUBY_Qnil Can't infer the label (C etc.). + * @retval "<main>" The frame is global toplevel. + * @retval "<compiled>" The frame is dynamic. + * @retval otherwise Base label of the frame. + */ VALUE rb_profile_frame_base_label(VALUE frame); + +/** + * Identical to rb_profile_frame_label(), except it returns a qualified result. + * + * @param[in] frame What rb_profile_frames() returned. + * @retval RUBY_Qnil Can't infer the label (C etc.). + * @retval "<main>" The frame is global toplevel. + * @retval "<compiled>" The frame is dynamic. + * @retval otherwise Qualified label of the frame. + * + * @internal + * + * As of writing there is no way to obtain this return value from a Ruby + * script. This may change in future (it took 8 years and still no progress, + * though). + */ VALUE rb_profile_frame_full_label(VALUE frame); + +/** + * Queries the first line of the method of the passed frame pointer. Can be + * handy when for instance a debugger want to display the frame in question. + * + * @param[in] frame What rb_profile_frames() returned. + * @retval RUBY_Qnil Can't infer the line (C etc.). + * @retval otherwise Line number of the method in question. + */ VALUE rb_profile_frame_first_lineno(VALUE frame); + +/** + * Queries the class path of the method that the passed frame represents. + * + * @param[in] frame What rb_profile_frames() returned. + * @retval RUBY_Qnil Can't infer the class (global toplevel etc.). + * @retval otherwise Class path as in rb_class_path(). + */ VALUE rb_profile_frame_classpath(VALUE frame); + +/** + * Queries if the method of the passed frame is a singleton class. + * + * @param[in] frame What rb_profile_frames() returned. + * @retval RUBY_Qtrue It is a singleton method. + * @retval RUBY_Qfalse Otherwise (normal method/non-method). + */ VALUE rb_profile_frame_singleton_method_p(VALUE frame); + +/** + * Queries the name of the method of the passed frame. + * + * @param[in] frame What rb_profile_frames() returned. + * @retval RUBY_Qnil The frame in question is not a method. + * @retval otherwise Name of the method of the frame. + */ VALUE rb_profile_frame_method_name(VALUE frame); + +/** + * Identical to rb_profile_frame_method_name(), except it "qualifies" the + * return value with its defining class. + * + * @param[in] frame What rb_profile_frames() returned. + * @retval RUBY_Qnil The frame in question is not a method. + * @retval otherwise Qualified name of the method of the frame. + */ VALUE rb_profile_frame_qualified_method_name(VALUE frame); -/* debug inspector APIs */ +/** @} */ + +/** + * @name Debug inspector APIs + * + * @{ + */ + +/** Opaque struct representing a debug inspector. */ typedef struct rb_debug_inspector_struct rb_debug_inspector_t; -typedef VALUE (*rb_debug_inspector_func_t)(const rb_debug_inspector_t *, void *); +/** + * Type of the callback function passed to rb_debug_inspector_open(). + * Inspection shall happen only inside of them. The passed pointers gets + * invalidated once after the callback returns. + * + * @param[in] dc A debug context. + * @param[in,out] data What was passed to rb_debug_inspector_open(). + * @return What would be the return value of rb_debug_inspector_open(). + */ +typedef VALUE (*rb_debug_inspector_func_t)(const rb_debug_inspector_t *dc, void *data); + +/** + * Prepares, executes, then cleans up a debug session. + * + * @param[in] func A callback to run inside of a debug session. + * @param[in,out] data Passed as-is to `func`. + * @return What was returned from `func`. + */ VALUE rb_debug_inspector_open(rb_debug_inspector_func_t func, void *data); + +/** + * Queries the backtrace object of the context. This is as if you call + * `caller_locations` at the point of debugger. + * + * @param[in] dc A debug context. + * @return An array of `Thread::Backtrace::Location` which represents the + * current point of execution at `dc`. + + */ +VALUE rb_debug_inspector_backtrace_locations(const rb_debug_inspector_t *dc); + +/** + * Queries the current receiver of the passed context's upper frame. + * + * @param[in] dc A debug context. + * @param[in] index Index of the frame from top to bottom. + * @exception rb_eArgError `index` out of range. + * @return The current receiver at `index`-th frame. + */ VALUE rb_debug_inspector_frame_self_get(const rb_debug_inspector_t *dc, long index); + +/** + * Queries the current class of the passed context's upper frame. + * + * @param[in] dc A debug context. + * @param[in] index Index of the frame from top to bottom. + * @exception rb_eArgError `index` out of range. + * @return The current class at `index`-th frame. + */ VALUE rb_debug_inspector_frame_class_get(const rb_debug_inspector_t *dc, long index); + +/** + * Queries the binding of the passed context's upper frame. + * + * @param[in] dc A debug context. + * @param[in] index Index of the frame from top to bottom. + * @exception rb_eArgError `index` out of range. + * @return The binding at `index`-th frame. + */ VALUE rb_debug_inspector_frame_binding_get(const rb_debug_inspector_t *dc, long index); + +/** + * Queries the instruction sequence of the passed context's upper frame. + * + * @param[in] dc A debug context. + * @param[in] index Index of the frame from top to bottom. + * @exception rb_eArgError `index` out of range. + * @retval RUBY_Qnil `index`-th frame is not in Ruby (C etc.). + * @retval otherwise An instance of `RubyVM::InstructionSequence` which + * represents the instruction sequence at `index`-th + * frame. + */ VALUE rb_debug_inspector_frame_iseq_get(const rb_debug_inspector_t *dc, long index); -VALUE rb_debug_inspector_backtrace_locations(const rb_debug_inspector_t *dc); -/* Old style set_trace_func APIs */ +/** + * Queries the depth of the passed context's upper frame. + * + * Note that the depth is not same as the frame index because debug_inspector + * skips some special frames but the depth counts all frames. + * + * @param[in] dc A debug context. + * @param[in] index Index of the frame from top to bottom. + * @exception rb_eArgError `index` out of range. + * @retval The depth at `index`-th frame in Integer. + */ +VALUE rb_debug_inspector_frame_depth(const rb_debug_inspector_t *dc, long index); + +// A macro to recognize `rb_debug_inspector_frame_depth()` is available or not +#define RB_DEBUG_INSPECTOR_FRAME_DEPTH(dc, index) rb_debug_inspector_frame_depth(dc, index) + +/** + * Return current frmae depth. + * + * @retval The depth of the current frame in Integer. + */ +VALUE rb_debug_inspector_current_depth(void); + +/** @} */ + +/** + * @name Old style set_trace_func APIs + * + * @{ + */ /* duplicated def of include/ruby/ruby.h */ -void rb_add_event_hook(rb_event_hook_func_t func, rb_event_flag_t events, VALUE data); -int rb_remove_event_hook(rb_event_hook_func_t func); +#include "ruby/internal/event.h" +/** + * Identical to rb_remove_event_hook(), except it additionally takes the data + * argument. This extra argument is the same as that of rb_add_event_hook(), + * and this function removes the hook which matches both arguments at once. + * + * @param[in] func A callback. + * @param[in] data What to be passed to `func`. + * @return Number of deleted event hooks. + * @note As multiple events can share the same `func` it is quite + * possible for the return value to become more than one. + */ int rb_remove_event_hook_with_data(rb_event_hook_func_t func, VALUE data); + +/** + * Identical to rb_add_event_hook(), except its effect is limited to the passed + * thread. Other threads are not affected by this. + * + * @param[in] thval An instance of ::rb_cThread. + * @param[in] func A callback. + * @param[in] events A set of events that `func` should run. + * @param[in] data Passed as-is to `func`. + * @exception rb_eTypeError `thval` is not a thread. + */ void rb_thread_add_event_hook(VALUE thval, rb_event_hook_func_t func, rb_event_flag_t events, VALUE data); + +/** + * Identical to rb_remove_event_hook(), except it additionally takes a thread + * argument. This extra argument is the same as that of + * rb_thread_add_event_hook(), and this function removes the hook which matches + * both arguments at once. + * + * @param[in] thval An instance of ::rb_cThread. + * @param[in] func A callback. + * @exception rb_eTypeError `thval` is not a thread. + * @return Number of deleted event hooks. + * @note As multiple events can share the same `func` it is quite + * possible for the return value to become more than one. + */ int rb_thread_remove_event_hook(VALUE thval, rb_event_hook_func_t func); + +/** + * Identical to rb_thread_remove_event_hook(), except it additionally takes the + * data argument. It can also be seen as a routine identical to + * rb_remove_event_hook_with_data(), except it additionally takes the thread. + * This function deletes hooks that satisfy all three criteria. + * + * @param[in] thval An instance of ::rb_cThread. + * @param[in] func A callback. + * @param[in] data What to be passed to `func`. + * @exception rb_eTypeError `thval` is not a thread. + * @return Number of deleted event hooks. + * @note As multiple events can share the same `func` it is quite + * possible for the return value to become more than one. + */ int rb_thread_remove_event_hook_with_data(VALUE thval, rb_event_hook_func_t func, VALUE data); -/* TracePoint APIs */ +/** @} */ + +/** + * @name TracePoint APIs + * + * @{ + */ +/** + * Creates a tracepoint by registering a callback function for one or more + * tracepoint events. Once the tracepoint is created, you can use + * rb_tracepoint_enable to enable the tracepoint. + * + * @param[in] target_thread_not_supported_yet Meant for picking the + * thread in which the tracepoint is to be created. + * However, current implementation ignore this + * parameter, tracepoint is created for all threads. + * Simply specify Qnil. + * @param[in] events Event(s) to listen to. + * @param[in] func A callback function. + * @param[in,out] data Void pointer that will be passed to the callback + * function. + * + * When the callback function is called, it will be passed 2 parameters: + * 1. `VALUE tpval` - the TracePoint object from which trace args can be + * extracted. + * 1. `void *data` - A void pointer which helps to share scope with the + * callback function. + * + * It is important to note that you cannot register callbacks for normal events + * and internal events simultaneously because they are different purpose. You + * can use any Ruby APIs (calling methods and so on) on normal event hooks. + * However, in internal events, you can not use any Ruby APIs (even object + * creations). This is why we can't specify internal events by TracePoint + * directly. Limitations are MRI version specific. + * + * Example: + * + * ```CXX + * rb_tracepoint_new( + * Qnil, + * RUBY_INTERNAL_EVENT_NEWOBJ | RUBY_INTERNAL_EVENT_FREEOBJ, + * obj_event_i, + * data); + * ``` + * + * In this example, a callback function `obj_event_i` will be registered for + * internal events #RUBY_INTERNAL_EVENT_NEWOBJ and + * #RUBY_INTERNAL_EVENT_FREEOBJ. + */ VALUE rb_tracepoint_new(VALUE target_thread_not_supported_yet, rb_event_flag_t events, void (*func)(VALUE, void *), void *data); + +/** + * Starts (enables) trace(s) defined by the passed object. A TracePoint object + * does not immediately take effect on creation. You have to explicitly call + * this API. + * + * @param[in] tpval An instance of TracePoint. + * @exception rb_eArgError A trace is already running. + * @return Undefined value. Forget this. It should have returned `void`. + * @post Trace(s) defined by `tpval` start. + */ VALUE rb_tracepoint_enable(VALUE tpval); + +/** + * Stops (disables) an already running instance of TracePoint. + * + * @param[in] tpval An instance of TracePoint. + * @return Undefined value. Forget this. It should have returned `void`. + * @post Trace(s) defined by `tpval` stop. + */ VALUE rb_tracepoint_disable(VALUE tpval); + +/** + * Queries if the passed TracePoint is up and running. + * + * @param[in] tpval An instance of TracePoint. + * @retval RUBY_Qtrue It is. + * @retval RUBY_Qfalse It isn't. + */ VALUE rb_tracepoint_enabled_p(VALUE tpval); +/** + * Type that represents a specific trace event. Roughly resembles the + * tracepoint object that is passed to the block of `TracePoint.new`: + * + * ```ruby + * TracePoint.new(*events) do |obj| + * ... # ^^^^^ Resembles this object. + * end + * ``` + */ typedef struct rb_trace_arg_struct rb_trace_arg_t; + +RBIMPL_ATTR_RETURNS_NONNULL() +/** + * Queries the current event of the passed tracepoint. + * + * @param[in] tpval An instance of TracePoint. + * @exception rb_eRuntimeError `tpval` is disabled. + * @return The current event. + * + * @internal + * + * `tpval` is a fake. There is only one instance of ::rb_trace_arg_t at one + * time. This function just returns that global variable. + */ rb_trace_arg_t *rb_tracearg_from_tracepoint(VALUE tpval); +RBIMPL_ATTR_NONNULL(()) +/** + * Queries the event of the passed trace. + * + * @param[in] trace_arg A trace instance. + * @return Its event. + */ rb_event_flag_t rb_tracearg_event_flag(rb_trace_arg_t *trace_arg); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_tracearg_event_flag(), except it returns the name of the + * event in Ruby's symbol. + * + * @param[in] trace_arg A trace instance. + * @return Its event, in Ruby level Symbol object. + */ VALUE rb_tracearg_event(rb_trace_arg_t *trace_arg); + +RBIMPL_ATTR_NONNULL(()) +/** + * Queries the line of the point where the trace is at. + * + * @param[in] trace_arg A trace instance. + * @retval 0 The trace is not at Ruby frame. + * @return otherwise Its line number. + */ VALUE rb_tracearg_lineno(rb_trace_arg_t *trace_arg); + +RBIMPL_ATTR_NONNULL(()) +/** + * Queries the file name of the point where the trace is at. + * + * @param[in] trace_arg A trace instance. + * @retval RUBY_Qnil The trace is not at Ruby frame. + * @retval otherwise Its path. + */ VALUE rb_tracearg_path(rb_trace_arg_t *trace_arg); + +RBIMPL_ATTR_NONNULL(()) +/** + * Queries the method name of the point where the trace is at. + * + * @param[in] trace_arg A trace instance. + * @retval RUBY_Qnil There is no method. + * @retval otherwise Its method name, in Ruby level Symbol. + */ VALUE rb_tracearg_method_id(rb_trace_arg_t *trace_arg); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_tracearg_method_id(), except it returns callee id like + * rb_frame_callee(). + * + * @param[in] trace_arg A trace instance. + * @retval RUBY_Qnil There is no method. + * @retval otherwise Its method name, in Ruby level Symbol. + */ VALUE rb_tracearg_callee_id(rb_trace_arg_t *trace_arg); + +RBIMPL_ATTR_NONNULL(()) +/** + * Queries the class that defines the method that the passed trace is at. This + * can be different from the class of rb_tracearg_self()'s return value because + * of inheritance(s). + * + * @param[in] trace_arg A trace instance. + * @retval RUBY_Qnil There is no method. + * @retval otherwise Its method's class. + */ VALUE rb_tracearg_defined_class(rb_trace_arg_t *trace_arg); + +RBIMPL_ATTR_NONNULL(()) +/** + * Creates a binding object of the point where the trace is at. + * + * @param[in] trace_arg A trace instance. + * @retval RUBY_Qnil The point has no binding. + * @retval otherwise Its binding. + * + * @internal + * + * @shyouhei has no idea on which situation shall this function return + * ::RUBY_Qnil. + */ VALUE rb_tracearg_binding(rb_trace_arg_t *trace_arg); + +RBIMPL_ATTR_NONNULL(()) +/** + * Queries the receiver of the point trace is at. + * + * @param[in] trace_arg A trace instance. + * @return Its receiver. + */ VALUE rb_tracearg_self(rb_trace_arg_t *trace_arg); + +RBIMPL_ATTR_NONNULL(()) +/** + * Queries the return value that the trace represents. + * + * @param[in] trace_arg A trace instance. + * @exception rb_eRuntimeError The tracing event is not return-related. + * @return The return value. + */ VALUE rb_tracearg_return_value(rb_trace_arg_t *trace_arg); + +RBIMPL_ATTR_NONNULL(()) +/** + * Queries the raised exception that the trace represents. + * + * @param[in] trace_arg A trace instance. + * @exception rb_eRuntimeError The tracing event is not exception-related. + * @return The raised exception. + */ VALUE rb_tracearg_raised_exception(rb_trace_arg_t *trace_arg); + +RBIMPL_ATTR_NONNULL(()) +/** + * Queries the allocated/deallocated object that the trace represents. + * + * @param[in] trace_arg A trace instance. + * @exception rb_eRuntimeError The tracing event is not GC-related. + * @return The allocated/deallocated object. + */ VALUE rb_tracearg_object(rb_trace_arg_t *trace_arg); + +/** @} */ + +/** + * @name Postponed Job API + * + * @{ + */ + /* * Postponed Job API - * rb_postponed_job_register and rb_postponed_job_register_one are - * async-signal-safe and used via SIGPROF by the "stackprof" RubyGem + * + * This API is designed to be called from contexts where it is not safe to run Ruby + * code (e.g. because they do not hold the GVL or because GC is in progress), and + * defer a callback to run in a context where it _is_ safe. The primary intended + * users of this API is for sampling profilers like the "stackprof" gem; these work + * by scheduling the periodic delivery of a SIGPROF signal, and inside the C-level + * signal handler, deferring a job to collect a Ruby backtrace when it is next safe + * to do so. + * + * Ruby maintains a small, fixed-size postponed job table. An extension using this + * API should first call `rb_postponed_job_preregister` to register a callback + * function in this table and obtain a handle of type `rb_postponed_job_handle_t` + * to it. Subsequently, the callback can be triggered by calling + * `rb_postponed_job_trigger` with that handle, or the `data` associated with the + * callback function can be changed by calling `rb_postponed_job_preregister` again. + * + * Because the postponed job table is quite small (it only has 32 entries on most + * common systems), extensions should generally only preregister one or two `func` + * values. + * + * Historically, this API provided two functions `rb_postponed_job_register` and + * `rb_postponed_job_register_one`, which claimed to be fully async-signal-safe and + * would call back the provided `func` and `data` at an appropriate time. However, + * these functions were subject to race conditions which could cause crashes when + * racing with Ruby's internal use of them. These two functions are still present, + * but are marked as deprecated and have slightly changed semantics: + * + * * rb_postponed_job_register now works like rb_postponed_job_register_one i.e. + * `func` will only be executed at most one time each time Ruby checks for + * interrupts, no matter how many times it is registered + * * They are also called with the last `data` to be registered, not the first + * (which is how rb_postponed_job_register_one previously worked) + */ + + +/** + * Type of postponed jobs. + * + * @param[in,out] arg What was passed to `rb_postponed_job_preregister` */ typedef void (*rb_postponed_job_func_t)(void *arg); + +/** + * The type of a handle returned from `rb_postponed_job_preregister` and + * passed to `rb_postponed_job_trigger` + */ +typedef unsigned int rb_postponed_job_handle_t; +#define POSTPONED_JOB_HANDLE_INVALID ((rb_postponed_job_handle_t)UINT_MAX) + +/** + * Pre-registers a func in Ruby's postponed job preregistration table, + * returning an opaque handle which can be used to trigger the job later. Generally, + * this function will be called during the initialization routine of an extension. + * + * The returned handle can be used later to call `rb_postponed_job_trigger`. This will + * cause Ruby to call back into the registered `func` with `data` at a later time, in + * a context where the GVL is held and it is safe to perform Ruby allocations. + * + * If the given `func` was already pre-registered, this function will overwrite the + * stored data with the newly passed data, and return the same handle instance as + * was previously returned. + * + * If this function is called concurrently with the same `func`, then the stored data + * could be the value from either call (but will definitely be one of them). + * + * If this function is called to update the data concurrently with a call to + * `rb_postponed_job_trigger` on the same handle, it's undefined whether `func` will + * be called with the old data or the new data. + * + * Although the current implementation of this function is in fact async-signal-safe and + * has defined semantics when called concurrently on the same `func`, a future Ruby + * version might require that this method be called under the GVL; thus, programs which + * aim to be forward-compatible should call this method whilst holding the GVL. + * + * @param[in] flags Unused and ignored + * @param[in] func The function to be pre-registered + * @param[in] data The data to be pre-registered + * @retval POSTPONED_JOB_HANDLE_INVALID The job table is full; this registration + * did not succeed and no further registration will do so for + * the lifetime of the program. + * @retval otherwise A handle which can be passed to `rb_postponed_job_trigger` + */ +rb_postponed_job_handle_t rb_postponed_job_preregister(unsigned int flags, rb_postponed_job_func_t func, void *data); + +/** + * Triggers a pre-registered job registered with rb_postponed_job_preregister, + * scheduling it for execution the next time the Ruby VM checks for interrupts. + * The context in which the job is called in holds the GVL and is safe to perform + * Ruby allocations within (i.e. it is not during GC). + * + * This method is async-signal-safe and can be called from any thread, at any + * time, including in signal handlers. + * + * If this method is called multiple times, Ruby will coalesce this into only + * one call to the job the next time it checks for interrupts. + * + * @params[in] h A handle returned from rb_postponed_job_preregister + */ +void rb_postponed_job_trigger(rb_postponed_job_handle_t h); + +/** + * Schedules the given `func` to be called with `data` when Ruby next checks for + * interrupts. If this function is called multiple times in between Ruby checking + * for interrupts, then `func` will be called only once with the `data` value from + * the first call to this function. + * + * Like `rb_postponed_job_trigger`, the context in which the job is called + * holds the GVL and can allocate Ruby objects. + * + * This method essentially has the same semantics as: + * + * ``` + * rb_postponed_job_trigger(rb_postponed_job_preregister(func, data)); + * ``` + * + * @note Previous versions of Ruby promised that the (`func`, `data`) pairs would + * be executed as many times as they were registered with this function; in + * reality this was always subject to race conditions and this function no + * longer provides this guarantee. Instead, multiple calls to this function + * can be coalesced into a single execution of the passed `func`, with the + * most recent `data` registered at that time passed in. + * + * @deprecated This interface implies that arbitrarily many `func`'s can be enqueued + * over the lifetime of the program, whilst in reality the registration + * slots for postponed jobs are a finite resource. This is made clearer + * by the `rb_postponed_job_preregister` and `rb_postponed_job_trigger` + * functions, and a future version of Ruby might delete this function. + * + * @param[in] flags Unused and ignored. + * @param[in] func Job body. + * @param[in,out] data Passed as-is to `func`. + * @retval 0 Postponed job registration table is full. Failed. + * @retval 1 Registration succeeded. + * @post The passed job will run on the next interrupt check. + */ + RBIMPL_ATTR_DEPRECATED(("use rb_postponed_job_preregister and rb_postponed_job_trigger")) int rb_postponed_job_register(unsigned int flags, rb_postponed_job_func_t func, void *data); + +/** + * Identical to `rb_postponed_job_register` + * + * @deprecated This is deprecated for the same reason as `rb_postponed_job_register` + * + * @param[in] flags Unused and ignored. + * @param[in] func Job body. + * @param[in,out] data Passed as-is to `func`. + * @retval 0 Postponed job registration table is full. Failed. + * @retval 1 Registration succeeded. + * @post The passed job will run on the next interrupt check. + */ + RBIMPL_ATTR_DEPRECATED(("use rb_postponed_job_preregister and rb_postponed_job_trigger")) int rb_postponed_job_register_one(unsigned int flags, rb_postponed_job_func_t func, void *data); +/** @} */ + +/** + * @cond INTERNAL_MACRO + * + * Anything after this are intentionally left undocumented, to honour the + * comment below. + */ + /* undocumented advanced tracing APIs */ typedef enum { @@ -98,6 +789,8 @@ typedef enum { void rb_add_event_hook2(rb_event_hook_func_t func, rb_event_flag_t events, VALUE data, rb_event_hook_flag_t hook_flag); void rb_thread_add_event_hook2(VALUE thval, rb_event_hook_func_t func, rb_event_flag_t events, VALUE data, rb_event_hook_flag_t hook_flag); +/** @endcond */ + RBIMPL_SYMBOL_EXPORT_END() #endif /* RUBY_DEBUG_H */ diff --git a/include/ruby/defines.h b/include/ruby/defines.h index d632a69fc1..48184f8a18 100644 --- a/include/ruby/defines.h +++ b/include/ruby/defines.h @@ -72,23 +72,30 @@ #include "ruby/backward/2/assume.h" #include "ruby/backward/2/attributes.h" #include "ruby/backward/2/bool.h" -#include "ruby/backward/2/gcc_version_since.h" #include "ruby/backward/2/long_long.h" #include "ruby/backward/2/stdalign.h" #include "ruby/backward/2/stdarg.h" #include "ruby/internal/dosish.h" #include "ruby/missing.h" +/** + * Asserts that the compilation unit includes Ruby's CAPI. This has been here + * since the very beginning (at least since version 0.49). + */ #define RUBY #ifdef __GNUC__ +# /** This is expanded to nothing for non-GCC compilers. */ # define RB_GNUC_EXTENSION __extension__ +# /** This is expanded to the passed token for non-GCC compilers. */ # define RB_GNUC_EXTENSION_BLOCK(x) __extension__ ({ x; }) #else # define RB_GNUC_EXTENSION # define RB_GNUC_EXTENSION_BLOCK(x) (x) #endif +/** @cond INTERNAL_MACRO */ + /* :FIXME: Can someone tell us why is this macro defined here? @shyouhei * thinks this is a truly internal macro but cannot move around because he * doesn't understand the reason of this arrangement. */ @@ -105,5 +112,5 @@ RBIMPL_SYMBOL_EXPORT_END() #else # define FLUSH_REGISTER_WINDOWS ((void)0) #endif - +/** @endcond */ #endif /* RUBY_DEFINES_H */ diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 4e46d0d996..1256393701 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -9,403 +9,23 @@ * Permission is hereby granted, to either redistribute and/or * modify this file, provided that the conditions mentioned in the * file COPYING are met. Consult the file for details. + * @brief Encoding relates APIs. + * + * These APIs are mainly for implementing encodings themselves. Encodings are + * built on top of Ruby's core CAPIs. Though not prohibited, there can be + * relatively less rooms for things in this header file be useful when writing + * an extension library. */ -#include "ruby/internal/config.h" -#include <stdarg.h> #include "ruby/ruby.h" -#include "ruby/oniguruma.h" -#include "ruby/internal/dllexport.h" -RBIMPL_SYMBOL_EXPORT_BEGIN() - -enum ruby_encoding_consts { - RUBY_ENCODING_INLINE_MAX = 127, - RUBY_ENCODING_SHIFT = (RUBY_FL_USHIFT+10), - RUBY_ENCODING_MASK = (RUBY_ENCODING_INLINE_MAX<<RUBY_ENCODING_SHIFT - /* RUBY_FL_USER10..RUBY_FL_USER16 */), - RUBY_ENCODING_MAXNAMELEN = 42 -}; - -#define ENCODING_INLINE_MAX RUBY_ENCODING_INLINE_MAX -#define ENCODING_SHIFT RUBY_ENCODING_SHIFT -#define ENCODING_MASK RUBY_ENCODING_MASK - -#define RB_ENCODING_SET_INLINED(obj,i) do {\ - RBASIC(obj)->flags &= ~RUBY_ENCODING_MASK;\ - RBASIC(obj)->flags |= (VALUE)(i) << RUBY_ENCODING_SHIFT;\ -} while (0) -#define RB_ENCODING_SET(obj,i) rb_enc_set_index((obj), (i)) - -#define RB_ENCODING_GET_INLINED(obj) \ - (int)((RBASIC(obj)->flags & RUBY_ENCODING_MASK)>>RUBY_ENCODING_SHIFT) -#define RB_ENCODING_GET(obj) \ - (RB_ENCODING_GET_INLINED(obj) != RUBY_ENCODING_INLINE_MAX ? \ - RB_ENCODING_GET_INLINED(obj) : \ - rb_enc_get_index(obj)) - -#define RB_ENCODING_IS_ASCII8BIT(obj) (RB_ENCODING_GET_INLINED(obj) == 0) - -#define ENCODING_SET_INLINED(obj,i) RB_ENCODING_SET_INLINED(obj,i) -#define ENCODING_SET(obj,i) RB_ENCODING_SET(obj,i) -#define ENCODING_GET_INLINED(obj) RB_ENCODING_GET_INLINED(obj) -#define ENCODING_GET(obj) RB_ENCODING_GET(obj) -#define ENCODING_IS_ASCII8BIT(obj) RB_ENCODING_IS_ASCII8BIT(obj) -#define ENCODING_MAXNAMELEN RUBY_ENCODING_MAXNAMELEN - -enum ruby_coderange_type { - RUBY_ENC_CODERANGE_UNKNOWN = 0, - RUBY_ENC_CODERANGE_7BIT = ((int)RUBY_FL_USER8), - RUBY_ENC_CODERANGE_VALID = ((int)RUBY_FL_USER9), - RUBY_ENC_CODERANGE_BROKEN = ((int)(RUBY_FL_USER8|RUBY_FL_USER9)), - RUBY_ENC_CODERANGE_MASK = (RUBY_ENC_CODERANGE_7BIT| - RUBY_ENC_CODERANGE_VALID| - RUBY_ENC_CODERANGE_BROKEN) -}; - -static inline int -rb_enc_coderange_clean_p(int cr) -{ - return (cr ^ (cr >> 1)) & RUBY_ENC_CODERANGE_7BIT; -} -#define RB_ENC_CODERANGE_CLEAN_P(cr) rb_enc_coderange_clean_p(cr) -#define RB_ENC_CODERANGE(obj) ((int)RBASIC(obj)->flags & RUBY_ENC_CODERANGE_MASK) -#define RB_ENC_CODERANGE_ASCIIONLY(obj) (RB_ENC_CODERANGE(obj) == RUBY_ENC_CODERANGE_7BIT) -#define RB_ENC_CODERANGE_SET(obj,cr) (\ - RBASIC(obj)->flags = \ - (RBASIC(obj)->flags & ~RUBY_ENC_CODERANGE_MASK) | (cr)) -#define RB_ENC_CODERANGE_CLEAR(obj) RB_ENC_CODERANGE_SET((obj),0) - -/* assumed ASCII compatibility */ -#define RB_ENC_CODERANGE_AND(a, b) \ - ((a) == RUBY_ENC_CODERANGE_7BIT ? (b) : \ - (a) != RUBY_ENC_CODERANGE_VALID ? RUBY_ENC_CODERANGE_UNKNOWN : \ - (b) == RUBY_ENC_CODERANGE_7BIT ? RUBY_ENC_CODERANGE_VALID : (b)) - -#define RB_ENCODING_CODERANGE_SET(obj, encindex, cr) \ - do { \ - VALUE rb_encoding_coderange_obj = (obj); \ - RB_ENCODING_SET(rb_encoding_coderange_obj, (encindex)); \ - RB_ENC_CODERANGE_SET(rb_encoding_coderange_obj, (cr)); \ - } while (0) - -#define ENC_CODERANGE_MASK RUBY_ENC_CODERANGE_MASK -#define ENC_CODERANGE_UNKNOWN RUBY_ENC_CODERANGE_UNKNOWN -#define ENC_CODERANGE_7BIT RUBY_ENC_CODERANGE_7BIT -#define ENC_CODERANGE_VALID RUBY_ENC_CODERANGE_VALID -#define ENC_CODERANGE_BROKEN RUBY_ENC_CODERANGE_BROKEN -#define ENC_CODERANGE_CLEAN_P(cr) RB_ENC_CODERANGE_CLEAN_P(cr) -#define ENC_CODERANGE(obj) RB_ENC_CODERANGE(obj) -#define ENC_CODERANGE_ASCIIONLY(obj) RB_ENC_CODERANGE_ASCIIONLY(obj) -#define ENC_CODERANGE_SET(obj,cr) RB_ENC_CODERANGE_SET(obj,cr) -#define ENC_CODERANGE_CLEAR(obj) RB_ENC_CODERANGE_CLEAR(obj) -#define ENC_CODERANGE_AND(a, b) RB_ENC_CODERANGE_AND(a, b) -#define ENCODING_CODERANGE_SET(obj, encindex, cr) RB_ENCODING_CODERANGE_SET(obj, encindex, cr) - -typedef const OnigEncodingType rb_encoding; - -int rb_char_to_option_kcode(int c, int *option, int *kcode); - -int rb_enc_replicate(const char *, rb_encoding *); -int rb_define_dummy_encoding(const char *); -PUREFUNC(int rb_enc_dummy_p(rb_encoding *enc)); -PUREFUNC(int rb_enc_to_index(rb_encoding *enc)); -int rb_enc_get_index(VALUE obj); -void rb_enc_set_index(VALUE obj, int encindex); -int rb_enc_capable(VALUE obj); -int rb_enc_find_index(const char *name); -int rb_enc_alias(const char *alias, const char *orig); -int rb_to_encoding_index(VALUE); -rb_encoding *rb_to_encoding(VALUE); -rb_encoding *rb_find_encoding(VALUE); -rb_encoding *rb_enc_get(VALUE); -rb_encoding *rb_enc_compatible(VALUE,VALUE); -rb_encoding *rb_enc_check(VALUE,VALUE); -VALUE rb_enc_associate_index(VALUE, int); -VALUE rb_enc_associate(VALUE, rb_encoding*); -void rb_enc_copy(VALUE dst, VALUE src); - -VALUE rb_enc_str_new(const char*, long, rb_encoding*); -VALUE rb_enc_str_new_cstr(const char*, rb_encoding*); -VALUE rb_enc_str_new_static(const char*, long, rb_encoding*); -VALUE rb_enc_interned_str(const char *, long, rb_encoding *); -VALUE rb_enc_interned_str_cstr(const char *, rb_encoding *); -VALUE rb_enc_reg_new(const char*, long, rb_encoding*, int); -PRINTF_ARGS(VALUE rb_enc_sprintf(rb_encoding *, const char*, ...), 2, 3); -VALUE rb_enc_vsprintf(rb_encoding *, const char*, va_list); -long rb_enc_strlen(const char*, const char*, rb_encoding*); -char* rb_enc_nth(const char*, const char*, long, rb_encoding*); -VALUE rb_obj_encoding(VALUE); -VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc); -VALUE rb_enc_uint_chr(unsigned int code, rb_encoding *enc); - -VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *); -VALUE rb_str_export_to_enc(VALUE, rb_encoding *); -VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to); -VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts); - -#ifdef HAVE_BUILTIN___BUILTIN_CONSTANT_P -#define rb_enc_str_new(str, len, enc) RB_GNUC_EXTENSION_BLOCK( \ - (__builtin_constant_p(str) && __builtin_constant_p(len)) ? \ - rb_enc_str_new_static((str), (len), (enc)) : \ - rb_enc_str_new((str), (len), (enc)) \ -) -#define rb_enc_str_new_cstr(str, enc) RB_GNUC_EXTENSION_BLOCK( \ - (__builtin_constant_p(str)) ? \ - rb_enc_str_new_static((str), (long)strlen(str), (enc)) : \ - rb_enc_str_new_cstr((str), (enc)) \ -) -#endif - -PRINTF_ARGS(NORETURN(void rb_enc_raise(rb_encoding *, VALUE, const char*, ...)), 3, 4); - -/* index -> rb_encoding */ -rb_encoding *rb_enc_from_index(int idx); - -/* name -> rb_encoding */ -rb_encoding *rb_enc_find(const char *name); - -/* rb_encoding * -> name */ -#define rb_enc_name(enc) (enc)->name - -/* rb_encoding * -> minlen/maxlen */ -#define rb_enc_mbminlen(enc) (enc)->min_enc_len -#define rb_enc_mbmaxlen(enc) (enc)->max_enc_len - -/* -> mbclen (no error notification: 0 < ret <= e-p, no exception) */ -int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc); - -/* -> mbclen (only for valid encoding) */ -int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc); - -/* -> chlen, invalid or needmore */ -int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc); -#define MBCLEN_CHARFOUND_P(ret) ONIGENC_MBCLEN_CHARFOUND_P(ret) -#define MBCLEN_CHARFOUND_LEN(ret) ONIGENC_MBCLEN_CHARFOUND_LEN(ret) -#define MBCLEN_INVALID_P(ret) ONIGENC_MBCLEN_INVALID_P(ret) -#define MBCLEN_NEEDMORE_P(ret) ONIGENC_MBCLEN_NEEDMORE_P(ret) -#define MBCLEN_NEEDMORE_LEN(ret) ONIGENC_MBCLEN_NEEDMORE_LEN(ret) - -/* -> 0x00..0x7f, -1 */ -int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc); - - -/* -> code (and len) or raise exception */ -unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc); - -/* prototype for obsolete function */ -unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc); -/* overriding macro */ -#define rb_enc_codepoint(p,e,enc) rb_enc_codepoint_len((p),(e),0,(enc)) -#define rb_enc_mbc_to_codepoint(p, e, enc) ONIGENC_MBC_TO_CODE((enc),(UChar*)(p),(UChar*)(e)) - -/* -> codelen>0 or raise exception */ -int rb_enc_codelen(int code, rb_encoding *enc); -/* -> 0 for invalid codepoint */ -int rb_enc_code_to_mbclen(int code, rb_encoding *enc); -#define rb_enc_code_to_mbclen(c, enc) ONIGENC_CODE_TO_MBCLEN((enc), (c)); - -/* code,ptr,encoding -> write buf */ -#define rb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC((enc),(c),(UChar*)(buf)) - -/* start, ptr, end, encoding -> prev_char */ -#define rb_enc_prev_char(s,p,e,enc) ((char *)onigenc_get_prev_char_head((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e))) -/* start, ptr, end, encoding -> next_char */ -#define rb_enc_left_char_head(s,p,e,enc) ((char *)onigenc_get_left_adjust_char_head((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e))) -#define rb_enc_right_char_head(s,p,e,enc) ((char *)onigenc_get_right_adjust_char_head((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e))) -#define rb_enc_step_back(s,p,e,n,enc) ((char *)onigenc_step_back((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e),(int)(n))) - -/* ptr, ptr, encoding -> newline_or_not */ -#define rb_enc_is_newline(p,end,enc) ONIGENC_IS_MBC_NEWLINE((enc),(UChar*)(p),(UChar*)(end)) - -#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE((enc),(c),(t)) -#define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c) -#define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA((enc),(c)) -#define rb_enc_islower(c,enc) ONIGENC_IS_CODE_LOWER((enc),(c)) -#define rb_enc_isupper(c,enc) ONIGENC_IS_CODE_UPPER((enc),(c)) -#define rb_enc_ispunct(c,enc) ONIGENC_IS_CODE_PUNCT((enc),(c)) -#define rb_enc_isalnum(c,enc) ONIGENC_IS_CODE_ALNUM((enc),(c)) -#define rb_enc_isprint(c,enc) ONIGENC_IS_CODE_PRINT((enc),(c)) -#define rb_enc_isspace(c,enc) ONIGENC_IS_CODE_SPACE((enc),(c)) -#define rb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT((enc),(c)) - -static inline int -rb_enc_asciicompat_inline(rb_encoding *enc) -{ - return rb_enc_mbminlen(enc)==1 && !rb_enc_dummy_p(enc); -} -#define rb_enc_asciicompat(enc) rb_enc_asciicompat_inline(enc) - -CONSTFUNC(int rb_enc_toupper(int c, rb_encoding *enc)); -CONSTFUNC(int rb_enc_tolower(int c, rb_encoding *enc)); -ID rb_intern3(const char*, long, rb_encoding*); -int rb_enc_symname_p(const char*, rb_encoding*); -int rb_enc_symname2_p(const char*, long, rb_encoding*); -int rb_enc_str_coderange(VALUE); -long rb_str_coderange_scan_restartable(const char*, const char*, rb_encoding*, int*); -int rb_enc_str_asciionly_p(VALUE); -#define rb_enc_str_asciicompat_p(str) rb_enc_asciicompat(rb_enc_get(str)) -VALUE rb_enc_from_encoding(rb_encoding *enc); -PUREFUNC(int rb_enc_unicode_p(rb_encoding *enc)); -rb_encoding *rb_ascii8bit_encoding(void); -rb_encoding *rb_utf8_encoding(void); -rb_encoding *rb_usascii_encoding(void); -rb_encoding *rb_locale_encoding(void); -rb_encoding *rb_filesystem_encoding(void); -rb_encoding *rb_default_external_encoding(void); -rb_encoding *rb_default_internal_encoding(void); -#ifndef rb_ascii8bit_encindex -CONSTFUNC(int rb_ascii8bit_encindex(void)); -#endif -#ifndef rb_utf8_encindex -CONSTFUNC(int rb_utf8_encindex(void)); -#endif -#ifndef rb_usascii_encindex -CONSTFUNC(int rb_usascii_encindex(void)); -#endif -int rb_locale_encindex(void); -int rb_filesystem_encindex(void); -VALUE rb_enc_default_external(void); -VALUE rb_enc_default_internal(void); -void rb_enc_set_default_external(VALUE encoding); -void rb_enc_set_default_internal(VALUE encoding); -VALUE rb_locale_charmap(VALUE klass); -long rb_memsearch(const void*,long,const void*,long,rb_encoding*); -char *rb_enc_path_next(const char *,const char *,rb_encoding*); -char *rb_enc_path_skip_prefix(const char *,const char *,rb_encoding*); -char *rb_enc_path_last_separator(const char *,const char *,rb_encoding*); -char *rb_enc_path_end(const char *,const char *,rb_encoding*); -const char *ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc); -const char *ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc); -ID rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc); -VALUE rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc); - -RUBY_EXTERN VALUE rb_cEncoding; - -/* econv stuff */ - -typedef enum { - econv_invalid_byte_sequence, - econv_undefined_conversion, - econv_destination_buffer_full, - econv_source_buffer_empty, - econv_finished, - econv_after_output, - econv_incomplete_input -} rb_econv_result_t; - -typedef struct rb_econv_t rb_econv_t; - -VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts); -int rb_econv_has_convpath_p(const char* from_encoding, const char* to_encoding); - -int rb_econv_prepare_options(VALUE opthash, VALUE *ecopts, int ecflags); -int rb_econv_prepare_opts(VALUE opthash, VALUE *ecopts); - -rb_econv_t *rb_econv_open(const char *source_encoding, const char *destination_encoding, int ecflags); -rb_econv_t *rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts); - -rb_econv_result_t rb_econv_convert(rb_econv_t *ec, - const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, - unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, - int flags); -void rb_econv_close(rb_econv_t *ec); - -/* result: 0:success -1:failure */ -int rb_econv_set_replacement(rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname); - -/* result: 0:success -1:failure */ -int rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name); -int rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name); - -VALUE rb_econv_open_exc(const char *senc, const char *denc, int ecflags); - -/* result: 0:success -1:failure */ -int rb_econv_insert_output(rb_econv_t *ec, - const unsigned char *str, size_t len, const char *str_encoding); - -/* encoding that rb_econv_insert_output doesn't need conversion */ -const char *rb_econv_encoding_to_insert_output(rb_econv_t *ec); - -/* raise an error if the last rb_econv_convert is error */ -void rb_econv_check_error(rb_econv_t *ec); - -/* returns an exception object or nil */ -VALUE rb_econv_make_exception(rb_econv_t *ec); - -int rb_econv_putbackable(rb_econv_t *ec); -void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n); - -/* returns the corresponding ASCII compatible encoding for encname, - * or NULL if encname is not ASCII incompatible encoding. */ -const char *rb_econv_asciicompat_encoding(const char *encname); - -VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags); -VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags); -VALUE rb_econv_str_append(rb_econv_t *ec, VALUE src, VALUE dst, int flags); -VALUE rb_econv_substr_append(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, VALUE dst, int flags); -VALUE rb_econv_append(rb_econv_t *ec, const char *bytesrc, long bytesize, VALUE dst, int flags); - -void rb_econv_binmode(rb_econv_t *ec); - -enum ruby_econv_flag_type { -/* flags for rb_econv_open */ - RUBY_ECONV_ERROR_HANDLER_MASK = 0x000000ff, - - RUBY_ECONV_INVALID_MASK = 0x0000000f, - RUBY_ECONV_INVALID_REPLACE = 0x00000002, - - RUBY_ECONV_UNDEF_MASK = 0x000000f0, - RUBY_ECONV_UNDEF_REPLACE = 0x00000020, - RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030, - - RUBY_ECONV_DECORATOR_MASK = 0x0000ff00, - RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00003f00, - RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00, - RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00003000, - - RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100, - RUBY_ECONV_CRLF_NEWLINE_DECORATOR = 0x00001000, - RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000, - RUBY_ECONV_XML_TEXT_DECORATOR = 0x00004000, - RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00008000, - - RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000, - RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR = 0x00100000, - - RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR = -#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) - RUBY_ECONV_CRLF_NEWLINE_DECORATOR, -#else - 0, -#endif -#define ECONV_ERROR_HANDLER_MASK RUBY_ECONV_ERROR_HANDLER_MASK -#define ECONV_INVALID_MASK RUBY_ECONV_INVALID_MASK -#define ECONV_INVALID_REPLACE RUBY_ECONV_INVALID_REPLACE -#define ECONV_UNDEF_MASK RUBY_ECONV_UNDEF_MASK -#define ECONV_UNDEF_REPLACE RUBY_ECONV_UNDEF_REPLACE -#define ECONV_UNDEF_HEX_CHARREF RUBY_ECONV_UNDEF_HEX_CHARREF -#define ECONV_DECORATOR_MASK RUBY_ECONV_DECORATOR_MASK -#define ECONV_NEWLINE_DECORATOR_MASK RUBY_ECONV_NEWLINE_DECORATOR_MASK -#define ECONV_NEWLINE_DECORATOR_READ_MASK RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK -#define ECONV_NEWLINE_DECORATOR_WRITE_MASK RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK -#define ECONV_UNIVERSAL_NEWLINE_DECORATOR RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR -#define ECONV_CRLF_NEWLINE_DECORATOR RUBY_ECONV_CRLF_NEWLINE_DECORATOR -#define ECONV_CR_NEWLINE_DECORATOR RUBY_ECONV_CR_NEWLINE_DECORATOR -#define ECONV_XML_TEXT_DECORATOR RUBY_ECONV_XML_TEXT_DECORATOR -#define ECONV_XML_ATTR_CONTENT_DECORATOR RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR -#define ECONV_STATEFUL_DECORATOR_MASK RUBY_ECONV_STATEFUL_DECORATOR_MASK -#define ECONV_XML_ATTR_QUOTE_DECORATOR RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR -#define ECONV_DEFAULT_NEWLINE_DECORATOR RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR -/* end of flags for rb_econv_open */ - -/* flags for rb_econv_convert */ - RUBY_ECONV_PARTIAL_INPUT = 0x00010000, - RUBY_ECONV_AFTER_OUTPUT = 0x00020000, -#define ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT -#define ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT -/* end of flags for rb_econv_convert */ -RUBY_ECONV_FLAGS_PLACEHOLDER}; - -RBIMPL_SYMBOL_EXPORT_END() +#include "ruby/internal/encoding/coderange.h" +#include "ruby/internal/encoding/ctype.h" +#include "ruby/internal/encoding/encoding.h" +#include "ruby/internal/encoding/pathname.h" +#include "ruby/internal/encoding/re.h" +#include "ruby/internal/encoding/sprintf.h" +#include "ruby/internal/encoding/string.h" +#include "ruby/internal/encoding/symbol.h" +#include "ruby/internal/encoding/transcode.h" #endif /* RUBY_ENCODING_H */ diff --git a/include/ruby/fiber/scheduler.h b/include/ruby/fiber/scheduler.h index 246e690587..8f3d383330 100644 --- a/include/ruby/fiber/scheduler.h +++ b/include/ruby/fiber/scheduler.h @@ -1,4 +1,4 @@ -#ifndef RUBY_FIBER_SCHEDULER_H /*-*-C-*-vi:se ft=c:*/ +#ifndef RUBY_FIBER_SCHEDULER_H /*-*-C++-*-vi:se ft=cpp:*/ #define RUBY_FIBER_SCHEDULER_H /** * @file @@ -7,45 +7,396 @@ * Permission is hereby granted, to either redistribute and/or * modify this file, provided that the conditions mentioned in the * file COPYING are met. Consult the file for details. - * @brief Internal header for Scheduler. + * @brief Scheduler APIs. */ +#include "ruby/internal/config.h" + +#include <errno.h> + +#ifdef STDC_HEADERS +#include <stddef.h> /* size_t */ +#endif + #include "ruby/ruby.h" -#include "ruby/intern.h" +#include "ruby/internal/dllexport.h" +#include "ruby/internal/arithmetic.h" RBIMPL_SYMBOL_EXPORT_BEGIN() +#define RUBY_FIBER_SCHEDULER_VERSION 2 + +struct timeval; + +/** + * Wrap a `ssize_t` and `int errno` into a single `VALUE`. This interface should + * be used to safely capture results from system calls like `read` and `write`. + * + * You should use `rb_fiber_scheduler_io_result_apply` to unpack the result of + * this value and update `int errno`. + * + * You should not directly try to interpret the result value as it is considered + * an opaque representation. However, the general representation is an integer + * in the range of `[-int errno, size_t size]`. Linux generally restricts the + * result of system calls like `read` and `write` to `<= 2^31` which means this + * will typically fit within a single FIXNUM. + * + * @param[in] result The result of the system call. + * @param[in] error The value of `errno`. + * @return A `VALUE` which contains the result and/or errno. + */ +static inline VALUE +rb_fiber_scheduler_io_result(ssize_t result, int error) +{ + if (result == -1) { + return RB_INT2NUM(-error); + } + else { + return RB_SIZE2NUM(result); + } +} + +/** + * Apply an io result to the local thread, returning the value of the original + * system call that created it and updating `int errno`. + * + * You should not directly try to interpret the result value as it is considered + * an opaque representation. + * + * @param[in] result The `VALUE` which contains an errno and/or result size. + * @post Updates `int errno` with the value if negative. + * @return The original result of the system call. + */ +static inline ssize_t +rb_fiber_scheduler_io_result_apply(VALUE result) +{ + if (RB_FIXNUM_P(result) && RB_NUM2INT(result) < 0) { + errno = -RB_NUM2INT(result); + return -1; + } + else { + return RB_NUM2SIZE(result); + } +} + +/** + * Queries the current scheduler of the current thread that is calling this + * function. + * + * @retval RUBY_Qnil No scheduler has been set so far to this thread (which + * is the default). + * @retval otherwise The scheduler that was last set for the current thread + * with rb_fiber_scheduler_set(). + */ VALUE rb_fiber_scheduler_get(void); + +/** + * Destructively assigns the passed scheduler to that of the current thread + * that is calling this function. If the scheduler is set, non-blocking fibers + * (created by `Fiber.new` with `blocking: false`, or by `Fiber.schedule`) call + * that scheduler's hook methods on potentially blocking operations, and the + * current thread will call scheduler's `#close` method on finalisation + * (allowing the scheduler to properly manage all non-finished fibers). + * `scheduler` can be an object of any class corresponding to + * `Fiber::Scheduler` interface. Its implementation is up to the user. + * + * @param[in] scheduler The scheduler to set. + * @exception rb_eArgError `scheduler` does not conform the interface. + * @post Current thread's scheduler is `scheduler`. + */ VALUE rb_fiber_scheduler_set(VALUE scheduler); +/** + * Identical to rb_fiber_scheduler_get(), except it also returns ::RUBY_Qnil in + * case of a blocking fiber. As blocking fibers do not participate schedulers' + * scheduling this function can be handy. + * + * @retval RUBY_Qnil No scheduler is in effect. + * @retval otherwise The scheduler that is in effect, if any. + */ VALUE rb_fiber_scheduler_current(void); + +/** + * Identical to rb_fiber_scheduler_current(), except it queries for that of the + * passed thread instead of the implicit current one. + * + * @param[in] thread Target thread. + * @exception rb_eTypeError `thread` is not a thread. + * @retval RUBY_Qnil No scheduler is in effect in `thread`. + * @retval otherwise The scheduler that is in effect in `thread`. + */ VALUE rb_fiber_scheduler_current_for_thread(VALUE thread); +/** + * Converts the passed timeout to an expression that rb_fiber_scheduler_block() + * etc. expects. + * + * @param[in] timeout A duration (can be `NULL`). + * @retval RUBY_Qnil No timeout (blocks indefinitely). + * @retval otherwise A timeout object. + */ VALUE rb_fiber_scheduler_make_timeout(struct timeval *timeout); +/** + * Closes the passed scheduler object. This expects the scheduler to wait for + * all fibers. Thus the scheduler's main loop tends to start here. + * + * @param[in] scheduler Target scheduler. + * @return What `scheduler.close` returns. + */ VALUE rb_fiber_scheduler_close(VALUE scheduler); +/** + * Non-blocking `sleep`. Depending on scheduler implementation, this for + * instance switches to another fiber etc. + * + * @param[in] scheduler Target scheduler. + * @param[in] duration Passed as-is to `scheduler.kernel_sleep`. + * @return What `scheduler.kernel_sleep` returns. + */ VALUE rb_fiber_scheduler_kernel_sleep(VALUE scheduler, VALUE duration); + +/** + * Identical to rb_fiber_scheduler_kernel_sleep(), except it can pass multiple + * arguments. + * + * @param[in] scheduler Target scheduler. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Passed as-is to `scheduler.kernel_sleep` + * @return What `scheduler.kernel_sleep` returns. + */ VALUE rb_fiber_scheduler_kernel_sleepv(VALUE scheduler, int argc, VALUE * argv); +/* Description TBW */ #if 0 VALUE rb_fiber_scheduler_timeout_after(VALUE scheduler, VALUE timeout, VALUE exception, VALUE message); VALUE rb_fiber_scheduler_timeout_afterv(VALUE scheduler, int argc, VALUE * argv); +int rb_fiber_scheduler_supports_process_wait(VALUE scheduler); #endif -int rb_fiber_scheduler_supports_process_wait(VALUE scheduler); +/** + * Non-blocking `waitpid`. Depending on scheduler implementation, this for + * instance switches to another fiber etc. + * + * @param[in] scheduler Target scheduler. + * @param[in] pid Process ID to wait. + * @param[in] flags Wait flags, e.g. `WUNTRACED`. + * @return What `scheduler.process_wait` returns. + */ VALUE rb_fiber_scheduler_process_wait(VALUE scheduler, rb_pid_t pid, int flags); +/** + * Non-blocking wait for the passed "blocker", which is for instance + * `Thread.join` or `Mutex.lock`. Depending on scheduler implementation, this + * for instance switches to another fiber etc. + * + * @param[in] scheduler Target scheduler. + * @param[in] blocker What blocks the current fiber. + * @param[in] timeout Numeric timeout. + * @return What `scheduler.block` returns. + */ VALUE rb_fiber_scheduler_block(VALUE scheduler, VALUE blocker, VALUE timeout); + +/** + * Wakes up a fiber previously blocked using rb_fiber_scheduler_block(). + * + * @param[in] scheduler Target scheduler. + * @param[in] blocker What was awaited for. + * @param[in] fiber What to unblock. + * @return What `scheduler.unblock` returns. + */ VALUE rb_fiber_scheduler_unblock(VALUE scheduler, VALUE blocker, VALUE fiber); +/** + * Non-blocking version of rb_io_wait(). Depending on scheduler + * implementation, this for instance switches to another fiber etc. + * + * The "events" here is a Ruby level integer, which is an OR-ed value of + * `IO::READABLE`, `IO::WRITABLE`, and `IO::PRIORITY`. + * + * @param[in] scheduler Target scheduler. + * @param[in] io An io object to wait. + * @param[in] events An integer set of interests. + * @param[in] timeout Numeric timeout. + * @return What `scheduler.io_wait` returns. + */ VALUE rb_fiber_scheduler_io_wait(VALUE scheduler, VALUE io, VALUE events, VALUE timeout); + +/** + * Non-blocking wait until the passed IO is ready for reading. This is a + * special case of rb_fiber_scheduler_io_wait(), where the interest is + * `IO::READABLE` and timeout is never. + * + * @param[in] scheduler Target scheduler. + * @param[in] io An io object to wait. + * @return What `scheduler.io_wait` returns. + */ VALUE rb_fiber_scheduler_io_wait_readable(VALUE scheduler, VALUE io); + +/** + * Non-blocking wait until the passed IO is ready for writing. This is a + * special case of rb_fiber_scheduler_io_wait(), where the interest is + * `IO::WRITABLE` and timeout is never. + * + * @param[in] scheduler Target scheduler. + * @param[in] io An io object to wait. + * @return What `scheduler.io_wait` returns. + */ VALUE rb_fiber_scheduler_io_wait_writable(VALUE scheduler, VALUE io); -VALUE rb_fiber_scheduler_io_read(VALUE scheduler, VALUE io, VALUE buffer, size_t offset, size_t length); -VALUE rb_fiber_scheduler_io_write(VALUE scheduler, VALUE io, VALUE buffer, size_t offset, size_t length); +/** + * Non-blocking version of `IO.select`. + * + * It's possible that this will be emulated using a thread, so you should not + * rely on it for high performance. + * + * @param[in] scheduler Target scheduler. + * @param[in] readables An array of readable objects. + * @param[in] writables An array of writable objects. + * @param[in] exceptables An array of objects that might encounter exceptional conditions. + * @param[in] timeout Numeric timeout or nil. + * @return What `scheduler.io_select` returns, normally a 3-tuple of arrays of ready objects. + */ +VALUE rb_fiber_scheduler_io_select(VALUE scheduler, VALUE readables, VALUE writables, VALUE exceptables, VALUE timeout); + +/** + * Non-blocking version of `IO.select`, `argv` variant. + */ +VALUE rb_fiber_scheduler_io_selectv(VALUE scheduler, int argc, VALUE *argv); + +/** + * Non-blocking read from the passed IO. + * + * @param[in] scheduler Target scheduler. + * @param[in] io An io object to read from. + * @param[in] buffer The buffer to read to. + * @param[in] length The minimum number of bytes to read. + * @param[in] offset The offset in the buffer to read from. + * @retval RUBY_Qundef `scheduler` doesn't have `#io_read`. + * @return otherwise What `scheduler.io_read` returns `[-errno, size]`. + */ +VALUE rb_fiber_scheduler_io_read(VALUE scheduler, VALUE io, VALUE buffer, size_t length, size_t offset); + +/** + * Non-blocking write to the passed IO. + * + * @param[in] scheduler Target scheduler. + * @param[in] io An io object to write to. + * @param[in] buffer The buffer to write from. + * @param[in] length The minimum number of bytes to write. + * @param[in] offset The offset in the buffer to write from. + * @retval RUBY_Qundef `scheduler` doesn't have `#io_write`. + * @return otherwise What `scheduler.io_write` returns `[-errno, size]`. + */ +VALUE rb_fiber_scheduler_io_write(VALUE scheduler, VALUE io, VALUE buffer, size_t length, size_t offset); + +/** + * Non-blocking read from the passed IO at the specified offset. + * + * @param[in] scheduler Target scheduler. + * @param[in] io An io object to read from. + * @param[in] from The offset to read from. + * @param[in] buffer The buffer to read to. + * @param[in] length The minimum number of bytes to read. + * @param[in] offset The offset in the buffer to read to. + * @retval RUBY_Qundef `scheduler` doesn't have `#io_read`. + * @return otherwise What `scheduler.io_read` returns. + */ +VALUE rb_fiber_scheduler_io_pread(VALUE scheduler, VALUE io, rb_off_t from, VALUE buffer, size_t length, size_t offset); + +/** + * Non-blocking write to the passed IO at the specified offset. + * + * @param[in] scheduler Target scheduler. + * @param[in] io An io object to write to. + * @param[in] from The offset to write to. + * @param[in] buffer The buffer to write from. + * @param[in] length The minimum number of bytes to write. + * @param[in] offset The offset in the buffer to write from. + * @retval RUBY_Qundef `scheduler` doesn't have `#io_write`. + * @return otherwise What `scheduler.io_write` returns. + */ +VALUE rb_fiber_scheduler_io_pwrite(VALUE scheduler, VALUE io, rb_off_t from, VALUE buffer, size_t length, size_t offset); + +/** + * Non-blocking read from the passed IO using a native buffer. + * + * @param[in] scheduler Target scheduler. + * @param[in] io An io object to read from. + * @param[in] base The memory to read to. + * @param[in] size Size of the memory. + * @param[in] length The minimum number of bytes to read. + * @retval RUBY_Qundef `scheduler` doesn't have `#io_read`. + * @return otherwise What `scheduler.io_read` returns. + */ +VALUE rb_fiber_scheduler_io_read_memory(VALUE scheduler, VALUE io, void *base, size_t size, size_t length); + +/** + * Non-blocking write to the passed IO using a native buffer. + * + * @param[in] scheduler Target scheduler. + * @param[in] io An io object to write to. + * @param[in] base The memory to write from. + * @param[in] size Size of the memory. + * @param[in] length The minimum number of bytes to write. + * @retval RUBY_Qundef `scheduler` doesn't have `#io_write`. + * @return otherwise What `scheduler.io_write` returns. + */ +VALUE rb_fiber_scheduler_io_write_memory(VALUE scheduler, VALUE io, const void *base, size_t size, size_t length); + +/** + * Non-blocking pread from the passed IO using a native buffer. + * + * @param[in] scheduler Target scheduler. + * @param[in] io An io object to read from. + * @param[in] from The offset to read from. + * @param[in] base The memory to read to. + * @param[in] size Size of the memory. + * @param[in] length The minimum number of bytes to read. + * @retval RUBY_Qundef `scheduler` doesn't have `#io_read`. + * @return otherwise What `scheduler.io_read` returns. + */ +VALUE rb_fiber_scheduler_io_pread_memory(VALUE scheduler, VALUE io, rb_off_t from, void *base, size_t size, size_t length); + +/** + * Non-blocking pwrite to the passed IO using a native buffer. + * + * @param[in] scheduler Target scheduler. + * @param[in] io An io object to write to. + * @param[in] from The offset to write from. + * @param[in] base The memory to write from. + * @param[in] size Size of the memory. + * @param[in] length The minimum number of bytes to write. + * @retval RUBY_Qundef `scheduler` doesn't have `#io_write`. + * @return otherwise What `scheduler.io_write` returns. + */ +VALUE rb_fiber_scheduler_io_pwrite_memory(VALUE scheduler, VALUE io, rb_off_t from, const void *base, size_t size, size_t length); + +/** + * Non-blocking close the given IO. + * + * @param[in] scheduler Target scheduler. + * @param[in] io An io object to close. + * @retval RUBY_Qundef `scheduler` doesn't have `#io_close`. + * @return otherwise What `scheduler.io_close` returns. + */ +VALUE rb_fiber_scheduler_io_close(VALUE scheduler, VALUE io); + +/** + * Non-blocking DNS lookup. + * + * @param[in] scheduler Target scheduler. + * @param[in] hostname A host name to query. + * @retval RUBY_Qundef `scheduler` doesn't have `#address_resolve`. + * @return otherwise What `scheduler.address_resolve` returns. + */ VALUE rb_fiber_scheduler_address_resolve(VALUE scheduler, VALUE hostname); +/** + * Create and schedule a non-blocking fiber. + * + */ +VALUE rb_fiber_scheduler_fiber(VALUE scheduler, int argc, VALUE *argv, int kw_splat); + RBIMPL_SYMBOL_EXPORT_END() #endif /* RUBY_FIBER_SCHEDULER_H */ diff --git a/include/ruby/intern.h b/include/ruby/intern.h index 2480e2e703..48e4cd546e 100644 --- a/include/ruby/intern.h +++ b/include/ruby/intern.h @@ -36,7 +36,6 @@ #include "ruby/internal/intern/error.h" #include "ruby/internal/intern/eval.h" #include "ruby/internal/intern/file.h" -#include "ruby/internal/intern/gc.h" #include "ruby/internal/intern/hash.h" #include "ruby/internal/intern/io.h" #include "ruby/internal/intern/load.h" diff --git a/include/ruby/internal/abi.h b/include/ruby/internal/abi.h new file mode 100644 index 0000000000..e735a67564 --- /dev/null +++ b/include/ruby/internal/abi.h @@ -0,0 +1,58 @@ +#ifndef RUBY_ABI_H +#define RUBY_ABI_H + +#ifdef RUBY_ABI_VERSION /* should match the definition in config.h */ + +/* This number represents Ruby's ABI version. + * + * In development Ruby, it should be bumped every time an ABI incompatible + * change is introduced. This will force other developers to rebuild extension + * gems. + * + * The following cases are considered as ABI incompatible changes: + * - Changing any data structures. + * - Changing macros or inline functions causing a change in behavior. + * - Deprecating or removing function declarations. + * + * The following cases are NOT considered as ABI incompatible changes: + * - Any changes that does not involve the header files in the `include` + * directory. + * - Adding macros, inline functions, or function declarations. + * - Backwards compatible refactors. + * - Editing comments. + * + * In released versions of Ruby, this number is not defined since teeny + * versions of Ruby should guarantee ABI compatibility. + */ +#define RUBY_ABI_VERSION 0 + +/* Windows does not support weak symbols so ruby_abi_version will not exist + * in the shared library. */ +#if defined(HAVE_FUNC_WEAK) && !defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +# define RUBY_DLN_CHECK_ABI +#endif +#endif /* RUBY_ABI_VERSION */ + +#if defined(RUBY_DLN_CHECK_ABI) && !defined(RUBY_EXPORT) + +# ifdef __cplusplus +extern "C" { +# endif + +RUBY_FUNC_EXPORTED unsigned long long __attribute__((weak)) +ruby_abi_version(void) +{ +# ifdef RUBY_ABI_VERSION + return RUBY_ABI_VERSION; +# else + return 0; +# endif +} + +# ifdef __cplusplus +} +# endif + +#endif + +#endif diff --git a/include/ruby/internal/anyargs.h b/include/ruby/internal/anyargs.h index f09a4e72e9..e3e1b6166d 100644 --- a/include/ruby/internal/anyargs.h +++ b/include/ruby/internal/anyargs.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Function overloads to issue warnings around #ANYARGS. * * For instance ::rb_define_method takes a pointer to #ANYARGS -ed functions, @@ -239,15 +239,16 @@ # define RBIMPL_ANYARGS_DISPATCH_rb_define_method_13(n) RBIMPL_ANYARGS_DISPATCH((n) == 13, rb_define_method_13, RBIMPL_ANYARGS_DISPATCH_rb_define_method_12(n)) # define RBIMPL_ANYARGS_DISPATCH_rb_define_method_14(n) RBIMPL_ANYARGS_DISPATCH((n) == 14, rb_define_method_14, RBIMPL_ANYARGS_DISPATCH_rb_define_method_13(n)) # define RBIMPL_ANYARGS_DISPATCH_rb_define_method_15(n) RBIMPL_ANYARGS_DISPATCH((n) == 15, rb_define_method_15, RBIMPL_ANYARGS_DISPATCH_rb_define_method_14(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_singleton_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_singleton_method_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_singleton_method_15(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_protected_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_protected_method_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_protected_method_15(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_private_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_private_method_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_private_method_15(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_module_function(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_module_function_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_module_function_15(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_global_function(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_global_function_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_global_function_15(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_method_id(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_method_id_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_method_id_15(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_method_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_method_15(n)) -# define RBIMPL_ANYARGS_ATTRSET(sym) RBIMPL_ATTR_MAYBE_UNUSED() RBIMPL_ATTR_NONNULL() RBIMPL_ATTR_WEAKREF(sym) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_singleton_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_singleton_method_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_singleton_method_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_protected_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_protected_method_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_protected_method_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_private_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_private_method_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_private_method_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_module_function(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_module_function_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_module_function_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_global_function(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_global_function_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_global_function_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_method_id(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_method_id_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_method_id_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_method_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_method_15(n)) +# define RBIMPL_ANYARGS_ATTRSET(sym) RBIMPL_ATTR_MAYBE_UNUSED() RBIMPL_ATTR_NONNULL(()) RBIMPL_ATTR_WEAKREF(sym) # define RBIMPL_ANYARGS_DECL(sym, ...) \ +RBIMPL_ANYARGS_ATTRSET(sym) static void sym ## _notimpl(__VA_ARGS__, VALUE(*)(int, const VALUE *, VALUE, VALUE), int); \ RBIMPL_ANYARGS_ATTRSET(sym) static void sym ## _m3(__VA_ARGS__, VALUE(*)(ANYARGS), int); \ RBIMPL_ANYARGS_ATTRSET(sym) static void sym ## _m2(__VA_ARGS__, VALUE(*)(VALUE, VALUE), int); \ RBIMPL_ANYARGS_ATTRSET(sym) static void sym ## _m1(__VA_ARGS__, VALUE(*)(int, union { VALUE *x; const VALUE *y; } __attribute__((__transparent_union__)), VALUE), int); \ @@ -338,7 +339,7 @@ RBIMPL_ANYARGS_DECL(rb_define_method, VALUE, const char *) /** * @brief Defines ::rb_mKerbel \#mid. - * @see ::rb_define_gobal_function + * @see ::rb_define_global_function * @param mid Name of the defining method. * @param func Implementation of ::rb_mKernel \#mid. * @param arity Arity of ::rb_mKernel \#mid. @@ -349,7 +350,7 @@ RBIMPL_ANYARGS_DECL(rb_define_method, VALUE, const char *) /** * This macro is to properly cast a function parameter of *_define_method - * family. It has been around since 1.x era so you can maximize backwards + * family. It has been around since 1.x era so you can maximise backwards * compatibility by using it. * * ```CXX diff --git a/include/ruby/internal/arithmetic.h b/include/ruby/internal/arithmetic.h index c3806db444..7ebb4a86f1 100644 --- a/include/ruby/internal/arithmetic.h +++ b/include/ruby/internal/arithmetic.h @@ -17,8 +17,9 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Conversion between C's arithmtic types and Ruby's numeric types. + * extension libraries. They could be written in C++98. + * @brief Conversion between C's arithmetic types and Ruby's numeric + * types. */ #include "ruby/internal/arithmetic/char.h" #include "ruby/internal/arithmetic/double.h" diff --git a/include/ruby/internal/arithmetic/char.h b/include/ruby/internal/arithmetic/char.h index 3033639a43..920fdc0c9d 100644 --- a/include/ruby/internal/arithmetic/char.h +++ b/include/ruby/internal/arithmetic/char.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Arithmetic conversion between C's `char` and Ruby's. */ #include "ruby/internal/arithmetic/int.h" /* NUM2INT is here, but */ @@ -29,9 +29,9 @@ #include "ruby/internal/core/rstring.h" #include "ruby/internal/value_type.h" -#define RB_NUM2CHR rb_num2char_inline -#define NUM2CHR RB_NUM2CHR -#define CHR2FIX RB_CHR2FIX +#define RB_NUM2CHR rb_num2char_inline /**< @alias{rb_num2char_inline} */ +#define NUM2CHR RB_NUM2CHR /**< @old{RB_NUM2CHR} */ +#define CHR2FIX RB_CHR2FIX /**< @old{RB_CHR2FIX} */ /** @cond INTERNAL_MACRO */ #define RB_CHR2FIX RB_CHR2FIX @@ -40,12 +40,35 @@ RBIMPL_ATTR_CONST_UNLESS_DEBUG() RBIMPL_ATTR_CONSTEXPR_UNLESS_DEBUG(CXX14) RBIMPL_ATTR_ARTIFICIAL() +/** + * Converts a C's `unsigned char` into an instance of ::rb_cInteger. + * + * @param[in] c Arbitrary `unsigned char` value. + * @return An instance of ::rb_cInteger. + * + * @internal + * + * Nobody explicitly states this but in Ruby, a char means an unsigned integer + * value of range 0..255. This is a general principle. AFAIK there is no + * single line of code where char is signed. + */ static inline VALUE RB_CHR2FIX(unsigned char c) { return RB_INT2FIX(c); } +/** + * Converts an instance of ::rb_cNumeric into C's `char`. At the same time it + * accepts a String of more than one character, and returns its first byte. In + * the early days there was a Ruby level "character" literal `?c`, which + * roughly worked this way. + * + * @param[in] x Either a string or a numeric. + * @exception rb_eTypeError `x` is not a numeric. + * @exception rb_eRangeError `x` is out of range of `unsigned int`. + * @return The passed value converted into C's `char`. + */ static inline char rb_num2char_inline(VALUE x) { diff --git a/include/ruby/internal/arithmetic/double.h b/include/ruby/internal/arithmetic/double.h index 69d8630dbb..229de47aef 100644 --- a/include/ruby/internal/arithmetic/double.h +++ b/include/ruby/internal/arithmetic/double.h @@ -17,23 +17,56 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Arithmetic conversion between C's `double` and Ruby's. */ #include "ruby/internal/attr/pure.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" -#define NUM2DBL rb_num2dbl -#define RFLOAT_VALUE rb_float_value -#define DBL2NUM rb_float_new +#define NUM2DBL rb_num2dbl /**< @old{rb_num2dbl} */ +#define RFLOAT_VALUE rb_float_value /**< @old{rb_float_value} */ +#define DBL2NUM rb_float_new /**< @old{rb_float_new} */ RBIMPL_SYMBOL_EXPORT_BEGIN() -double rb_num2dbl(VALUE); +/** + * Converts an instance of ::rb_cNumeric into C's `double`. + * + * @param[in] num Something numeric. + * @exception rb_eTypeError `num` is not a numeric. + * @return The passed value converted into C's `double`. + */ +double rb_num2dbl(VALUE num); + RBIMPL_ATTR_PURE() -double rb_float_value(VALUE); -VALUE rb_float_new(double); -VALUE rb_float_new_in_heap(double); +/** + * Extracts its double value from an instance of ::rb_cFloat. + * + * @param[in] num An instance of ::rb_cFloat. + * @pre Must not pass anything other than a Fixnum. + * @return The passed value converted into C's `double`. + */ +double rb_float_value(VALUE num); + +/** + * Converts a C's `double` into an instance of ::rb_cFloat. + * + * @param[in] d Arbitrary `double` value. + * @return An instance of ::rb_cFloat. + */ +VALUE rb_float_new(double d); + +/** + * Identical to rb_float_new(), except it does not generate Flonums. + * + * @param[in] d Arbitrary `double` value. + * @return An instance of ::rb_cFloat. + * + * @internal + * + * @shyouhei has no idea why it is here. + */ +VALUE rb_float_new_in_heap(double d); RBIMPL_SYMBOL_EXPORT_END() #endif /* RBIMPL_ARITHMETIC_DOUBLE_H */ diff --git a/include/ruby/internal/arithmetic/fixnum.h b/include/ruby/internal/arithmetic/fixnum.h index 68544b760b..c8927ac824 100644 --- a/include/ruby/internal/arithmetic/fixnum.h +++ b/include/ruby/internal/arithmetic/fixnum.h @@ -17,28 +17,44 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Handling of integers formerly known as Fixnums. */ #include "ruby/backward/2/limits.h" -#define FIXABLE RB_FIXABLE -#define FIXNUM_MAX RUBY_FIXNUM_MAX -#define FIXNUM_MIN RUBY_FIXNUM_MIN -#define NEGFIXABLE RB_NEGFIXABLE -#define POSFIXABLE RB_POSFIXABLE +#define FIXABLE RB_FIXABLE /**< @old{RB_FIXABLE} */ +#define FIXNUM_MAX RUBY_FIXNUM_MAX /**< @old{RUBY_FIXNUM_MAX} */ +#define FIXNUM_MIN RUBY_FIXNUM_MIN /**< @old{RUBY_FIXNUM_MIN} */ +#define NEGFIXABLE RB_NEGFIXABLE /**< @old{RB_NEGFIXABLE} */ +#define POSFIXABLE RB_POSFIXABLE /**< @old{RB_POSFIXABLE} */ -/* +/** + * Checks if the passed value is in range of fixnum, assuming it is a positive + * number. Can sometimes be useful for C's unsigned integer types. + * + * @internal + * * FIXABLE can be applied to anything, from double to intmax_t. The problem is * double. On a 64bit system RUBY_FIXNUM_MAX is 4,611,686,018,427,387,903, * which is not representable by a double. The nearest value that a double can * represent is 4,611,686,018,427,387,904, which is not fixable. The - * seemingly-stragne "< FIXNUM_MAX + 1" expression below is due to this. + * seemingly-strange "< FIXNUM_MAX + 1" expression below is due to this. */ #define RB_POSFIXABLE(_) ((_) < RUBY_FIXNUM_MAX + 1) + +/** + * Checks if the passed value is in range of fixnum, assuming it is a negative + * number. This is an implementation of #RB_FIXABLE. Rarely used stand alone. + */ #define RB_NEGFIXABLE(_) ((_) >= RUBY_FIXNUM_MIN) + +/** Checks if the passed value is in range of fixnum */ #define RB_FIXABLE(_) (RB_POSFIXABLE(_) && RB_NEGFIXABLE(_)) + +/** Maximum possible value that a fixnum can represent. */ #define RUBY_FIXNUM_MAX (LONG_MAX / 2) + +/** Minimum possible value that a fixnum can represent. */ #define RUBY_FIXNUM_MIN (LONG_MIN / 2) #endif /* RBIMPL_ARITHMETIC_FIXNUM_H */ diff --git a/include/ruby/internal/arithmetic/gid_t.h b/include/ruby/internal/arithmetic/gid_t.h index 094fc63092..361220bfab 100644 --- a/include/ruby/internal/arithmetic/gid_t.h +++ b/include/ruby/internal/arithmetic/gid_t.h @@ -17,20 +17,23 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Arithmetic conversion between C's `gid_t` and Ruby's. */ #include "ruby/internal/config.h" #include "ruby/internal/arithmetic/long.h" +/** Converts a C's `gid_t` into an instance of ::rb_cInteger. */ #ifndef GIDT2NUM # define GIDT2NUM RB_LONG2NUM #endif +/** Converts an instance of ::rb_cNumeric into C's `gid_t`. */ #ifndef NUM2GIDT # define NUM2GIDT RB_NUM2LONG #endif +/** A rb_sprintf() format prefix to be used for a `gid_t` parameter. */ #ifndef PRI_GIDT_PREFIX # define PRI_GIDT_PREFIX PRI_LONG_PREFIX #endif diff --git a/include/ruby/internal/arithmetic/int.h b/include/ruby/internal/arithmetic/int.h index 346fa9258b..6bd8ec2184 100644 --- a/include/ruby/internal/arithmetic/int.h +++ b/include/ruby/internal/arithmetic/int.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Arithmetic conversion between C's `int` and Ruby's. */ #include "ruby/internal/config.h" @@ -34,16 +34,16 @@ #include "ruby/internal/warning_push.h" #include "ruby/assert.h" -#define RB_INT2NUM rb_int2num_inline -#define RB_NUM2INT rb_num2int_inline -#define RB_UINT2NUM rb_uint2num_inline +#define RB_INT2NUM rb_int2num_inline /**< @alias{rb_int2num_inline} */ +#define RB_NUM2INT rb_num2int_inline /**< @alias{rb_num2int_inline} */ +#define RB_UINT2NUM rb_uint2num_inline /**< @alias{rb_uint2num_inline} */ -#define FIX2INT RB_FIX2INT -#define FIX2UINT RB_FIX2UINT -#define INT2NUM RB_INT2NUM -#define NUM2INT RB_NUM2INT -#define NUM2UINT RB_NUM2UINT -#define UINT2NUM RB_UINT2NUM +#define FIX2INT RB_FIX2INT /**< @old{RB_FIX2INT} */ +#define FIX2UINT RB_FIX2UINT /**< @old{RB_FIX2UINT} */ +#define INT2NUM RB_INT2NUM /**< @old{RB_INT2NUM} */ +#define NUM2INT RB_NUM2INT /**< @old{RB_NUM2INT} */ +#define NUM2UINT RB_NUM2UINT /**< @old{RB_NUM2UINT} */ +#define UINT2NUM RB_UINT2NUM /**< @old{RB_UINT2NUM} */ /** @cond INTERNAL_MACRO */ #define RB_FIX2INT RB_FIX2INT @@ -52,13 +52,79 @@ /** @endcond */ RBIMPL_SYMBOL_EXPORT_BEGIN() -long rb_num2int(VALUE); -long rb_fix2int(VALUE); -unsigned long rb_num2uint(VALUE); -unsigned long rb_fix2uint(VALUE); + +/** + * Converts an instance of ::rb_cNumeric into C's `long`. + * + * @param[in] num Something numeric. + * @exception rb_eTypeError `num` is not a numeric. + * @exception rb_eRangeError `num` is out of range of `int`. + * @return The passed value converted into C's `long`. + * + * @internal + * + * Yes, the API is really strange. It returns `long`, but raises when the + * value is out of `int`. This seems to be due to the fact that Matz favoured + * K&R before, and his machine at that moment was an ILP32 architecture. + */ +long rb_num2int(VALUE num); + +/** + * Identical to rb_num2int(). + * + * @param[in] num Something numeric. + * @exception rb_eTypeError `num` is not a numeric. + * @exception rb_eRangeError `num` is out of range of `int`. + * @return The passed value converted into C's `long`. + * + * @internal + * + * This function seems to be a complete waste of disk space. @shyouhei has no + * idea why this is a different thing from rb_num2short(). + */ +long rb_fix2int(VALUE num); + +/** + * Converts an instance of ::rb_cNumeric into C's `unsigned long`. + * + * @param[in] num Something numeric. + * @exception rb_eTypeError `num` is not a numeric. + * @exception rb_eRangeError `num` is out of range of `unsigned int`. + * @return The passed value converted into C's `unsigned long`. + * + * @internal + * + * Yes, the API is really strange. It returns `unsigned long`, but raises when + * the value is out of `unsigned int`. This seems to be due to the fact that + * Matz favoured K&R before, and his machine at that moment was an ILP32 + * architecture. + */ +unsigned long rb_num2uint(VALUE num); + +/** + * Identical to rb_num2uint(). + * + * @param[in] num Something numeric. + * @exception rb_eTypeError `num` is not a numeric. + * @exception rb_eRangeError `num` is out of range of `unsigned int`. + * @return The passed value converted into C's `unsigned long`. + * + * @internal + * + * This function seems to be a complete waste of disk space. @shyouhei has no + * idea why this is a different thing from rb_num2short(). + */ +unsigned long rb_fix2uint(VALUE num); RBIMPL_SYMBOL_EXPORT_END() RBIMPL_ATTR_ARTIFICIAL() +/** + * Converts a Fixnum into C's `int`. + * + * @param[in] x Some Fixnum. + * @pre Must not pass anything other than a Fixnum. + * @return The passed value converted into C's `int`. + */ static inline int RB_FIX2INT(VALUE x) { @@ -80,6 +146,14 @@ RB_FIX2INT(VALUE x) return RBIMPL_CAST((int)ret); } +/** + * Converts an instance of ::rb_cNumeric into C's `int`. + * + * @param[in] x Something numeric. + * @exception rb_eTypeError `x` is not a numeric. + * @exception rb_eRangeError `x` is out of range of `int`. + * @return The passed value converted into C's `int`. + */ static inline int rb_num2int_inline(VALUE x) { @@ -98,6 +172,14 @@ rb_num2int_inline(VALUE x) return RBIMPL_CAST((int)ret); } +/** + * Converts an instance of ::rb_cNumeric into C's `unsigned int`. + * + * @param[in] x Something numeric. + * @exception rb_eTypeError `x` is not a numeric. + * @exception rb_eRangeError `x` is out of range of `unsigned int`. + * @return The passed value converted into C's `unsigned int`. + */ RBIMPL_ATTR_ARTIFICIAL() static inline unsigned int RB_NUM2UINT(VALUE x) @@ -115,6 +197,13 @@ RB_NUM2UINT(VALUE x) } RBIMPL_ATTR_ARTIFICIAL() +/** + * Converts a Fixnum into C's `int`. + * + * @param[in] x Some Fixnum. + * @pre Must not pass anything other than a Fixnum. + * @return The passed value converted into C's `int`. + */ static inline unsigned int RB_FIX2UINT(VALUE x) { @@ -140,6 +229,12 @@ RBIMPL_WARNING_IGNORED(-Wtype-limits) /* We can ignore them here. */ RBIMPL_WARNING_IGNORED(-Wtautological-constant-out-of-range-compare) #endif +/** + * Converts a C's `int` into an instance of ::rb_cInteger. + * + * @param[in] v Arbitrary `int` value. + * @return An instance of ::rb_cInteger. + */ static inline VALUE rb_int2num_inline(int v) { @@ -149,6 +244,12 @@ rb_int2num_inline(int v) return rb_int2big(v); } +/** + * Converts a C's `unsigned int` into an instance of ::rb_cInteger. + * + * @param[in] v Arbitrary `unsigned int` value. + * @return An instance of ::rb_cInteger. + */ static inline VALUE rb_uint2num_inline(unsigned int v) { diff --git a/include/ruby/internal/arithmetic/intptr_t.h b/include/ruby/internal/arithmetic/intptr_t.h index 442c87144c..a354f4469c 100644 --- a/include/ruby/internal/arithmetic/intptr_t.h +++ b/include/ruby/internal/arithmetic/intptr_t.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Arithmetic conversion between C's `intptr_t` and Ruby's. */ #include "ruby/internal/config.h" @@ -29,13 +29,45 @@ #include "ruby/internal/value.h" #include "ruby/internal/dllexport.h" -#define rb_int_new rb_int2inum -#define rb_uint_new rb_uint2inum +#define rb_int_new rb_int2inum /**< @alias{rb_int2inum} */ +#define rb_uint_new rb_uint2inum /**< @alias{rb_uint2inum} */ RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * Converts a C's `intptr_t` into an instance of ::rb_cInteger. + * + * @param[in] i Arbitrary `intptr_t` value. + * @return An instance of ::rb_cInteger. + * @note This function always allocates Bignums, even if the given number + * is small enough to fit into a Fixnum. + */ VALUE rb_int2big(intptr_t i); + +/** + * Converts a C's `intptr_t` into an instance of ::rb_cInteger. + * + * @param[in] i Arbitrary `intptr_t` value. + * @return An instance of ::rb_cInteger. + */ VALUE rb_int2inum(intptr_t i); + +/** + * Converts a C's `intptr_t` into an instance of ::rb_cInteger. + * + * @param[in] i Arbitrary `intptr_t` value. + * @return An instance of ::rb_cInteger. + * @note This function always allocates Bignums, even if the given number + * is small enough to fit into a Fixnum. + */ VALUE rb_uint2big(uintptr_t i); + +/** + * Converts a C's `uintptr_t` into an instance of ::rb_cInteger. + * + * @param[in] i Arbitrary `uintptr_t` value. + * @return An instance of ::rb_cInteger. + */ VALUE rb_uint2inum(uintptr_t i); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/arithmetic/long.h b/include/ruby/internal/arithmetic/long.h index aff7d68478..6b8fd8ffc3 100644 --- a/include/ruby/internal/arithmetic/long.h +++ b/include/ruby/internal/arithmetic/long.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Arithmetic conversion between C's `long` and Ruby's. * * ### Q&A ### @@ -43,23 +43,23 @@ #include "ruby/internal/value.h" #include "ruby/assert.h" -#define FIX2LONG RB_FIX2LONG -#define FIX2ULONG RB_FIX2ULONG -#define INT2FIX RB_INT2FIX -#define LONG2FIX RB_INT2FIX -#define LONG2NUM RB_LONG2NUM -#define NUM2LONG RB_NUM2LONG -#define NUM2ULONG RB_NUM2ULONG -#define RB_FIX2LONG rb_fix2long -#define RB_FIX2ULONG rb_fix2ulong -#define RB_LONG2FIX RB_INT2FIX -#define RB_LONG2NUM rb_long2num_inline -#define RB_NUM2LONG rb_num2long_inline -#define RB_NUM2ULONG rb_num2ulong_inline -#define RB_ULONG2NUM rb_ulong2num_inline -#define ULONG2NUM RB_ULONG2NUM -#define rb_fix_new RB_INT2FIX -#define rb_long2int rb_long2int_inline +#define FIX2LONG RB_FIX2LONG /**< @old{RB_FIX2LONG} */ +#define FIX2ULONG RB_FIX2ULONG /**< @old{RB_FIX2ULONG} */ +#define INT2FIX RB_INT2FIX /**< @old{RB_INT2FIX} */ +#define LONG2FIX RB_INT2FIX /**< @old{RB_INT2FIX} */ +#define LONG2NUM RB_LONG2NUM /**< @old{RB_LONG2NUM} */ +#define NUM2LONG RB_NUM2LONG /**< @old{RB_NUM2LONG} */ +#define NUM2ULONG RB_NUM2ULONG /**< @old{RB_NUM2ULONG} */ +#define RB_FIX2LONG rb_fix2long /**< @alias{rb_fix2long} */ +#define RB_FIX2ULONG rb_fix2ulong /**< @alias{rb_fix2ulong} */ +#define RB_LONG2FIX RB_INT2FIX /**< @alias{RB_INT2FIX} */ +#define RB_LONG2NUM rb_long2num_inline /**< @alias{rb_long2num_inline} */ +#define RB_NUM2LONG rb_num2long_inline /**< @alias{rb_num2long_inline} */ +#define RB_NUM2ULONG rb_num2ulong_inline /**< @alias{rb_num2ulong_inline} */ +#define RB_ULONG2NUM rb_ulong2num_inline /**< @alias{rb_ulong2num_inline} */ +#define ULONG2NUM RB_ULONG2NUM /**< @old{RB_ULONG2NUM} */ +#define rb_fix_new RB_INT2FIX /**< @alias{RB_INT2FIX} */ +#define rb_long2int rb_long2int_inline /**< @alias{rb_long2int_inline} */ /** @cond INTERNAL_MACRO */ #define RB_INT2FIX RB_INT2FIX @@ -69,15 +69,44 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() RBIMPL_ATTR_NORETURN() RBIMPL_ATTR_COLD() +/** + * This is an utility function to raise an ::rb_eRangeError. + * + * @param[in] num A signed value about to overflow. + * @exception rb_eRangeError `num` is out of range of `int`. + */ void rb_out_of_int(SIGNED_VALUE num); +/** + * Converts an instance of ::rb_cNumeric into C's `long`. + * + * @param[in] num Something numeric. + * @exception rb_eTypeError `num` is not a numeric. + * @exception rb_eRangeError `num` is out of range of `long`. + * @return The passed value converted into C's `long`. + */ long rb_num2long(VALUE num); + +/** + * Converts an instance of ::rb_cNumeric into C's `unsigned long`. + * + * @param[in] num Something numeric. + * @exception rb_eTypeError `num` is not a numeric. + * @exception rb_eRangeError `num` is out of range of `unsigned long`. + * @return The passed value converted into C's `unsigned long`. + */ unsigned long rb_num2ulong(VALUE num); RBIMPL_SYMBOL_EXPORT_END() RBIMPL_ATTR_CONST_UNLESS_DEBUG() RBIMPL_ATTR_CONSTEXPR_UNLESS_DEBUG(CXX14) RBIMPL_ATTR_ARTIFICIAL() +/** + * Converts a C's `long` into an instance of ::rb_cInteger. + * + * @param[in] i Arbitrary `long` value. + * @return An instance of ::rb_cInteger. + */ static inline VALUE RB_INT2FIX(long i) { @@ -86,7 +115,7 @@ RB_INT2FIX(long i) /* :NOTE: VALUE can be wider than long. As j being unsigned, 2j+1 is fully * defined. Also it can be compiled into a single LEA instruction. */ const unsigned long j = i; - const unsigned long k = 2 * j + RUBY_FIXNUM_FLAG; + const unsigned long k = (j << 1) + RUBY_FIXNUM_FLAG; const long l = k; const SIGNED_VALUE m = l; /* Sign extend */ const VALUE n = m; @@ -95,6 +124,13 @@ RB_INT2FIX(long i) return n; } +/** + * Checks if `int` can hold the given integer. + * + * @param[in] n Arbitrary `long` value. + * @exception rb_eRangeError `n` is out of range of `int`. + * @return Identical value of type `int` + */ static inline int rb_long2int_inline(long n) { @@ -112,6 +148,16 @@ rb_long2int_inline(long n) RBIMPL_ATTR_CONST_UNLESS_DEBUG() RBIMPL_ATTR_CONSTEXPR_UNLESS_DEBUG(CXX14) +/** + * @private + * + * This is an implementation detail of rb_fix2long(). People don't use it + * directly. + * + * @param[in] x A Fixnum. + * @return Identical value of type `long` + * @pre Must not pass anything other than a Fixnum. + */ static inline long rbimpl_fix2long_by_idiv(VALUE x) { @@ -130,6 +176,16 @@ rbimpl_fix2long_by_idiv(VALUE x) RBIMPL_ATTR_CONST_UNLESS_DEBUG() RBIMPL_ATTR_CONSTEXPR_UNLESS_DEBUG(CXX14) +/** + * @private + * + * This is an implementation detail of rb_fix2long(). People don't use it + * directly. + * + * @param[in] x A Fixnum. + * @return Identical value of type `long` + * @pre Must not pass anything other than a Fixnum. + */ static inline long rbimpl_fix2long_by_shift(VALUE x) { @@ -147,6 +203,15 @@ rbimpl_fix2long_by_shift(VALUE x) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) +/** + * @private + * + * This is an implementation detail of rb_fix2long(). People don't use it + * directly. + * + * @retval true This C compiler's right shift operator is arithmetic. + * @retval false This C compiler's right shift operator is logical. + */ static inline bool rbimpl_right_shift_is_arithmetic_p(void) { @@ -155,6 +220,13 @@ rbimpl_right_shift_is_arithmetic_p(void) RBIMPL_ATTR_CONST_UNLESS_DEBUG() RBIMPL_ATTR_CONSTEXPR_UNLESS_DEBUG(CXX14) +/** + * Converts a Fixnum into C's `long`. + * + * @param[in] x Some Fixnum. + * @pre Must not pass anything other than a Fixnum. + * @return The passed value converted into C's `long`. + */ static inline long rb_fix2long(VALUE x) { @@ -168,6 +240,14 @@ rb_fix2long(VALUE x) RBIMPL_ATTR_CONST_UNLESS_DEBUG() RBIMPL_ATTR_CONSTEXPR_UNLESS_DEBUG(CXX14) +/** + * Converts a Fixnum into C's `unsigned long`. + * + * @param[in] x Some Fixnum. + * @pre Must not pass anything other than a Fixnum. + * @return The passed value converted into C's `unsigned long`. + * @note Negative fixnums will be converted into large unsigned longs. + */ static inline unsigned long rb_fix2ulong(VALUE x) { @@ -175,6 +255,14 @@ rb_fix2ulong(VALUE x) return rb_fix2long(x); } +/** + * Converts an instance of ::rb_cNumeric into C's `long`. + * + * @param[in] x Something numeric. + * @exception rb_eTypeError `x` is not a numeric. + * @exception rb_eRangeError `x` is out of range of `long`. + * @return The passed value converted into C's `long`. + */ static inline long rb_num2long_inline(VALUE x) { @@ -184,20 +272,38 @@ rb_num2long_inline(VALUE x) return rb_num2long(x); } +/** + * Converts an instance of ::rb_cNumeric into C's `unsigned long`. + * + * @param[in] x Something numeric. + * @exception rb_eTypeError `x` is not a numeric. + * @exception rb_eRangeError `x` is out of range of `unsigned long`. + * @return The passed value converted into C's `unsigned long`. + * + * @internal + * + * This (negative fixnum would become a large unsigned long while negative + * bignum is an exception) has been THE behaviour of NUM2ULONG since the + * beginning. It is strange, but we can no longer change how it works at this + * moment. We have to get by with it. + * + * @see https://bugs.ruby-lang.org/issues/9089 + */ static inline unsigned long rb_num2ulong_inline(VALUE x) { - /* This (negative fixnum would become a large unsigned long while negative - * bignum is an exception) has been THE behaviour of NUM2ULONG since the - * beginning. It is strange, but we can no longer change how it works at - * this moment. We have to get by with it. See also: - * https://bugs.ruby-lang.org/issues/9089 */ if (RB_FIXNUM_P(x)) return RB_FIX2ULONG(x); else return rb_num2ulong(x); } +/** + * Converts a C's `long` into an instance of ::rb_cInteger. + * + * @param[in] v Arbitrary `long` value. + * @return An instance of ::rb_cInteger. + */ static inline VALUE rb_long2num_inline(long v) { @@ -207,6 +313,12 @@ rb_long2num_inline(long v) return rb_int2big(v); } +/** + * Converts a C's `unsigned long` into an instance of ::rb_cInteger. + * + * @param[in] v Arbitrary `unsigned long` value. + * @return An instance of ::rb_cInteger. + */ static inline VALUE rb_ulong2num_inline(unsigned long v) { diff --git a/include/ruby/internal/arithmetic/long_long.h b/include/ruby/internal/arithmetic/long_long.h index a4a5d0aa09..65dec8729d 100644 --- a/include/ruby/internal/arithmetic/long_long.h +++ b/include/ruby/internal/arithmetic/long_long.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Arithmetic conversion between C's `long long` and Ruby's. */ #include "ruby/internal/value.h" @@ -25,22 +25,59 @@ #include "ruby/internal/special_consts.h" #include "ruby/backward/2/long_long.h" -#define RB_LL2NUM rb_ll2num_inline -#define RB_ULL2NUM rb_ull2num_inline -#define LL2NUM RB_LL2NUM -#define ULL2NUM RB_ULL2NUM -#define RB_NUM2LL rb_num2ll_inline -#define RB_NUM2ULL rb_num2ull_inline -#define NUM2LL RB_NUM2LL -#define NUM2ULL RB_NUM2ULL +#define RB_LL2NUM rb_ll2num_inline /**< @alias{rb_ll2num_inline} */ +#define RB_ULL2NUM rb_ull2num_inline /**< @alias{rb_ull2num_inline} */ +#define LL2NUM RB_LL2NUM /**< @old{RB_LL2NUM} */ +#define ULL2NUM RB_ULL2NUM /**< @old{RB_ULL2NUM} */ +#define RB_NUM2LL rb_num2ll_inline /**< @alias{rb_num2ll_inline} */ +#define RB_NUM2ULL rb_num2ull_inline /**< @alias{rb_num2ull_inline} */ +#define NUM2LL RB_NUM2LL /**< @old{RB_NUM2LL} */ +#define NUM2ULL RB_NUM2ULL /**< @old{RB_NUM2ULL} */ RBIMPL_SYMBOL_EXPORT_BEGIN() -VALUE rb_ll2inum(LONG_LONG); -VALUE rb_ull2inum(unsigned LONG_LONG); -LONG_LONG rb_num2ll(VALUE); -unsigned LONG_LONG rb_num2ull(VALUE); +/** + * Converts a C's `long long` into an instance of ::rb_cInteger. + * + * @param[in] num Arbitrary `long long` value. + * @return An instance of ::rb_cInteger. + */ +VALUE rb_ll2inum(LONG_LONG num); + +/** + * Converts a C's `unsigned long long` into an instance of ::rb_cInteger. + * + * @param[in] num Arbitrary `unsigned long long` value. + * @return An instance of ::rb_cInteger. + */ +VALUE rb_ull2inum(unsigned LONG_LONG num); + +/** + * Converts an instance of ::rb_cNumeric into C's `long long`. + * + * @param[in] num Something numeric. + * @exception rb_eTypeError `num` is not a numeric. + * @exception rb_eRangeError `num` is out of range of `long long`. + * @return The passed value converted into C's `long long`. + */ +LONG_LONG rb_num2ll(VALUE num); + +/** + * Converts an instance of ::rb_cNumeric into C's `unsigned long long`. + * + * @param[in] num Something numeric. + * @exception rb_eTypeError `num` is not a numeric. + * @exception rb_eRangeError `num` is out of range of `unsigned long long`. + * @return The passed value converted into C's `unsigned long long`. + */ +unsigned LONG_LONG rb_num2ull(VALUE num); RBIMPL_SYMBOL_EXPORT_END() +/** + * Converts a C's `long long` into an instance of ::rb_cInteger. + * + * @param[in] n Arbitrary `long long` value. + * @return An instance of ::rb_cInteger + */ static inline VALUE rb_ll2num_inline(LONG_LONG n) { @@ -48,6 +85,12 @@ rb_ll2num_inline(LONG_LONG n) return rb_ll2inum(n); } +/** + * Converts a C's `unsigned long long` into an instance of ::rb_cInteger. + * + * @param[in] n Arbitrary `unsigned long long` value. + * @return An instance of ::rb_cInteger + */ static inline VALUE rb_ull2num_inline(unsigned LONG_LONG n) { @@ -55,6 +98,14 @@ rb_ull2num_inline(unsigned LONG_LONG n) return rb_ull2inum(n); } +/** + * Converts an instance of ::rb_cNumeric into C's `long long`. + * + * @param[in] x Something numeric. + * @exception rb_eTypeError `x` is not a numeric. + * @exception rb_eRangeError `x` is out of range of `long long`. + * @return The passed value converted into C's `long long`. + */ static inline LONG_LONG rb_num2ll_inline(VALUE x) { @@ -64,6 +115,14 @@ rb_num2ll_inline(VALUE x) return rb_num2ll(x); } +/** + * Converts an instance of ::rb_cNumeric into C's `unsigned long long`. + * + * @param[in] x Something numeric. + * @exception rb_eTypeError `x` is not a numeric. + * @exception rb_eRangeError `x` is out of range of `unsigned long long`. + * @return The passed value converted into C's `unsigned long long`. + */ static inline unsigned LONG_LONG rb_num2ull_inline(VALUE x) { diff --git a/include/ruby/internal/arithmetic/mode_t.h b/include/ruby/internal/arithmetic/mode_t.h index 46e41f7ef9..5b7ad35fbc 100644 --- a/include/ruby/internal/arithmetic/mode_t.h +++ b/include/ruby/internal/arithmetic/mode_t.h @@ -17,20 +17,23 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Arithmetic conversion between C's `mode_t` and Ruby's. */ #include "ruby/internal/config.h" #include "ruby/internal/arithmetic/int.h" +/** Converts a C's `mode_t` into an instance of ::rb_cInteger. */ #ifndef NUM2MODET # define NUM2MODET RB_NUM2INT #endif +/** Converts an instance of ::rb_cNumeric into C's `mode_t`. */ #ifndef MODET2NUM # define MODET2NUM RB_INT2NUM #endif +/** A rb_sprintf() format prefix to be used for a `mode_t` parameter. */ #ifndef PRI_MODET_PREFIX # define PRI_MODET_PREFIX PRI_INT_PREFIX #endif diff --git a/include/ruby/internal/arithmetic/off_t.h b/include/ruby/internal/arithmetic/off_t.h index c1959c61a1..0ec9362cc9 100644 --- a/include/ruby/internal/arithmetic/off_t.h +++ b/include/ruby/internal/arithmetic/off_t.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Arithmetic conversion between C's `off_t` and Ruby's. */ #include "ruby/internal/config.h" @@ -26,6 +26,7 @@ #include "ruby/internal/arithmetic/long_long.h" #include "ruby/backward/2/long_long.h" +/** Converts a C's `off_t` into an instance of ::rb_cInteger. */ #ifdef OFFT2NUM # /* take that. */ #elif SIZEOF_OFF_T == SIZEOF_LONG_LONG @@ -36,6 +37,7 @@ # define OFFT2NUM RB_INT2NUM #endif +/** Converts an instance of ::rb_cNumeric into C's `off_t`. */ #ifdef NUM2OFFT # /* take that. */ #elif SIZEOF_OFF_T == SIZEOF_LONG_LONG @@ -46,6 +48,7 @@ # define NUM2OFFT RB_NUM2INT #endif +/** A rb_sprintf() format prefix to be used for an `off_t` parameter. */ #ifdef PRI_OFFT_PREFIX # /* take that. */ #elif SIZEOF_OFF_T == SIZEOF_LONG_LONG diff --git a/include/ruby/internal/arithmetic/pid_t.h b/include/ruby/internal/arithmetic/pid_t.h index 86373ebfde..df9704e8f5 100644 --- a/include/ruby/internal/arithmetic/pid_t.h +++ b/include/ruby/internal/arithmetic/pid_t.h @@ -17,20 +17,23 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Arithmetic conversion between C's `pid_t` and Ruby's. */ #include "ruby/internal/config.h" #include "ruby/internal/arithmetic/long.h" +/** Converts a C's `pid_t` into an instance of ::rb_cInteger. */ #ifndef PIDT2NUM # define PIDT2NUM RB_LONG2NUM #endif +/** Converts an instance of ::rb_cNumeric into C's `pid_t`. */ #ifndef NUM2PIDT # define NUM2PIDT RB_NUM2LONG #endif +/** A rb_sprintf() format prefix to be used for a `pid_t` parameter. */ #ifndef PRI_PIDT_PREFIX # define PRI_PIDT_PREFIX PRI_LONG_PREFIX #endif diff --git a/include/ruby/internal/arithmetic/short.h b/include/ruby/internal/arithmetic/short.h index ef213a8d3e..7a324d945b 100644 --- a/include/ruby/internal/arithmetic/short.h +++ b/include/ruby/internal/arithmetic/short.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Arithmetic conversion between C's `short` and Ruby's. * * Shyouhei wonders: why there is no SHORT2NUM, given there are both @@ -27,21 +27,80 @@ #include "ruby/internal/dllexport.h" #include "ruby/internal/special_consts.h" -#define RB_NUM2SHORT rb_num2short_inline -#define RB_NUM2USHORT rb_num2ushort -#define NUM2SHORT RB_NUM2SHORT -#define NUM2USHORT RB_NUM2USHORT -#define USHORT2NUM RB_INT2FIX -#define RB_FIX2SHORT rb_fix2short -#define FIX2SHORT RB_FIX2SHORT +#define RB_NUM2SHORT rb_num2short_inline /**< @alias{rb_num2short_inline} */ +#define RB_NUM2USHORT rb_num2ushort /**< @alias{rb_num2ushort} */ +#define NUM2SHORT RB_NUM2SHORT /**< @old{RB_NUM2SHORT} */ +#define NUM2USHORT RB_NUM2USHORT /**< @old{RB_NUM2USHORT} */ +#define USHORT2NUM RB_INT2FIX /**< @old{RB_INT2FIX} */ +#define RB_FIX2SHORT rb_fix2short /**< @alias{rb_fix2ushort} */ +#define FIX2SHORT RB_FIX2SHORT /**< @old{RB_FIX2SHORT} */ RBIMPL_SYMBOL_EXPORT_BEGIN() -short rb_num2short(VALUE); -unsigned short rb_num2ushort(VALUE); -short rb_fix2short(VALUE); -unsigned short rb_fix2ushort(VALUE); + +/** + * Converts an instance of ::rb_cNumeric into C's `short`. + * + * @param[in] num Something numeric. + * @exception rb_eTypeError `num` is not a numeric. + * @exception rb_eRangeError `num` is out of range of `short`. + * @return The passed value converted into C's `short`. + */ +short rb_num2short(VALUE num); + +/** + * Converts an instance of ::rb_cNumeric into C's `unsigned short`. + * + * @param[in] num Something numeric. + * @exception rb_eTypeError `num` is not a numeric. + * @exception rb_eRangeError `num` is out of range of `unsigned short`. + * @return The passed value converted into C's `unsigned short`. + */ +unsigned short rb_num2ushort(VALUE num); + +/** + * Identical to rb_num2short(). + * + * @param[in] num Something numeric. + * @exception rb_eTypeError `num` is not a numeric. + * @exception rb_eRangeError `num` is out of range of `short`. + * @return The passed value converted into C's `short`. + * + * @internal + * + * This function seems to be a complete waste of disk space. @shyouhei has no + * idea why this is a different thing from rb_num2short(). + */ +short rb_fix2short(VALUE num); + +/** + * Identical to rb_num2ushort(). + * + * @param[in] num Something numeric. + * @exception rb_eTypeError `num` is not a numeric. + * @exception rb_eRangeError `num` is out of range of `unsigned short`. + * @return The passed value converted into C's `unsigned short`. + * + * @internal + * + * This function seems to be a complete waste of disk space. @shyouhei has no + * idea why this is a different thing from rb_num2ushort(). + */ +unsigned short rb_fix2ushort(VALUE num); RBIMPL_SYMBOL_EXPORT_END() +/** + * Identical to rb_num2short(). + * + * @param[in] x Something numeric. + * @exception rb_eTypeError `x` is not a numeric. + * @exception rb_eRangeError `x` is out of range of `short`. + * @return The passed value converted into C's `short`. + * + * @internal + * + * This function seems to be a complete waste of time. @shyouhei has no idea + * why this is a different thing from rb_num2short(). + */ static inline short rb_num2short_inline(VALUE x) { @@ -51,4 +110,4 @@ rb_num2short_inline(VALUE x) return rb_num2short(x); } -#endif /* RBIMPL_ARITHMETIC_SOHRT_H */ +#endif /* RBIMPL_ARITHMETIC_SHORT_H */ diff --git a/include/ruby/internal/arithmetic/size_t.h b/include/ruby/internal/arithmetic/size_t.h index 0458f1f5f3..1082160b8e 100644 --- a/include/ruby/internal/arithmetic/size_t.h +++ b/include/ruby/internal/arithmetic/size_t.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Arithmetic conversion between C's `size_t` and Ruby's. */ #include "ruby/internal/config.h" @@ -26,7 +26,12 @@ #include "ruby/internal/arithmetic/long_long.h" #include "ruby/backward/2/long_long.h" -#if SIZEOF_SIZE_T == SIZEOF_LONG_LONG +#if defined(__DOXYGEN__) +# /** Converts a C's `size_t` into an instance of ::rb_cInteger. */ +# define RB_SIZE2NUM RB_ULONG2NUM +# /** Converts a C's `ssize_t` into an instance of ::rb_cInteger. */ +# define RB_SSIZE2NUM RB_LONG2NUM +#elif SIZEOF_SIZE_T == SIZEOF_LONG_LONG # define RB_SIZE2NUM RB_ULL2NUM # define RB_SSIZE2NUM RB_LL2NUM #elif SIZEOF_SIZE_T == SIZEOF_LONG @@ -37,7 +42,12 @@ # define RB_SSIZE2NUM RB_INT2NUM #endif -#if SIZEOF_SIZE_T == SIZEOF_LONG_LONG +#if defined(__DOXYGEN__) +# /** Converts an instance of ::rb_cInteger into C's `size_t`. */ +# define RB_NUM2SIZE RB_NUM2ULONG +# /** Converts an instance of ::rb_cInteger into C's `ssize_t`. */ +# define RB_NUM2SSIZE RB_NUM2LONG +#elif SIZEOF_SIZE_T == SIZEOF_LONG_LONG # define RB_NUM2SIZE RB_NUM2ULL # define RB_NUM2SSIZE RB_NUM2LL #elif SIZEOF_SIZE_T == SIZEOF_LONG @@ -48,9 +58,9 @@ # define RB_NUM2SSIZE RB_NUM2INT #endif -#define NUM2SIZET RB_NUM2SIZE -#define SIZET2NUM RB_SIZE2NUM -#define NUM2SSIZET RB_NUM2SSIZE -#define SSIZET2NUM RB_SSIZE2NUM +#define NUM2SIZET RB_NUM2SIZE /**< @old{RB_NUM2SIZE} */ +#define SIZET2NUM RB_SIZE2NUM /**< @old{RB_SIZE2NUM} */ +#define NUM2SSIZET RB_NUM2SSIZE /**< @old{RB_NUM2SSIZE} */ +#define SSIZET2NUM RB_SSIZE2NUM /**< @old{RB_SSIZE2NUM} */ #endif /* RBIMPL_ARITHMETIC_SIZE_T_H */ diff --git a/include/ruby/internal/arithmetic/st_data_t.h b/include/ruby/internal/arithmetic/st_data_t.h index 93a5ccb7a1..3bff4ffc0b 100644 --- a/include/ruby/internal/arithmetic/st_data_t.h +++ b/include/ruby/internal/arithmetic/st_data_t.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Arithmetic conversion between C's `st_data_t` and Ruby's. */ #include "ruby/internal/arithmetic/fixnum.h" @@ -30,7 +30,7 @@ #include "ruby/assert.h" #include "ruby/st.h" -#define ST2FIX RB_ST2FIX +#define ST2FIX RB_ST2FIX /**< @old{RB_ST2FIX} */ /** @cond INTERNAL_MACRO */ #define RB_ST2FIX RB_ST2FIX /** @endcond */ @@ -38,7 +38,23 @@ RBIMPL_ATTR_CONST_UNLESS_DEBUG() RBIMPL_ATTR_CONSTEXPR_UNLESS_DEBUG(CXX14) RBIMPL_ATTR_ARTIFICIAL() -/* See also [ruby-core:84395] [Bug #14218] [ruby-core:82687] [Bug #13877] */ +/** + * Converts a C's `st_data_t` into an instance of ::rb_cInteger. + * + * @param[in] i The data in question. + * @return A converted result + * @warning THIS CONVERSION LOSES DATA! Be warned. + * @see https://bugs.ruby-lang.org/issues/13877 + * @see https://bugs.ruby-lang.org/issues/14218 + * + * @internal + * + * This is needed because of hash functions. Hash functions return + * `st_data_t`, which could theoretically be bigger than Fixnums. However + * allocating Bignums for them every time we calculate hash values is just too + * heavy. To avoid penalty we need to ignore some upper bit(s) and stick to + * Fixnums. This function is used for that purpose. + */ static inline VALUE RB_ST2FIX(st_data_t i) { @@ -56,4 +72,4 @@ RB_ST2FIX(st_data_t i) return RB_LONG2FIX(y); } -#endif /* RBIMPL_ARITHMERIC_ST_DATA_T_H */ +#endif /* RBIMPL_ARITHMETIC_ST_DATA_T_H */ diff --git a/include/ruby/internal/arithmetic/uid_t.h b/include/ruby/internal/arithmetic/uid_t.h index a990b2f480..12cde2a9c8 100644 --- a/include/ruby/internal/arithmetic/uid_t.h +++ b/include/ruby/internal/arithmetic/uid_t.h @@ -17,20 +17,23 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Arithmetic conversion between C's `uid_t` and Ruby's. */ #include "ruby/internal/config.h" #include "ruby/internal/arithmetic/long.h" +/** Converts a C's `uid_t` into an instance of ::rb_cInteger. */ #ifndef UIDT2NUM # define UIDT2NUM RB_LONG2NUM #endif +/** Converts an instance of ::rb_cNumeric into C's `uid_t`. */ #ifndef NUM2UIDT # define NUM2UIDT RB_NUM2LONG #endif +/** A rb_sprintf() format prefix to be used for a `uid_t` parameter. */ #ifndef PRI_UIDT_PREFIX # define PRI_UIDT_PREFIX PRI_LONG_PREFIX #endif diff --git a/include/ruby/internal/assume.h b/include/ruby/internal/assume.h index e95b2fb12a..4c183e8af9 100644 --- a/include/ruby/internal/assume.h +++ b/include/ruby/internal/assume.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ASSUME / #RBIMPL_UNREACHABLE. * * These macros must be defined at once because: @@ -32,10 +32,7 @@ #include "ruby/internal/warning_push.h" /** @cond INTERNAL_MACRO */ -#if RBIMPL_COMPILER_SINCE(MSVC, 13, 10, 0) -# define RBIMPL_HAVE___ASSUME - -#elif RBIMPL_COMPILER_SINCE(Intel, 13, 0, 0) +#if defined(HAVE___ASSUME) # define RBIMPL_HAVE___ASSUME #endif /** @endcond */ diff --git a/include/ruby/internal/attr/alloc_size.h b/include/ruby/internal/attr/alloc_size.h index ea96feec99..954a2010f6 100644 --- a/include/ruby/internal/attr/alloc_size.h +++ b/include/ruby/internal/attr/alloc_size.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_ALLOC_SIZE. */ #include "ruby/internal/has/attribute.h" diff --git a/include/ruby/internal/attr/artificial.h b/include/ruby/internal/attr/artificial.h index fa9a3814cc..ef5f36abff 100644 --- a/include/ruby/internal/attr/artificial.h +++ b/include/ruby/internal/attr/artificial.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_ARTIFICIAL. * * ### Q&A ### diff --git a/include/ruby/internal/attr/cold.h b/include/ruby/internal/attr/cold.h index 6db57fc9c2..c68b3ae784 100644 --- a/include/ruby/internal/attr/cold.h +++ b/include/ruby/internal/attr/cold.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_COLD. */ #include "ruby/internal/compiler_is.h" diff --git a/include/ruby/internal/attr/const.h b/include/ruby/internal/attr/const.h index c9e8577693..e66aa17c70 100644 --- a/include/ruby/internal/attr/const.h +++ b/include/ruby/internal/attr/const.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_CONST. */ #include "ruby/internal/compiler_since.h" diff --git a/include/ruby/internal/attr/constexpr.h b/include/ruby/internal/attr/constexpr.h index d887fa41c1..abc4f238b5 100644 --- a/include/ruby/internal/attr/constexpr.h +++ b/include/ruby/internal/attr/constexpr.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief #RBIMPL_ATTR_CONSTEXPR. */ #include "ruby/internal/has/feature.h" diff --git a/include/ruby/internal/attr/deprecated.h b/include/ruby/internal/attr/deprecated.h index 8bc1db5185..e1bbdbd15a 100644 --- a/include/ruby/internal/attr/deprecated.h +++ b/include/ruby/internal/attr/deprecated.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_DEPRECATED. */ #include "ruby/internal/compiler_since.h" @@ -35,7 +35,7 @@ #elif RBIMPL_HAS_EXTENSION(attribute_deprecated_with_message) # define RBIMPL_ATTR_DEPRECATED(msg) __attribute__((__deprecated__ msg)) -#elif defined(__cplusplus) && RBIMPL_COMPILER_SINCE(GCC, 10, 1, 0) /* && RBIMPL_COMPILER_BEFORE(GCC, 10, X, Y) */ +#elif defined(__cplusplus) && RBIMPL_COMPILER_SINCE(GCC, 10, 1, 0) && RBIMPL_COMPILER_BEFORE(GCC, 10, 3, 0) # /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95302 */ # define RBIMPL_ATTR_DEPRECATED(msg) /* disable until they fix this bug */ @@ -64,4 +64,12 @@ # define RBIMPL_ATTR_DEPRECATED(msg) /* void */ #endif +/** This is when a function is used internally (for backwards compatibility + * etc.), but extension libraries must consider it deprecated. */ +#if defined(RUBY_EXPORT) +# define RBIMPL_ATTR_DEPRECATED_EXT(msg) /* void */ +#else +# define RBIMPL_ATTR_DEPRECATED_EXT(msg) RBIMPL_ATTR_DEPRECATED(msg) +#endif + #endif /* RBIMPL_ATTR_DEPRECATED_H */ diff --git a/include/ruby/internal/attr/diagnose_if.h b/include/ruby/internal/attr/diagnose_if.h index 6a79e904b7..cadc6ce258 100644 --- a/include/ruby/internal/attr/diagnose_if.h +++ b/include/ruby/internal/attr/diagnose_if.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_DIAGNOSE_IF. */ #include "ruby/internal/has/attribute.h" diff --git a/include/ruby/internal/attr/enum_extensibility.h b/include/ruby/internal/attr/enum_extensibility.h index 6faa58185a..eb0d5b6e9b 100644 --- a/include/ruby/internal/attr/enum_extensibility.h +++ b/include/ruby/internal/attr/enum_extensibility.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief #RBIMPL_ATTR_ENUM_EXTENSIBILITY. */ #include "ruby/internal/has/attribute.h" diff --git a/include/ruby/internal/attr/error.h b/include/ruby/internal/attr/error.h index da19b73c2b..2ed388a770 100644 --- a/include/ruby/internal/attr/error.h +++ b/include/ruby/internal/attr/error.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_ERROR. */ #include "ruby/internal/has/attribute.h" diff --git a/include/ruby/internal/attr/flag_enum.h b/include/ruby/internal/attr/flag_enum.h index 148384d842..3053d75074 100644 --- a/include/ruby/internal/attr/flag_enum.h +++ b/include/ruby/internal/attr/flag_enum.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_FLAG_ENUM. * @see https://clang.llvm.org/docs/AttributeReference.html#flag_enum */ diff --git a/include/ruby/internal/attr/forceinline.h b/include/ruby/internal/attr/forceinline.h index 6b31f1016f..b7daafede7 100644 --- a/include/ruby/internal/attr/forceinline.h +++ b/include/ruby/internal/attr/forceinline.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_FORCEINLINE. */ #include "ruby/internal/compiler_since.h" diff --git a/include/ruby/internal/attr/format.h b/include/ruby/internal/attr/format.h index fcbf7b6cfe..b3488ee00a 100644 --- a/include/ruby/internal/attr/format.h +++ b/include/ruby/internal/attr/format.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_FORMAT. */ #include "ruby/internal/has/attribute.h" diff --git a/include/ruby/internal/attr/maybe_unused.h b/include/ruby/internal/attr/maybe_unused.h index f46d1bc670..3ee8be4540 100644 --- a/include/ruby/internal/attr/maybe_unused.h +++ b/include/ruby/internal/attr/maybe_unused.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_MAYBE_UNUSED. */ #include "ruby/internal/has/attribute.h" diff --git a/include/ruby/internal/attr/noalias.h b/include/ruby/internal/attr/noalias.h index 63324b7184..0790ef60e5 100644 --- a/include/ruby/internal/attr/noalias.h +++ b/include/ruby/internal/attr/noalias.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_NOALIAS. * * ### Q&A ### @@ -46,10 +46,21 @@ * that has to be passed to the function as a pointer. ::VALUE -taking * functions thus cannot be attributed as such. */ +#include "ruby/internal/compiler_since.h" #include "ruby/internal/has/declspec_attribute.h" /** Wraps (or simulates) `__declspec((noalias))` */ -#if RBIMPL_HAS_DECLSPEC_ATTRIBUTE(noalias) +#if RBIMPL_COMPILER_BEFORE(Clang, 12, 0, 0) +# /* +# * `::llvm::Attribute::ArgMemOnly` was buggy before. Maybe because nobody +# * actually seriously used it. It seems they somehow mitigated the situation +# * in LLVM 12. Still not found the exact changeset which fiexed the +# * attribute, though. +# * +# * :FIXME: others (armclang, xlclang, ...) can also be affected? +# */ +# define RBIMPL_ATTR_NOALIAS() /* void */ +#elif RBIMPL_HAS_DECLSPEC_ATTRIBUTE(noalias) # define RBIMPL_ATTR_NOALIAS() __declspec(noalias) #else # define RBIMPL_ATTR_NOALIAS() /* void */ diff --git a/include/ruby/internal/attr/nodiscard.h b/include/ruby/internal/attr/nodiscard.h index 5fd71b1c23..c3ae118942 100644 --- a/include/ruby/internal/attr/nodiscard.h +++ b/include/ruby/internal/attr/nodiscard.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_NODISCARD. */ #include "ruby/internal/has/attribute.h" @@ -26,7 +26,7 @@ /** * Wraps (or simulates) `[[nodiscard]]`. In C++ (at least since C++20) a - * nodiscard attribute can have a message why the result shall not be ignoed. + * nodiscard attribute can have a message why the result shall not be ignored. * However GCC attribute and SAL annotation cannot take them. */ #if RBIMPL_HAS_CPP_ATTRIBUTE(nodiscard) diff --git a/include/ruby/internal/attr/noexcept.h b/include/ruby/internal/attr/noexcept.h index 968a7742b9..7c3f92f1e7 100644 --- a/include/ruby/internal/attr/noexcept.h +++ b/include/ruby/internal/attr/noexcept.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_NOEXCEPT. * * This isn't actually an attribute in C++ but who cares... @@ -54,7 +54,7 @@ * get smarter and smarter. Today they can infer if it actually throws * or not without any annotations by humans (correct me if I'm wrong). * - * - When an inline function attributed `noexcepr` actually _does_ throw an + * - When an inline function attributed `noexcept` actually _does_ throw an * exception: they have to call `std::terminate` then (C++ standard * mandates so). This means exception handling routines are actually * enforced, not omitted. This doesn't impact runtime performance (The diff --git a/include/ruby/internal/attr/noinline.h b/include/ruby/internal/attr/noinline.h index 619f99a171..b7605a0c91 100644 --- a/include/ruby/internal/attr/noinline.h +++ b/include/ruby/internal/attr/noinline.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_NOINLINE. */ #include "ruby/internal/has/attribute.h" diff --git a/include/ruby/internal/attr/nonnull.h b/include/ruby/internal/attr/nonnull.h index cfced0bf74..778d5be208 100644 --- a/include/ruby/internal/attr/nonnull.h +++ b/include/ruby/internal/attr/nonnull.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_NONNULL. */ #include "ruby/internal/has/attribute.h" @@ -25,8 +25,10 @@ /** Wraps (or simulates) `__attribute__((nonnull))` */ #if RBIMPL_HAS_ATTRIBUTE(nonnull) # define RBIMPL_ATTR_NONNULL(list) __attribute__((__nonnull__ list)) +# define RBIMPL_NONNULL_ARG(arg) RBIMPL_ASSERT_NOTHING #else # define RBIMPL_ATTR_NONNULL(list) /* void */ +# define RBIMPL_NONNULL_ARG(arg) RUBY_ASSERT(arg) #endif #endif /* RBIMPL_ATTR_NONNULL_H */ diff --git a/include/ruby/internal/attr/noreturn.h b/include/ruby/internal/attr/noreturn.h index f741167c12..5839212037 100644 --- a/include/ruby/internal/attr/noreturn.h +++ b/include/ruby/internal/attr/noreturn.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_NORETURN. */ #include "ruby/internal/has/attribute.h" diff --git a/include/ruby/internal/attr/packed_struct.h b/include/ruby/internal/attr/packed_struct.h new file mode 100644 index 0000000000..0678b9acc8 --- /dev/null +++ b/include/ruby/internal/attr/packed_struct.h @@ -0,0 +1,43 @@ +#ifndef RBIMPL_ATTR_PACKED_STRUCT_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RBIMPL_ATTR_PACKED_STRUCT_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Defines #RBIMPL_ATTR_PACKED_STRUCT_BEGIN, + * #RBIMPL_ATTR_PACKED_STRUCT_END, + * #RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_BEGIN, and + * #RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_END. + */ +#include "ruby/internal/config.h" + +#ifndef RBIMPL_ATTR_PACKED_STRUCT_BEGIN +# define RBIMPL_ATTR_PACKED_STRUCT_BEGIN() /* void */ +#endif +#ifndef RBIMPL_ATTR_PACKED_STRUCT_END +# define RBIMPL_ATTR_PACKED_STRUCT_END() /* void */ +#endif + +#if UNALIGNED_WORD_ACCESS +# define RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_BEGIN() RBIMPL_ATTR_PACKED_STRUCT_BEGIN() +# define RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_END() RBIMPL_ATTR_PACKED_STRUCT_END() +#else +# define RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_BEGIN() /* void */ +# define RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_END() /* void */ +#endif + +#endif diff --git a/include/ruby/internal/attr/pure.h b/include/ruby/internal/attr/pure.h index 4416c25aa0..015711bdab 100644 --- a/include/ruby/internal/attr/pure.h +++ b/include/ruby/internal/attr/pure.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_PURE. */ #include "ruby/internal/compiler_since.h" diff --git a/include/ruby/internal/attr/restrict.h b/include/ruby/internal/attr/restrict.h index aca122af45..e39104138c 100644 --- a/include/ruby/internal/attr/restrict.h +++ b/include/ruby/internal/attr/restrict.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_RESTRICT. */ #include "ruby/internal/compiler_since.h" diff --git a/include/ruby/internal/attr/returns_nonnull.h b/include/ruby/internal/attr/returns_nonnull.h index 91c7be15cd..5d6f1d1459 100644 --- a/include/ruby/internal/attr/returns_nonnull.h +++ b/include/ruby/internal/attr/returns_nonnull.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_RETURNS_NONNULL. */ #include "ruby/internal/has/attribute.h" diff --git a/include/ruby/internal/attr/warning.h b/include/ruby/internal/attr/warning.h index fb6b214828..e5ced269b8 100644 --- a/include/ruby/internal/attr/warning.h +++ b/include/ruby/internal/attr/warning.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_WARNING. */ #include "ruby/internal/has/attribute.h" diff --git a/include/ruby/internal/attr/weakref.h b/include/ruby/internal/attr/weakref.h index 59dba678fa..f118bb62b8 100644 --- a/include/ruby/internal/attr/weakref.h +++ b/include/ruby/internal/attr/weakref.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ATTR_WEAKREF. */ #include "ruby/internal/has/attribute.h" diff --git a/include/ruby/internal/cast.h b/include/ruby/internal/cast.h index 065a5d01c2..a31fddbe4c 100644 --- a/include/ruby/internal/cast.h +++ b/include/ruby/internal/cast.h @@ -1,7 +1,6 @@ #ifndef RBIMPL_CAST_H /*-*-C++-*-vi:se ft=cpp:*/ #define RBIMPL_CAST_H /** - * @file * @author Ruby developers <ruby-core@ruby-lang.org> * @copyright This file is a part of the programming language Ruby. * Permission is hereby granted, to either redistribute and/or @@ -17,9 +16,8 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines RBIMPL_CAST. - * @cond INTERNAL_MACRO * * This casting macro makes sense only inside of other macros that are part of * public headers. They could be used from C++, and C-style casts could issue @@ -48,6 +46,5 @@ #else # define RBIMPL_CAST(expr) (expr) #endif -/** @endcond */ #endif /* RBIMPL_CAST_H */ diff --git a/include/ruby/internal/compiler_is.h b/include/ruby/internal/compiler_is.h index 776d7add4f..7070b033a0 100644 --- a/include/ruby/internal/compiler_is.h +++ b/include/ruby/internal/compiler_is.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_COMPILER_IS. */ diff --git a/include/ruby/internal/compiler_is/apple.h b/include/ruby/internal/compiler_is/apple.h index 5a5f558254..a81f1f2c8f 100644 --- a/include/ruby/internal/compiler_is/apple.h +++ b/include/ruby/internal/compiler_is/apple.h @@ -1,7 +1,6 @@ #ifndef RBIMPL_COMPILER_IS_APPLE_H /*-*-C++-*-vi:se ft=cpp:*/ #define RBIMPL_COMPILER_IS_APPLE_H /** - * @file * @author Ruby developers <ruby-core@ruby-lang.org> * @copyright This file is a part of the programming language Ruby. * Permission is hereby granted, to either redistribute and/or @@ -17,8 +16,8 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Defines #RBIMPL_COMPILER_IS_Apple. + * extension libraries. They could be written in C++98. + * @brief Defines RBIMPL_COMPILER_IS_Apple. * * Apple ships clang. Problem is, its `__clang_major__` etc. are not the * upstream LLVM version, but XCode's. We have to think Apple's is distinct diff --git a/include/ruby/internal/compiler_is/clang.h b/include/ruby/internal/compiler_is/clang.h index 30655f2f25..169ff789f6 100644 --- a/include/ruby/internal/compiler_is/clang.h +++ b/include/ruby/internal/compiler_is/clang.h @@ -1,7 +1,6 @@ #ifndef RBIMPL_COMPILER_IS_CLANG_H /*-*-C++-*-vi:se ft=cpp:*/ #define RBIMPL_COMPILER_IS_CLANG_H /** - * @file * @author Ruby developers <ruby-core@ruby-lang.org> * @copyright This file is a part of the programming language Ruby. * Permission is hereby granted, to either redistribute and/or @@ -17,8 +16,8 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Defines #RBIMPL_COMPILER_IS_Clang. + * extension libraries. They could be written in C++98. + * @brief Defines RBIMPL_COMPILER_IS_Clang. */ #include "ruby/internal/compiler_is/apple.h" diff --git a/include/ruby/internal/compiler_is/gcc.h b/include/ruby/internal/compiler_is/gcc.h index 6eabfb61a2..accc80e9aa 100644 --- a/include/ruby/internal/compiler_is/gcc.h +++ b/include/ruby/internal/compiler_is/gcc.h @@ -1,7 +1,6 @@ #ifndef RBIMPL_COMPILER_IS_GCC_H /*-*-C++-*-vi:se ft=cpp:*/ #define RBIMPL_COMPILER_IS_GCC_H /** - * @file * @author Ruby developers <ruby-core@ruby-lang.org> * @copyright This file is a part of the programming language Ruby. * Permission is hereby granted, to either redistribute and/or @@ -17,8 +16,8 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Defines #RBIMPL_COMPILER_IS_GCC. + * extension libraries. They could be written in C++98. + * @brief Defines RBIMPL_COMPILER_IS_GCC. */ #include "ruby/internal/compiler_is/apple.h" #include "ruby/internal/compiler_is/clang.h" diff --git a/include/ruby/internal/compiler_is/intel.h b/include/ruby/internal/compiler_is/intel.h index ada120485e..377946ace0 100644 --- a/include/ruby/internal/compiler_is/intel.h +++ b/include/ruby/internal/compiler_is/intel.h @@ -1,7 +1,6 @@ #ifndef RBIMPL_COMPILER_IS_INTEL_H /*-*-C++-*-vi:se ft=cpp:*/ #define RBIMPL_COMPILER_IS_INTEL_H /** - * @file * @author Ruby developers <ruby-core@ruby-lang.org> * @copyright This file is a part of the programming language Ruby. * Permission is hereby granted, to either redistribute and/or @@ -17,8 +16,8 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Defines #RBIMPL_COMPILER_IS_Intel. + * extension libraries. They could be written in C++98. + * @brief Defines RBIMPL_COMPILER_IS_Intel. */ #if ! defined(__INTEL_COMPILER) # define RBIMPL_COMPILER_IS_Intel 0 diff --git a/include/ruby/internal/compiler_is/msvc.h b/include/ruby/internal/compiler_is/msvc.h index 60189da1a3..8a864ea558 100644 --- a/include/ruby/internal/compiler_is/msvc.h +++ b/include/ruby/internal/compiler_is/msvc.h @@ -1,7 +1,6 @@ #ifndef RBIMPL_COMPILER_IS_MSVC_H /*-*-C++-*-vi:se ft=cpp:*/ #define RBIMPL_COMPILER_IS_MSVC_H /** - * @file * @author Ruby developers <ruby-core@ruby-lang.org> * @copyright This file is a part of the programming language Ruby. * Permission is hereby granted, to either redistribute and/or @@ -17,8 +16,8 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Defines #RBIMPL_COMPILER_IS_MSVC. + * extension libraries. They could be written in C++98. + * @brief Defines RBIMPL_COMPILER_IS_MSVC. */ #include "ruby/internal/compiler_is/clang.h" #include "ruby/internal/compiler_is/intel.h" diff --git a/include/ruby/internal/compiler_is/sunpro.h b/include/ruby/internal/compiler_is/sunpro.h index 2eb7b892f2..c11c8452e7 100644 --- a/include/ruby/internal/compiler_is/sunpro.h +++ b/include/ruby/internal/compiler_is/sunpro.h @@ -1,7 +1,6 @@ #ifndef RBIMPL_COMPILER_IS_SUNPRO_H /*-*-C++-*-vi:se ft=cpp:*/ #define RBIMPL_COMPILER_IS_SUNPRO_H /** - * @file * @author Ruby developers <ruby-core@ruby-lang.org> * @copyright This file is a part of the programming language Ruby. * Permission is hereby granted, to either redistribute and/or @@ -17,8 +16,8 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Defines #RBIMPL_COMPILER_IS_SunPro. + * extension libraries. They could be written in C++98. + * @brief Defines RBIMPL_COMPILER_IS_SunPro. */ #if ! (defined(__SUNPRO_C) || defined(__SUNPRO_CC)) # define RBIMPL_COMPILER_IS_SunPro 0 diff --git a/include/ruby/internal/compiler_since.h b/include/ruby/internal/compiler_since.h index b213cfd8b9..1929032884 100644 --- a/include/ruby/internal/compiler_since.h +++ b/include/ruby/internal/compiler_since.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_COMPILER_SINCE. */ #include "ruby/internal/compiler_is.h" diff --git a/include/ruby/internal/config.h b/include/ruby/internal/config.h index 67d7e0156f..da070f0979 100644 --- a/include/ruby/internal/config.h +++ b/include/ruby/internal/config.h @@ -1,7 +1,6 @@ #ifndef RBIMPL_CONFIG_H /*-*-C++-*-vi:se ft=cpp:*/ #define RBIMPL_CONFIG_H /** - * @file * @author Ruby developers <ruby-core@ruby-lang.org> * @copyright This file is a part of the programming language Ruby. * Permission is hereby granted, to either redistribute and/or @@ -17,7 +16,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Thin wrapper to ruby/config.h */ #include "ruby/config.h" @@ -114,6 +113,8 @@ # define UNALIGNED_WORD_ACCESS 1 #elif defined(__powerpc64__) # define UNALIGNED_WORD_ACCESS 1 +#elif defined(__POWERPC__) // __POWERPC__ is defined for ppc and ppc64 on Darwin +# define UNALIGNED_WORD_ACCESS 1 #elif defined(__aarch64__) # define UNALIGNED_WORD_ACCESS 1 #elif defined(__mc68020__) @@ -126,6 +127,12 @@ #if ! defined(HAVE_VA_ARGS_MACRO) # undef HAVE___VA_OPT__ +#elif defined(__cplusplus) +# if __cplusplus > 201703L +# define HAVE___VA_OPT__ +# else +# undef HAVE___VA_OPT__ +# endif #else # /* Idea taken from: https://stackoverflow.com/a/48045656 */ # define RBIMPL_TEST3(q, w, e, ...) e diff --git a/include/ruby/internal/constant_p.h b/include/ruby/internal/constant_p.h index e54a8d85b9..92d69cb972 100644 --- a/include/ruby/internal/constant_p.h +++ b/include/ruby/internal/constant_p.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_CONSTANT_P. * * Note that __builtin_constant_p can be applicable inside of inline functions, @@ -28,6 +28,7 @@ */ #include "ruby/internal/has/builtin.h" +/** Wraps (or simulates) `__builtin_constant_p` */ #if RBIMPL_HAS_BUILTIN(__builtin_constant_p) # define RBIMPL_CONSTANT_P(expr) __builtin_constant_p(expr) #else diff --git a/include/ruby/internal/core.h b/include/ruby/internal/core.h index 279a697ea1..3f4561c6a6 100644 --- a/include/ruby/internal/core.h +++ b/include/ruby/internal/core.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Core data structures, definitions and manipulations. */ #include "ruby/internal/core/rarray.h" diff --git a/include/ruby/internal/core/rarray.h b/include/ruby/internal/core/rarray.h index 938e2dc897..90690fe794 100644 --- a/include/ruby/internal/core/rarray.h +++ b/include/ruby/internal/core/rarray.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines struct ::RArray. */ #include "ruby/internal/arithmetic/long.h" @@ -29,29 +29,27 @@ #include "ruby/internal/core/rbasic.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/fl_type.h" -#include "ruby/internal/rgengc.h" +#include "ruby/internal/gc.h" #include "ruby/internal/stdbool.h" #include "ruby/internal/value.h" #include "ruby/internal/value_type.h" #include "ruby/assert.h" -#ifndef USE_TRANSIENT_HEAP -# define USE_TRANSIENT_HEAP 1 -#endif - +/** + * Convenient casting macro. + * + * @param obj An object, which is in fact an ::RArray. + * @return The passed object casted to ::RArray. + */ #define RARRAY(obj) RBIMPL_CAST((struct RArray *)(obj)) +/** @cond INTERNAL_MACRO */ #define RARRAY_EMBED_FLAG RARRAY_EMBED_FLAG #define RARRAY_EMBED_LEN_MASK RARRAY_EMBED_LEN_MASK #define RARRAY_EMBED_LEN_MAX RARRAY_EMBED_LEN_MAX #define RARRAY_EMBED_LEN_SHIFT RARRAY_EMBED_LEN_SHIFT -#if USE_TRANSIENT_HEAP -# define RARRAY_TRANSIENT_FLAG RARRAY_TRANSIENT_FLAG -#else -# define RARRAY_TRANSIENT_FLAG 0 -#endif -#define RARRAY_LEN rb_array_len -#define RARRAY_CONST_PTR rb_array_const_ptr -#define RARRAY_CONST_PTR_TRANSIENT rb_array_const_ptr_transient +/** @endcond */ +#define RARRAY_LEN rb_array_len /**< @alias{rb_array_len} */ +#define RARRAY_CONST_PTR rb_array_const_ptr /**< @alias{rb_array_const_ptr} */ /** @cond INTERNAL_MACRO */ #if defined(__fcc__) || defined(__fcc_version) || \ @@ -64,33 +62,102 @@ #define RARRAY_EMBED_LEN RARRAY_EMBED_LEN #define RARRAY_LENINT RARRAY_LENINT -#define RARRAY_TRANSIENT_P RARRAY_TRANSIENT_P #define RARRAY_ASET RARRAY_ASET #define RARRAY_PTR RARRAY_PTR /** @endcond */ +/** + * @private + * + * Bits that you can set to ::RBasic::flags. + * + * @warning These enums are not the only bits we use for arrays. + * + * @internal + * + * Unlike strings, flag usages for arrays are scattered across the entire + * source codes. @shyouhei doesn't know the complete list. But what is listed + * here is at least incomplete. + */ enum ruby_rarray_flags { + /** + * This flag has something to do with memory footprint. If the array is + * "small" enough, ruby tries to be creative to abuse padding bits of + * struct ::RArray for storing its contents. This flag denotes that + * situation. + * + * @warning This bit has to be considered read-only. Setting/clearing + * this bit without corresponding fix up must cause immediate + * SEGV. Also, internal structures of an array change + * dynamically and transparently throughout of its lifetime. + * Don't assume it being persistent. + * + * @internal + * + * 3rd parties must not be aware that there even is more than one way to + * store array elements. It was a bad idea to expose this to them. + */ RARRAY_EMBED_FLAG = RUBY_FL_USER1, + /* RUBY_FL_USER2 is for ELTS_SHARED */ - RARRAY_EMBED_LEN_MASK = RUBY_FL_USER4 | RUBY_FL_USER3 -#if USE_TRANSIENT_HEAP - , - RARRAY_TRANSIENT_FLAG = RUBY_FL_USER13 -#endif + + /** + * When an array employs embedded strategy (see ::RARRAY_EMBED_FLAG), these + * bits are used to store the number of elements actually filled into + * ::RArray::ary. + * + * @internal + * + * 3rd parties must not be aware that there even is more than one way to + * store array elements. It was a bad idea to expose this to them. + */ + RARRAY_EMBED_LEN_MASK = RUBY_FL_USER9 | RUBY_FL_USER8 | RUBY_FL_USER7 | RUBY_FL_USER6 | + RUBY_FL_USER5 | RUBY_FL_USER4 | RUBY_FL_USER3 }; +/** + * This is an enum because GDB wants it (rather than a macro). People need not + * bother. + */ enum ruby_rarray_consts { - RARRAY_EMBED_LEN_SHIFT = RUBY_FL_USHIFT + 3, - RARRAY_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(VALUE) + /** Where ::RARRAY_EMBED_LEN_MASK resides. */ + RARRAY_EMBED_LEN_SHIFT = RUBY_FL_USHIFT + 3 }; +/** Ruby's array. */ struct RArray { + + /** Basic part, including flags and class. */ struct RBasic basic; + + /** Array's specific fields. */ union { + + /** + * Arrays that use separated memory region for elements use this + * pattern. + */ struct { + + /** Number of elements of the array. */ long len; + + /** Auxiliary info. */ union { + + /** + * Capacity of `*ptr`. A continuous memory region of at least + * `capa` elements is expected to exist at `*ptr`. This can be + * bigger than `len`. + */ long capa; + + /** + * Parent of the array. Nowadays arrays can share their + * backend memory regions each other, constructing gigantic + * nest of objects. This situation is called "shared", and + * this is the field to control such properties. + */ #if defined(__clang__) /* <- clang++ is sane */ || \ !defined(__cplusplus) /* <- C99 is sane */ || \ (__cplusplus > 199711L) /* <- C++11 is sane */ @@ -98,22 +165,72 @@ struct RArray { #endif VALUE shared_root; } aux; + + /** + * Pointer to the C array that holds the elements of the array. In + * the old days each array had dedicated memory regions. That is + * no longer true today, but there still are arrays of such + * properties. This field could be used to point such things. + */ const VALUE *ptr; } heap; - const VALUE ary[RARRAY_EMBED_LEN_MAX]; + + /** + * Embedded elements. When an array is short enough, it uses this area + * to store its elements. In this case the length is encoded into the + * flags. + */ + /* This is a length 1 array because: + * 1. GCC has a bug that does not optimize C flexible array members + * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452) + * 2. Zero length arrays are not supported by all compilers + */ + const VALUE ary[1]; } as; }; RBIMPL_SYMBOL_EXPORT_BEGIN() +/** + * @private + * + * Declares a section of code where raw pointers are used. This is an + * implementation detail of #RARRAY_PTR_USE. People don't use it directly. + * + * @param[in] ary An object of ::RArray. + * @return `ary`'s backend C array. + */ VALUE *rb_ary_ptr_use_start(VALUE ary); + +/** + * @private + * + * Declares an end of a section formerly started by rb_ary_ptr_use_start(). + * This is an implementation detail of #RARRAY_PTR_USE. People don't use it + * directly. + * + * @param[in] a An object of ::RArray. + */ void rb_ary_ptr_use_end(VALUE a); -#if USE_TRANSIENT_HEAP -void rb_ary_detransient(VALUE a); -#endif + RBIMPL_SYMBOL_EXPORT_END() RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * Queries the length of the array. + * + * @param[in] ary Array in question. + * @return Its number of elements. + * @pre `ary` must be an instance of ::RArray, and must has its + * ::RARRAY_EMBED_FLAG flag set. + * + * @internal + * + * This was a macro before. It was inevitable to be public, since macros are + * global constructs. But should it be forever? Now that it is a function, + * @shyouhei thinks it could just be eliminated, hidden into implementation + * details. + */ static inline long RARRAY_EMBED_LEN(VALUE ary) { @@ -127,6 +244,13 @@ RARRAY_EMBED_LEN(VALUE ary) } RBIMPL_ATTR_PURE_UNLESS_DEBUG() +/** + * Queries the length of the array. + * + * @param[in] a Array in question. + * @return Its number of elements. + * @pre `a` must be an instance of ::RArray. + */ static inline long rb_array_len(VALUE a) { @@ -141,6 +265,18 @@ rb_array_len(VALUE a) } RBIMPL_ATTR_ARTIFICIAL() +/** + * Identical to rb_array_len(), except it differs for the return type. + * + * @param[in] ary Array in question. + * @exception rb_eRangeError Too long. + * @return Its number of elements. + * @pre `ary` must be an instance of ::RArray. + * + * @internal + * + * This API seems redundant but has actual usages. + */ static inline int RARRAY_LENINT(VALUE ary) { @@ -148,23 +284,17 @@ RARRAY_LENINT(VALUE ary) } RBIMPL_ATTR_PURE_UNLESS_DEBUG() -RBIMPL_ATTR_ARTIFICIAL() -static inline bool -RARRAY_TRANSIENT_P(VALUE ary) -{ - RBIMPL_ASSERT_TYPE(ary, RUBY_T_ARRAY); - -#if USE_TRANSIENT_HEAP - return RB_FL_ANY_RAW(ary, RARRAY_TRANSIENT_FLAG); -#else - return false; -#endif -} - -RBIMPL_ATTR_PURE_UNLESS_DEBUG() -/* internal function. do not use this function */ +/** + * @private + * + * This is an implementation detail of RARRAY_PTR(). People do not use it + * directly. + * + * @param[in] a An object of ::RArray. + * @return Its backend storage. + */ static inline const VALUE * -rb_array_const_ptr_transient(VALUE a) +rb_array_const_ptr(VALUE a) { RBIMPL_ASSERT_TYPE(a, RUBY_T_ARRAY); @@ -176,70 +306,62 @@ rb_array_const_ptr_transient(VALUE a) } } -#if ! USE_TRANSIENT_HEAP -RBIMPL_ATTR_PURE_UNLESS_DEBUG() -#endif -/* internal function. do not use this function */ -static inline const VALUE * -rb_array_const_ptr(VALUE a) -{ - RBIMPL_ASSERT_TYPE(a, RUBY_T_ARRAY); - -#if USE_TRANSIENT_HEAP - if (RARRAY_TRANSIENT_P(a)) { - rb_ary_detransient(a); - } -#endif - return rb_array_const_ptr_transient(a); -} - -/* internal function. do not use this function */ -static inline VALUE * -rb_array_ptr_use_start(VALUE a, - RBIMPL_ATTR_MAYBE_UNUSED() - int allow_transient) -{ - RBIMPL_ASSERT_TYPE(a, RUBY_T_ARRAY); - -#if USE_TRANSIENT_HEAP - if (!allow_transient) { - if (RARRAY_TRANSIENT_P(a)) { - rb_ary_detransient(a); - } - } -#endif - - return rb_ary_ptr_use_start(a); -} - -/* internal function. do not use this function */ -static inline void -rb_array_ptr_use_end(VALUE a, - RBIMPL_ATTR_MAYBE_UNUSED() - int allow_transient) -{ - RBIMPL_ASSERT_TYPE(a, RUBY_T_ARRAY); - rb_ary_ptr_use_end(a); -} - -#define RBIMPL_RARRAY_STMT(flag, ary, var, expr) do { \ +/** + * @private + * + * This is an implementation detail of #RARRAY_PTR_USE. People do not use it + * directly. + */ +#define RBIMPL_RARRAY_STMT(ary, var, expr) do { \ RBIMPL_ASSERT_TYPE((ary), RUBY_T_ARRAY); \ const VALUE rbimpl_ary = (ary); \ - VALUE *var = rb_array_ptr_use_start(rbimpl_ary, (flag)); \ + VALUE *var = rb_ary_ptr_use_start(rbimpl_ary); \ expr; \ - rb_array_ptr_use_end(rbimpl_ary, (flag)); \ + rb_ary_ptr_use_end(rbimpl_ary); \ } while (0) -#define RARRAY_PTR_USE_START(a) rb_array_ptr_use_start(a, 0) -#define RARRAY_PTR_USE_END(a) rb_array_ptr_use_end(a, 0) -#define RARRAY_PTR_USE(ary, ptr_name, expr) \ - RBIMPL_RARRAY_STMT(0, ary, ptr_name, expr) - -#define RARRAY_PTR_USE_START_TRANSIENT(a) rb_array_ptr_use_start(a, 1) -#define RARRAY_PTR_USE_END_TRANSIENT(a) rb_array_ptr_use_end(a, 1) -#define RARRAY_PTR_USE_TRANSIENT(ary, ptr_name, expr) \ - RBIMPL_RARRAY_STMT(1, ary, ptr_name, expr) +/** + * Declares a section of code where raw pointers are used. In case you need to + * touch the raw C array instead of polite CAPIs, then that operation shall be + * wrapped using this macro. + * + * ```CXX + * const auto ary = rb_eval_string("[...]"); + * const auto len = RARRAY_LENINT(ary); + * const auto symwrite = rb_intern("write"); + * + * RARRAY_PTR_USE(ary, ptr, { + * rb_funcallv(rb_stdout, symwrite, len, ptr); + * }); + * ``` + * + * @param ary An object of ::RArray. + * @param ptr_name A variable name which points the C array in `expr`. + * @param expr The expression that touches `ptr_name`. + * + * @internal + * + * For historical reasons use of this macro is not enforced. There are + * extension libraries in the wild which call RARRAY_PTR() without it. We want + * them use it... Maybe some transition path can be implemented later. + */ +#define RARRAY_PTR_USE(ary, ptr_name, expr) \ + RBIMPL_RARRAY_STMT(ary, ptr_name, expr) +/** + * Wild use of a C pointer. This function accesses the backend storage + * directly. This is slower than #RARRAY_PTR_USE. It exercises + * extra manoeuvres to protect our generational GC. Use of this function is + * considered archaic. Use a modern way instead. + * + * @param[in] ary An object of ::RArray. + * @return The backend C array. + * + * @internal + * + * That said... there are extension libraries in the wild who uses it. We + * cannot but continue supporting. + */ static inline VALUE * RARRAY_PTR(VALUE ary) { @@ -249,14 +371,27 @@ RARRAY_PTR(VALUE ary) return RBIMPL_CAST((VALUE *)RARRAY_CONST_PTR(tmp)); } +/** + * Assigns an object in an array. + * + * @param[out] ary Destination array object. + * @param[in] i Index of `ary`. + * @param[in] v Arbitrary ruby object. + * @pre `ary` must be an instance of ::RArray. + * @pre `ary`'s length must be longer than or equal to `i`. + * @pre `i` must be greater than or equal to zero. + * @post `ary`'s `i`th element is set to `v`. + */ static inline void RARRAY_ASET(VALUE ary, long i, VALUE v) { - RARRAY_PTR_USE_TRANSIENT(ary, ptr, + RARRAY_PTR_USE(ary, ptr, RB_OBJ_WRITE(ary, &ptr[i], v)); } -/* +/** + * @deprecated + * * :FIXME: we want to convert RARRAY_AREF into an inline function (to add rooms * for more sanity checks). However there were situations where the address of * this macro is taken i.e. &RARRAY_AREF(...). They cannot be possible if this @@ -265,6 +400,6 @@ RARRAY_ASET(VALUE ary, long i, VALUE v) * remains as it is due to that. If we could warn such usages we can set a * transition path, but currently no way is found to do so. */ -#define RARRAY_AREF(a, i) RARRAY_CONST_PTR_TRANSIENT(a)[i] +#define RARRAY_AREF(a, i) RARRAY_CONST_PTR(a)[i] #endif /* RBIMPL_RARRAY_H */ diff --git a/include/ruby/internal/core/rbasic.h b/include/ruby/internal/core/rbasic.h index aafec2422f..a1477e2600 100644 --- a/include/ruby/internal/core/rbasic.h +++ b/include/ruby/internal/core/rbasic.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines struct ::RBasic. */ #include "ruby/internal/attr/artificial.h" @@ -31,22 +31,58 @@ #include "ruby/internal/value.h" #include "ruby/assert.h" -#define RBASIC(obj) RBIMPL_CAST((struct RBasic *)(obj)) -#define RBASIC_CLASS RBASIC_CLASS -#define RVALUE_EMBED_LEN_MAX RVALUE_EMBED_LEN_MAX - +/** + * Convenient casting macro. + * + * @param obj Arbitrary Ruby object. + * @return The passed object casted to ::RBasic. + */ +#define RBASIC(obj) RBIMPL_CAST((struct RBasic *)(obj)) /** @cond INTERNAL_MACRO */ +#define RBASIC_CLASS RBASIC_CLASS +#define RBIMPL_RVALUE_EMBED_LEN_MAX 3 +#define RVALUE_EMBED_LEN_MAX RVALUE_EMBED_LEN_MAX #define RBIMPL_EMBED_LEN_MAX_OF(T) \ RBIMPL_CAST((int)(sizeof(VALUE[RBIMPL_RVALUE_EMBED_LEN_MAX]) / (sizeof(T)))) /** @endcond */ -#define RBIMPL_RVALUE_EMBED_LEN_MAX 3 -enum ruby_rvalue_flags { RVALUE_EMBED_LEN_MAX = RBIMPL_RVALUE_EMBED_LEN_MAX }; +/** + * This is an enum because GDB wants it (rather than a macro). People need not + * bother. + */ +enum ruby_rvalue_flags { + /** Max possible number of objects that can be embedded. */ + RVALUE_EMBED_LEN_MAX = RBIMPL_RVALUE_EMBED_LEN_MAX +}; +/** + * Ruby object's base components. All Ruby objects have them in common. + */ struct RUBY_ALIGNAS(SIZEOF_VALUE) RBasic { - VALUE flags; /**< @see enum ::ruby_fl_type. */ + + /** + * Per-object flags. Each Ruby object has its own characteristics apart + * from its class. For instance, whether an object is frozen or not is not + * controlled by its class. This is where such properties are stored. + * + * @see enum ::ruby_fl_type + * + * @note This is ::VALUE rather than an enum for alignment purposes. Back + * in the 1990s there were no such thing like `_Alignas` in C. + */ + VALUE flags; + + /** + * Class of an object. Every object has its class. Also, everything is an + * object in Ruby. This means classes are also objects. Classes have + * their own classes, classes of classes have their classes too, and it + * recursively continues forever. + * + * Also note the `const` qualifier. In Ruby, an object cannot "change" its + * class. + */ const VALUE klass; #ifdef __cplusplus @@ -70,12 +106,46 @@ RBasic { }; RBIMPL_SYMBOL_EXPORT_BEGIN() +/** + * Make the object invisible from Ruby code. + * + * It is useful to let Ruby's GC manage your internal data structure -- The + * object keeps being managed by GC, but `ObjectSpace.each_object` never yields + * the object. + * + * Note that the object also lose a way to call a method on it. + * + * @param[out] obj A Ruby object. + * @return The passed object. + * @post The object is destructively modified to be invisible. + * @see rb_obj_reveal + */ VALUE rb_obj_hide(VALUE obj); + +/** + * Make a hidden object visible again. + * + * It is the caller's responsibility to pass the right `klass` which `obj` + * originally used to belong to. + * + * @param[out] obj A Ruby object. + * @param[in] klass Class of `obj`. + * @return Passed `obj`. + * @pre `obj` was previously hidden. + * @post `obj`'s class is `klass`. + * @see rb_obj_hide + */ VALUE rb_obj_reveal(VALUE obj, VALUE klass); /* do not use this API to change klass information */ RBIMPL_SYMBOL_EXPORT_END() RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * Queries the class of an object. + * + * @param[in] obj An object. + * @return Its class. + */ static inline VALUE RBASIC_CLASS(VALUE obj) { diff --git a/include/ruby/internal/core/rbignum.h b/include/ruby/internal/core/rbignum.h index 3cd7d19850..1d31743235 100644 --- a/include/ruby/internal/core/rbignum.h +++ b/include/ruby/internal/core/rbignum.h @@ -17,15 +17,16 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Routines to manipulate struct ::RBignum. + * extension libraries. They could be written in C++98. + * @brief Routines to manipulate struct RBignum. + * @note The struct RBignum itself is opaque. */ #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" #include "ruby/internal/value_type.h" #include "ruby/internal/stdbool.h" -#define RBIGNUM_SIGN rb_big_sign +#define RBIGNUM_SIGN rb_big_sign /**< @alias{rb_big_sign} */ /** @cond INTERNAL_MACRO */ #define RBIGNUM_POSITIVE_P RBIGNUM_POSITIVE_P @@ -33,9 +34,29 @@ /** @endcond */ RBIMPL_SYMBOL_EXPORT_BEGIN() +/** + * The "sign" of a bignum. + * + * @param[in] num An object of RBignum. + * @retval 1 It is greater than or equal to zero. + * @retval 0 It is less than zero. + * + * @internal + * + * Implementation wise, unlike fixnums (which are 2's complement), bignums are + * signed magnitude system. Theoretically it could be possible to have + * negative zero instances. But in reality there is no way to create such + * thing. Nobody ever needed that kind of insanity. + */ int rb_big_sign(VALUE num); RBIMPL_SYMBOL_EXPORT_END() +/** + * Checks if the bignum is positive. + * @param[in] b An object of RBignum. + * @retval false `b` is less than zero. + * @retval true Otherwise. + */ static inline bool RBIGNUM_POSITIVE_P(VALUE b) { @@ -43,6 +64,12 @@ RBIGNUM_POSITIVE_P(VALUE b) return RBIGNUM_SIGN(b); } +/** + * Checks if the bignum is negative. + * @param[in] b An object of RBignum. + * @retval true `b` is less than zero. + * @retval false Otherwise. + */ static inline bool RBIGNUM_NEGATIVE_P(VALUE b) { diff --git a/include/ruby/internal/core/rclass.h b/include/ruby/internal/core/rclass.h index 0aa6b1290e..b0b6bfc80c 100644 --- a/include/ruby/internal/core/rclass.h +++ b/include/ruby/internal/core/rclass.h @@ -17,31 +17,77 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Routines to manipulate struct ::RClass. + * extension libraries. They could be written in C++98. + * @brief Routines to manipulate struct RClass. + * @note The struct RClass itself is opaque. */ #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" #include "ruby/internal/cast.h" -#define RMODULE_IS_OVERLAID RMODULE_IS_OVERLAID +/** @cond INTERNAL_MACRO */ #define RMODULE_IS_REFINEMENT RMODULE_IS_REFINEMENT -#define RMODULE_INCLUDED_INTO_REFINEMENT RMODULE_INCLUDED_INTO_REFINEMENT +/** @endcond */ +/** + * Convenient casting macro. + * + * @param obj An object, which is in fact an RClass. + * @return The passed object casted to RClass. + */ #define RCLASS(obj) RBIMPL_CAST((struct RClass *)(obj)) + +/** @alias{RCLASS} */ #define RMODULE RCLASS + +/** @alias{rb_class_get_superclass} */ #define RCLASS_SUPER rb_class_get_superclass +/** + * @private + * + * Bits that you can set to ::RBasic::flags. + * + * @internal + * + * Why is it here, given RClass itself is not? + */ enum ruby_rmodule_flags { - RMODULE_IS_OVERLAID = RUBY_FL_USER2, - RMODULE_IS_REFINEMENT = RUBY_FL_USER3, - RMODULE_INCLUDED_INTO_REFINEMENT = RUBY_FL_USER4 + /** + * This flag has something to do with refinements. A module created using + * rb_mod_refine() has this flag set. This is the bit which controls + * difference between normal inclusion versus refinements. + */ + RMODULE_IS_REFINEMENT = RUBY_FL_USER3 }; struct RClass; /* Opaque, declared here for RCLASS() macro. */ RBIMPL_SYMBOL_EXPORT_BEGIN() -VALUE rb_class_get_superclass(VALUE); +/** + * Returns the superclass of a class. + * @param[in] klass An object of RClass. + * @retval RUBY_Qfalse `klass` has no super class. + * @retval otherwise Raw superclass of `klass` + * @see rb_class_superclass + * + * ### Q&A ### + * + * - Q: How can a class have no super class? + * + * - A: `klass` could be a module. Or it could be ::rb_cBasicObject. + * + * - Q: What do you mean by "raw" superclass? + * + * - A: This is a really good question. The answer is that this function + * returns something different from what you would normally expect. On + * occasions ruby inserts hidden classes in a hierarchy of class + * inheritance behind-the-scene. Such classes are called "iclass"es and + * distinguished using ::RUBY_T_ICLASS in C level. They are truly + * transparent from Ruby level but can be accessed from C, by using this + * API. + */ +VALUE rb_class_get_superclass(VALUE klass); RBIMPL_SYMBOL_EXPORT_END() #endif /* RBIMPL_RCLASS_H */ diff --git a/include/ruby/internal/core/rdata.h b/include/ruby/internal/core/rdata.h index 9432b2ed7a..43ab3c01e7 100644 --- a/include/ruby/internal/core/rdata.h +++ b/include/ruby/internal/core/rdata.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines struct ::RData. */ #include "ruby/internal/config.h" @@ -36,6 +36,7 @@ #include "ruby/internal/value_type.h" #include "ruby/defines.h" +/** @cond INTERNAL_MACRO */ #ifdef RUBY_UNTYPED_DATA_WARNING # /* Take that. */ #elif defined(RUBY_EXPORT) @@ -44,39 +45,160 @@ # define RUBY_UNTYPED_DATA_WARNING 0 #endif -/** @cond INTERNAL_MACRO */ #define RBIMPL_DATA_FUNC(f) RBIMPL_CAST((void (*)(void *))(f)) #define RBIMPL_ATTRSET_UNTYPED_DATA_FUNC() \ RBIMPL_ATTR_WARNING(("untyped Data is unsafe; use TypedData instead")) \ RBIMPL_ATTR_DEPRECATED(("by TypedData")) + +#define RBIMPL_MACRO_SELECT(x, y) x ## y +#define RUBY_MACRO_SELECT(x, y) RBIMPL_MACRO_SELECT(x, y) /** @endcond */ +/** + * Convenient casting macro. + * + * @param obj An object, which is in fact an ::RData. + * @return The passed object casted to ::RData. + */ #define RDATA(obj) RBIMPL_CAST((struct RData *)(obj)) + +/** + * Convenient getter macro. + * + * @param obj An object, which is in fact an ::RData. + * @return The passed object's ::RData::data field. + */ #define DATA_PTR(obj) RDATA(obj)->data -#define RBIMPL_MACRO_SELECT(x, y) x ## y -#define RUBY_MACRO_SELECT(x, y) RBIMPL_MACRO_SELECT(x, y) + +/** + * This is a value you can set to ::RData::dfree. Setting this means the data + * was allocated using ::ruby_xmalloc() (or variants), and shall be freed using + * ::ruby_xfree(). + * + * @warning Do not use this if you want to use system malloc, because the + * system and Ruby might or might not share the same malloc + * implementation. + */ #define RUBY_DEFAULT_FREE RBIMPL_DATA_FUNC(-1) + +/** + * This is a value you can set to ::RData::dfree. Setting this means the data + * is managed by someone else, like, statically allocated. Of course you are + * on your own then. + */ #define RUBY_NEVER_FREE RBIMPL_DATA_FUNC(0) + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define RUBY_UNTYPED_DATA_FUNC(f) f RBIMPL_ATTRSET_UNTYPED_DATA_FUNC() /* #define RUBY_DATA_FUNC(func) ((void (*)(void*))(func)) */ + +/** + * This is the type of callbacks registered to ::RData. The argument is the + * `data` field. + */ typedef void (*RUBY_DATA_FUNC)(void*); +/** + * @deprecated + * + * Old "untyped" user data. It has roughly the same usage as struct + * ::RTypedData, but lacked several features such as support for compaction GC. + * Use of this struct is not recommended any longer. If it is dead necessary, + * please inform the core devs about your usage. + * + * @internal + * + * @shyouhei tried to add RBIMPL_ATTR_DEPRECATED for this type but that yielded + * too many warnings in the core. Maybe we want to retry later... Just add + * deprecated document for now. + */ struct RData { + + /** Basic part, including flags and class. */ struct RBasic basic; + + /** + * This function is called when the object is experiencing GC marks. If it + * contains references to other Ruby objects, you need to mark them also. + * Otherwise GC will smash your data. + * + * @see rb_gc_mark() + * @warning This is called during GC runs. Object allocations are + * impossible at that moment (that is why GC runs). + */ RUBY_DATA_FUNC dmark; + + /** + * This function is called when the object is no longer used. You need to + * do whatever necessary to avoid memory leaks. + * + * @warning This is called during GC runs. Object allocations are + * impossible at that moment (that is why GC runs). + */ RUBY_DATA_FUNC dfree; + + /** Pointer to the actual C level struct that you want to wrap. */ void *data; }; RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * This is the primitive way to wrap an existing C struct into ::RData. + * + * @param[in] klass Ruby level class of the returning object. + * @param[in] datap Pointer to the target C struct. + * @param[in] dmark Mark function. + * @param[in] dfree Free function. + * @exception rb_eTypeError `klass` is not a class. + * @exception rb_eNoMemError Out of memory. + * @return An allocated object that wraps `datap`. + */ VALUE rb_data_object_wrap(VALUE klass, void *datap, RUBY_DATA_FUNC dmark, RUBY_DATA_FUNC dfree); + +/** + * Identical to rb_data_object_wrap(), except it allocates a new data region + * internally instead of taking an existing one. The allocation is done using + * ruby_calloc(). Hence it makes no sense to pass anything other than + * ::RUBY_DEFAULT_FREE to the last argument. + * + * @param[in] klass Ruby level class of the returning object. + * @param[in] size Requested size of memory to allocate. + * @param[in] dmark Mark function. + * @param[in] dfree Free function. + * @exception rb_eTypeError `klass` is not a class. + * @exception rb_eNoMemError Out of memory. + * @return An allocated object that wraps a new `size` byte region. + */ VALUE rb_data_object_zalloc(VALUE klass, size_t size, RUBY_DATA_FUNC dmark, RUBY_DATA_FUNC dfree); + +/** + * @private + * Documented in include/ruby/internal/globals.h + */ RUBY_EXTERN VALUE rb_cObject; RBIMPL_SYMBOL_EXPORT_END() +/** + * Converts sval, a pointer to your struct, into a Ruby object. + * + * @param klass A ruby level class. + * @param mark Mark function. + * @param free Free function. + * @param sval A pointer to your struct. + * @exception rb_eTypeError `klass` is not a class. + * @exception rb_eNoMemError Out of memory. + * @return A created Ruby object. + */ #define Data_Wrap_Struct(klass, mark, free, sval) \ rb_data_object_wrap( \ (klass), \ @@ -84,6 +206,20 @@ RBIMPL_SYMBOL_EXPORT_END() RBIMPL_DATA_FUNC(mark), \ RBIMPL_DATA_FUNC(free)) +/** + * @private + * + * This is an implementation detail of #Data_Make_Struct. People don't use it + * directly. + * + * @param result Variable name of created Ruby object. + * @param klass Ruby level class of the object. + * @param type Type name of the C struct. + * @param size Size of the C struct. + * @param mark Mark function. + * @param free Free function. + * @param sval Variable name of created C struct. + */ #define Data_Make_Struct0(result, klass, type, size, mark, free, sval) \ VALUE result = rb_data_object_zalloc( \ (klass), \ @@ -93,6 +229,21 @@ RBIMPL_SYMBOL_EXPORT_END() (sval) = RBIMPL_CAST((type *)DATA_PTR(result)); \ RBIMPL_CAST(/*suppress unused variable warnings*/(void)(sval)) +/** + * Identical to #Data_Wrap_Struct, except it allocates a new data region + * internally instead of taking an existing one. The allocation is done using + * ruby_calloc(). Hence it makes no sense to pass anything other than + * ::RUBY_DEFAULT_FREE to the `free` argument. + * + * @param klass Ruby level class of the returning object. + * @param type Type name of the C struct. + * @param mark Mark function. + * @param free Free function. + * @param sval Variable name of created C struct. + * @exception rb_eTypeError `klass` is not a class. + * @exception rb_eNoMemError Out of memory. + * @return A created Ruby object. + */ #ifdef HAVE_STMT_AND_DECL_IN_EXPR #define Data_Make_Struct(klass, type, mark, free, sval) \ RB_GNUC_EXTENSION({ \ @@ -116,16 +267,47 @@ RBIMPL_SYMBOL_EXPORT_END() sizeof(type)) #endif +/** + * Obtains a C struct from inside of a wrapper Ruby object. + * + * @param obj An instance of ::RData. + * @param type Type name of the C struct. + * @param sval Variable name of obtained C struct. + * @return Unwrapped C struct that `obj` holds. + */ #define Data_Get_Struct(obj, type, sval) \ ((sval) = RBIMPL_CAST((type*)rb_data_object_get(obj))) RBIMPL_ATTRSET_UNTYPED_DATA_FUNC() +/** + * @private + * + * This is an implementation detail of rb_data_object_wrap(). People don't use + * it directly. + * + * @param[in] klass Ruby level class of the returning object. + * @param[in] ptr Pointer to the target C struct. + * @param[in] mark Mark function. + * @param[in] free Free function. + * @exception rb_eTypeError `klass` is not a class. + * @exception rb_eNoMemError Out of memory. + * @return An allocated object that wraps `datap`. + */ static inline VALUE rb_data_object_wrap_warning(VALUE klass, void *ptr, RUBY_DATA_FUNC mark, RUBY_DATA_FUNC free) { return rb_data_object_wrap(klass, ptr, mark, free); } +/** + * @private + * + * This is an implementation detail of #Data_Get_Struct. People don't use it + * directly. + * + * @param[in] obj An instance of ::RData. + * @return Unwrapped C struct that `obj` holds. + */ static inline void * rb_data_object_get(VALUE obj) { @@ -134,6 +316,15 @@ rb_data_object_get(VALUE obj) } RBIMPL_ATTRSET_UNTYPED_DATA_FUNC() +/** + * @private + * + * This is an implementation detail of #Data_Get_Struct. People don't use it + * directly. + * + * @param[in] obj An instance of ::RData. + * @return Unwrapped C struct that `obj` holds. + */ static inline void * rb_data_object_get_warning(VALUE obj) { @@ -149,6 +340,20 @@ rb_data_object_get_warning(VALUE obj) (rb_data_object_wrap_warning)(klass, ptr, mark, free))) #endif +/** + * This is an implementation detail of #Data_Make_Struct. People don't use it + * directly. + * + * @param[in] klass Ruby level class of the returning object. + * @param[in] mark_func Mark function. + * @param[in] free_func Free function. + * @param[in] datap Variable of created C struct. + * @param[in] size Requested size of allocation. + * @exception rb_eTypeError `klass` is not a class. + * @exception rb_eNoMemError Out of memory. + * @return A created Ruby object. + * @post `*datap` holds the created C struct. + */ static inline VALUE rb_data_object_make(VALUE klass, RUBY_DATA_FUNC mark_func, RUBY_DATA_FUNC free_func, void **datap, size_t size) { @@ -157,21 +362,14 @@ rb_data_object_make(VALUE klass, RUBY_DATA_FUNC mark_func, RUBY_DATA_FUNC free_f } RBIMPL_ATTR_DEPRECATED(("by: rb_data_object_wrap")) +/** @deprecated This function was renamed to rb_data_object_wrap(). */ static inline VALUE rb_data_object_alloc(VALUE klass, void *data, RUBY_DATA_FUNC dmark, RUBY_DATA_FUNC dfree) { return rb_data_object_wrap(klass, data, dmark, dfree); } -RBIMPL_ATTR_DEPRECATED(("by: rb_cObject. Will be removed in 3.1.")) -RBIMPL_ATTR_PURE() -static inline VALUE -rb_cData(void) -{ - return rb_cObject; -} -#define rb_cData rb_cData() - +/** @cond INTERNAL_MACRO */ #define rb_data_object_wrap_0 rb_data_object_wrap #define rb_data_object_wrap_1 rb_data_object_wrap_warning #define rb_data_object_wrap_2 rb_data_object_wrap_ /* Used here vvvv */ @@ -184,4 +382,5 @@ rb_cData(void) #define rb_data_object_make_1 rb_data_object_make_warning #define rb_data_object_make_2 rb_data_object_make_ /* Used here vvvv */ #define rb_data_object_make RUBY_MACRO_SELECT(rb_data_object_make_2, RUBY_UNTYPED_DATA_WARNING) +/** @endcond */ #endif /* RBIMPL_RDATA_H */ diff --git a/include/ruby/internal/core/rfile.h b/include/ruby/internal/core/rfile.h index 464625b2bd..a0eb8cb833 100644 --- a/include/ruby/internal/core/rfile.h +++ b/include/ruby/internal/core/rfile.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines struct ::RFile. */ #include "ruby/internal/core/rbasic.h" @@ -25,12 +25,27 @@ /* rb_io_t is in ruby/io.h. The header file has historically not been included * into ruby/ruby.h. We follow that tradition. */ -struct rb_io_t; +struct rb_io; +/** + * Ruby's File and IO. Ruby's IO are not just file descriptors. They have + * buffers. They also have encodings. Various information are controlled + * using this struct. + */ struct RFile { + + /** Basic part, including flags and class. */ struct RBasic basic; - struct rb_io_t *fptr; + + /** IO's specific fields. */ + struct rb_io *fptr; }; +/** + * Convenient casting macro. + * + * @param obj An object, which is in fact an ::RFile. + * @return The passed object casted to ::RFile. + */ #define RFILE(obj) RBIMPL_CAST((struct RFile *)(obj)) #endif /* RBIMPL_RFILE_H */ diff --git a/include/ruby/internal/core/rhash.h b/include/ruby/internal/core/rhash.h index cffd0b28ce..897c570794 100644 --- a/include/ruby/internal/core/rhash.h +++ b/include/ruby/internal/core/rhash.h @@ -17,20 +17,9 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Routines to manipulate struct ::RHash. - * - * Shyouhei really suffered agnish over placement of macros in this file. They - * are half-broken. The situation (as of writing) is: - * - * - #RHASH_TBL: works. - * - #RHASH_ITER_LEV: compile-time error. - * - #RHASH_IFNONE: compile-time error. - * - #RHASH_SIZE: works. - * - #RHASH_EMPTY_P: works. - * - #RHASH_SET_IFNONE: works (why... given you cannot query). - * - * Shyouhei stopped thinking. Let them be as is. + * extension libraries. They could be written in C++98. + * @brief Routines to manipulate struct RHash. + * @note The struct RHash itself is opaque. */ #include "ruby/internal/config.h" @@ -44,18 +33,98 @@ # include "ruby/backward.h" #endif +/** + * Retrieves the internal table. + * + * @param[in] h An instance of RHash. + * @pre `h` must be of ::RUBY_T_HASH. + * @return A struct st_table which has the contents of this hash. + * @note Nowadays as Ruby evolved over ages, RHash has multiple backend + * storage engines. `h`'s backend is not guaranteed to be a + * st_table. This function creates one when necessary. + */ #define RHASH_TBL(h) rb_hash_tbl(h, __FILE__, __LINE__) -#define RHASH_ITER_LEV(h) rb_hash_iter_lev(h) + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + * + * @internal + * + * Declaration of rb_hash_ifnone() is at include/ruby/backward.h. + */ #define RHASH_IFNONE(h) rb_hash_ifnone(h) + +/** + * Queries the size of the hash. Size here means the number of keys that the + * hash stores. + * + * @param[in] h An instance of RHash. + * @pre `h` must be of ::RUBY_T_HASH. + * @return The size of the hash. + */ #define RHASH_SIZE(h) rb_hash_size_num(h) + +/** + * Checks if the hash is empty. + * + * @param[in] h An instance of RHash. + * @pre `h` must be of ::RUBY_T_HASH. + * @retval true It is. + * @retval false It isn't. + */ #define RHASH_EMPTY_P(h) (RHASH_SIZE(h) == 0) + +/** + * Destructively updates the default value of the hash. + * + * @param[out] h An instance of RHash. + * @param[in] ifnone Arbitrary default value. + * @pre `h` must be of ::RUBY_T_HASH. + * + * @internal + * + * But why you can set this, given rb_hash_ifnone() doesn't exist? + */ #define RHASH_SET_IFNONE(h, ifnone) rb_hash_set_ifnone((VALUE)h, ifnone) struct st_table; /* in ruby/st.h */ RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * This is the implementation detail of #RHASH_SIZE. People don't call this + * directly. + * + * @param[in] hash An instance of RHash. + * @pre `hash` must be of ::RUBY_T_HASH. + * @return The size of the hash. + */ size_t rb_hash_size_num(VALUE hash); -struct st_table *rb_hash_tbl(VALUE, const char *file, int line); + +/** + * This is the implementation detail of #RHASH_TBL. People don't call this + * directly. + * + * @param[in] hash An instance of RHash. + * @param[in] file The `__FILE__`. + * @param[in] line The `__LINE__`. + * @pre `hash` must be of ::RUBY_T_HASH. + * @return Table that has the contents of the hash. + */ +struct st_table *rb_hash_tbl(VALUE hash, const char *file, int line); + +/** + * This is the implementation detail of #RHASH_SET_IFNONE. People don't call + * this directly. + * + * @param[out] hash An instance of RHash. + * @param[in] ifnone Arbitrary default value. + * @pre `hash` must be of ::RUBY_T_HASH. + */ VALUE rb_hash_set_ifnone(VALUE hash, VALUE ifnone); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/core/rmatch.h b/include/ruby/internal/core/rmatch.h index 03ab5e5d82..a528c2999e 100644 --- a/include/ruby/internal/core/rmatch.h +++ b/include/ruby/internal/core/rmatch.h @@ -17,18 +17,23 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines struct ::RMatch. */ #include "ruby/internal/attr/artificial.h" #include "ruby/internal/attr/pure.h" -#include "ruby/internal/attr/returns_nonnull.h" #include "ruby/internal/cast.h" #include "ruby/internal/core/rbasic.h" #include "ruby/internal/value.h" #include "ruby/internal/value_type.h" #include "ruby/assert.h" +/** + * Convenient casting macro. + * + * @param obj An object, which is in fact an ::RMatch. + * @return The passed object casted to ::RMatch. + */ #define RMATCH(obj) RBIMPL_CAST((struct RMatch *)(obj)) /** @cond INTERNAL_MACRO */ #define RMATCH_REGS RMATCH_REGS @@ -37,37 +42,103 @@ struct re_patter_buffer; /* a.k.a. OnigRegexType, defined in onigmo.h */ struct re_registers; /* Also in onigmo.h */ -/* @shyouhei wonders: is anyone actively using this typedef ...? */ +/** + * @old{re_pattern_buffer} + * + * @internal + * + * @shyouhei wonders: is anyone actively using this typedef ...? + */ typedef struct re_pattern_buffer Regexp; +/** + * Represents the region of a capture group. This is basically for caching + * purpose. re_registers have similar concepts (`beg` and `end`) but they are + * in `ptrdiff_t*`. In order for us to implement `MatchData#offset` that info + * has to be converted to offset integers. This is the struct to hold such + * things. + * + * @internal + * + * But why on earth it has to be visible from extension libraries? + */ struct rmatch_offset { - long beg; - long end; + long beg; /**< Beginning of a group. */ + long end; /**< End of a group. */ }; -struct rmatch { +/** Represents a match. */ +struct rb_matchext_struct { + /** + * "Registers" of a match. This is a quasi-opaque struct that holds + * execution result of a match. Roughly resembles `&~`. + */ struct re_registers regs; + /** Capture group offsets, in C array. */ struct rmatch_offset *char_offset; + + /** Number of ::rmatch_offset that ::rmatch::char_offset holds. */ int char_offset_num_allocated; }; +typedef struct rb_matchext_struct rb_matchext_t; + +/** + * Regular expression execution context. When a regular expression "matches" + * to a string, it generates capture groups etc. This struct holds that info. + * Visible from Ruby as an instance of `MatchData`. + * + * @note There is no way for extension libraries to manually generate this + * struct except by actually exercising the match operation of a regular + * expression. + */ struct RMatch { + + /** Basic part, including flags and class. */ struct RBasic basic; + + /** + * The target string that the match was made against. + */ VALUE str; - struct rmatch *rmatch; + + /** + * The expression of this match. + */ VALUE regexp; /* RRegexp */ }; +#define RMATCH_EXT(m) ((rb_matchext_t *)((char *)(m) + sizeof(struct RMatch))) + RBIMPL_ATTR_PURE_UNLESS_DEBUG() -RBIMPL_ATTR_RETURNS_NONNULL() RBIMPL_ATTR_ARTIFICIAL() +/** + * Queries the raw ::re_registers. + * + * @param[in] match A match object + * @pre `match` must be of ::RMatch. + * @return Its execution result. + * @note Good. So you are aware of the fact that it could return NULL. + * Yes. It actually does. This is a really bizarre thing. The + * situation is about `String#gsub` and its family. They take + * strings as arguments, like `"foo".sub("bar", "baz")`. On such + * situations, in order to optimise memory allocations, these + * methods do not involve regular expressions at all. They just + * sequentially scan the receiver. Okay. The story begins here. + * Even when they do not kick our regexp engine, there must be + * backref objects e.g. `$&`. But how? You know what? Ruby fakes + * them. It allocates an empty ::RMatch and behaves as if there + * were execution contexts. In reality there weren't. No + * ::re_registers are allocated then. There is no way for this + * function but to return NULL for those fake ::RMatch. This is + * the reason for the nullability of this function. + */ static inline struct re_registers * RMATCH_REGS(VALUE match) { RBIMPL_ASSERT_TYPE(match, RUBY_T_MATCH); - RBIMPL_ASSERT_OR_ASSUME(RMATCH(match)->rmatch != NULL); - return &RMATCH(match)->rmatch->regs; + return &RMATCH_EXT(match)->regs; } #endif /* RBIMPL_RMATCH_H */ diff --git a/include/ruby/internal/core/robject.h b/include/ruby/internal/core/robject.h index c352c87a40..c2bcae6306 100644 --- a/include/ruby/internal/core/robject.h +++ b/include/ruby/internal/core/robject.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines struct ::RObject. */ #include "ruby/internal/config.h" @@ -34,50 +34,104 @@ #include "ruby/internal/value.h" #include "ruby/internal/value_type.h" +/** + * Convenient casting macro. + * + * @param obj An object, which is in fact an ::RObject. + * @return The passed object casted to ::RObject. + */ #define ROBJECT(obj) RBIMPL_CAST((struct RObject *)(obj)) +/** @cond INTERNAL_MACRO */ #define ROBJECT_EMBED_LEN_MAX ROBJECT_EMBED_LEN_MAX #define ROBJECT_EMBED ROBJECT_EMBED -/** @cond INTERNAL_MACRO */ -#define ROBJECT_NUMIV ROBJECT_NUMIV +#define ROBJECT_IV_CAPACITY ROBJECT_IV_CAPACITY #define ROBJECT_IVPTR ROBJECT_IVPTR -#define ROBJECT_IV_INDEX_TBL ROBJECT_IV_INDEX_TBL /** @endcond */ -enum ruby_robject_flags { ROBJECT_EMBED = RUBY_FL_USER1 }; - -enum ruby_robject_consts { ROBJECT_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(VALUE) }; +/** + * @private + * + * Bits that you can set to ::RBasic::flags. + */ +enum ruby_robject_flags { + /** + * This flag has something to do with memory footprint. If the object is + * "small" enough, ruby tries to be creative to abuse padding bits of + * struct ::RObject for storing instance variables. This flag denotes that + * situation. + * + * @warning This bit has to be considered read-only. Setting/clearing + * this bit without corresponding fix up must cause immediate + * SEGV. Also, internal structures of an object change + * dynamically and transparently throughout of its lifetime. + * Don't assume it being persistent. + * + * @internal + * + * 3rd parties must not be aware that there even is more than one way to + * store instance variables. Might better be hidden. + */ + ROBJECT_EMBED = RUBY_FL_USER1 +}; struct st_table; +/** + * Ruby's ordinal objects. Unless otherwise special cased, all predefined and + * user-defined classes share this struct to hold their instances. + */ struct RObject { + + /** Basic part, including flags and class. */ struct RBasic basic; + + /** Object's specific fields. */ union { + + /** + * Object that use separated memory region for instance variables use + * this pattern. + */ struct { - uint32_t numiv; + /** Pointer to a C array that holds instance variables. */ VALUE *ivptr; - struct st_table *iv_index_tbl; /* shortcut for RCLASS_IV_INDEX_TBL(rb_obj_class(obj)) */ + + /** + * This is a table that holds instance variable name to index + * mapping. Used when accessing instance variables using names. + * + * @internal + * + * This is a shortcut for `RCLASS_IV_INDEX_TBL(rb_obj_class(obj))`. + */ + struct rb_id_table *iv_index_tbl; } heap; - VALUE ary[ROBJECT_EMBED_LEN_MAX]; + + /* Embedded instance variables. When an object is small enough, it + * uses this area to store the instance variables. + * + * This is a length 1 array because: + * 1. GCC has a bug that does not optimize C flexible array members + * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452) + * 2. Zero length arrays are not supported by all compilers + */ + VALUE ary[1]; } as; }; RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() -static inline uint32_t -ROBJECT_NUMIV(VALUE obj) -{ - RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT); - - if (RB_FL_ANY_RAW(obj, ROBJECT_EMBED)) { - return ROBJECT_EMBED_LEN_MAX; - } - else { - return ROBJECT(obj)->as.heap.numiv; - } -} - -RBIMPL_ATTR_PURE_UNLESS_DEBUG() -RBIMPL_ATTR_ARTIFICIAL() +/** + * Queries the instance variables. + * + * @param[in] obj Object in question. + * @return Its instance variables, in C array. + * @pre `obj` must be an instance of ::RObject. + * + * @internal + * + * @shyouhei finds no reason for this to be visible from extension libraries. + */ static inline VALUE * ROBJECT_IVPTR(VALUE obj) { diff --git a/include/ruby/internal/core/rregexp.h b/include/ruby/internal/core/rregexp.h index f289ee1dda..cf54a399f1 100644 --- a/include/ruby/internal/core/rregexp.h +++ b/include/ruby/internal/core/rregexp.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines struct ::RRegexp. */ #include "ruby/internal/attr/artificial.h" @@ -28,7 +28,20 @@ #include "ruby/internal/value.h" #include "ruby/internal/value_type.h" +/** + * Convenient casting macro. + * + * @param obj An object, which is in fact an ::RRegexp. + * @return The passed object casted to ::RRegexp. + */ #define RREGEXP(obj) RBIMPL_CAST((struct RRegexp *)(obj)) + +/** + * Convenient accessor macro. + * + * @param obj An object, which is in fact an ::RRegexp. + * @return The passed object's pattern buffer. + */ #define RREGEXP_PTR(obj) (RREGEXP(obj)->ptr) /** @cond INTERNAL_MACRO */ #define RREGEXP_SRC RREGEXP_SRC @@ -37,17 +50,55 @@ #define RREGEXP_SRC_END RREGEXP_SRC_END /** @endcond */ -struct re_patter_buffer; /* a.k.a. OnigRegexType, defined in onigmo.h */ +struct re_patter_buffer; /* a.k.a. OnigRegexType, defined in onigmo.h */ +/** + * Ruby's regular expression. A regexp is compiled into its own intermediate + * representation. This one holds that info. Regexp "match" operation then + * executes that IR. + */ struct RRegexp { + + /** Basic part, including flags and class. */ struct RBasic basic; + + /** + * The pattern buffer. This is a quasi-opaque struct that holds compiled + * intermediate representation of the regular expression. + * + * @note Compilation of a regexp could be delayed until actual match. + */ struct re_pattern_buffer *ptr; + + /** Source code of this expression. */ const VALUE src; + + /** + * Reference count. A regexp match can take extraordinarily long time to + * run. Ruby's regular expression is heavily extended and not a regular + * language any longer; runs in NP-time in practice. Now, Ruby also has + * threads and GVL. In order to prevent long GVL lockup, our regexp engine + * can release it on occasions. This means that multiple threads can touch + * a regular expressions at once. That itself is okay. But their cleanup + * phase shall wait for all the concurrent runs, to prevent use-after-free + * situation. This field is used to count such threads that are executing + * this particular pattern buffer. + * + * @warning Of course, touching this field from extension libraries causes + * catastrophic effects. Just leave it. + */ unsigned long usecnt; }; RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * Convenient getter function. + * + * @param[in] rexp The regular expression in question. + * @return The source code of the regular expression. + * @pre `rexp` must be of ::RRegexp. + */ static inline VALUE RREGEXP_SRC(VALUE rexp) { @@ -59,6 +110,17 @@ RREGEXP_SRC(VALUE rexp) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * Convenient getter function. + * + * @param[in] rexp The regular expression in question. + * @return The source code of the regular expression, in C's string. + * @pre `rexp` must be of ::RRegexp. + * + * @internal + * + * It seems nobody uses this function in the wild. Subject to hide? + */ static inline char * RREGEXP_SRC_PTR(VALUE rexp) { @@ -67,6 +129,17 @@ RREGEXP_SRC_PTR(VALUE rexp) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * Convenient getter function. + * + * @param[in] rexp The regular expression in question. + * @return The length of the source code of the regular expression. + * @pre `rexp` must be of ::RRegexp. + * + * @internal + * + * It seems nobody uses this function in the wild. Subject to hide? + */ static inline long RREGEXP_SRC_LEN(VALUE rexp) { @@ -75,6 +148,17 @@ RREGEXP_SRC_LEN(VALUE rexp) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * Convenient getter function. + * + * @param[in] rexp The regular expression in question. + * @return The end of the source code of the regular expression. + * @pre `rexp` must be of ::RRegexp. + * + * @internal + * + * It seems nobody uses this function in the wild. Subject to hide? + */ static inline char * RREGEXP_SRC_END(VALUE rexp) { diff --git a/include/ruby/internal/core/rstring.h b/include/ruby/internal/core/rstring.h index d073da1d2c..0bca74e688 100644 --- a/include/ruby/internal/core/rstring.h +++ b/include/ruby/internal/core/rstring.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines struct ::RString. */ #include "ruby/internal/config.h" @@ -32,84 +32,341 @@ #include "ruby/internal/warning_push.h" #include "ruby/assert.h" +/** + * Convenient casting macro. + * + * @param obj An object, which is in fact an ::RString. + * @return The passed object casted to ::RString. + */ #define RSTRING(obj) RBIMPL_CAST((struct RString *)(obj)) -#define RSTRING_NOEMBED RSTRING_NOEMBED -#define RSTRING_EMBED_LEN_MASK RSTRING_EMBED_LEN_MASK -#define RSTRING_EMBED_LEN_SHIFT RSTRING_EMBED_LEN_SHIFT -#define RSTRING_EMBED_LEN_MAX RSTRING_EMBED_LEN_MAX -#define RSTRING_FSTR RSTRING_FSTR /** @cond INTERNAL_MACRO */ -#define RSTRING_EMBED_LEN RSTRING_EMBED_LEN +#define RSTRING_NOEMBED RSTRING_NOEMBED +#define RSTRING_FSTR RSTRING_FSTR #define RSTRING_LEN RSTRING_LEN #define RSTRING_LENINT RSTRING_LENINT #define RSTRING_PTR RSTRING_PTR #define RSTRING_END RSTRING_END /** @endcond */ +/** + * @name Conversion of Ruby strings into C's + * + * @{ + */ + +/** + * Ensures that the parameter object is a String. This is done by calling its + * `to_str` method. + * + * @param[in,out] v Arbitrary Ruby object. + * @exception rb_eTypeError No implicit conversion defined. + * @post `v` is a String. + */ #define StringValue(v) rb_string_value(&(v)) + +/** + * Identical to #StringValue, except it returns a `char*`. + * + * @param[in,out] v Arbitrary Ruby object. + * @exception rb_eTypeError No implicit conversion defined. + * @return Converted Ruby string's backend C string. + * @post `v` is a String. + */ #define StringValuePtr(v) rb_string_value_ptr(&(v)) + +/** + * Identical to #StringValuePtr, except it additionally checks for the contents + * for viability as a C string. Ruby can accept wider range of contents as + * strings, compared to C. This function is to check that. + * + * @param[in,out] v Arbitrary Ruby object. + * @exception rb_eTypeError No implicit conversion defined. + * @exception rb_eArgError String is not C-compatible. + * @return Converted Ruby string's backend C string. + * @post `v` is a String. + */ #define StringValueCStr(v) rb_string_value_cstr(&(v)) + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define SafeStringValue(v) StringValue(v) + +/** + * Identical to #StringValue, except it additionally converts the string's + * encoding to default external encoding. Ruby has a concept called encodings. + * A string can have different encoding than the environment expects. Someone + * has to make sure its contents be converted to something suitable. This is + * that routine. Call it when necessary. + * + * @param[in,out] v Arbitrary Ruby object. + * @exception rb_eTypeError No implicit conversion defined. + * @return Converted Ruby string's backend C string. + * @post `v` is a String. + * + * @internal + * + * Not sure but it seems this macro does not raise on encoding + * incompatibilities? Doesn't sound right to @shyouhei. + */ #define ExportStringValue(v) do { \ StringValue(v); \ (v) = rb_str_export(v); \ } while (0) +/** @} */ + +/** + * @private + * + * Bits that you can set to ::RBasic::flags. + * + * @warning These enums are not the only bits we use for strings. + * + * @internal + * + * Actually all bits through FL_USER1 to FL_USER19 are used for strings. Why + * only this tiny part of them are made public here? @shyouhei can find no + * reason. + */ enum ruby_rstring_flags { + + /** + * This flag has something to do with memory footprint. If the string is + * short enough, ruby tries to be creative to abuse padding bits of struct + * ::RString for storing contents. If this flag is set that string does + * _not_ do that, to resort to good old fashioned external allocation + * strategy instead. + * + * @warning This bit has to be considered read-only. Setting/clearing + * this bit without corresponding fix up must cause immediate + * SEGV. Also, internal structures of a string change + * dynamically and transparently throughout of its lifetime. + * Don't assume it being persistent. + * + * @internal + * + * 3rd parties must not be aware that there even is more than one way to + * store a string. Might better be hidden. + */ RSTRING_NOEMBED = RUBY_FL_USER1, - RSTRING_EMBED_LEN_MASK = RUBY_FL_USER2 | RUBY_FL_USER3 | RUBY_FL_USER4 | - RUBY_FL_USER5 | RUBY_FL_USER6, + /* Actually, string encodings are also encoded into the flags, using * remaining bits.*/ - RSTRING_FSTR = RUBY_FL_USER17 -}; -enum ruby_rstring_consts { - RSTRING_EMBED_LEN_SHIFT = RUBY_FL_USHIFT + 2, - RSTRING_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(char) - 1 + /** + * This flag has something to do with infamous "f"string. What is a + * fstring? Well it is a special subkind of strings that is immutable, + * deduped globally, and managed by our GC. It is much like a Symbol (in + * fact Symbols are dynamic these days and are backended using fstrings). + * This concept has been silently introduced at some point in 2.x era. + * Since then it gained wider acceptance in the core. But extension + * libraries could not know that until very recently. Strings of this flag + * live in a special Limbo deep inside of the interpreter. Never try to + * manipulate it by hand. + * + * @internal + * + * Fstrings are not the only variant strings that we implement today. + * Other things are behind-the-scene. This is the only one that is visible + * from extension library. There is no clear reason why it has to be. + * Given there are more "polite" ways to create fstrings, it seems this bit + * need not be exposed to extension libraries. Might better be hidden. + */ + RSTRING_FSTR = RUBY_FL_USER17 }; +/** + * Ruby's String. A string in ruby conceptually has these information: + * + * - Encoding of the string. + * - Length of the string. + * - Contents of the string. + * + * It is worth noting that a string is _not_ an array of characters in ruby. + * It has never been. In 1.x a string was an array of integers. Since 2.x a + * string is no longer an array of anything. A string is a string -- just like + * a Time is not an integer. + */ struct RString { + + /** Basic part, including flags and class. */ struct RBasic basic; + + /** + * Length of the string, not including terminating NUL character. + * + * @note This is in bytes. + */ + long len; + + /** String's specific fields. */ union { + + /** + * Strings that use separated memory region for contents use this + * pattern. + */ struct { - long len; + /** + * Pointer to the contents of the string. In the old days each + * string had dedicated memory regions. That is no longer true + * today, but there still are strings of such properties. This + * field could be used to point such things. + */ char *ptr; + + /** Auxiliary info. */ union { + + /** + * Capacity of `*ptr`. A continuous memory region of at least + * `capa` bytes is expected to exist at `*ptr`. This can be + * bigger than `len`. + */ long capa; + + /** + * Parent of the string. Nowadays strings can share their + * contents each other, constructing gigantic nest of objects. + * This situation is called "shared", and this is the field to + * control such properties. + */ VALUE shared; } aux; } heap; - char ary[RSTRING_EMBED_LEN_MAX + 1]; + + /** Embedded contents. */ + struct { + /* This is a length 1 array because: + * 1. GCC has a bug that does not optimize C flexible array members + * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452) + * 2. Zero length arrays are not supported by all compilers + */ + char ary[1]; + } embed; } as; }; RBIMPL_SYMBOL_EXPORT_BEGIN() -VALUE rb_str_to_str(VALUE); -VALUE rb_string_value(volatile VALUE*); -char *rb_string_value_ptr(volatile VALUE*); -char *rb_string_value_cstr(volatile VALUE*); -VALUE rb_str_export(VALUE); -VALUE rb_str_export_locale(VALUE); +/** + * Identical to rb_check_string_type(), except it raises exceptions in case of + * conversion failures. + * + * @param[in] obj Target object. + * @exception rb_eTypeError No implicit conversion to String. + * @return Return value of `obj.to_str`. + * @see rb_io_get_io + * @see rb_ary_to_ary + */ +VALUE rb_str_to_str(VALUE obj); + +/** + * Identical to rb_str_to_str(), except it fills the passed pointer with the + * converted object. + * + * @param[in,out] ptr Pointer to a variable of target object. + * @exception rb_eTypeError No implicit conversion to String. + * @return Return value of `obj.to_str`. + * @post `*ptr` is the return value. + */ +VALUE rb_string_value(volatile VALUE *ptr); + +/** + * Identical to rb_str_to_str(), except it returns the converted string's + * backend memory region. + * + * @param[in,out] ptr Pointer to a variable of target object. + * @exception rb_eTypeError No implicit conversion to String. + * @post `*ptr` is the return value of `obj.to_str`. + * @return Pointer to the contents of the return value. + */ +char *rb_string_value_ptr(volatile VALUE *ptr); + +/** + * Identical to rb_string_value_ptr(), except it additionally checks for the + * contents for viability as a C string. Ruby can accept wider range of + * contents as strings, compared to C. This function is to check that. + * + * @param[in,out] ptr Pointer to a variable of target object. + * @exception rb_eTypeError No implicit conversion to String. + * @exception rb_eArgError String is not C-compatible. + * @post `*ptr` is the return value of `obj.to_str`. + * @return Pointer to the contents of the return value. + */ +char *rb_string_value_cstr(volatile VALUE *ptr); + +/** + * Identical to rb_str_to_str(), except it additionally converts the string + * into default external encoding. Ruby has a concept called encodings. A + * string can have different encoding than the environment expects. Someone + * has to make sure its contents be converted to something suitable. This is + * that routine. Call it when necessary. + * + * @param[in] obj Target object. + * @exception rb_eTypeError No implicit conversion to String. + * @return Converted ruby string of default external encoding. + */ +VALUE rb_str_export(VALUE obj); + +/** + * Identical to rb_str_export(), except it converts into the locale encoding + * instead. + * + * @param[in] obj Target object. + * @exception rb_eTypeError No implicit conversion to String. + * @return Converted ruby string of locale encoding. + */ +VALUE rb_str_export_locale(VALUE obj); RBIMPL_ATTR_ERROR(("rb_check_safe_str() and Check_SafeStr() are obsolete; use StringValue() instead")) +/** + * @private + * + * @deprecated This function once was a thing in the old days, but makes no + * sense any longer today. Exists here for backwards + * compatibility only. You can safely forget about it. + */ void rb_check_safe_str(VALUE); + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define Check_SafeStr(v) rb_check_safe_str(RBIMPL_CAST((VALUE)(v))) + +/** + * @private + * + * Prints diagnostic message to stderr when RSTRING_PTR or RSTRING_END + * is NULL. + * + * @param[in] func The function name where encountered NULL pointer. + */ +void rb_debug_rstring_null_ptr(const char *func); RBIMPL_SYMBOL_EXPORT_END() RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * Queries the length of the string. + * + * @param[in] str String in question. + * @return Its length, in bytes. + * @pre `str` must be an instance of ::RString. + */ static inline long -RSTRING_EMBED_LEN(VALUE str) +RSTRING_LEN(VALUE str) { - RBIMPL_ASSERT_TYPE(str, RUBY_T_STRING); - RBIMPL_ASSERT_OR_ASSUME(! RB_FL_ANY_RAW(str, RSTRING_NOEMBED)); - - VALUE f = RBASIC(str)->flags; - f &= RSTRING_EMBED_LEN_MASK; - f >>= RSTRING_EMBED_LEN_SHIFT; - return RBIMPL_CAST((long)f); + return RSTRING(str)->len; } RBIMPL_WARNING_PUSH() @@ -119,6 +376,15 @@ RBIMPL_WARNING_IGNORED(413) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * @private + * + * "Expands" an embedded string into an ordinal one. This is a function that + * returns aggregated type. The returned struct always has its `as.heap.len` + * an `as.heap.ptr` fields set appropriately. + * + * This is an implementation detail that 3rd parties should never bother. + */ static inline struct RString rbimpl_rstring_getmem(VALUE str) { @@ -130,82 +396,93 @@ rbimpl_rstring_getmem(VALUE str) else { /* Expecting compilers to optimize this on-stack struct away. */ struct RString retval; - retval.as.heap.len = RSTRING_EMBED_LEN(str); - retval.as.heap.ptr = RSTRING(str)->as.ary; + retval.len = RSTRING_LEN(str); + retval.as.heap.ptr = RSTRING(str)->as.embed.ary; return retval; } } RBIMPL_WARNING_POP() -RBIMPL_ATTR_PURE_UNLESS_DEBUG() -RBIMPL_ATTR_ARTIFICIAL() -static inline long -RSTRING_LEN(VALUE str) -{ - return rbimpl_rstring_getmem(str).as.heap.len; -} - RBIMPL_ATTR_ARTIFICIAL() +/** + * Queries the contents pointer of the string. + * + * @param[in] str String in question. + * @return Pointer to its contents. + * @pre `str` must be an instance of ::RString. + */ static inline char * RSTRING_PTR(VALUE str) { char *ptr = rbimpl_rstring_getmem(str).as.heap.ptr; - if (RB_UNLIKELY(! ptr)) { + if (RUBY_DEBUG && RB_UNLIKELY(! ptr)) { /* :BEWARE: @shyouhei thinks that currently, there are rooms for this - * function to return NULL. In the 20th century that was a pointless - * concern. However struct RString can hold fake strings nowadays. It - * seems no check against NULL are exercised around handling of them - * (one of such usages is located in marshal.c, which scares - * @shyouhei). Better check here for maximum safety. + * function to return NULL. Better check here for maximum safety. * * Also, this is not rb_warn() because RSTRING_PTR() can be called * during GC (see what obj_info() does). rb_warn() needs to allocate * Ruby objects. That is not possible at this moment. */ - fprintf(stderr, "%s\n", - "RSTRING_PTR is returning NULL!! " - "SIGSEGV is highly expected to follow immediately. " - "If you could reproduce, attach your debugger here, " - "and look at the passed string." - ); + rb_debug_rstring_null_ptr("RSTRING_PTR"); } return ptr; } RBIMPL_ATTR_ARTIFICIAL() +/** + * Queries the end of the contents pointer of the string. + * + * @param[in] str String in question. + * @return Pointer to its end of contents. + * @pre `str` must be an instance of ::RString. + */ static inline char * RSTRING_END(VALUE str) { struct RString buf = rbimpl_rstring_getmem(str); - if (RB_UNLIKELY(! buf.as.heap.ptr)) { + if (RUBY_DEBUG && RB_UNLIKELY(! buf.as.heap.ptr)) { /* Ditto. */ - fprintf(stderr, "%s\n", - "RSTRING_END is returning NULL!! " - "SIGSEGV is highly expected to follow immediately. " - "If you could reproduce, attach your debugger here, " - "and look at the passed string." - ); + rb_debug_rstring_null_ptr("RSTRING_END"); } - return &buf.as.heap.ptr[buf.as.heap.len]; + return &buf.as.heap.ptr[buf.len]; } RBIMPL_ATTR_ARTIFICIAL() +/** + * Identical to RSTRING_LEN(), except it differs for the return type. + * + * @param[in] str String in question. + * @exception rb_eRangeError Too long. + * @return Its length, in bytes. + * @pre `str` must be an instance of ::RString. + * + * @internal + * + * This API seems redundant but has actual usages. + */ static inline int RSTRING_LENINT(VALUE str) { return rb_long2int(RSTRING_LEN(str)); } +/** + * Convenient macro to obtain the contents and length at once. + * + * @param str String in question. + * @param ptrvar Variable where its contents is stored. + * @param lenvar Variable where its length is stored. + */ #ifdef HAVE_STMT_AND_DECL_IN_EXPR # define RSTRING_GETMEM(str, ptrvar, lenvar) \ __extension__ ({ \ struct RString rbimpl_str = rbimpl_rstring_getmem(str); \ (ptrvar) = rbimpl_str.as.heap.ptr; \ - (lenvar) = rbimpl_str.as.heap.len; \ + (lenvar) = rbimpl_str.len; \ }) #else # define RSTRING_GETMEM(str, ptrvar, lenvar) \ diff --git a/include/ruby/internal/core/rstruct.h b/include/ruby/internal/core/rstruct.h index 17454f7cbe..69be487b59 100644 --- a/include/ruby/internal/core/rstruct.h +++ b/include/ruby/internal/core/rstruct.h @@ -17,8 +17,9 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Routines to manipulate struct ::RStruct. + * extension libraries. They could be written in C++98. + * @brief Routines to manipulate struct RStruct. + * @note The struct RStruct itself is opaque. */ #include "ruby/internal/attr/artificial.h" #include "ruby/internal/dllexport.h" @@ -30,6 +31,17 @@ # include "ruby/backward.h" #endif +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + * + * @internal + * + * Declaration of rb_struct_ptr() is at include/ruby/backward.h. + */ #define RSTRUCT_PTR(st) rb_struct_ptr(st) /** @cond INTERNAL_MACRO */ #define RSTRUCT_LEN RSTRUCT_LEN @@ -38,12 +50,46 @@ /** @endcond */ RBIMPL_SYMBOL_EXPORT_BEGIN() -VALUE rb_struct_size(VALUE s); -VALUE rb_struct_aref(VALUE, VALUE); -VALUE rb_struct_aset(VALUE, VALUE, VALUE); +/** + * Returns the number of struct members. + * + * @param[in] st An instance of RStruct. + * @return The number of members of `st`. + * @pre `st` must be of ::RUBY_T_STRUCT. + */ +VALUE rb_struct_size(VALUE st); + +/** + * Resembles `Struct#[]`. + * + * @param[in] st An instance of RStruct. + * @param[in] k Index a.k.a. key of the struct. + * @exception rb_eTypeError `k` is neither Numeric, Symbol, nor String. + * @exception rb_eIndexError Numerical index out of range. + * @exception rb_eNameError No such key. + * @return The member stored at `k` in `st`. + * @pre `st` must be of ::RUBY_T_STRUCT. + */ +VALUE rb_struct_aref(VALUE st, VALUE k); + +/** + * Resembles `Struct#[]=`. + * + * @param[out] st An instance of RStruct. + * @param[in] k Index a.k.a. key of the struct. + * @param[in] v Value to store. + * @exception rb_eTypeError `k` is neither Numeric, Symbol, nor String. + * @exception rb_eIndexError Numerical index out of range. + * @exception rb_eNameError No such key. + * @return Passed `v`. + * @pre `st` must be of ::RUBY_T_STRUCT. + * @post `v` is stored at `k` in `st`. + */ +VALUE rb_struct_aset(VALUE st, VALUE k, VALUE v); RBIMPL_SYMBOL_EXPORT_END() RBIMPL_ATTR_ARTIFICIAL() +/** @copydoc rb_struct_size() */ static inline long RSTRUCT_LEN(VALUE st) { @@ -53,6 +99,7 @@ RSTRUCT_LEN(VALUE st) } RBIMPL_ATTR_ARTIFICIAL() +/** @copydoc rb_struct_aset() */ static inline VALUE RSTRUCT_SET(VALUE st, int k, VALUE v) { @@ -62,6 +109,7 @@ RSTRUCT_SET(VALUE st, int k, VALUE v) } RBIMPL_ATTR_ARTIFICIAL() +/** @copydoc rb_struct_aref() */ static inline VALUE RSTRUCT_GET(VALUE st, int k) { diff --git a/include/ruby/internal/core/rtypeddata.h b/include/ruby/internal/core/rtypeddata.h index c038e6f2b8..6c19576c20 100644 --- a/include/ruby/internal/core/rtypeddata.h +++ b/include/ruby/internal/core/rtypeddata.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines struct ::RTypedData. */ #include "ruby/internal/config.h" @@ -28,6 +28,8 @@ #include "ruby/internal/assume.h" #include "ruby/internal/attr/artificial.h" +#include "ruby/internal/attr/flag_enum.h" +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/attr/pure.h" #include "ruby/internal/cast.h" #include "ruby/internal/core/rbasic.h" @@ -38,13 +40,68 @@ #include "ruby/internal/stdbool.h" #include "ruby/internal/value_type.h" +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define HAVE_TYPE_RB_DATA_TYPE_T 1 + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define HAVE_RB_DATA_TYPE_T_FUNCTION 1 + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define HAVE_RB_DATA_TYPE_T_PARENT 1 + +/** + * This is a value you can set to ::rb_data_type_struct::dfree. Setting this + * means the data was allocated using ::ruby_xmalloc() (or variants), and shall + * be freed using ::ruby_xfree(). + * + * @warning Do not use this if you want to use system malloc, because the + * system and Ruby might or might not share the same malloc + * implementation. + */ #define RUBY_TYPED_DEFAULT_FREE RUBY_DEFAULT_FREE + +/** + * This is a value you can set to ::rb_data_type_struct::dfree. Setting this + * means the data is managed by someone else, like, statically allocated. Of + * course you are on your own then. + */ #define RUBY_TYPED_NEVER_FREE RUBY_NEVER_FREE + +/** + * Convenient casting macro. + * + * @param obj An object, which is in fact an ::RTypedData. + * @return The passed object casted to ::RTypedData. + */ #define RTYPEDDATA(obj) RBIMPL_CAST((struct RTypedData *)(obj)) + +/** + * Convenient getter macro. + * + * @param v An object, which is in fact an ::RTypedData. + * @return The passed object's ::RTypedData::data field. + */ #define RTYPEDDATA_DATA(v) (RTYPEDDATA(v)->data) + +/** @old{rb_check_typeddata} */ #define Check_TypedStruct(v, t) \ rb_check_typeddata(RBIMPL_CAST((VALUE)(v)), (t)) @@ -57,55 +114,373 @@ #define RUBY_TYPED_PROMOTED1 RUBY_TYPED_PROMOTED1 /** @endcond */ -/* bits for rb_data_type_struct::flags */ -enum rbimpl_typeddata_flags { +#define TYPED_DATA_EMBEDDED 2 + +/** + * @private + * + * Bits for rb_data_type_struct::flags. + */ +enum +RBIMPL_ATTR_FLAG_ENUM() +rbimpl_typeddata_flags { + /** + * This flag has something to do with Ruby's global interpreter lock. For + * maximum safety, Ruby locks the entire VM during GC. However your + * callback functions could unintentionally unlock it, for instance when + * they try to flush an IO buffer. Such operations are dangerous (threads + * then run alongside of GC). By default, to prevent those scenario, + * callbacks are deferred until the GC engine is 100% sure threads can run. + * This flag skips that; structs with it are deallocated during the sweep + * phase. + * + * Using this flag needs deep understanding of both GC and threads. You + * would better leave it unspecified. + */ RUBY_TYPED_FREE_IMMEDIATELY = 1, + + RUBY_TYPED_EMBEDDABLE = 2, + + /** + * This flag has something to do with Ractor. Multiple Ractors run without + * protecting each other. Sharing an object among Ractors is basically + * dangerous, disabled by default. This flag is used to bypass that + * restriction. but setting it is not enough. In addition to do so, an + * object also has to be frozen, and be passed to + * rb_ractor_make_shareable() before being actually shareable. Of course, + * you have to manually prevent race conditions then. + * + * Using this flag needs deep understanding of multithreaded programming. + * You would better leave it unspecified. + */ RUBY_TYPED_FROZEN_SHAREABLE = RUBY_FL_SHAREABLE, + + /** + * This flag has something to do with our garbage collector. These days + * ruby objects are "generational". There are those who are young and + * those who are old. Young objects are prone to die; monitored relatively + * extensively by the garbage collector. OTOH old objects tend to live + * longer. They are relatively rarely considered. This basically works. + * But there is one tweak that has to be exercised. When an elder object + * has reference(s) to younger one(s), that referenced objects must not + * die. In order to detect additions of such references, old generations + * are protected by write barriers. It is a very difficult hack to + * appropriately insert write barriers everywhere. This mechanism is + * disabled by default for 3rd party extensions (they never get aged). By + * specifying this flag you can enable the generational feature to your + * data structure. Of course, you have to manually insert write barriers + * then. + * + * Using this flag needs deep understanding of GC internals, often at the + * level of source code. You would better leave it unspecified. + */ RUBY_TYPED_WB_PROTECTED = RUBY_FL_WB_PROTECTED, /* THIS FLAG DEPENDS ON Ruby version */ - RUBY_TYPED_PROMOTED1 = RUBY_FL_PROMOTED1 /* THIS FLAG DEPENDS ON Ruby version */ + + /** + * This flag no longer in use + */ + RUBY_TYPED_UNUSED = RUBY_FL_UNUSED6, + + /** + * This flag determines whether marking and compaction should be carried out + * using the dmark/dcompact callback functions or whether we should mark + * declaratively using a list of references defined inside the data struct we're wrapping + */ + RUBY_TYPED_DECL_MARKING = RUBY_FL_USER2 }; +/** + * This is the struct that holds necessary info for a struct. It roughly + * resembles a Ruby level class; multiple objects can share a ::rb_data_type_t + * instance. + */ typedef struct rb_data_type_struct rb_data_type_t; +/** @copydoc rb_data_type_t */ struct rb_data_type_struct { + + /** + * Name of structs of this kind. This is used for diagnostic purposes. + * This has to be unique in the process, but doesn't has to be a valid + * C/Ruby identifier. + */ const char *wrap_struct_name; + + /** Function pointers. Resembles C++ `vtbl`.*/ struct { + + /** + * This function is called when the object is experiencing GC marks. + * If it contains references to other Ruby objects, you need to mark + * them also. Otherwise GC will smash your data. + * + * @see rb_gc_mark() + * @warning This is called during GC runs. Object allocations are + * impossible at that moment (that is why GC runs). + */ RUBY_DATA_FUNC dmark; + + /** + * This function is called when the object is no longer used. You need + * to do whatever necessary to avoid memory leaks. + * + * @warning This is called during GC runs. Object allocations are + * impossible at that moment (that is why GC runs). + */ RUBY_DATA_FUNC dfree; + + /** + * This function is to query the size of the underlying memory regions. + * + * @internal + * + * This function has only one usage, which is form inside of + * `ext/objspace`. + */ size_t (*dsize)(const void *); + + /** + * This function is called when the object is relocated. Like + * ::rb_data_type_struct::dmark, you need to update references to Ruby + * objects inside of your structs. + * + * @see rb_gc_location() + * @warning This is called during GC runs. Object allocations are + * impossible at that moment (that is why GC runs). + */ RUBY_DATA_FUNC dcompact; + + /** + * This field is reserved for future extension. For now, it must be + * filled with zeros. + */ void *reserved[1]; /* For future extension. This array *must* be filled with ZERO. */ } function; + + /** + * Parent of this class. Sometimes C structs have inheritance-like + * relationships. An example is `struct sockaddr` and its family. If you + * design such things, make ::rb_data_type_t for each of them and connect + * using this field. Ruby can then transparently cast your data back and + * forth when you call #TypedData_Get_Struct(). + * + * ```CXX + * struct parent { }; + * static inline const rb_data_type_t parent_type = { + * .wrap_struct_name = "parent", + * }; + * + * struct child: public parent { }; + * static inline const rb_data_type_t child_type = { + * .wrap_struct_name = "child", + * .parent = &parent_type, + * }; + * + * // This function can take both parent_class and child_class. + * static inline struct parent * + * get_parent(VALUE v) + * { + * struct parent *p; + * TypedData_Get_Struct(v, parent_type, struct parent, p); + * return p; + * } + * ``` + */ const rb_data_type_t *parent; + + /** + * Type-specific static data. This area can be used for any purpose by a + * programmer who define the type. Ruby does not manage this at all. + */ void *data; /* This area can be used for any purpose by a programmer who define the type. */ + + /** + * Type-specific behavioural characteristics. This is a bitfield. It is + * an EXTREMELY WISE IDEA to leave this field blank. It is designed so + * that setting zero is the safest thing to do. If you risk to set any + * bits on, you have to know exactly what you are doing. + * + * @internal + * + * Why it has to be a ::VALUE? @shyouhei doesn't understand the design. + */ VALUE flags; /* RUBY_FL_WB_PROTECTED */ }; +/** + * "Typed" user data. By using this, extension libraries can wrap a C struct + * to make it visible from Ruby. For instance if you have a `struct timeval`, + * and you want users to use it, + * + * ```CXX + * static inline const rb_data_type_t timeval_type = { + * // Note that unspecified fields are 0-filled by default. + * .wrap_struct_name = "timeval", + * .function = { + * .dmark = nullptr, // no need to mark + * .dfree = RUBY_TYPED_DEFAULT_FREE, // use ruby_xfree() + * .dsize = [](auto) { + * return sizeof(struct timeval); + * }, + * }, + * }; + * + * extern "C" void + * Init_timeval(void) + * { + * auto klass = rb_define_class("YourName", rb_cObject); + * + * rb_define_alloc_func(klass, [](auto klass) { + * struct timeval *t; + * auto ret = TypedData_Make_Struct( + * klass, struct timeval, &timeval_type, t); + * + * if (auto i = gettimeofday(t, nullptr); i == -1) { + * rb_sys_fail("gettimeofday(3)"); + * } + * else { + * return ret; + * } + * }); + * } + * ``` + */ struct RTypedData { + + /** The part that all ruby objects have in common. */ struct RBasic basic; - const rb_data_type_t *type; - VALUE typed_flag; /* 1 or not */ + + /** + * This field stores various information about how Ruby should handle a + * data. This roughly resembles a Ruby level class (apart from method + * definition etc.) + */ + const rb_data_type_t *const type; + + /** + * This has to be always 1. + * + * @internal + */ + const VALUE typed_flag; + + /** Pointer to the actual C level struct that you want to wrap. */ void *data; }; RBIMPL_SYMBOL_EXPORT_BEGIN() -VALUE rb_data_typed_object_wrap(VALUE klass, void *datap, const rb_data_type_t *); +RBIMPL_ATTR_NONNULL((3)) +/** + * This is the primitive way to wrap an existing C struct into ::RTypedData. + * + * @param[in] klass Ruby level class of the returning object. + * @param[in] datap Pointer to the target C struct. + * @param[in] type The characteristics of the passed data. + * @exception rb_eTypeError `klass` is not a class. + * @exception rb_eNoMemError Out of memory. + * @return An allocated object that wraps `datap`. + */ +VALUE rb_data_typed_object_wrap(VALUE klass, void *datap, const rb_data_type_t *type); + +/** + * Identical to rb_data_typed_object_wrap(), except it allocates a new data + * region internally instead of taking an existing one. The allocation is done + * using ruby_calloc(). Hence it makes no sense for `type->function.dfree` to + * be anything other than ::RUBY_TYPED_DEFAULT_FREE. + * + * @param[in] klass Ruby level class of the returning object. + * @param[in] size Requested size of memory to allocate. + * @param[in] type The characteristics of the passed data. + * @exception rb_eTypeError `klass` is not a class. + * @exception rb_eNoMemError Out of memory. + * @return An allocated object that wraps a new `size` byte region. + */ VALUE rb_data_typed_object_zalloc(VALUE klass, size_t size, const rb_data_type_t *type); + +/** + * Checks for the domestic relationship between the two. + * + * @param[in] child A data type supposed to be a child of `parent`. + * @param[in] parent A data type supposed to be a parent of `child`. + * @retval true `child` is a descendent of `parent`. + * @retval false Otherwise. + * + * @internal + * + * You can path NULL to both arguments, don't know what that means though. + */ int rb_typeddata_inherited_p(const rb_data_type_t *child, const rb_data_type_t *parent); + +/** + * Checks if the given object is of given kind. + * + * @param[in] obj An instance of ::RTypedData. + * @param[in] data_type Expected data type of `obj`. + * @retval true `obj` is of `data_type`. + * @retval false Otherwise. + */ int rb_typeddata_is_kind_of(VALUE obj, const rb_data_type_t *data_type); + +/** + * Identical to rb_typeddata_is_kind_of(), except it raises exceptions instead + * of returning false. + * + * @param[in] obj An instance of ::RTypedData. + * @param[in] data_type Expected data type of `obj`. + * @exception rb_eTypeError obj is not of `data_type`. + * @return Unwrapped C struct that `obj` holds. + * @post Upon successful return `obj`'s type is guaranteed `data_type`. + */ void *rb_check_typeddata(VALUE obj, const rb_data_type_t *data_type); RBIMPL_SYMBOL_EXPORT_END() +/** + * Converts sval, a pointer to your struct, into a Ruby object. + * + * @param klass A ruby level class. + * @param data_type The type of `sval`. + * @param sval A pointer to your struct. + * @exception rb_eTypeError `klass` is not a class. + * @exception rb_eNoMemError Out of memory. + * @return A created Ruby object. + */ #define TypedData_Wrap_Struct(klass,data_type,sval)\ rb_data_typed_object_wrap((klass),(sval),(data_type)) +/** + * @private + * + * This is an implementation detail of #TypedData_Make_Struct. People don't + * use it directly. + * + * @param result Variable name of created Ruby object. + * @param klass Ruby level class of the object. + * @param type Type name of the C struct. + * @param size Size of the C struct. + * @param data_type The data type describing `type`. + * @param sval Variable name of created C struct. + */ #define TypedData_Make_Struct0(result, klass, type, size, data_type, sval) \ VALUE result = rb_data_typed_object_zalloc(klass, size, data_type); \ - (sval) = RBIMPL_CAST((type *)RTYPEDDATA_DATA(result)); \ + (sval) = (type *)RTYPEDDATA_GET_DATA(result); \ RBIMPL_CAST(/*suppress unused variable warnings*/(void)(sval)) +/** + * Identical to #TypedData_Wrap_Struct, except it allocates a new data region + * internally instead of taking an existing one. The allocation is done using + * ruby_calloc(). Hence it makes no sense for `data_type->function.dfree` to + * be anything other than ::RUBY_TYPED_DEFAULT_FREE. + * + * @param klass Ruby level class of the object. + * @param type Type name of the C struct. + * @param data_type The data type describing `type`. + * @param sval Variable name of created C struct. + * @exception rb_eTypeError `klass` is not a class. + * @exception rb_eNoMemError Out of memory. + * @return A created Ruby object. + */ #ifdef HAVE_STMT_AND_DECL_IN_EXPR #define TypedData_Make_Struct(klass, type, data_type, sval) \ RB_GNUC_EXTENSION({ \ @@ -127,19 +502,79 @@ RBIMPL_SYMBOL_EXPORT_END() sizeof(type)) #endif +/** + * Obtains a C struct from inside of a wrapper Ruby object. + * + * @param obj An instance of ::RTypedData. + * @param type Type name of the C struct. + * @param data_type The data type describing `type`. + * @param sval Variable name of obtained C struct. + * @exception rb_eTypeError `obj` is not a kind of `data_type`. + * @return Unwrapped C struct that `obj` holds. + */ #define TypedData_Get_Struct(obj,type,data_type,sval) \ ((sval) = RBIMPL_CAST((type *)rb_check_typeddata((obj), (data_type)))) +static inline bool +RTYPEDDATA_EMBEDDED_P(VALUE obj) +{ +#if RUBY_DEBUG + if (RB_UNLIKELY(!RB_TYPE_P(obj, RUBY_T_DATA))) { + Check_Type(obj, RUBY_T_DATA); + RBIMPL_UNREACHABLE_RETURN(false); + } +#endif + + return RTYPEDDATA(obj)->typed_flag & TYPED_DATA_EMBEDDED; +} + +static inline void * +RTYPEDDATA_GET_DATA(VALUE obj) +{ +#if RUBY_DEBUG + if (RB_UNLIKELY(!RB_TYPE_P(obj, RUBY_T_DATA))) { + Check_Type(obj, RUBY_T_DATA); + RBIMPL_UNREACHABLE_RETURN(false); + } +#endif + + /* We reuse the data pointer in embedded TypedData. We can't use offsetof + * since RTypedData a non-POD type in C++. */ + const size_t embedded_typed_data_size = sizeof(struct RTypedData) - sizeof(void *); + + return RTYPEDDATA_EMBEDDED_P(obj) ? (char *)obj + embedded_typed_data_size : RTYPEDDATA(obj)->data; +} + RBIMPL_ATTR_PURE() RBIMPL_ATTR_ARTIFICIAL() +/** + * @private + * + * This is an implementation detail of Check_Type(). People don't use it + * directly. + * + * @param[in] obj Object in question + * @retval true `obj` is an instance of ::RTypedData. + * @retval false `obj` is an instance of ::RData. + * @pre `obj` must be a Ruby object of ::RUBY_T_DATA. + */ static inline bool rbimpl_rtypeddata_p(VALUE obj) { - return RTYPEDDATA(obj)->typed_flag == 1; + VALUE typed_flag = RTYPEDDATA(obj)->typed_flag; + return typed_flag != 0 && typed_flag <= 3; } RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * Checks whether the passed object is ::RTypedData or ::RData. + * + * @param[in] obj Object in question + * @retval true `obj` is an instance of ::RTypedData. + * @retval false `obj` is an instance of ::RData. + * @pre `obj` must be a Ruby object of ::RUBY_T_DATA. + */ static inline bool RTYPEDDATA_P(VALUE obj) { @@ -156,6 +591,13 @@ RTYPEDDATA_P(VALUE obj) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() /* :TODO: can this function be __attribute__((returns_nonnull)) or not? */ +/** + * Queries for the type of given object. + * + * @param[in] obj Object in question + * @return Data type struct that corresponds to `obj`. + * @pre `obj` must be an instance of ::RTypedData. + */ static inline const struct rb_data_type_struct * RTYPEDDATA_TYPE(VALUE obj) { @@ -169,6 +611,20 @@ RTYPEDDATA_TYPE(VALUE obj) return RTYPEDDATA(obj)->type; } +/** + * While we don't stop you from using this function, it seems to be an + * implementation detail of #TypedData_Make_Struct, which is preferred over + * this one. + * + * @param[in] klass Ruby level class of the returning object. + * @param[in] type The data type + * @param[out] datap Return pointer. + * @param[in] size Size of the C struct. + * @exception rb_eTypeError `klass` is not a class. + * @exception rb_eNoMemError Out of memory. + * @return A created Ruby object. + * @post `*datap` points to the C struct wrapped by the returned object. + */ static inline VALUE rb_data_typed_object_make(VALUE klass, const rb_data_type_t *type, void **datap, size_t size) { @@ -177,6 +633,7 @@ rb_data_typed_object_make(VALUE klass, const rb_data_type_t *type, void **datap, } RBIMPL_ATTR_DEPRECATED(("by: rb_data_typed_object_wrap")) +/** @deprecated This function was renamed to rb_data_typed_object_wrap(). */ static inline VALUE rb_data_typed_object_alloc(VALUE klass, void *datap, const rb_data_type_t *type) { diff --git a/include/ruby/internal/ctype.h b/include/ruby/internal/ctype.h index aea3e0ca3d..0f7ca6c516 100644 --- a/include/ruby/internal/ctype.h +++ b/include/ruby/internal/ctype.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Our own, locale independent, character handling routines. */ #include "ruby/internal/config.h" @@ -29,34 +29,161 @@ #include "ruby/internal/attr/artificial.h" #include "ruby/internal/attr/const.h" #include "ruby/internal/attr/constexpr.h" +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" +/** + * @name Old character classification macros + * + * What is this #ISPRINT business? Well, according to our VCS and some + * internet surfing, it appears that the initial intent of these macros were to + * mimic codes appear in common in several GNU projects. As far as @shyouhei + * detects they seem to originate GNU regex (that standalone one rather than + * Gnulib or Glibc), and at least date back to 1995. + * + * Let me lawfully quote from a GNU coreutils commit + * https://git.savannah.gnu.org/cgit/coreutils.git/commit/?id=49803907f5dbd7646184a8912c9db9b09dcd0f22 + * + * > Jim Meyering writes: + * > + * > "... Some ctype macros are valid only for character codes that + * > isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when + * > using /bin/cc or gcc but without giving an ansi option). So, all + * > ctype uses should be through macros like ISPRINT... If + * > STDC_HEADERS is defined, then autoconf has verified that the ctype + * > macros don't need to be guarded with references to isascii. ... + * > Defining isascii to 1 should let any compiler worth its salt + * > eliminate the && through constant folding." + * > + * > Bruno Haible adds: + * > + * > "... Furthermore, isupper(c) etc. have an undefined result if c is + * > outside the range -1 <= c <= 255. One is tempted to write isupper(c) + * > with c being of type `char', but this is wrong if c is an 8-bit + * > character >= 128 which gets sign-extended to a negative value. + * > The macro ISUPPER protects against this as well." + * + * So the intent was to reroute old problematic systems that no longer exist. + * At the same time the problems described above no longer hurt us, because we + * decided to completely avoid using system-provided isupper etc. to reinvent + * the wheel. These macros are entirely legacy; please ignore them. + * + * But let me also put stress that GNU people are wise; they use those macros + * only inside of their own implementations and never let them be public. On + * the other hand ruby has thoughtlessly publicised them to 3rd party libraries + * since its beginning, which is a very bad idea. These macros are too easy to + * get conflicted with definitions elsewhere. + * + * New programs should stick to the `rb_` prefixed names. + * + * @note It seems we just mimic the API. We do not share their implementation + * with GPL-ed programs. + * + * @{ + */ #ifndef ISPRINT -# define ISASCII rb_isascii -# define ISPRINT rb_isprint -# define ISGRAPH rb_isgraph -# define ISSPACE rb_isspace -# define ISUPPER rb_isupper -# define ISLOWER rb_islower -# define ISALNUM rb_isalnum -# define ISALPHA rb_isalpha -# define ISDIGIT rb_isdigit -# define ISXDIGIT rb_isxdigit -# define ISBLANK rb_isblank -# define ISCNTRL rb_iscntrl -# define ISPUNCT rb_ispunct +# define ISASCII rb_isascii /**< @old{rb_isascii}*/ +# define ISPRINT rb_isprint /**< @old{rb_isprint}*/ +# define ISGRAPH rb_isgraph /**< @old{rb_isgraph}*/ +# define ISSPACE rb_isspace /**< @old{rb_isspace}*/ +# define ISUPPER rb_isupper /**< @old{rb_isupper}*/ +# define ISLOWER rb_islower /**< @old{rb_islower}*/ +# define ISALNUM rb_isalnum /**< @old{rb_isalnum}*/ +# define ISALPHA rb_isalpha /**< @old{rb_isalpha}*/ +# define ISDIGIT rb_isdigit /**< @old{rb_isdigit}*/ +# define ISXDIGIT rb_isxdigit /**< @old{rb_isxdigit}*/ +# define ISBLANK rb_isblank /**< @old{rb_isblank}*/ +# define ISCNTRL rb_iscntrl /**< @old{rb_iscntrl}*/ +# define ISPUNCT rb_ispunct /**< @old{rb_ispunct}*/ #endif -#define TOUPPER rb_toupper -#define TOLOWER rb_tolower -#define STRCASECMP st_locale_insensitive_strcasecmp -#define STRNCASECMP st_locale_insensitive_strncasecmp -#define STRTOUL ruby_strtoul +#define TOUPPER rb_toupper /**< @old{rb_toupper}*/ +#define TOLOWER rb_tolower /**< @old{rb_tolower}*/ +#define STRCASECMP st_locale_insensitive_strcasecmp /**< @old{st_locale_insensitive_strcasecmp}*/ +#define STRNCASECMP st_locale_insensitive_strncasecmp /**< @old{st_locale_insensitive_strncasecmp}*/ +#define STRTOUL ruby_strtoul /**< @old{ruby_strtoul}*/ + +/** @} */ RBIMPL_SYMBOL_EXPORT_BEGIN() -/* locale insensitive functions */ +/** @name locale insensitive functions + * @{ + */ + +/* In descriptions below, `the POSIX Locale` and `the "C" locale` are tactfully + * used as to whether the described function mimics POSIX or C99. */ + +RBIMPL_ATTR_NONNULL(()) +/** + * Our own locale-insensitive version of `strcasecmp(3)`. The "case" here + * always means that of the POSIX Locale. It doesn't depend on runtime locale + * settings. + * + * @param[in] s1 Comparison LHS. + * @param[in] s2 Comparison RHS. + * @retval -1 `s1` is "less" than `s2`. + * @retval 0 Both strings converted into lowercase would be identical. + * @retval 1 `s1` is "greater" than `s2`. + * @note Not only does this function works under the POSIX Locale, but + * also assumes its execution character set be what ruby calls an + * ASCII-compatible character set; which does not include for + * instance EBCDIC or UTF-16LE. + */ int st_locale_insensitive_strcasecmp(const char *s1, const char *s2); + +RBIMPL_ATTR_NONNULL(()) +/** + * Our own locale-insensitive version of `strcnasecmp(3)`. The "case" here + * always means that of the POSIX Locale. It doesn't depend on runtime locale + * settings. + * + * @param[in] s1 Comparison LHS. + * @param[in] s2 Comparison RHS. + * @param[in] n Comparison shall stop after first `n` bytes are scanned. + * @retval -1 `s1` is "less" than `s2`. + * @retval 0 Both strings converted into lowercase would be identical. + * @retval 1 `s1` is "greater" than `s2`. + * @note Not only does this function works under the POSIX Locale, but + * also assumes its execution character set be what ruby calls an + * ASCII-compatible character set; which does not include for + * instance EBCDIC or UTF-16LE. + * @warning This function is _not_ timing safe. + */ int st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n); + +RBIMPL_ATTR_NONNULL((1)) +/** + * Our own locale-insensitive version of `strtoul(3)`. The conversion is done + * as if the current locale is set to the "C" locale, no matter actual runtime + * locale settings. + * + * @note This is needed because `strtoul("i", 0, 36)` would return zero + * if it is locale sensitive and the current locale is `tr_TR`. + * @param[in] str String of digits, optionally preceded with whitespaces + * (ignored) and optionally `+` or `-` sign. + * @param[out] endptr NULL, or an arbitrary pointer (overwritten on return). + * @param[in] base `2` to `36` inclusive for each base, or special case + * `0` to detect the base from the contents of the string. + * @return Converted integer, casted to unsigned long. + * @post If `endptr` is not NULL, it is updated to point the first such + * byte where conversion failed. + * @note This function sets `errno` on failure. + * - `EINVAL`: Passed `base` is out of range. + * - `ERANGE`: Converted integer is out of range of `long`. + * @warning As far as @shyouhei reads ISO/IEC 9899:2018 section 7.22.1.4, a + * conforming `strtoul` implementation shall render `ERANGE` + * whenever it finds the input string represents a negative + * integer. Such thing can never be representable using `unsigned + * long`. However this implementation does not honour that + * language. It just casts such negative value to the return + * type, resulting a very big return value. This behaviour is at + * least questionable. But we can no longer change that at this + * point. + * @note Not only does this function works under the "C" locale, but + * also assumes its execution character set be what ruby calls an + * ASCII-compatible character set; which does not include for + * instance EBCDIC or UTF-16LE. + */ unsigned long ruby_strtoul(const char *str, char **endptr, int base); RBIMPL_SYMBOL_EXPORT_END() @@ -68,6 +195,16 @@ RBIMPL_SYMBOL_EXPORT_END() RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Our own locale-insensitive version of `isascii(3)`. + * + * @param[in] c Byte in question to query. + * @retval false `c` is out of range of ASCII character set. + * @retval true Yes it is. + * @warning `c` is an int. This means that when you pass a `char` value + * here, it experiences "integer promotion" as defined in ISO/IEC + * 9899:2018 section 6.3.1.1 paragraph 1. + */ static inline int rb_isascii(int c) { @@ -77,6 +214,20 @@ rb_isascii(int c) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Our own locale-insensitive version of `isupper(3)`. + * + * @param[in] c Byte in question to query. + * @retval true `c` is listed in IEEE 1003.1 section 7.3.1.1 "upper". + * @retval false Anything else. + * @note Not only does this function works under the POSIX Locale, but + * also assumes its execution character set be what ruby calls an + * ASCII-compatible character set; which does not include for + * instance EBCDIC or UTF-16LE. + * @warning `c` is an int. This means that when you pass a `char` value + * here, it experiences "integer promotion" as defined in ISO/IEC + * 9899:2018 section 6.3.1.1 paragraph 1. + */ static inline int rb_isupper(int c) { @@ -86,6 +237,20 @@ rb_isupper(int c) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Our own locale-insensitive version of `islower(3)`. + * + * @param[in] c Byte in question to query. + * @retval true `c` is listed in IEEE 1003.1 section 7.3.1.1 "lower". + * @retval false Anything else. + * @note Not only does this function works under the POSIX Locale, but + * also assumes its execution character set be what ruby calls an + * ASCII-compatible character set; which does not include for + * instance EBCDIC or UTF-16LE. + * @warning `c` is an int. This means that when you pass a `char` value + * here, it experiences "integer promotion" as defined in ISO/IEC + * 9899:2018 section 6.3.1.1 paragraph 1. + */ static inline int rb_islower(int c) { @@ -95,6 +260,21 @@ rb_islower(int c) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Our own locale-insensitive version of `isalpha(3)`. + * + * @param[in] c Byte in question to query. + * @retval true `c` is listed in either IEEE 1003.1 section 7.3.1.1 + * "upper" or "lower". + * @retval false Anything else. + * @note Not only does this function works under the POSIX Locale, but + * also assumes its execution character set be what ruby calls an + * ASCII-compatible character set; which does not include for + * instance EBCDIC or UTF-16LE. + * @warning `c` is an int. This means that when you pass a `char` value + * here, it experiences "integer promotion" as defined in ISO/IEC + * 9899:2018 section 6.3.1.1 paragraph 1. + */ static inline int rb_isalpha(int c) { @@ -104,6 +284,20 @@ rb_isalpha(int c) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Our own locale-insensitive version of `isdigit(3)`. + * + * @param[in] c Byte in question to query. + * @retval true `c` is listed in IEEE 1003.1 section 7.3.1.1 "digit". + * @retval false Anything else. + * @note Not only does this function works under the POSIX Locale, but + * also assumes its execution character set be what ruby calls an + * ASCII-compatible character set; which does not include for + * instance EBCDIC or UTF-16LE. + * @warning `c` is an int. This means that when you pass a `char` value + * here, it experiences "integer promotion" as defined in ISO/IEC + * 9899:2018 section 6.3.1.1 paragraph 1. + */ static inline int rb_isdigit(int c) { @@ -113,6 +307,21 @@ rb_isdigit(int c) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Our own locale-insensitive version of `isalnum(3)`. + * + * @param[in] c Byte in question to query. + * @retval true `c` is listed in either IEEE 1003.1 section 7.3.1.1 + * "upper", "lower", or "digit". + * @retval false Anything else. + * @note Not only does this function works under the POSIX Locale, but + * also assumes its execution character set be what ruby calls an + * ASCII-compatible character set; which does not include for + * instance EBCDIC or UTF-16LE. + * @warning `c` is an int. This means that when you pass a `char` value + * here, it experiences "integer promotion" as defined in ISO/IEC + * 9899:2018 section 6.3.1.1 paragraph 1. + */ static inline int rb_isalnum(int c) { @@ -122,6 +331,20 @@ rb_isalnum(int c) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Our own locale-insensitive version of `isxdigit(3)`. + * + * @param[in] c Byte in question to query. + * @retval true `c` is listed in IEEE 1003.1 section 7.3.1.1 "xdigit". + * @retval false Anything else. + * @note Not only does this function works under the POSIX Locale, but + * also assumes its execution character set be what ruby calls an + * ASCII-compatible character set; which does not include for + * instance EBCDIC or UTF-16LE. + * @warning `c` is an int. This means that when you pass a `char` value + * here, it experiences "integer promotion" as defined in ISO/IEC + * 9899:2018 section 6.3.1.1 paragraph 1. + */ static inline int rb_isxdigit(int c) { @@ -131,6 +354,20 @@ rb_isxdigit(int c) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Our own locale-insensitive version of `isblank(3)`. + * + * @param[in] c Byte in question to query. + * @retval true `c` is listed in IEEE 1003.1 section 7.3.1.1 "blank". + * @retval false Anything else. + * @note Not only does this function works under the POSIX Locale, but + * also assumes its execution character set be what ruby calls an + * ASCII-compatible character set; which does not include for + * instance EBCDIC or UTF-16LE. + * @warning `c` is an int. This means that when you pass a `char` value + * here, it experiences "integer promotion" as defined in ISO/IEC + * 9899:2018 section 6.3.1.1 paragraph 1. + */ static inline int rb_isblank(int c) { @@ -140,6 +377,20 @@ rb_isblank(int c) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Our own locale-insensitive version of `isspace(3)`. + * + * @param[in] c Byte in question to query. + * @retval true `c` is listed in IEEE 1003.1 section 7.3.1.1 "space". + * @retval false Anything else. + * @note Not only does this function works under the POSIX Locale, but + * also assumes its execution character set be what ruby calls an + * ASCII-compatible character set; which does not include for + * instance EBCDIC or UTF-16LE. + * @warning `c` is an int. This means that when you pass a `char` value + * here, it experiences "integer promotion" as defined in ISO/IEC + * 9899:2018 section 6.3.1.1 paragraph 1. + */ static inline int rb_isspace(int c) { @@ -149,6 +400,20 @@ rb_isspace(int c) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Our own locale-insensitive version of `iscntrl(3)`. + * + * @param[in] c Byte in question to query. + * @retval true `c` is listed in IEEE 1003.1 section 7.3.1.1 "cntrl". + * @retval false Anything else. + * @note Not only does this function works under the POSIX Locale, but + * also assumes its execution character set be what ruby calls an + * ASCII-compatible character set; which does not include for + * instance EBCDIC or UTF-16LE. + * @warning `c` is an int. This means that when you pass a `char` value + * here, it experiences "integer promotion" as defined in ISO/IEC + * 9899:2018 section 6.3.1.1 paragraph 1. + */ static inline int rb_iscntrl(int c) { @@ -158,6 +423,21 @@ rb_iscntrl(int c) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Identical to rb_isgraph(), except it also returns true for `' '`. + * + * @param[in] c Byte in question to query. + * @retval true `c` is listed in either IEEE 1003.1 section 7.3.1.1 + * "upper", "lower", "digit", "punct", or a `' '`. + * @retval false Anything else. + * @note Not only does this function works under the POSIX Locale, but + * also assumes its execution character set be what ruby calls an + * ASCII-compatible character set; which does not include for + * instance EBCDIC or UTF-16LE. + * @warning `c` is an int. This means that when you pass a `char` value + * here, it experiences "integer promotion" as defined in ISO/IEC + * 9899:2018 section 6.3.1.1 paragraph 1. + */ static inline int rb_isprint(int c) { @@ -167,6 +447,20 @@ rb_isprint(int c) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Our own locale-insensitive version of `ispunct(3)`. + * + * @param[in] c Byte in question to query. + * @retval true `c` is listed in IEEE 1003.1 section 7.3.1.1 "punct". + * @retval false Anything else. + * @note Not only does this function works under the POSIX Locale, but + * also assumes its execution character set be what ruby calls an + * ASCII-compatible character set; which does not include for + * instance EBCDIC or UTF-16LE. + * @warning `c` is an int. This means that when you pass a `char` value + * here, it experiences "integer promotion" as defined in ISO/IEC + * 9899:2018 section 6.3.1.1 paragraph 1. + */ static inline int rb_ispunct(int c) { @@ -176,6 +470,21 @@ rb_ispunct(int c) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Our own locale-insensitive version of `isgraph(3)`. + * + * @param[in] c Byte in question to query. + * @retval true `c` is listed in either IEEE 1003.1 section 7.3.1.1 + * "upper", "lower", "digit", or "punct". + * @retval false Anything else. + * @note Not only does this function works under the POSIX Locale, but + * also assumes its execution character set be what ruby calls an + * ASCII-compatible character set; which does not include for + * instance EBCDIC or UTF-16LE. + * @warning `c` is an int. This means that when you pass a `char` value + * here, it experiences "integer promotion" as defined in ISO/IEC + * 9899:2018 section 6.3.1.1 paragraph 1. + */ static inline int rb_isgraph(int c) { @@ -185,6 +494,22 @@ rb_isgraph(int c) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Our own locale-insensitive version of `tolower(3)`. + * + * @param[in] c Byte in question to convert. + * @retval c The byte is not listed in in IEEE 1003.1 section + * 7.3.1.1 "upper". + * @retval otherwise Byte converted using the map defined in IEEE 1003.1 + * section 7.3.1 "tolower". + * @note Not only does this function works under the POSIX Locale, but + * also assumes its execution character set be what ruby calls an + * ASCII-compatible character set; which does not include for + * instance EBCDIC or UTF-16LE. + * @warning `c` is an int. This means that when you pass a `char` value + * here, it experiences "integer promotion" as defined in ISO/IEC + * 9899:2018 section 6.3.1.1 paragraph 1. + */ static inline int rb_tolower(int c) { @@ -194,10 +519,27 @@ rb_tolower(int c) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Our own locale-insensitive version of `toupper(3)`. + * + * @param[in] c Byte in question to convert. + * @retval c The byte is not listed in in IEEE 1003.1 section + * 7.3.1.1 "lower". + * @retval otherwise Byte converted using the map defined in IEEE 1003.1 + * section 7.3.1 "toupper". + * @note Not only does this function works under the POSIX Locale, but + * also assumes its execution character set be what ruby calls an + * ASCII-compatible character set; which does not include for + * instance EBCDIC or UTF-16LE. + * @warning `c` is an int. This means that when you pass a `char` value + * here, it experiences "integer promotion" as defined in ISO/IEC + * 9899:2018 section 6.3.1.1 paragraph 1. + */ static inline int rb_toupper(int c) { return rb_islower(c) ? (c&0x5f) : c; } +/** @} */ #endif /* RBIMPL_CTYPE_H */ diff --git a/include/ruby/internal/dllexport.h b/include/ruby/internal/dllexport.h index 1488140854..71026e7100 100644 --- a/include/ruby/internal/dllexport.h +++ b/include/ruby/internal/dllexport.h @@ -17,18 +17,27 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Tewaking visibility of C variables/functions. + * extension libraries. They could be written in C++98. + * @brief Tweaking visibility of C variables/functions. */ #include "ruby/internal/config.h" #include "ruby/internal/compiler_is.h" -/* For MinGW, we need __declspec(dllimport) for RUBY_EXTERN on MJIT. - mswin's RUBY_EXTERN already has that. See also: win32/Makefile.sub */ +/** + * Declaration of externally visible global variables. Here "externally" means + * they should be visible from extension libraries. Depending on operating + * systems (dynamic linkers, to be precise), global variables inside of a DLL + * may or may not be visible form outside of that DLL by default. This + * declaration manually tweaks that default and ensures the declared variable + * be truly globally visible. + * + * ```CXX + * extern VALUE foo; // hidden on some OS + * RUBY_EXTERN VALUE foo; // ensure visible + * ``` + */ #undef RUBY_EXTERN -#if defined(MJIT_HEADER) && defined(_WIN32) -# define RUBY_EXTERN extern __declspec(dllimport) -#elif defined(RUBY_EXPORT) +#if defined(RUBY_EXPORT) # define RUBY_EXTERN extern #elif defined(_WIN32) # define RUBY_EXTERN extern __declspec(dllimport) @@ -48,28 +57,7 @@ # define RUBY_FUNC_EXPORTED /* void */ #endif -/* These macros are used for functions which are exported only for MJIT - and NOT ensured to be exported in future versions. */ - -#if ! defined(MJIT_HEADER) -# define MJIT_FUNC_EXPORTED RUBY_FUNC_EXPORTED -#elif ! RBIMPL_COMPILER_IS(MSVC) -# define MJIT_FUNC_EXPORTED RUBY_FUNC_EXPORTED -#else -# define MJIT_FUNC_EXPORTED static -#endif - -#define MJIT_SYMBOL_EXPORT_BEGIN RUBY_SYMBOL_EXPORT_BEGIN -#define MJIT_SYMBOL_EXPORT_END RUBY_SYMBOL_EXPORT_END - -/* On mswin, MJIT header transformation can't be used since cl.exe can't output - preprocessed output preserving macros. So this `MJIT_STATIC` is needed - to force non-static function to static on MJIT header to avoid symbol conflict. */ -#ifdef MJIT_HEADER -# define MJIT_STATIC static -#else -# define MJIT_STATIC -#endif +/** @endcond */ /** Shortcut macro equivalent to `RUBY_SYMBOL_EXPORT_BEGIN extern "C" {`. * \@shyouhei finds it handy. */ diff --git a/include/ruby/internal/dosish.h b/include/ruby/internal/dosish.h index eb71e36505..7d354ddd1a 100644 --- a/include/ruby/internal/dosish.h +++ b/include/ruby/internal/dosish.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Support for so-called dosish systems. */ #ifdef __CYGWIN__ @@ -38,20 +38,46 @@ #include "ruby/win32.h" #endif +/** The delimiter of `PATH` environment variable. */ #if defined(DOSISH) #define PATH_SEP ";" #else #define PATH_SEP ":" #endif +/** Identical to #PATH_SEP, except it is of type `char`. */ #define PATH_SEP_CHAR PATH_SEP[0] +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + * + * @internal + * + * For historical interests: there was an operating system called Human68k + * which used an environment variable called `"path"` for this purpose. + */ #define PATH_ENV "PATH" #if defined(DOSISH) #define ENV_IGNORECASE #endif +/** + * Stone age assumption was that an operating system supports only one file + * system at a moment. This macro was to detect if such (one and only) file + * system has case sensitivity. This assumption is largely not true any + * longer; most operating systems can mount many kinds of file systems side by + * side. Also there are file systems that do or do not ignore cases depending + * on configuration (e.g. EXT4's `casefold` feature). + * + * This macro is still used internally (for instance Ruby level constant + * `File::FNM_SYSCASE` depends on it), but it is basically a wrong idea for you + * to use it today. Please just find another way. + */ #ifndef CASEFOLD_FILESYSTEM # if defined DOSISH # define CASEFOLD_FILESYSTEM 1 diff --git a/include/ruby/internal/encoding/coderange.h b/include/ruby/internal/encoding/coderange.h new file mode 100644 index 0000000000..7a81208c9e --- /dev/null +++ b/include/ruby/internal/encoding/coderange.h @@ -0,0 +1,202 @@ +#ifndef RUBY_INTERNAL_ENCODING_CODERANGE_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_CODERANGE_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines for code ranges. + */ + +#include "ruby/internal/attr/const.h" +#include "ruby/internal/attr/pure.h" +#include "ruby/internal/dllexport.h" +#include "ruby/internal/fl_type.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** What rb_enc_str_coderange() returns. */ +enum ruby_coderange_type { + + /** The object's coderange is unclear yet. */ + RUBY_ENC_CODERANGE_UNKNOWN = 0, + + /** The object holds 0 to 127 inclusive and nothing else. */ + RUBY_ENC_CODERANGE_7BIT = ((int)RUBY_FL_USER8), + + /** The object's encoding and contents are consistent each other */ + RUBY_ENC_CODERANGE_VALID = ((int)RUBY_FL_USER9), + + /** The object holds invalid/malformed/broken character(s). */ + RUBY_ENC_CODERANGE_BROKEN = ((int)(RUBY_FL_USER8|RUBY_FL_USER9)), + + /** Where the coderange resides. */ + RUBY_ENC_CODERANGE_MASK = (RUBY_ENC_CODERANGE_7BIT| + RUBY_ENC_CODERANGE_VALID| + RUBY_ENC_CODERANGE_BROKEN) +}; + +RBIMPL_ATTR_CONST() +/** + * @private + * + * This is an implementation detail of #RB_ENC_CODERANGE_CLEAN_P. People don't + * use it directly. + * + * @param[in] cr An enum ::ruby_coderange_type. + * @retval 1 It is. + * @retval 0 It isn't. + */ +static inline int +rb_enc_coderange_clean_p(int cr) +{ + return (cr ^ (cr >> 1)) & RUBY_ENC_CODERANGE_7BIT; +} + +RBIMPL_ATTR_CONST() +/** + * Queries if a code range is "clean". "Clean" in this context means it is + * known and valid. + * + * @param[in] cr An enum ::ruby_coderange_type. + * @retval 1 It is. + * @retval 0 It isn't. + */ +static inline bool +RB_ENC_CODERANGE_CLEAN_P(enum ruby_coderange_type cr) +{ + return rb_enc_coderange_clean_p(cr); +} + +RBIMPL_ATTR_PURE_UNLESS_DEBUG() +/** + * Queries the (inline) code range of the passed object. The object must be + * capable of having inline encoding. Using this macro needs deep + * understanding of bit level object binary layout. + * + * @param[in] obj Target object. + * @return An enum ::ruby_coderange_type. + */ +static inline enum ruby_coderange_type +RB_ENC_CODERANGE(VALUE obj) +{ + VALUE ret = RB_FL_TEST_RAW(obj, RUBY_ENC_CODERANGE_MASK); + + return RBIMPL_CAST((enum ruby_coderange_type)ret); +} + +RBIMPL_ATTR_PURE_UNLESS_DEBUG() +/** + * Queries the (inline) code range of the passed object is + * ::RUBY_ENC_CODERANGE_7BIT. The object must be capable of having inline + * encoding. Using this macro needs deep understanding of bit level object + * binary layout. + * + * @param[in] obj Target object. + * @retval 1 It is ascii only. + * @retval 0 Otherwise (including cases when the range is not known). + */ +static inline bool +RB_ENC_CODERANGE_ASCIIONLY(VALUE obj) +{ + return RB_ENC_CODERANGE(obj) == RUBY_ENC_CODERANGE_7BIT; +} + +/** + * Destructively modifies the passed object so that its (inline) code range is + * the passed one. The object must be capable of having inline encoding. + * Using this macro needs deep understanding of bit level object binary layout. + * + * @param[out] obj Target object. + * @param[out] cr An enum ::ruby_coderange_type. + * @post `obj`'s code range is `cr`. + */ +static inline void +RB_ENC_CODERANGE_SET(VALUE obj, enum ruby_coderange_type cr) +{ + RB_FL_UNSET_RAW(obj, RUBY_ENC_CODERANGE_MASK); + RB_FL_SET_RAW(obj, cr); +} + +/** + * Destructively clears the passed object's (inline) code range. The object + * must be capable of having inline encoding. Using this macro needs deep + * understanding of bit level object binary layout. + * + * @param[out] obj Target object. + * @post `obj`'s code range is ::RUBY_ENC_CODERANGE_UNKNOWN. + */ +static inline void +RB_ENC_CODERANGE_CLEAR(VALUE obj) +{ + RB_FL_UNSET_RAW(obj, RUBY_ENC_CODERANGE_MASK); +} + +RBIMPL_ATTR_CONST() +/* assumed ASCII compatibility */ +/** + * "Mix" two code ranges into one. This is handy for instance when you + * concatenate two strings into one. Consider one of then is valid but the + * other isn't. The result must be invalid. This macro computes that kind of + * mixture. + * + * @param[in] a An enum ::ruby_coderange_type. + * @param[in] b Another enum ::ruby_coderange_type. + * @return The `a` "and" `b`. + */ +static inline enum ruby_coderange_type +RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b) +{ + if (a == RUBY_ENC_CODERANGE_7BIT) { + return b; + } + else if (a != RUBY_ENC_CODERANGE_VALID) { + return RUBY_ENC_CODERANGE_UNKNOWN; + } + else if (b == RUBY_ENC_CODERANGE_7BIT) { + return RUBY_ENC_CODERANGE_VALID; + } + else { + return b; + } +} + +#define ENC_CODERANGE_MASK RUBY_ENC_CODERANGE_MASK /**< @old{RUBY_ENC_CODERANGE_MASK} */ +#define ENC_CODERANGE_UNKNOWN RUBY_ENC_CODERANGE_UNKNOWN /**< @old{RUBY_ENC_CODERANGE_UNKNOWN} */ +#define ENC_CODERANGE_7BIT RUBY_ENC_CODERANGE_7BIT /**< @old{RUBY_ENC_CODERANGE_7BIT} */ +#define ENC_CODERANGE_VALID RUBY_ENC_CODERANGE_VALID /**< @old{RUBY_ENC_CODERANGE_VALID} */ +#define ENC_CODERANGE_BROKEN RUBY_ENC_CODERANGE_BROKEN /**< @old{RUBY_ENC_CODERANGE_BROKEN} */ +#define ENC_CODERANGE_CLEAN_P(cr) RB_ENC_CODERANGE_CLEAN_P(cr) /**< @old{RB_ENC_CODERANGE_CLEAN_P} */ +#define ENC_CODERANGE(obj) RB_ENC_CODERANGE(obj) /**< @old{RB_ENC_CODERANGE} */ +#define ENC_CODERANGE_ASCIIONLY(obj) RB_ENC_CODERANGE_ASCIIONLY(obj) /**< @old{RB_ENC_CODERANGE_ASCIIONLY} */ +#define ENC_CODERANGE_SET(obj,cr) RB_ENC_CODERANGE_SET(obj,cr) /**< @old{RB_ENC_CODERANGE_SET} */ +#define ENC_CODERANGE_CLEAR(obj) RB_ENC_CODERANGE_CLEAR(obj) /**< @old{RB_ENC_CODERANGE_CLEAR} */ +#define ENC_CODERANGE_AND(a, b) RB_ENC_CODERANGE_AND(a, b) /**< @old{RB_ENC_CODERANGE_AND} */ +#define ENCODING_CODERANGE_SET(obj, encindex, cr) RB_ENCODING_CODERANGE_SET(obj, encindex, cr) /**< @old{RB_ENCODING_CODERANGE_SET} */ + +/** @cond INTERNAL_MACRO */ +#define RB_ENC_CODERANGE RB_ENC_CODERANGE +#define RB_ENC_CODERANGE_AND RB_ENC_CODERANGE_AND +#define RB_ENC_CODERANGE_ASCIIONLY RB_ENC_CODERANGE_ASCIIONLY +#define RB_ENC_CODERANGE_CLEAN_P RB_ENC_CODERANGE_CLEAN_P +#define RB_ENC_CODERANGE_CLEAR RB_ENC_CODERANGE_CLEAR +#define RB_ENC_CODERANGE_SET RB_ENC_CODERANGE_SET +/** @endcond */ + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_CODERANGE_H */ diff --git a/include/ruby/internal/encoding/ctype.h b/include/ruby/internal/encoding/ctype.h new file mode 100644 index 0000000000..05c314aeb3 --- /dev/null +++ b/include/ruby/internal/encoding/ctype.h @@ -0,0 +1,258 @@ +#ifndef RUBY_INTERNAL_ENCODING_CTYPE_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_CTYPE_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines to query chacater types. + */ + +#include "ruby/onigmo.h" +#include "ruby/internal/attr/const.h" +#include "ruby/internal/dllexport.h" +#include "ruby/internal/encoding/encoding.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * Queries if the passed pointer points to a newline character. What is a + * newline and what is not depends on the passed encoding. + * + * @param[in] p Pointer to a possibly-middle of a character. + * @param[in] end End of the string. + * @param[in] enc Encoding. + * @retval false It isn't. + * @retval true It is. + */ +static inline bool +rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc) +{ + OnigUChar *up = RBIMPL_CAST((OnigUChar *)p); + OnigUChar *ue = RBIMPL_CAST((OnigUChar *)e); + + return ONIGENC_IS_MBC_NEWLINE(enc, up, ue); +} + +/** + * Queries if the passed code point is of passed character type in the passed + * encoding. The "character type" here is a set of macros defined in onigmo.h, + * like `ONIGENC_CTYPE_PUNCT`. + * + * @param[in] c An `OnigCodePoint` value. + * @param[in] t An `OnigCtype` value. + * @param[in] enc A `rb_encoding*` value. + * @retval true `c` is of `t` in `enc`. + * @retval false Otherwise. + */ +static inline bool +rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_CTYPE(enc, c, t); +} + +/** + * Identical to rb_isascii(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval false `c` is out of range of ASCII character set in `enc`. + * @retval true Otherwise. + * + * @internal + * + * `enc` is ignored. This is at least an intentional implementation detail + * (not a bug). But there could be rooms for future extensions. + */ +static inline bool +rb_enc_isascii(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_ASCII(c); +} + +/** + * Identical to rb_isalpha(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "ALPHA". + * @retval false Otherwise. + */ +static inline bool +rb_enc_isalpha(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_ALPHA(enc, c); +} + +/** + * Identical to rb_islower(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "LOWER". + * @retval false Otherwise. + */ +static inline bool +rb_enc_islower(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_LOWER(enc, c); +} + +/** + * Identical to rb_isupper(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "UPPER". + * @retval false Otherwise. + */ +static inline bool +rb_enc_isupper(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_UPPER(enc, c); +} + +/** + * Identical to rb_iscntrl(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "CNTRL". + * @retval false Otherwise. + */ +static inline bool +rb_enc_iscntrl(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_CNTRL(enc, c); +} + +/** + * Identical to rb_ispunct(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "PUNCT". + * @retval false Otherwise. + */ +static inline bool +rb_enc_ispunct(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_PUNCT(enc, c); +} + +/** + * Identical to rb_isalnum(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "ANUM". + * @retval false Otherwise. + */ +static inline bool +rb_enc_isalnum(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_ALNUM(enc, c); +} + +/** + * Identical to rb_isprint(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "PRINT". + * @retval false Otherwise. + */ +static inline bool +rb_enc_isprint(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_PRINT(enc, c); +} + +/** + * Identical to rb_isspace(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "PRINT". + * @retval false Otherwise. + */ +static inline bool +rb_enc_isspace(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_SPACE(enc, c); +} + +/** + * Identical to rb_isdigit(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "DIGIT". + * @retval false Otherwise. + */ +static inline bool +rb_enc_isdigit(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_DIGIT(enc, c); +} + +RBIMPL_ATTR_CONST() +/** + * Identical to rb_toupper(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @return `c`'s (Ruby's definition of) upper case counterpart. + * + * @internal + * + * As `RBIMPL_ATTR_CONST` implies this function ignores `enc`. + */ +int rb_enc_toupper(int c, rb_encoding *enc); + +RBIMPL_ATTR_CONST() +/** + * Identical to rb_tolower(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @return `c`'s (Ruby's definition of) lower case counterpart. + * + * @internal + * + * As `RBIMPL_ATTR_CONST` implies this function ignores `enc`. + */ +int rb_enc_tolower(int c, rb_encoding *enc); + +RBIMPL_SYMBOL_EXPORT_END() + +/** @cond INTERNAL_MACRO */ +#define rb_enc_is_newline rb_enc_is_newline +#define rb_enc_isalnum rb_enc_isalnum +#define rb_enc_isalpha rb_enc_isalpha +#define rb_enc_isascii rb_enc_isascii +#define rb_enc_isctype rb_enc_isctype +#define rb_enc_isdigit rb_enc_isdigit +#define rb_enc_islower rb_enc_islower +#define rb_enc_isprint rb_enc_isprint +#define rb_enc_iscntrl rb_enc_iscntrl +#define rb_enc_ispunct rb_enc_ispunct +#define rb_enc_isspace rb_enc_isspace +#define rb_enc_isupper rb_enc_isupper +/** @endcond */ + +#endif /* RUBY_INTERNAL_ENCODING_CTYPE_H */ diff --git a/include/ruby/internal/encoding/encoding.h b/include/ruby/internal/encoding/encoding.h new file mode 100644 index 0000000000..a680651a81 --- /dev/null +++ b/include/ruby/internal/encoding/encoding.h @@ -0,0 +1,1044 @@ +#ifndef RUBY_INTERNAL_ENCODING_ENCODING_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_ENCODING_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Defines ::rb_encoding + */ + +#include "ruby/oniguruma.h" +#include "ruby/internal/attr/const.h" +#include "ruby/internal/attr/deprecated.h" +#include "ruby/internal/attr/noalias.h" +#include "ruby/internal/attr/pure.h" +#include "ruby/internal/attr/returns_nonnull.h" +#include "ruby/internal/dllexport.h" +#include "ruby/internal/encoding/coderange.h" +#include "ruby/internal/value.h" +#include "ruby/internal/core/rbasic.h" +#include "ruby/internal/fl_type.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * `Encoding` class. + * + * @ingroup object + */ +RUBY_EXTERN VALUE rb_cEncoding; + +/** + * @private + * + * Bit constants used when embedding encodings into ::RBasic::flags. Extension + * libraries must not bother such things. + */ +enum ruby_encoding_consts { + + /** Max possible number of embeddable encodings. */ + RUBY_ENCODING_INLINE_MAX = 127, + + /** Where inline encodings reside. */ + RUBY_ENCODING_SHIFT = (RUBY_FL_USHIFT+10), + + /** Bits we use to store inline encodings. */ + RUBY_ENCODING_MASK = (RUBY_ENCODING_INLINE_MAX<<RUBY_ENCODING_SHIFT + /* RUBY_FL_USER10..RUBY_FL_USER16 */), + + /** Max possible length of an encoding name. */ + RUBY_ENCODING_MAXNAMELEN = 42 +}; + +#define ENCODING_INLINE_MAX RUBY_ENCODING_INLINE_MAX /**< @old{RUBY_ENCODING_INLINE_MAX} */ +#define ENCODING_SHIFT RUBY_ENCODING_SHIFT /**< @old{RUBY_ENCODING_SHIFT} */ +#define ENCODING_MASK RUBY_ENCODING_MASK /**< @old{RUBY_ENCODING_MASK} */ + +/** + * Destructively assigns the passed encoding to the passed object. The object + * must be capable of having inline encoding. Using this macro needs deep + * understanding of bit level object binary layout. + * + * @param[out] obj Target object to modify. + * @param[in] ecindex Encoding in encindex format. + * @post `obj`'s encoding is `encindex`. + */ +static inline void +RB_ENCODING_SET_INLINED(VALUE obj, int encindex) +{ + VALUE f = /* upcast */ encindex; + + f <<= RUBY_ENCODING_SHIFT; + RB_FL_UNSET_RAW(obj, RUBY_ENCODING_MASK); + RB_FL_SET_RAW(obj, f); +} + +/** + * Queries the encoding of the passed object. The encoding must be smaller + * than ::RUBY_ENCODING_INLINE_MAX, which means you have some assumption on the + * return value. This means the API is for internal use only. + * + * @param[in] obj Target object. + * @return `obj`'s encoding index. + */ +static inline int +RB_ENCODING_GET_INLINED(VALUE obj) +{ + VALUE ret = RB_FL_TEST_RAW(obj, RUBY_ENCODING_MASK) >> RUBY_ENCODING_SHIFT; + + return RBIMPL_CAST((int)ret); +} + +#define ENCODING_SET_INLINED(obj,i) RB_ENCODING_SET_INLINED(obj,i) /**< @old{RB_ENCODING_SET_INLINED} */ +#define ENCODING_SET(obj,i) RB_ENCODING_SET(obj,i) /**< @old{RB_ENCODING_SET} */ +#define ENCODING_GET_INLINED(obj) RB_ENCODING_GET_INLINED(obj) /**< @old{RB_ENCODING_GET_INLINED} */ +#define ENCODING_GET(obj) RB_ENCODING_GET(obj) /**< @old{RB_ENCODING_GET} */ +#define ENCODING_IS_ASCII8BIT(obj) RB_ENCODING_IS_ASCII8BIT(obj) /**< @old{RB_ENCODING_IS_ASCII8BIT} */ +#define ENCODING_MAXNAMELEN RUBY_ENCODING_MAXNAMELEN /**< @old{RUBY_ENCODING_MAXNAMELEN} */ + +/** + * The type of encoding. Our design here is we take Oniguruma/Onigmo's + * multilingualisation schema as our base data structure. + */ +typedef const OnigEncodingType rb_encoding; + +RBIMPL_ATTR_NOALIAS() +/** + * Converts a character option to its encoding. It only supports a very + * limited set of Japanese encodings due to its Japanese origin. Ruby still + * has this in-core for backwards compatibility. But new codes must not bother + * such concept like one-character encoding option. Consider deprecated in + * practice. + * + * @param[in] c One of `['n', 'e', 's', 'u', 'i', 'x', 'm']`. + * @param[out] option Return buffer. + * @param[out] kcode Return buffer. + * @retval 1 `c` understood properly. + * @retval 0 `c` is not understood. + * @post `option` is a ::OnigOptionType. + * @post `kcode` is an enum `ruby_preserved_encindex`. + * + * @internal + * + * `kcode` is opaque because `ruby_preserved_encindex` is not visible from + * extension libraries. But who cares? + */ +int rb_char_to_option_kcode(int c, int *option, int *kcode); + +/** + * Creates a new "dummy" encoding. Roughly speaking, an encoding is dummy when + * it is stateful. Notable example of dummy encoding are those defined in + * ISO/IEC 2022 + * + * @param[in] name Name of the creating encoding. + * @exception rb_eArgError Duplicated or malformed `name`. + * @return New dummy encoding's index. + * @post Encoding named `name` is created, whose index is the return + * value. + */ +int rb_define_dummy_encoding(const char *name); + +RBIMPL_ATTR_PURE() +/** + * Queries if the passed encoding is dummy. + * + * @param[in] enc Encoding in question. + * @retval 1 It is. + * @retval 0 It isn't. + */ +int rb_enc_dummy_p(rb_encoding *enc); + +RBIMPL_ATTR_PURE() +/** + * Queries the index of the encoding. An encoding's index is a Ruby-local + * concept. It is a (sequential) number assigned to each encoding. + * + * @param[in] enc Encoding in question. + * @return Its index. + * @note You can pass null pointers to this function. It is equivalent + * to rb_usascii_encindex() then. + */ +int rb_enc_to_index(rb_encoding *enc); + +/** + * Queries the index of the encoding of the passed object, if any. + * + * @param[in] obj Object in question. + * @retval -1 `obj` is incapable of having an encoding. + * @retval otherwise `obj`'s encoding's index. + */ +int rb_enc_get_index(VALUE obj); + +/** + * @alias{rb_enc_get_index} + * + * @internal + * + * Implementation wise this is not a verbatim alias of rb_enc_get_index(). But + * the API is consistent. Don't bother. + */ +static inline int +RB_ENCODING_GET(VALUE obj) +{ + int encindex = RB_ENCODING_GET_INLINED(obj); + + if (encindex == RUBY_ENCODING_INLINE_MAX) { + return rb_enc_get_index(obj); + } + else { + return encindex; + } +} + +/** + * Destructively assigns an encoding (via its index) to an object. + * + * @param[out] obj Object in question. + * @param[in] encindex An encoding index. + * @exception rb_eFrozenError `obj` is frozen. + * @exception rb_eArgError `obj` is incapable of having an encoding. + * @exception rb_eEncodingError `encindex` is out of bounds. + * @exception rb_eLoadError Failed to load the encoding. + */ +void rb_enc_set_index(VALUE obj, int encindex); + +/** @alias{rb_enc_set_index} */ +static inline void +RB_ENCODING_SET(VALUE obj, int encindex) +{ + rb_enc_set_index(obj, encindex); +} + +/** + * This is #RB_ENCODING_SET + RB_ENC_CODERANGE_SET combo. The object must be + * capable of having inline encoding. Using this macro needs deep + * understanding of bit level object binary layout. + * + * @param[out] obj Target object. + * @param[in] encindex Encoding in encindex format. + * @param[in] cr An enum ::ruby_coderange_type. + * @post `obj`'s encoding is `encindex`. + * @post `obj`'s code range is `cr`. + */ +static inline void +RB_ENCODING_CODERANGE_SET(VALUE obj, int encindex, enum ruby_coderange_type cr) +{ + RB_ENCODING_SET(obj, encindex); + RB_ENC_CODERANGE_SET(obj, cr); +} + +RBIMPL_ATTR_PURE() +/** + * Queries if the passed object can have its encoding. + * + * @param[in] obj Object in question. + * @retval 1 It can. + * @retval 0 It cannot. + */ +int rb_enc_capable(VALUE obj); + +/** + * Queries the index of the encoding. + * + * @param[in] name Name of the encoding to find. + * @exception rb_eArgError No such encoding named `name`. + * @retval -1 `name` exists, but unable to load. + * @retval otherwise Index of encoding named `name`. + */ +int rb_enc_find_index(const char *name); + +/** + * Registers an "alias" name. In the wild, an encoding can be called using + * multiple names. For instance an encoding known as `"CP932"` is also called + * `"SJIS"` on occasions. This API registers such relationships. + * + * @param[in] alias New name. + * @param[in] orig Old name. + * @exception rb_eArgError `alias` is duplicated or malformed. + * @retval -1 Failed to load `orig`. + * @retval otherwise The index of `orig` and `alias`. + * @post `alias` is a synonym of `orig`. They refer to the identical + * encoding. + */ +int rb_enc_alias(const char *alias, const char *orig); + +/** + * Obtains a encoding index from a wider range of objects (than + * rb_enc_find_index()). + * + * @param[in] obj An ::rb_cEncoding, or its name in ::rb_cString. + * @retval -1 `obj` is unexpected type/contents. + * @retval otherwise Index corresponding to `obj`. + */ +int rb_to_encoding_index(VALUE obj); + +/** + * Identical to rb_find_encoding(), except it raises an exception instead of + * returning NULL. + * + * @param[in] obj An ::rb_cEncoding, or its name in ::rb_cString. + * @exception rb_eTypeError `obj` is neither ::rb_cEncoding nor ::rb_cString. + * @exception rb_eArgError `obj` is an unknown encoding name. + * @return Encoding of `obj`. + */ +rb_encoding *rb_to_encoding(VALUE obj); + +/** + * Identical to rb_to_encoding_index(), except the return type. + * + * @param[in] obj An ::rb_cEncoding, or its name in ::rb_cString. + * @exception rb_eTypeError `obj` is neither ::rb_cEncoding nor ::rb_cString. + * @retval NULL No such encoding. + * @return otherwise Encoding of `obj`. + */ +rb_encoding *rb_find_encoding(VALUE obj); + +/** + * Identical to rb_enc_get_index(), except the return type. + * + * @param[in] obj Object in question. + * @retval NULL Obj is incapable of having an encoding. + * @retval otherwise `obj`'s encoding. + */ +rb_encoding *rb_enc_get(VALUE obj); + +/** + * Look for the "common" encoding between the two. One character can or cannot + * be expressed depending on an encoding. This function finds the super-set of + * encodings that satisfy contents of both arguments. If that is impossible + * returns NULL. + * + * @param[in] str1 An object. + * @param[in] str2 Another object. + * @retval NULL No encoding can satisfy both at once. + * @retval otherwise Common encoding between the two. + * @note Arguments can be non-string, e.g. Regexp. + */ +rb_encoding *rb_enc_compatible(VALUE str1, VALUE str2); + +/** + * Identical to rb_enc_compatible(), except it raises an exception instead of + * returning NULL. + * + * @param[in] str1 An object. + * @param[in] str2 Another object. + * @exception rb_eEncCompatError No encoding can satisfy both. + * @return Common encoding between the two. + * @note Arguments can be non-string, e.g. Regexp. + */ +rb_encoding *rb_enc_check(VALUE str1,VALUE str2); + +/** + * Identical to rb_enc_set_index(), except it additionally does contents fix-up + * depending on the passed object. It for instance changes the byte length of + * terminating `U+0000` according to the passed encoding. + * + * @param[out] obj Object in question. + * @param[in] encindex An encoding index. + * @exception rb_eFrozenError `obj` is frozen. + * @exception rb_eArgError `obj` is incapable of having an encoding. + * @exception rb_eEncodingError `encindex` is out of bounds. + * @exception rb_eLoadError Failed to load the encoding. + * @return The passed `obj`. + * @post `obj`'s contents might be fixed according to `encindex`. + */ +VALUE rb_enc_associate_index(VALUE obj, int encindex); + +/** + * Identical to rb_enc_associate_index(), except it takes an encoding itself + * instead of its index. + * + * @param[out] obj Object in question. + * @param[in] enc An encoding. + * @exception rb_eFrozenError `obj` is frozen. + * @exception rb_eArgError `obj` is incapable of having an encoding. + * @return The passed `obj`. + * @post `obj`'s contents might be fixed according to `enc`. + */ +VALUE rb_enc_associate(VALUE obj, rb_encoding *enc); + +/** + * Destructively copies the encoding of the latter object to that of former + * one. It can also be seen as a routine identical to + * rb_enc_associate_index(), except it takes an object's encoding instead of an + * encoding's index. + * + * @param[out] dst Object to modify. + * @param[in] src Object to reference. + * @exception rb_eFrozenError `dst` is frozen. + * @exception rb_eArgError `dst` is incapable of having an encoding. + * @exception rb_eEncodingError `src` is incapable of having an encoding. + * @post `dst`'s encoding is that of `src`'s. + */ +void rb_enc_copy(VALUE dst, VALUE src); + + +/** + * Identical to rb_find_encoding(), except it takes an encoding index instead + * of a Ruby object. + * + * @param[in] idx An encoding index. + * @retval NULL No such encoding. + * @retval otherwise An encoding whose index is `idx`. + */ +rb_encoding *rb_enc_from_index(int idx); + +/** + * Identical to rb_find_encoding(), except it takes a C's string instead of + * Ruby's. + * + * @param[in] name Name of the encoding to query. + * @retval NULL No such encoding. + * @retval otherwise An encoding whose index is `idx`. + */ +rb_encoding *rb_enc_find(const char *name); + +/** + * Queries the (canonical) name of the passed encoding. + * + * @param[in] enc An encoding. + * @return Its name. + */ +static inline const char * +rb_enc_name(rb_encoding *enc) +{ + return enc->name; +} + +/** + * Queries the minimum number of bytes that the passed encoding needs to + * represent a character. For ASCII and compatible encodings this is typically + * 1. There are however encodings whose minimum is not 1; they are + * historically called wide characters. + * + * @param[in] enc An encoding. + * @return Its least possible number of bytes except 0. + */ +static inline int +rb_enc_mbminlen(rb_encoding *enc) +{ + return enc->min_enc_len; +} + +/** + * Queries the maximum number of bytes that the passed encoding needs to + * represent a character. Fixed-width encodings have the same value for this + * one and #rb_enc_mbminlen. However there are variable-width encodings. + * UTF-8, for instance, takes from 1 up to 6 bytes. + * + * @param[in] enc An encoding. + * @return Its maximum possible number of bytes of a character. + */ +static inline int +rb_enc_mbmaxlen(rb_encoding *enc) +{ + return enc->max_enc_len; +} + +/** + * Queries the number of bytes of the character at the passed pointer. + * + * @param[in] p Pointer to a character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] enc Encoding of the string. + * @return If the character at `p` does not end until `e`, number of bytes + * between `p` and `e`. Otherwise the number of bytes that the + * character at `p` is encoded. + * + * @internal + * + * Strictly speaking there are chances when `p` points to a middle byte of a + * wide character. This function returns "the number of bytes from `p` to + * nearest of either `e` or the next character boundary", if you go strict. + */ +int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc); + +/** + * Identical to rb_enc_mbclen() unless the character at `p` overruns `e`. That + * can happen for instance when you read from a socket and its partial read + * cuts a wide character in-between. In those situations this function + * "estimates" theoretical length of the character in question. Typically it + * tends to be possible to know how many bytes a character needs before + * actually reaching its end; for instance UTF-8 encodes a character's length + * in the first byte of it. This function returns that info. + * + * @note This implies that the string is not broken. + * + * @param[in] p Pointer to the character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] enc Encoding of the string. + * @return Number of bytes of character at `p`, measured or estimated. + */ +int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc); + +/** + * Queries the number of bytes of the character at the passed pointer. This + * function returns 3 different types of information: + * + * ```CXX + * auto n = rb_enc_precise_mbclen(p, q, r); + * + * if (ONIGENC_MBCLEN_CHARFOUND_P(n)) { + * // Character found. Normal return. + * auto found_length = ONIGENC_MBCLEN_CHARFOUND_LEN(n); + * } + * else if (ONIGENC_MBCLEN_NEEDMORE_P(n)) { + * // Character overruns past `q`; needs more. + * auto requested_length = ONIGENC_MBCLEN_NEEDMORE_LEN(n); + * } + * else { + * // `p` is broken. + * assert(ONIGENC_MBCLEN_INVALID_P(n)); + * } + * ``` + * + * @param[in] p Pointer to the character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] enc Encoding of the string. + * @return Encoded read/needed number of bytes (see above). + */ +int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc); + +#define MBCLEN_CHARFOUND_P(ret) ONIGENC_MBCLEN_CHARFOUND_P(ret) /**< @old{ONIGENC_MBCLEN_CHARFOUND_P} */ +#define MBCLEN_CHARFOUND_LEN(ret) ONIGENC_MBCLEN_CHARFOUND_LEN(ret) /**< @old{ONIGENC_MBCLEN_CHARFOUND_LEN} */ +#define MBCLEN_INVALID_P(ret) ONIGENC_MBCLEN_INVALID_P(ret) /**< @old{ONIGENC_MBCLEN_INVALID_P} */ +#define MBCLEN_NEEDMORE_P(ret) ONIGENC_MBCLEN_NEEDMORE_P(ret) /**< @old{ONIGENC_MBCLEN_NEEDMORE_P} */ +#define MBCLEN_NEEDMORE_LEN(ret) ONIGENC_MBCLEN_NEEDMORE_LEN(ret) /**< @old{ONIGENC_MBCLEN_NEEDMORE_LEN} */ + +/** + * Queries the code point of character pointed by the passed pointer. If that + * code point is included in ASCII that code point is returned. Otherwise -1. + * This can be different from just looking at the first byte. For instance it + * reads 2 bytes in case of UTF-16BE. + * + * @param[in] p Pointer to the character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] len Return buffer. + * @param[in] enc Encoding of the string. + * @retval -1 The character at `p` is not i ASCII. + * @retval otherwise A code point of the character at `p`. + * @post `len` (if set) is the number of bytes of `p`. + */ +int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc); + +/** + * Queries the code point of character pointed by the passed pointer. + * Exceptions happen in case of broken input. + * + * @param[in] p Pointer to the character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] len Return buffer. + * @param[in] enc Encoding of the string. + * @exception rb_eArgError `p` is broken. + * @return Code point of the character pointed by `p`. + * @post `len` (if set) is the number of bytes of `p`. + */ +unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc); + +/** + * Queries the code point of character pointed by the passed pointer. + * Exceptions happen in case of broken input. + * + * @deprecated Use rb_enc_codepoint_len() instead. + * @param[in] p Pointer to the character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] enc Encoding of the string. + * @exception rb_eArgError `p` is broken. + * @return Code point of the character pointed by `p`. + * + * @internal + * + * @matz says in commit 91e5ba1cb865a2385d3e1cbfacd824496898e098 that the line + * below is a "prototype for obsolete function". However even today there + * still are some use cases of it throughout our repository. It seems it has + * its own niche. + */ +static inline unsigned int +rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc) +{ + return rb_enc_codepoint_len(p, e, 0, enc); + /* ^^^ + * This can be `NULL` in C, `nullptr` in C++, and `0` for both. + * We choose the most portable one here. + */ +} + + +/** + * Identical to rb_enc_codepoint(), except it assumes the passed character is + * not broken. + * + * @param[in] p Pointer to the character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] enc Encoding of the string. + * @return Code point of the character pointed by `p`. + */ +static inline OnigCodePoint +rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc) +{ + const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p); + const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e); + + return ONIGENC_MBC_TO_CODE(enc, up, ue); +} + +/** + * Queries the number of bytes requested to represent the passed code point + * using the passed encoding. + * + * @param[in] code Code point in question. + * @param[in] enc Encoding to convert the code into a byte sequence. + * @exception rb_eArgError `enc` does not glean `code`. + * @return Number of bytes requested to represent `code` using `enc`. + */ +int rb_enc_codelen(int code, rb_encoding *enc); + +/** + * Identical to rb_enc_codelen(), except it returns 0 for invalid code points. + * + * @param[in] c Code point in question. + * @param[in] enc Encoding to convert `c` into a byte sequence. + * @retval 0 `c` is invalid. + * @return otherwise Number of bytes needed for `enc` to encode `c`. + */ +static inline int +rb_enc_code_to_mbclen(int c, rb_encoding *enc) +{ + OnigCodePoint uc = RBIMPL_CAST((OnigCodePoint)c); + + return ONIGENC_CODE_TO_MBCLEN(enc, uc); +} + +/** + * Identical to rb_enc_uint_chr(), except it writes back to the passed buffer + * instead of allocating one. + * + * @param[in] c Code point. + * @param[out] buf Return buffer. + * @param[in] enc Target encoding scheme. + * @retval <= 0 `c` is invalid in `enc`. + * @return otherwise Number of bytes written to `buf`. + * @post `c` is encoded according to `enc`, then written to `buf`. + * + * @internal + * + * The second argument must be typed. But its current usages prevent us from + * being any stricter than this. :FIXME: + */ +static inline int +rb_enc_mbcput(unsigned int c, void *buf, rb_encoding *enc) +{ + OnigCodePoint uc = RBIMPL_CAST((OnigCodePoint)c); + OnigUChar *ubuf = RBIMPL_CAST((OnigUChar *)buf); + + return ONIGENC_CODE_TO_MBC(enc, uc, ubuf); +} + +/** + * Queries the previous (left) character. + * + * @param[in] s Start of the string. + * @param[in] p Pointer to a character. + * @param[in] e End of the string. + * @param[in] enc Encoding. + * @retval NULL No previous character. + * @retval otherwise Pointer to the head of the previous character. + */ +static inline char * +rb_enc_prev_char(const char *s, const char *p, const char *e, rb_encoding *enc) +{ + const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s); + const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p); + const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e); + OnigUChar *ur = onigenc_get_prev_char_head(enc, us, up, ue); + + return RBIMPL_CAST((char *)ur); +} + +/** + * Queries the left boundary of a character. This function takes a pointer + * that is not necessarily a head of a character, and searches for its head. + * + * @param[in] s Start of the string. + * @param[in] p Pointer to a possibly-middle of a character. + * @param[in] e End of the string. + * @param[in] enc Encoding. + * @return Pointer to the head of the character that contains `p`. + */ +static inline char * +rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc) +{ + const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s); + const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p); + const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e); + OnigUChar *ur = onigenc_get_left_adjust_char_head(enc, us, up, ue); + + return RBIMPL_CAST((char *)ur); +} + +/** + * Queries the right boundary of a character. This function takes a pointer + * that is not necessarily a head of a character, and searches for its tail. + * + * @param[in] s Start of the string. + * @param[in] p Pointer to a possibly-middle of a character. + * @param[in] e End of the string. + * @param[in] enc Encoding. + * @return Pointer to the end of the character that contains `p`. + */ +static inline char * +rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc) +{ + const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s); + const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p); + const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e); + OnigUChar *ur = onigenc_get_right_adjust_char_head(enc, us, up, ue); + + return RBIMPL_CAST((char *)ur); +} + +/** + * Scans the string backwards for n characters. + * + * @param[in] s Start of the string. + * @param[in] p Pointer to a character. + * @param[in] e End of the string. + * @param[in] n Steps. + * @param[in] enc Encoding. + * @retval NULL There are no `n` characters left. + * @retval otherwise Pointer to `n` character before `p`. + */ +static inline char * +rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc) +{ + const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s); + const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p); + const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e); + const OnigUChar *ur = onigenc_step_back(enc, us, up, ue, n); + + return RBIMPL_CAST((char *)ur); +} + +/** + * @private + * + * This is an implementation detail of rb_enc_asciicompat(). People don't use + * it directly. Just always use rb_enc_asciicompat(). + * + * @param[in] enc Encoding in question. + * @retval 1 It is ASCII compatible. + * @retval 0 It isn't. + */ +static inline int +rb_enc_asciicompat_inline(rb_encoding *enc) +{ + return rb_enc_mbminlen(enc)==1 && !rb_enc_dummy_p(enc); +} + +/** + * Queries if the passed encoding is _in some sense_ compatible with ASCII. + * The concept of ASCII compatibility is nuanced, and private to our + * implementation. For instance SJIS is ASCII compatible to us, despite their + * having different characters at code point `0x5C`. This is based on some + * practical consideration that Japanese people confuses SJIS to be "upper + * compatible" with ASCII (which is in fact a wrong idea, but we just don't go + * strict here). An example of ASCII incompatible encoding is UTF-16. UTF-16 + * shares code points with ASCII, but employs a completely different encoding + * scheme. + * + * @param[in] enc Encoding in question. + * @retval 0 It is incompatible. + * @retval 1 It is compatible. + */ +static inline bool +rb_enc_asciicompat(rb_encoding *enc) +{ + if (rb_enc_mbminlen(enc) != 1) { + return false; + } + else if (rb_enc_dummy_p(enc)) { + return false; + } + else { + return true; + } +} + +/** + * Queries if the passed string is in an ASCII-compatible encoding. + * + * @param[in] str A Ruby's string to query. + * @retval 0 `str` is not a String, or an ASCII-incompatible string. + * @retval 1 Otherwise. + */ +static inline bool +rb_enc_str_asciicompat_p(VALUE str) +{ + rb_encoding *enc = rb_enc_get(str); + + return rb_enc_asciicompat(enc); +} + +/** + * Queries the Ruby-level counterpart instance of ::rb_cEncoding that + * corresponds to the passed encoding. + * + * @param[in] enc An encoding + * @retval RUBY_Qnil `enc` is a null pointer. + * @retval otherwise An instance of ::rb_cEncoding. + */ +VALUE rb_enc_from_encoding(rb_encoding *enc); + +RBIMPL_ATTR_PURE() +/** + * Queries if the passed encoding is either one of UTF-8/16/32. + * + * @note It does not take UTF-7, which we actually support, into account. + * + * @param[in] enc Encoding in question. + * @retval 0 It is not a Unicode variant. + * @retval otherwise It is. + * + * @internal + * + * In reality it returns 1/0, but the value is abstracted as + * `ONIGENC_FLAG_UNICODE`. + */ +int rb_enc_unicode_p(rb_encoding *enc); + +RBIMPL_ATTR_RETURNS_NONNULL() +/** + * Queries the encoding that represents ASCII-8BIT a.k.a. binary. + * + * @return The encoding that represents ASCII-8BIT. + * + * @internal + * + * This can not return NULL once the process properly boots up. + */ +rb_encoding *rb_ascii8bit_encoding(void); + +RBIMPL_ATTR_RETURNS_NONNULL() +/** + * Queries the encoding that represents UTF-8. + * + * @return The encoding that represents UTF-8. + * + * @internal + * + * This can not return NULL once the process properly boots up. + */ +rb_encoding *rb_utf8_encoding(void); + +RBIMPL_ATTR_RETURNS_NONNULL() +/** + * Queries the encoding that represents US-ASCII. + * + * @return The encoding that represents US-ASCII. + * + * @internal + * + * This can not return NULL once the process properly boots up. + */ +rb_encoding *rb_usascii_encoding(void); + +/** + * Queries the encoding that represents the current locale. + * + * @return The encoding that represents the process' locale. + * + * @internal + * + * This is dynamic. If you change the process' locale by e.g. calling + * `setlocale(3)`, that should also change the return value of this function. + * + * There is no official way for Ruby scripts to manipulate locales, though. + */ +rb_encoding *rb_locale_encoding(void); + +/** + * Queries the "filesystem" encoding. This is the encoding that ruby expects + * info from the OS' file system are in. This affects for instance return + * value of rb_dir_getwd(). Most notably on Windows it can be an alias of OS + * codepage. Most notably on Linux users can set this via default external + * encoding. + * + * @return The "filesystem" encoding. + */ +rb_encoding *rb_filesystem_encoding(void); + +/** + * Queries the "default external" encoding. This is used to interact with + * outer-process things such as File. Though not recommended, you can set this + * using rb_enc_set_default_external(). + * + * @return The "default external" encoding. + */ +rb_encoding *rb_default_external_encoding(void); + +/** + * Queries the "default internal" encoding. This could be a null pointer. + * Otherwise, outer-process info are transcoded from default external encoding + * to this one during reading from an IO. + * + * @return The "default internal" encoding (if any). + */ +rb_encoding *rb_default_internal_encoding(void); + +#ifndef rb_ascii8bit_encindex +RBIMPL_ATTR_CONST() +/** + * Identical to rb_ascii8bit_encoding(), except it returns the encoding's index + * instead of the encoding itself. + * + * @return The index of encoding of ASCII-8BIT. + * + * @internal + * + * This happens to be 0. + */ +int rb_ascii8bit_encindex(void); +#endif + +/** + * Queries if the passed object is in ascii 8bit (== binary) encoding. The + * object must be capable of having inline encoding. Using this macro needs + * deep understanding of bit level object binary layout. + * + * @param[in] obj An object to check. + * @retval 1 It is. + * @retval 0 It isn't. + */ +static inline bool +RB_ENCODING_IS_ASCII8BIT(VALUE obj) +{ + return RB_ENCODING_GET_INLINED(obj) == rb_ascii8bit_encindex(); +} + +#ifndef rb_utf8_encindex +RBIMPL_ATTR_CONST() +/** + * Identical to rb_utf8_encoding(), except it returns the encoding's index + * instead of the encoding itself. + * + * @return The index of encoding of UTF-8. + */ +int rb_utf8_encindex(void); +#endif + +#ifndef rb_usascii_encindex +RBIMPL_ATTR_CONST() +/** + * Identical to rb_usascii_encoding(), except it returns the encoding's index + * instead of the encoding itself. + * + * @return The index of encoding of UTF-8. + */ +int rb_usascii_encindex(void); +#endif + +/** + * Identical to rb_locale_encoding(), except it returns the encoding's index + * instead of the encoding itself. + * + * @return The index of the locale encoding. + */ +int rb_locale_encindex(void); + +/** + * Identical to rb_filesystem_encoding(), except it returns the encoding's + * index instead of the encoding itself. + * + * @return The index of the filesystem encoding. + */ +int rb_filesystem_encindex(void); + +/** + * Identical to rb_default_external_encoding(), except it returns the + * Ruby-level counterpart instance of ::rb_cEncoding that corresponds to the + * default external encoding. + * + * @return An instance of ::rb_cEncoding of default external. + */ +VALUE rb_enc_default_external(void); + +/** + * Identical to rb_default_internal_encoding(), except it returns the + * Ruby-level counterpart instance of ::rb_cEncoding that corresponds to the + * default internal encoding. + * + * @return An instance of ::rb_cEncoding of default internal. + */ +VALUE rb_enc_default_internal(void); + +/** + * Destructively assigns the passed encoding as the default external encoding. + * You should not use this API. It has process-global side effects. Also it + * doesn't change encodings of strings that have already been read. + * + * @param[in] encoding Ruby level encoding. + * @exception rb_eArgError `encoding` is ::RUBY_Qnil. + * @post The default external encoding is `encoding`. + */ +void rb_enc_set_default_external(VALUE encoding); + +/** + * Destructively assigns the passed encoding as the default internal encoding. + * You should not use this API. It has process-global side effects. Also it + * doesn't change encodings of strings that have already been read. + * + * @param[in] encoding Ruby level encoding. + * @post The default internal encoding is `encoding`. + * @note Unlike rb_enc_set_default_external() you can pass ::RUBY_Qnil. + */ +void rb_enc_set_default_internal(VALUE encoding); + +/** + * Returns a platform-depended "charmap" of the current locale. This + * information is called a "Codeset name" in IEEE 1003.1 section 13 + * (`<langinfo.h>`). This is a very low-level API. The return value can have + * no corresponding encoding when passed to rb_find_encoding(). + * + * @param[in] klass Ignored for no reason (why...) + * @return The low-level locale charmap, in Ruby's String. + */ +VALUE rb_locale_charmap(VALUE klass); + +RBIMPL_SYMBOL_EXPORT_END() + +/** @cond INTERNAL_MACRO */ +#define RB_ENCODING_GET RB_ENCODING_GET +#define RB_ENCODING_GET_INLINED RB_ENCODING_GET_INLINED +#define RB_ENCODING_IS_ASCII8BIT RB_ENCODING_IS_ASCII8BIT +#define RB_ENCODING_SET RB_ENCODING_SET +#define RB_ENCODING_SET_INLINED RB_ENCODING_SET_INLINED +#define rb_enc_asciicompat rb_enc_asciicompat +#define rb_enc_code_to_mbclen rb_enc_code_to_mbclen +#define rb_enc_codepoint rb_enc_codepoint +#define rb_enc_left_char_head rb_enc_left_char_head +#define rb_enc_mbc_to_codepoint rb_enc_mbc_to_codepoint +#define rb_enc_mbcput rb_enc_mbcput +#define rb_enc_mbmaxlen rb_enc_mbmaxlen +#define rb_enc_mbminlen rb_enc_mbminlen +#define rb_enc_name rb_enc_name +#define rb_enc_prev_char rb_enc_prev_char +#define rb_enc_right_char_head rb_enc_right_char_head +#define rb_enc_step_back rb_enc_step_back +#define rb_enc_str_asciicompat_p rb_enc_str_asciicompat_p +/** @endcond */ + +#endif /* RUBY_INTERNAL_ENCODING_ENCODING_H */ diff --git a/include/ruby/internal/encoding/pathname.h b/include/ruby/internal/encoding/pathname.h new file mode 100644 index 0000000000..0b5e85a524 --- /dev/null +++ b/include/ruby/internal/encoding/pathname.h @@ -0,0 +1,184 @@ +#ifndef RUBY_INTERNAL_ENCODING_PATHNAME_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_PATHNAME_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines to manipulate encodings of pathnames. + */ + +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/dllexport.h" +#include "ruby/internal/encoding/encoding.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() +RBIMPL_ATTR_NONNULL(()) +/** + * Returns a path component directly adjacent to the passed pointer. + * + * ``` + * "/multi/byte/encoded/pathname.txt" + * ^ ^ ^ + * | | +--- end + * | +--- @return + * +--- path + * ``` + * + * @param[in] path Where to start scanning. + * @param[in] end End of the path string. + * @param[in] enc Encoding of the string. + * @return A pointer in the passed string where the next path component + * resides, or `end` if there is no next path component. + */ +char *rb_enc_path_next(const char *path, const char *end, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL(()) +/** + * Seeks for non-prefix part of a pathname. This can be a no-op when the OS + * has no such concept like a path prefix. But there are OSes where path + * prefixes do exist. + * + * ``` + * "C:\multi\byte\encoded\pathname.txt" + * ^ ^ ^ + * | | +--- end + * | +--- @return + * +--- path + * ``` + * + * @param[in] path Where to start scanning. + * @param[in] end End of the path string. + * @param[in] enc Encoding of the string. + * @return A pointer in the passed string where non-prefix part starts, or + * `path` if the OS does not have path prefix. + */ +char *rb_enc_path_skip_prefix(const char *path, const char *end, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL(()) +/** + * Returns the last path component. + * + * ``` + * "/multi/byte/encoded/pathname.txt" + * ^ ^ ^ + * | | +--- end + * | +--- @return + * +--- path + * ``` + * + * @param[in] path Where to start scanning. + * @param[in] end End of the path string. + * @param[in] enc Encoding of the string. + * @return A pointer in the passed string where the last path component + * resides, or `end` if there is no more path component. + */ +char *rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL(()) +/** + * This just returns the passed end basically. It makes difference in case the + * passed string ends with tons of path separators like the following: + * + * ``` + * "/path/that/ends/with/lots/of/slashes//////////////" + * ^ ^ ^ + * | | +--- end + * | +--- @return + * +--- path + * ``` + * + * @param[in] path Where to start scanning. + * @param[in] end End of the path string. + * @param[in] enc Encoding of the string. + * @return A pointer in the passed string where the trailing path + * separators start, or `end` if there is no trailing path + * separators. + * + * @internal + * + * It seems this function was introduced to mimic what POSIX says about + * `basename(3)`. + */ +char *rb_enc_path_end(const char *path, const char *end, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL((1, 4)) +/** + * Our own encoding-aware version of `basename(3)`. Normally, this function + * returns the last path component of the given name. However in case the + * passed name ends with a path separator, it returns the name of the + * directory, not the last (empty) component. Also if the passed name is a + * root directory, it returns that root directory. Note however that Windows + * filesystem have drive letters, which this function does not return. + * + * @param[in] name Target path. + * @param[out] baselen Return buffer. + * @param[in,out] alllen Number of bytes of `name`. + * @param[enc] enc Encoding of `name`. + * @return The rightmost component of `name`. + * @post `baselen`, if passed, is updated to be the number of bytes + * of the returned basename. + * @post `alllen`, if passed, is updated to be the number of bytes of + * strings not considered as the basename. + */ +const char *ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL((1, 3)) +/** + * Our own encoding-aware version of `extname`. This function first applies + * rb_enc_path_last_separator() to the passed name and only concerns its return + * value (ignores any parent directories). This function returns complicated + * results: + * + * ```CXX + * auto path = "..."; + * auto len = strlen(path); + * auto ret = ruby_enc_find_extname(path, &len, rb_ascii8bit_encoding()); + * + * switch(len) { + * case 0: + * if (ret == 0) { + * // `path` is a file without extensions. + * } + * else { + * // `path` is a dotfile. + * // `ret` is the file's name. + * } + * break; + * + * case 1: + * // `path` _ends_ with a dot. + * // `ret` is that dot. + * break; + * + * default: + * // `path` has an extension. + * // `ret` is that extension. + * } + * ``` + * + * @param[in] name Target path. + * @param[in,out] len Number of bytes of `name`. + * @param[in] enc Encoding of `name`. + * @return See above. + * @post `len`, if passed, is updated (see above). + */ +const char *ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc); + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_PATHNAME_H */ diff --git a/include/ruby/internal/intern/gc.h b/include/ruby/internal/encoding/re.h index 30759e0ded..d0de23bc83 100644 --- a/include/ruby/internal/intern/gc.h +++ b/include/ruby/internal/encoding/re.h @@ -1,5 +1,5 @@ -#ifndef RBIMPL_INTERN_GC_H /*-*-C++-*-vi:se ft=cpp:*/ -#define RBIMPL_INTERN_GC_H +#ifndef RUBY_INTERNAL_ENCODING_RE_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_RE_H /** * @file * @author Ruby developers <ruby-core@ruby-lang.org> @@ -17,41 +17,30 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Public APIs related to ::rb_mGC. + * extension libraries. They could be written in C++98. + * @brief Routines to manipulate encodings of symbols. */ + #include "ruby/internal/dllexport.h" +#include "ruby/internal/encoding/encoding.h" #include "ruby/internal/value.h" -#include "ruby/backward/2/attributes.h" RBIMPL_SYMBOL_EXPORT_BEGIN() -/* gc.c */ -COLDFUNC NORETURN(void rb_memerror(void)); -PUREFUNC(int rb_during_gc(void)); -void rb_gc_mark_locations(const VALUE*, const VALUE*); -void rb_mark_tbl(struct st_table*); -void rb_mark_tbl_no_pin(struct st_table*); -void rb_mark_set(struct st_table*); -void rb_mark_hash(struct st_table*); -void rb_gc_update_tbl_refs(st_table *ptr); -void rb_gc_mark_maybe(VALUE); -void rb_gc_mark(VALUE); -void rb_gc_mark_movable(VALUE); -VALUE rb_gc_location(VALUE); -void rb_gc_force_recycle(VALUE); -void rb_gc(void); -void rb_gc_copy_finalizer(VALUE,VALUE); -VALUE rb_gc_enable(void); -VALUE rb_gc_disable(void); -VALUE rb_gc_start(void); -VALUE rb_define_finalizer(VALUE, VALUE); -VALUE rb_undefine_finalizer(VALUE); -size_t rb_gc_count(void); -size_t rb_gc_stat(VALUE); -VALUE rb_gc_latest_gc_info(VALUE); -void rb_gc_adjust_memory_usage(ssize_t); +/** + * Identical to rb_reg_new(), except it additionally takes an encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @param[in] enc Encoding of `ptr`. + * @param[in] opts Options e.g. ONIG_OPTION_MULTILINE. + * @exception rb_eRegexpError Failed to compile `ptr`. + * @return An allocated new instance of ::rb_cRegexp, of `enc` encoding, + * whose expression is compiled according to `ptr`. + */ +VALUE rb_enc_reg_new(const char *ptr, long len, rb_encoding *enc, int opts); RBIMPL_SYMBOL_EXPORT_END() -#endif /* RBIMPL_INTERN_GC_H */ +#endif /* RUBY_INTERNAL_ENCODING_RE_H */ diff --git a/include/ruby/internal/encoding/sprintf.h b/include/ruby/internal/encoding/sprintf.h new file mode 100644 index 0000000000..cb8737b414 --- /dev/null +++ b/include/ruby/internal/encoding/sprintf.h @@ -0,0 +1,78 @@ +#ifndef RUBY_INTERNAL_ENCODING_SPRINTF_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_SPRINTF_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines to manipulate encodings of symbols. + */ +#include "ruby/internal/config.h" +#include <stdarg.h> +#include "ruby/internal/attr/format.h" +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/attr/noreturn.h" +#include "ruby/internal/dllexport.h" +#include "ruby/internal/encoding/encoding.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() +RBIMPL_ATTR_NONNULL((2)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) +/** + * Identical to rb_sprintf(), except it additionally takes an encoding. The + * passed encoding rules both the incoming format specifier and the resulting + * string. + * + * @param[in] enc Encoding of `fmt`. + * @param[in] fmt A `printf`-like format specifier. + * @param[in] ... Variadic number of contents to format. + * @return A rendered new instance of ::rb_cString, of `enc` encoding. + */ +VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt, ...); + +RBIMPL_ATTR_NONNULL((2)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 0) +/** + * Identical to rb_enc_sprintf(), except it takes a `va_list` instead of + * variadic arguments. It can also be seen as a routine identical to + * rb_vsprintf(), except it additionally takes an encoding. + * + * @param[in] enc Encoding of `fmt`. + * @param[in] fmt A `printf`-like format specifier. + * @param[in] ap Contents to format. + * @return A rendered new instance of ::rb_cString, of `enc` encoding. + */ +VALUE rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL((3)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 3, 4) +/** + * Identical to rb_raise(), except it additionally takes an encoding. + * + * @param[in] enc Encoding of the generating exception. + * @param[in] exc A subclass of ::rb_eException. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + * @param[in] ... Contents of the message. + * @exception exc The specified exception. + * @note It never returns. + */ +void rb_enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...); + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_SPRINTF_H */ diff --git a/include/ruby/internal/encoding/string.h b/include/ruby/internal/encoding/string.h new file mode 100644 index 0000000000..2b9dfe4f31 --- /dev/null +++ b/include/ruby/internal/encoding/string.h @@ -0,0 +1,346 @@ +#ifndef RUBY_INTERNAL_ENCODING_STRING_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_STRING_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines to manipulate encodings of strings. + */ + +#include "ruby/internal/dllexport.h" +#include "ruby/internal/value.h" +#include "ruby/internal/encoding/encoding.h" +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/intern/string.h" /* rbimpl_strlen */ + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * Identical to rb_str_new(), except it additionally takes an encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString, of `len` bytes length, of `enc` + * encoding, whose contents are verbatim copy of `ptr`. + * @pre At least `len` bytes of continuous memory region shall be + * accessible via `ptr`. + * @note `enc` can be a null pointer. It can also be seen as a routine + * identical to rb_usascii_str_new() then. + */ +VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL((1)) +/** + * Identical to rb_enc_str_new(), except it assumes the passed pointer is a + * pointer to a C string. It can also be seen as a routine identical to + * rb_str_new_cstr(), except it additionally takes an encoding. + * + * @param[in] ptr A C string. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString, of `enc` encoding, whose contents + * are verbatim copy of `ptr`. + * @pre `ptr` must not be a null pointer. + * @pre Because `ptr` is a C string it makes no sense for `enc` to be + * something like UTF-32. + * @note `enc` can be a null pointer. It can also be seen as a routine + * identical to rb_usascii_str_new_cstr() then. + */ +VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc); + +/** + * Identical to rb_enc_str_new(), except it takes a C string literal. It can + * also be seen as a routine identical to rb_str_new_static(), except it + * additionally takes an encoding. + * + * @param[in] ptr A C string literal. + * @param[in] len `strlen(ptr)`. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eArgError `len` out of range of `size_t`. + * @pre `ptr` must be a C string constant. + * @return An instance of ::rb_cString, of `enc` encoding, whose backend + * storage is the passed C string literal. + * @warning It is a very bad idea to write to a C string literal (often + * immediate SEGV shall occur). Consider return values of this + * function be read-only. + * @note `enc` can be a null pointer. It can also be seen as a routine + * identical to rb_usascii_str_new_static() then. + */ +VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc); + +/** + * Identical to rb_enc_str_new(), except it returns a "f"string. It can also + * be seen as a routine identical to rb_interned_str(), except it additionally + * takes an encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eArgError `len` is negative. + * @return A found or created instance of ::rb_cString, of `len` bytes + * length, of `enc` encoding, whose contents are identical to that + * of `ptr`. + * @pre At least `len` bytes of continuous memory region shall be + * accessible via `ptr`. + * @note `enc` can be a null pointer. + */ +VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL((1)) +/** + * Identical to rb_enc_str_new_cstr(), except it returns a "f"string. It can + * also be seen as a routine identical to rb_interned_str_cstr(), except it + * additionally takes an encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] enc Encoding of `ptr`. + * @return A found or created instance of ::rb_cString of `enc` encoding, + * whose contents are identical to that of `ptr`. + * @pre At least `len` bytes of continuous memory region shall be + * accessible via `ptr`. + * @note `enc` can be a null pointer. + */ +VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc); + +/** + * Counts the number of characters of the passed string, according to the + * passed encoding. This has to be complicated. The passed string could be + * invalid and/or broken. This routine would scan from the beginning til the + * end, byte by byte, to seek out character boundaries. Could be super slow. + * + * @param[in] head Leftmost pointer to the string. + * @param[in] tail Rightmost pointer to the string. + * @param[in] enc Encoding of the string. + * @return Number of characters exist in `head` .. `tail`. The definition + * of "character" depends on the passed `enc`. + */ +long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc); + +/** + * Queries the n-th character. Like rb_enc_strlen() this function can be fast + * or slow depending on the contents. Don't expect characters to be uniformly + * distributed across the entire string. + * + * @param[in] head Leftmost pointer to the string. + * @param[in] tail Rightmost pointer to the string. + * @param[in] nth Requested index of characters. + * @param[in] enc Encoding of the string. + * @return Pointer to the first byte of the character that is `nth` + * character ahead of `head`, or `tail` if there is no such + * character (OOB etc). The definition of "character" depends on + * the passed `enc`. + */ +char *rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc); + +/** + * Identical to rb_enc_get_index(), except the return type. + * + * @param[in] obj Object in question. + * @exception rb_eTypeError `obj` is incapable of having an encoding. + * @return `obj`'s encoding. + */ +VALUE rb_obj_encoding(VALUE obj); + +/** + * Identical to rb_str_cat(), except it additionally takes an encoding. + * + * @param[out] str Destination object. + * @param[in] ptr Contents to append. + * @param[in] len Length of `src`, in bytes. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eArgError `len` is negative. + * @exception rb_eEncCompatError `enc` is not compatible with `str`. + * @return The passed `dst`. + * @post The contents of `ptr` is copied, transcoded into `dst`'s + * encoding, then pasted into `dst`'s end. + */ +VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc); + +/** + * Encodes the passed code point into a series of bytes. + * + * @param[in] code Code point. + * @param[in] enc Target encoding scheme. + * @exception rb_eRangeError `enc` does not glean `code`. + * @return An instance of ::rb_cString, of `enc` encoding, whose sole + * contents is `code` represented in `enc`. + * @note No way to encode code points bigger than UINT_MAX. + * + * @internal + * + * In other languages, APIs like this one could be seen as the primitive + * routines where encodings' "encode" feature are implemented. However in case + * of Ruby this is not the primitive one. We directly manipulate encoded + * strings. Encoding conversion routines transcode an encoded string directly + * to another one; not via a code point array. + */ +VALUE rb_enc_uint_chr(unsigned int code, rb_encoding *enc); + +/** + * Identical to rb_external_str_new(), except it additionally takes an + * encoding. However the whole point of rb_external_str_new() is to encode a + * string into default external encoding. Being able to specify arbitrary + * encoding just ruins the designed purpose the function meseems. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @param[in] enc Target encoding scheme. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString. In case encoding conversion from + * "default internal" to `enc` is fully defined over the given + * contents, then the return value is a string of `enc` encoding, + * whose contents are the converted ones. Otherwise the string is + * a junk. + * @warning It doesn't raise on a conversion failure and silently ends up in + * a corrupted output. You can know the failure by querying + * `valid_encoding?` of the result object. + * + * @internal + * + * @shyouhei has no idea why this one does not follow the naming convention + * that others obey. It seems to him that this should have been called + * `rb_enc_external_str_new`. + */ +VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc); + +/** + * Identical to rb_str_export(), except it additionally takes an encoding. + * + * @param[in] obj Target object. + * @param[in] enc Target encoding. + * @exception rb_eTypeError No implicit conversion to String. + * @return Converted ruby string of `enc` encoding. + */ +VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc); + +/** + * Encoding conversion main routine. + * + * @param[in] str String to convert. + * @param[in] from Source encoding. + * @param[in] to Destination encoding. + * @return A copy of `str`, with conversion from `from` to `to` applied. + * @note `from` can be a null pointer. `str`'s encoding is taken then. + * @note `to` can be a null pointer. No-op then. + */ +VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to); + +/** + * Identical to rb_str_conv_enc(), except it additionally takes IO encoder + * options. The extra arguments can be constructed using io_extract_modeenc() + * etc. + * + * @param[in] str String to convert. + * @param[in] from Source encoding. + * @param[in] to Destination encoding. + * @param[in] ecflags A set of enum ::ruby_econv_flag_type. + * @param[in] ecopts Optional hash. + * @return A copy of `str`, with conversion from `from` to `to` applied. + * @note `from` can be a null pointer. `str`'s encoding is taken then. + * @note `to` can be a null pointer. No-op then. + * @note `ecopts` can be ::RUBY_Qnil, which is equivalent to passing an + * empty hash. + */ +VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts); + +/** + * Scans the passed string to collect its code range. Because a Ruby's string + * is mutable, its contents change from time to time; so does its code range. + * A long-lived string tends to fall back to ::RUBY_ENC_CODERANGE_UNKNOWN. + * This API scans it and re-assigns a fine-grained code range constant. + * + * @param[out] str A string. + * @return An enum ::ruby_coderange_type. + */ +int rb_enc_str_coderange(VALUE str); + +/** + * Scans the passed string until it finds something odd. Returns the number of + * bytes scanned. As the name implies this is suitable for repeated call. One + * of its application is `IO#readlines`. The method reads from its receiver's + * read buffer, maybe more than once, looking for newlines. But "newline" can + * be different among encodings. This API is used to detect broken contents to + * properly mark them as such. + * + * @param[in] str String to scan. + * @param[in] end End of `str`. + * @param[in] enc `str`'s encoding. + * @param[out] cr Return buffer. + * @return Distance between `str` and first such byte where broken. + * @post `cr` has the code range type. + */ +long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr); + +/** + * Queries if the passed string is "ASCII only". An ASCII only string is a + * string who doesn't have any non-ASCII characters at all. This doesn't + * necessarily mean the string is in ASCII encoding. For instance a String of + * CP932 encoding can quite much be ASCII only, depending on its contents. + * + * @param[in] str String in question. + * @retval 1 It doesn't have non-ASCII characters. + * @retval 0 It has characters that are out of ASCII. + */ +int rb_enc_str_asciionly_p(VALUE str); + +RBIMPL_ATTR_NONNULL(()) +/** + * Looks for the passed string in the passed buffer. + * + * @param[in] x Buffer that potentially includes `y`. + * @param[in] m Number of bytes of `x`. + * @param[in] y Query string. + * @param[in] n Number of bytes of `y`. + * @param[in] enc Encoding of both `x` and `y`. + * @retval -1 Not found. + * @retval otherwise Found index in `x`. + * @note This API can match at a non-character-boundary. + */ +long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc); + +/** @cond INTERNAL_MACRO */ +RBIMPL_ATTR_NONNULL(()) +static inline VALUE +rbimpl_enc_str_new_cstr(const char *str, rb_encoding *enc) +{ + long len = rbimpl_strlen(str); + + return rb_enc_str_new_static(str, len, enc); +} + +#define rb_enc_str_new(str, len, enc) \ + ((RBIMPL_CONSTANT_P(str) && \ + RBIMPL_CONSTANT_P(len) ? \ + rb_enc_str_new_static: \ + rb_enc_str_new) ((str), (len), (enc))) + +#define rb_enc_str_new_cstr(str, enc) \ + ((RBIMPL_CONSTANT_P(str) ? \ + rbimpl_enc_str_new_cstr : \ + rb_enc_str_new_cstr) ((str), (enc))) + +/** @endcond */ + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_STRING_H */ diff --git a/include/ruby/internal/encoding/symbol.h b/include/ruby/internal/encoding/symbol.h new file mode 100644 index 0000000000..9cd1b0dbf4 --- /dev/null +++ b/include/ruby/internal/encoding/symbol.h @@ -0,0 +1,100 @@ +#ifndef RUBY_INTERNAL_ENCODING_SYMBOL_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_SYMBOL_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines to manipulate encodings of symbols. + */ + +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/dllexport.h" +#include "ruby/internal/encoding/encoding.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * Identical to rb_intern2(), except it additionally takes an encoding. + * + * @param[in] name The name of the id. + * @param[in] len Length of `name`. + * @param[in] enc `name`'s encoding. + * @exception rb_eRuntimeError Too many symbols. + * @return A (possibly new) id whose value is the given name. + * @note These days Ruby internally has two kinds of symbols + * (static/dynamic). Symbols created using this function would + * become static ones; i.e. would never be garbage collected. It + * is up to you to avoid memory leaks. Think twice before using + * it. + */ +ID rb_intern3(const char *name, long len, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_symname_p(), except it additionally takes an encoding. + * + * @param[in] str A C string to check. + * @param[in] enc `str`'s encoding. + * @retval 1 It is a valid symbol name. + * @retval 0 It is invalid as a symbol name. + */ +int rb_enc_symname_p(const char *str, rb_encoding *enc); + +/** + * Identical to rb_enc_symname_p(), except it additionally takes the passed + * string's length. This is needed for strings containing NUL bytes, like in + * case of UTF-32. + * + * @param[in] name A C string to check. + * @param[in] len Number of bytes of `str`. + * @param[in] enc `str`'s encoding. + * @retval 1 It is a valid symbol name. + * @retval 0 It is invalid as a symbol name. + */ +int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc); + +/** + * Identical to rb_check_id(), except it takes a pointer to a memory region + * instead of Ruby's string. + * + * @param[in] ptr A pointer to a memory region. + * @param[in] len Number of bytes of `ptr`. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eEncodingError `ptr` contains non-ASCII according to `enc`. + * @retval 0 No such id ever existed in the history. + * @retval otherwise The id that represents the given name. + */ +ID rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc); + +/** + * Identical to rb_check_id_cstr(), except for the return type. It can also be + * seen as a routine identical to rb_check_symbol(), except it takes a pointer + * to a memory region instead of Ruby's string. + * + * @param[in] ptr A pointer to a memory region. + * @param[in] len Number of bytes of `ptr`. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eEncodingError `ptr` contains non-ASCII according to `enc`. + * @retval RUBY_Qnil No such id ever existed in the history. + * @retval otherwise The id that represents the given name. + */ +VALUE rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc); + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_SYMBOL_H */ diff --git a/include/ruby/internal/encoding/transcode.h b/include/ruby/internal/encoding/transcode.h new file mode 100644 index 0000000000..7f26d2eae9 --- /dev/null +++ b/include/ruby/internal/encoding/transcode.h @@ -0,0 +1,562 @@ +#ifndef RUBY_INTERNAL_ENCODING_TRANSCODE_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_TRANSCODE_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief econv stuff + */ + +#include "ruby/internal/dllexport.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** return value of rb_econv_convert() */ +typedef enum { + + /** + * The conversion stopped when it found an invalid sequence. + */ + econv_invalid_byte_sequence, + + /** + * The conversion stopped when it found a character in the input which + * cannot be representable in the output. + */ + econv_undefined_conversion, + + /** + * The conversion stopped because there is no destination. + */ + econv_destination_buffer_full, + + /** + * The conversion stopped because there is no input. + */ + econv_source_buffer_empty, + + /** + * The conversion stopped after converting everything. This is arguably + * the expected normal end of conversion. + */ + econv_finished, + + /** + * The conversion stopped after writing something to somewhere, before + * reading everything. + */ + econv_after_output, + + /** + * The conversion stopped in middle of reading a character, possibly due to + * a partial read of a socket etc. + */ + econv_incomplete_input +} rb_econv_result_t; + +/** An opaque struct that represents a lowest level of encoding conversion. */ +typedef struct rb_econv_t rb_econv_t; + +/** + * Converts the contents of the passed string from its encoding to the passed + * one. + * + * @param[in] str Target string. + * @param[in] to Destination encoding. + * @param[in] ecflags A set of enum + * ::ruby_econv_flag_type. + * @param[in] ecopts A keyword hash, like + * ::rb_io_t::rb_io_enc_t::ecopts. + * @exception rb_eArgError Not fully converted. + * @exception rb_eInvalidByteSequenceError `str` is malformed. + * @exception rb_eUndefinedConversionError `str` has a character not + * representable using `to`. + * @exception rb_eConversionNotFoundError There is no known conversion from + * `str`'s encoding to `to`. + * @return A string whose encoding is `to`, and whose contents is converted + * contents of `str`. + * @note Use rb_econv_prepare_options() to generate `ecopts`. + */ +VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts); + +/** + * Queries if there is more than one way to convert between the passed two + * encodings. Encoding conversion are has_and_belongs_to_many relationships. + * There could be no direct conversion defined for the passed pair. Ruby tries + * to find an indirect way to do so then. For instance ISO-8859-1 has no + * direct conversion to ISO-2022-JP. But there is ISO-8859-1 to UTF-8 + * conversion; then there is UTF-8 to EUC-JP conversion; finally there also is + * EUC-JP to ISO-2022-JP conversion. So in short ISO-8859-1 can be converted + * to ISO-2022-JP using that path. This function returns true. Obviously not + * everything that can be represented using UTF-8 can also be represented using + * EUC-JP. Conversions in practice can fail depending on the actual input, and + * that renders exceptions in case of rb_str_encode(). + * + * @param[in] from_encoding One encoding. + * @param[in] to_encoding Another encoding. + * @retval 0 No way to convert the two. + * @retval 1 At least one way to convert the two. + * + * @internal + * + * Practically @shyouhei knows no way for this function to return 0. It seems + * everything can eventually be converted to/from UTF-8, which connects + * everything. + */ +int rb_econv_has_convpath_p(const char* from_encoding, const char* to_encoding); + +/** + * Identical to rb_econv_prepare_opts(), except it additionally takes the + * initial value of flags. The extra bits are bitwise-ORed to the return + * value. + * + * @param[in] opthash Keyword arguments. + * @param[out] ecopts Return buffer. + * @param[in] ecflags Default set of enum ::ruby_econv_flag_type. + * @exception rb_eArgError Unknown/Broken values passed. + * @return Calculated set of enum ::ruby_econv_flag_type. + * @post `ecopts` holds a hash object suitable for + * ::rb_io_t::rb_io_enc_t::ecopts. + */ +int rb_econv_prepare_options(VALUE opthash, VALUE *ecopts, int ecflags); + +/** + * Splits a keyword arguments hash (that for instance `String#encode` took) + * into a set of enum ::ruby_econv_flag_type and a hash storing replacement + * characters etc. + * + * @param[in] opthash Keyword arguments. + * @param[out] ecopts Return buffer. + * @exception rb_eArgError Unknown/Broken values passed. + * @return Calculated set of enum ::ruby_econv_flag_type. + * @post `ecopts` holds a hash object suitable for + * ::rb_io_t::rb_io_enc_t::ecopts. + */ +int rb_econv_prepare_opts(VALUE opthash, VALUE *ecopts); + +/** + * Creates a new instance of struct ::rb_econv_t. + * + * @param[in] source_encoding Name of an encoding. + * @param[in] destination_encoding Name of another encoding. + * @param[in] ecflags A set of enum ::ruby_econv_flag_type. + * @exception rb_eArgError No such encoding. + * @retval NULL Failed to create a struct ::rb_econv_t. + * @retval otherwise Allocated struct ::rb_econv_t. + * @warning Return value must be passed to rb_econv_close() exactly once. + */ +rb_econv_t *rb_econv_open(const char *source_encoding, const char *destination_encoding, int ecflags); + +/** + * Identical to rb_econv_open(), except it additionally takes a hash of + * optional strings. + * + * + * @param[in] source_encoding Name of an encoding. + * @param[in] destination_encoding Name of another encoding. + * @param[in] ecflags A set of enum ::ruby_econv_flag_type. + * @param[in] ecopts Optional set of strings. + * @exception rb_eArgError No such encoding. + * @retval NULL Failed to create a struct ::rb_econv_t. + * @retval otherwise Allocated struct ::rb_econv_t. + * @warning Return value must be passed to rb_econv_close() exactly once. + */ +rb_econv_t *rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts); + +/** + * Converts a string from an encoding to another. + * + * Possible flags are either ::RUBY_ECONV_PARTIAL_INPUT (means the source + * buffer is a part of much larger one), ::RUBY_ECONV_AFTER_OUTPUT (instructs + * the converter to stop after output before input), or both of them. + * + * @param[in,out] ec Conversion specification/state etc. + * @param[in] source_buffer_ptr Target string. + * @param[in] source_buffer_end End of target string. + * @param[out] destination_buffer_ptr Return buffer. + * @param[out] destination_buffer_end End of return buffer. + * @param[in] flags Flags (see above). + * @return The status of the conversion. + * @post `destination_buffer_ptr` holds conversion results. + */ +rb_econv_result_t rb_econv_convert(rb_econv_t *ec, + const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, + unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, + int flags); + +/** + * Destructs a converter. Note that a converter can have a buffer, and can be + * non-empty. Calling this would lose your data then. + * + * @param[out] ec The converter to destroy. + * @post `ec` is no longer a valid pointer. + */ +void rb_econv_close(rb_econv_t *ec); + +/** + * Assigns the replacement string. The string passed here would appear in + * converted string when it cannot represent its source counterpart. This can + * happen for instance you convert an emoji to ISO-8859-1. + * + * @param[out] ec Target converter. + * @param[in] str Replacement string. + * @param[in] len Number of bytes of `str`. + * @param[in] encname Name of encoding of `str`. + * @retval 0 Success. + * @retval -1 Failure (ENOMEM etc.). + * @post `ec`'s replacement string is set to `str`. + */ +int rb_econv_set_replacement(rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname); + +/** + * "Decorate"s a converter. There are special kind of converters that + * transforms the contents, like replacing CR into CRLF. You can add such + * decorators to a converter using this API. By using this function a + * decorator is prepended at the beginning of a conversion sequence: in case of + * CRLF conversion, newlines are converted before encodings are converted. + * + * @param[out] ec Target converter to decorate. + * @param[in] decorator_name Name of decorator to prepend. + * @retval 0 Success. + * @retval -1 Failure (no such decorator etc.). + * @post Decorator works before encoding conversion happens. + * + * @internal + * + * What is the possible value of the `decorator_name` is not public. You have + * to read through `transcode.c` carefully. + */ +int rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name); + +/** + * Identical to rb_econv_decorate_at_first(), except it adds to the opposite + * direction. For instance CRLF conversion would run _after_ encodings are + * converted. + * + * @param[out] ec Target converter to decorate. + * @param[in] decorator_name Name of decorator to prepend. + * @retval 0 Success. + * @retval -1 Failure (no such decorator etc.). + * @post Decorator works after encoding conversion happens. + */ +int rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name); + +/** + * Creates a `rb_eConverterNotFoundError` exception object (but does not + * raise). + * + * @param[in] senc Name of source encoding. + * @param[in] denc Name of destination encoding. + * @param[in] ecflags A set of enum ::ruby_econv_flag_type. + * @return An instance of `rb_eConverterNotFoundError`. + */ +VALUE rb_econv_open_exc(const char *senc, const char *denc, int ecflags); + +/** + * Appends the passed string to the passed converter's output buffer. This can + * be handy when an encoding needs bytes out of thin air; for instance + * ISO-2022-JP has "shift function" which does not correspond to any + * characters. + * + * @param[out] ec Target converter. + * @param[in] str String to insert. + * @param[in] len Number of bytes of `str`. + * @param[in] str_encoding Encoding of `str`. + * @retval 0 Success. + * @retval -1 Failure (conversion error etc.). + * @note `str_encoding` can be anything, and `str` itself is converted + * when necessary. + */ +int rb_econv_insert_output(rb_econv_t *ec, + const unsigned char *str, size_t len, const char *str_encoding); + +/** + * Queries an encoding name which best suits for rb_econv_insert_output()'s + * last parameter. Strings in this encoding need no conversion when inserted; + * can be both time/space efficient. + * + * @param[in] ec Target converter. + * @return Its encoding for insertion. + */ +const char *rb_econv_encoding_to_insert_output(rb_econv_t *ec); + +/** + * This is a rb_econv_make_exception() + rb_exc_raise() combo. + * + * @param[in] ec (Possibly failed) conversion. + * @exception rb_eInvalidByteSequenceError Invalid byte sequence. + * @exception rb_eUndefinedConversionError Conversion undefined. + * @note This function can return when no error. + */ +void rb_econv_check_error(rb_econv_t *ec); + +/** + * This function makes sense right after rb_econv_convert() returns. As listed + * in ::rb_econv_result_t, rb_econv_convert() can bail out for various reasons. + * This function checks the passed converter's internal state and convert it to + * an appropriate exception object. + * + * @param[in] ec Target converter. + * @retval RUBY_Qnil The converter has no error. + * @retval otherwise Conversion error turned into an exception. + */ +VALUE rb_econv_make_exception(rb_econv_t *ec); + +/** + * Queries if rb_econv_putback() makes sense, i.e. there are invalid byte + * sequences remain in the buffer. + * + * @param[in] ec Target converter. + * @return Number of bytes that can be pushed back. + */ +int rb_econv_putbackable(rb_econv_t *ec); + +/** + * Puts back the bytes. In case of ::econv_invalid_byte_sequence, some of + * those invalid bytes are discarded and the others are buffered to be + * converted later. The latter bytes can be put back using this API. + * + * @param[out] ec Target converter (invalid byte sequence). + * @param[out] p Return buffer. + * @param[in] n Max number of bytes to put back. + * @post At most `n` bytes of what was put back is written to `p`. + */ +void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n); + +/** + * Queries the passed encoding's corresponding ASCII compatible encoding. "The + * corresponding ASCII compatible encoding" in this context is an ASCII + * compatible encoding which can represent exactly the same character sets as + * the given ASCII incompatible encoding. For instance that of UTF-16LE is + * UTF-8. + * + * @param[in] encname Name of an ASCII incompatible encoding. + * @retval NULL `encname` is already ASCII compatible. + * @retval otherwise The corresponding ASCII compatible encoding. + */ +const char *rb_econv_asciicompat_encoding(const char *encname); + +/** + * Identical to rb_econv_convert(), except it takes Ruby's string instead of + * C's pointer. + * + * @param[in,out] ec Target converter. + * @param[in] src Source string. + * @param[in] flags Flags (see rb_econv_convert). + * @exception rb_eArgError Converted string is too long. + * @exception rb_eInvalidByteSequenceError Invalid byte sequence. + * @exception rb_eUndefinedConversionError Conversion undefined. + * @return The conversion result. + */ +VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags); + +/** + * Identical to rb_econv_str_convert(), except it converts only a part of the + * passed string. Can be handy when you for instance want to do line-buffered + * conversion. + * + * @param[in,out] ec Target converter. + * @param[in] src Source string. + * @param[in] byteoff Number of bytes to seek. + * @param[in] bytesize Number of bytes to read. + * @param[in] flags Flags (see rb_econv_convert). + * @exception rb_eArgError Converted string is too long. + * @exception rb_eInvalidByteSequenceError Invalid byte sequence. + * @exception rb_eUndefinedConversionError Conversion undefined. + * @return The conversion result. + */ +VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags); + +/** + * Identical to rb_econv_str_convert(), except it appends the conversion result + * to the additionally passed string instead of creating a new string. It can + * also be seen as a routine identical to rb_econv_append(), except it takes a + * Ruby's string instead of C's pointer. + * + * @param[in,out] ec Target converter. + * @param[in] src Source string. + * @param[in] dst Return buffer. + * @param[in] flags Flags (see rb_econv_convert). + * @exception rb_eArgError Converted string is too long. + * @exception rb_eInvalidByteSequenceError Invalid byte sequence. + * @exception rb_eUndefinedConversionError Conversion undefined. + * @return The conversion result. + */ +VALUE rb_econv_str_append(rb_econv_t *ec, VALUE src, VALUE dst, int flags); + +/** + * Identical to rb_econv_str_append(), except it appends only a part of the + * passed string with conversion. It can also be seen as a routine identical + * to rb_econv_substr_convert(), except it appends the conversion result to the + * additionally passed string instead of creating a new string. + * + * @param[in,out] ec Target converter. + * @param[in] src Source string. + * @param[in] byteoff Number of bytes to seek. + * @param[in] bytesize Number of bytes to read. + * @param[in] dst Return buffer. + * @param[in] flags Flags (see rb_econv_convert). + * @exception rb_eArgError Converted string is too long. + * @exception rb_eInvalidByteSequenceError Invalid byte sequence. + * @exception rb_eUndefinedConversionError Conversion undefined. + * @return The conversion result. + */ +VALUE rb_econv_substr_append(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, VALUE dst, int flags); + +/** + * Converts the passed C's pointer according to the passed converter, then + * append the conversion result to the passed Ruby's string. This way buffer + * overflow is properly avoided to resize the destination properly. + * + * @param[in,out] ec Target converter. + * @param[in] bytesrc Target string. + * @param[in] bytesize Number of bytes of `bytesrc`. + * @param[in] dst Return buffer. + * @param[in] flags Flags (see rb_econv_convert). + * @exception rb_eArgError Converted string is too long. + * @exception rb_eInvalidByteSequenceError Invalid byte sequence. + * @exception rb_eUndefinedConversionError Conversion undefined. + * @return The conversion result. + */ +VALUE rb_econv_append(rb_econv_t *ec, const char *bytesrc, long bytesize, VALUE dst, int flags); + +/** + * This badly named function does not set the destination encoding to binary, + * but instead just nullifies newline conversion decorators if any. Other + * ordinal character conversions still happen after this; something non-binary + * would still be generated. + * + * @param[out] ec Target converter to modify. + * @post Any newline conversions, if any, would be killed. + */ +void rb_econv_binmode(rb_econv_t *ec); + +/** + * This enum is kind of omnibus. Gathers various constants. + */ +enum ruby_econv_flag_type { + + /** + * @name Flags for rb_econv_open() + * + * @{ + */ + + /** Mask for error handling related bits. */ + RUBY_ECONV_ERROR_HANDLER_MASK = 0x000000ff, + + /** Special handling of invalid sequences are there. */ + RUBY_ECONV_INVALID_MASK = 0x0000000f, + + /** Invalid sequences shall be replaced. */ + RUBY_ECONV_INVALID_REPLACE = 0x00000002, + + /** Special handling of undefined conversion are there. */ + RUBY_ECONV_UNDEF_MASK = 0x000000f0, + + /** Undefined characters shall be replaced. */ + RUBY_ECONV_UNDEF_REPLACE = 0x00000020, + + /** Undefined characters shall be escaped. */ + RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030, + + /** Decorators are there. */ + RUBY_ECONV_DECORATOR_MASK = 0x0001ff00, + + /** Newline converters are there. */ + RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00007f00, + + /** (Unclear; seems unused). */ + RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00, + + /** (Unclear; seems unused). */ + RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00007000, + + /** Universal newline mode. */ + RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100, + + /** CR to CRLF conversion shall happen. */ + RUBY_ECONV_CRLF_NEWLINE_DECORATOR = 0x00001000, + + /** CRLF to CR conversion shall happen. */ + RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000, + + /** CRLF to LF conversion shall happen. */ + RUBY_ECONV_LF_NEWLINE_DECORATOR = 0x00004000, + + /** Texts shall be XML-escaped. */ + RUBY_ECONV_XML_TEXT_DECORATOR = 0x00008000, + + /** Texts shall be AttrValue escaped */ + RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00010000, + + /** (Unclear; seems unused). */ + RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000, + + /** Texts shall be AttrValue escaped. */ + RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR = 0x00100000, + + /** Newline decorator's default. */ + RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR = +#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) + RUBY_ECONV_CRLF_NEWLINE_DECORATOR, +#else + 0, +#endif + +#define ECONV_ERROR_HANDLER_MASK RUBY_ECONV_ERROR_HANDLER_MASK /**< @old{RUBY_ECONV_ERROR_HANDLER_MASK} */ +#define ECONV_INVALID_MASK RUBY_ECONV_INVALID_MASK /**< @old{RUBY_ECONV_INVALID_MASK} */ +#define ECONV_INVALID_REPLACE RUBY_ECONV_INVALID_REPLACE /**< @old{RUBY_ECONV_INVALID_REPLACE} */ +#define ECONV_UNDEF_MASK RUBY_ECONV_UNDEF_MASK /**< @old{RUBY_ECONV_UNDEF_MASK} */ +#define ECONV_UNDEF_REPLACE RUBY_ECONV_UNDEF_REPLACE /**< @old{RUBY_ECONV_UNDEF_REPLACE} */ +#define ECONV_UNDEF_HEX_CHARREF RUBY_ECONV_UNDEF_HEX_CHARREF /**< @old{RUBY_ECONV_UNDEF_HEX_CHARREF} */ +#define ECONV_DECORATOR_MASK RUBY_ECONV_DECORATOR_MASK /**< @old{RUBY_ECONV_DECORATOR_MASK} */ +#define ECONV_NEWLINE_DECORATOR_MASK RUBY_ECONV_NEWLINE_DECORATOR_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_MASK} */ +#define ECONV_NEWLINE_DECORATOR_READ_MASK RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK} */ +#define ECONV_NEWLINE_DECORATOR_WRITE_MASK RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK} */ +#define ECONV_UNIVERSAL_NEWLINE_DECORATOR RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR} */ +#define ECONV_CRLF_NEWLINE_DECORATOR RUBY_ECONV_CRLF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CRLF_NEWLINE_DECORATOR} */ +#define ECONV_CR_NEWLINE_DECORATOR RUBY_ECONV_CR_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CR_NEWLINE_DECORATOR} */ +#define ECONV_LF_NEWLINE_DECORATOR RUBY_ECONV_LF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_LF_NEWLINE_DECORATOR} */ +#define ECONV_XML_TEXT_DECORATOR RUBY_ECONV_XML_TEXT_DECORATOR /**< @old{RUBY_ECONV_XML_TEXT_DECORATOR} */ +#define ECONV_XML_ATTR_CONTENT_DECORATOR RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR} */ +#define ECONV_STATEFUL_DECORATOR_MASK RUBY_ECONV_STATEFUL_DECORATOR_MASK /**< @old{RUBY_ECONV_STATEFUL_DECORATOR_MASK} */ +#define ECONV_XML_ATTR_QUOTE_DECORATOR RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR} */ +#define ECONV_DEFAULT_NEWLINE_DECORATOR RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR} */ + /** @} */ + + /** + * @name Flags for rb_econv_convert() + * + * @{ + */ + + /** Indicates the input is a part of much larger one. */ + RUBY_ECONV_PARTIAL_INPUT = 0x00020000, + + /** Instructs the converter to stop after output. */ + RUBY_ECONV_AFTER_OUTPUT = 0x00040000, +#define ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT /**< @old{RUBY_ECONV_PARTIAL_INPUT} */ +#define ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT /**< @old{RUBY_ECONV_AFTER_OUTPUT} */ + + RUBY_ECONV_FLAGS_PLACEHOLDER /**< Placeholder (not used) */ +}; + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_TRANSCODE_H */ diff --git a/include/ruby/internal/error.h b/include/ruby/internal/error.h index 7e9d5c4167..cd37f4461a 100644 --- a/include/ruby/internal/error.h +++ b/include/ruby/internal/error.h @@ -17,67 +17,577 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Declares ::rb_raise(). */ +#include "ruby/internal/attr/cold.h" +#include "ruby/internal/attr/format.h" +#include "ruby/internal/attr/noreturn.h" +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" -#include "ruby/backward/2/attributes.h" -RBIMPL_SYMBOL_EXPORT_BEGIN() - -VALUE rb_errinfo(void); -void rb_set_errinfo(VALUE); +/** + * @defgroup exception Exception handlings + * @{ + */ +/** + * Warning categories. A warning issued using this API can be selectively + * requested / suppressed by the end-users. For instance passing + * `-W:no-deprecated` to the ruby process would suppress those warnings in + * deprecated category. + * + * @warning There is no way to declare a new category (for now). + */ typedef enum { + /** Category unspecified. */ RB_WARN_CATEGORY_NONE, + + /** Warning is for deprecated features. */ RB_WARN_CATEGORY_DEPRECATED, + + /** Warning is for experimental features. */ RB_WARN_CATEGORY_EXPERIMENTAL, - RB_WARN_CATEGORY_ALL_BITS = 0x6 /* no RB_WARN_CATEGORY_NONE bit */ + + /** Warning is for performance issues (not enabled by -w). */ + RB_WARN_CATEGORY_PERFORMANCE, + + RB_WARN_CATEGORY_DEFAULT_BITS = ( + (1U << RB_WARN_CATEGORY_DEPRECATED) | + (1U << RB_WARN_CATEGORY_EXPERIMENTAL) | + 0), + + RB_WARN_CATEGORY_ALL_BITS = ( + (1U << RB_WARN_CATEGORY_DEPRECATED) | + (1U << RB_WARN_CATEGORY_EXPERIMENTAL) | + (1U << RB_WARN_CATEGORY_PERFORMANCE) | + 0) } rb_warning_category_t; -/* for rb_readwrite_sys_fail first argument */ +/** for rb_readwrite_sys_fail first argument */ enum rb_io_wait_readwrite {RB_IO_WAIT_READABLE, RB_IO_WAIT_WRITABLE}; +/** @cond INTERNAL_MACRO */ #define RB_IO_WAIT_READABLE RB_IO_WAIT_READABLE #define RB_IO_WAIT_WRITABLE RB_IO_WAIT_WRITABLE +/** @endcond */ + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * This is the same as `$!` in Ruby. + * + * @retval RUBY_Qnil Not handling exceptions at the moment. + * @retval otherwise The current exception in the current thread. + * @ingroup exception + */ +VALUE rb_errinfo(void); + +/** + * Sets the current exception (`$!`) to the given value. + * + * @param[in] err An instance of ::rb_eException, or ::RUBY_Qnil. + * @exception rb_eTypeError What is given was neither ::rb_eException nor + * ::RUBY_Qnil. + * @note Use rb_raise() instead to raise `err`. This function just + * assigns the given object to the global variable. + * @ingroup exception + */ +void rb_set_errinfo(VALUE err); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL((2)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) +/** + * Exception entry point. By calling this function the execution of your + * program gets interrupted to "raise" an exception up to the callee entities. + * Programs could "rescue" that exception, or could "ensure" some part of them. + * If nobody cares about such things, the raised exception reaches at the top + * of execution. This yields abnormal end of the process. + * + * @param[in] exc A subclass of ::rb_eException. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + * @exception exc The specified exception. + * @note It never returns. + */ +void rb_raise(VALUE exc, const char *fmt, ...); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL((1)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2) +/** + * Raises the unsung "fatal" exception. This is considered severe. Nobody can + * rescue the exception. Once raised, process termination is inevitable. + * However ensure clauses still run, so that resources are properly cleaned up. + * + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + * @exception rb_eFatal An exception that you cannot rescue. + * @note It never returns. + */ +void rb_fatal(const char *fmt, ...); + +RBIMPL_ATTR_COLD() +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL((1)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2) +/** + * Interpreter panic switch. Immediate process termination without any + * synchronisations shall occur. LOTS of internal states, stack traces, and + * even machine registers are displayed if possible for debugging purposes + * then. + * + * @warning Do not use this API. + * @warning You are not expected to use this API. + * @warning Why not just fix your code instead of calling this API? + * @warning It was a bad idea to expose this API to extension libraries at + * the first place. We just cannot delete it at this point for + * backwards compatibility. That doesn't mean everyone are + * welcomed to call this function at will. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + * @note It never returns. + */ +void rb_bug(const char *fmt, ...); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL(()) +/** + * This is a wrapper of rb_bug() which automatically constructs appropriate + * message from the passed errno. + * + * @param[in] msg Additional message to display. + * @exception err C level errno. + * @note It never returns. + */ +void rb_bug_errno(const char *msg, int err); + +RBIMPL_ATTR_NORETURN() +/** + * Converts a C errno into a Ruby exception, then raises it. For instance: + * + * ```CXX + * static VALUE + * foo(VALUE argv) + * { + * const auto cmd = StringValueCStr(argv); + * const auto waitr = system(cmd); + * if (waitr == -1) { + * rb_sys_fail("system(3posix)"); // <-------------- this + * } + * else { + * return INT2FIX(fd); + * } + * } + * ``` + * + * @param[in] msg Additional message to raise. + * @exception rb_eSystemCallError An exception representing errno. + * @note It never returns. + */ +void rb_sys_fail(const char *msg); + +RBIMPL_ATTR_NORETURN() +/** + * Identical to rb_sys_fail(), except it takes the message in Ruby's String + * instead of C's. + * + * @param[in] msg Additional message to raise. + * @exception rb_eSystemCallError An exception representing errno. + * @note It never returns. + */ +void rb_sys_fail_str(VALUE msg); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL((2)) +/** + * Identical to rb_sys_fail(), except it takes additional module to extend the + * exception object before raising. + * + * @param[in] mod A ::rb_cModule instance. + * @param[in] msg Additional message to raise. + * @exception rb_eSystemCallError An exception representing errno. + * @note It never returns. + * + * @internal + * + * Does anybody use it? + */ +void rb_mod_sys_fail(VALUE mod, const char *msg); + +RBIMPL_ATTR_NORETURN() +/** + * Identical to rb_mod_sys_fail(), except it takes the message in Ruby's String + * instead of C's. + * + * @param[in] mod A ::rb_cModule instance. + * @param[in] msg Additional message to raise. + * @exception rb_eSystemCallError An exception representing errno. + * @note It never returns. + */ +void rb_mod_sys_fail_str(VALUE mod, VALUE msg); + +RBIMPL_ATTR_NORETURN() +/** + * Raises appropriate exception using the parameters. + * + * In Ruby level there are rb_eEAGAINWaitReadable etc. This function maps the + * given parameter to an appropriate exception class, then raises it. + * + * @param[in] waiting Reason for the IO to wait. + * @param[in] msg Additional message to raise. + * @exception rb_eEAGAINWaitWritable + * @exception rb_eEWOULDBLOCKWaitWritable + * @exception rb_eEINPROGRESSWaitWritable + * @exception rb_eEAGAINWaitReadable + * @exception rb_eEWOULDBLOCKWaitReadable + * @exception rb_eEINPROGRESSWaitReadable + * @exception rb_eSystemCallError + * @note It never returns. + */ +void rb_readwrite_sys_fail(enum rb_io_wait_readwrite waiting, const char *msg); + +RBIMPL_ATTR_NORETURN() +/** + * Breaks from a block. Because you are using a CAPI this is not as intuitive + * as it sounds. In order for this function to properly work, make a + * ::rb_block_call_func_t function that calls it internally, and pass that + * function to rb_block_call(). + * + * @exception rb_eLocalJumpError Called from outside of a block. + * @note It never returns. + */ +void rb_iter_break(void); + +RBIMPL_ATTR_NORETURN() +/** + * Identical to rb_iter_break(), except it additionally takes the "value" of + * this breakage. It will be the evaluation result of the iterator. This is + * kind of complicated; you cannot see this as a "return from a block" + * behaviour. Take a look at this example: + * + * ```ruby + * def foo(q) + * puts(w = yield(q)) + * puts(e = yield(w)) + * puts(r = yield(e)) + * puts(t = yield(r)) + * puts(y = yield(t)) + * return "howdy!" + * end + * + * x = foo(0) {|i| + * if i > 2 + * break "hello!" + * else + * next i + 1 + * end + * } + * + * puts x + * ``` + * + * This script outputs 1, 2, 3, and hello. Note that the value passed to break + * becomes the return value of foo method, not the value of yield. This is + * confusing, but can be handy on occasions e.g. when you want to bring a + * local variable out of a block. + * + * @param[in] val The value of the iterator. + * @exception rb_eLocalJumpError Called from outside of a block. + * @note It never returns. + */ +void rb_iter_break_value(VALUE val); + +RBIMPL_ATTR_NORETURN() +/** + * Terminates the current execution context. This API is the entry point of a + * "well-mannered" termination sequence. When called from an extension + * library, it raises ::rb_eSystemExit exception. Programs could rescue that + * exception. Can cancel process exit then. Otherwise, that exception results + * in a process termination with the status passed to this function. + * + * @param[in] status Exit status, see also exit(3). + * @exception rb_eSystemExit Exception representing the exit status. + * @note It never returns. + * + * @internal + * + * "When called from an extension library"? You might wonder. In fact there + * are chances for this function to be called from outside of it, for instance + * when dlopen(3) failed. In case it is not possible for this function to + * raise an exception, it does not (silently enters to process cleanup). But + * that is a kind of implementation detail which extension library authors + * should not bother. + */ +void rb_exit(int status); + +RBIMPL_ATTR_NORETURN() +/** + * @exception rb_eNotImpError + * @note It never returns. + */ +void rb_notimplement(void); + +/** + * Creates an exception object that represents the given C errno. + * + * @param[in] err C level errno. + * @param[in] msg Additional message. + * @retval rb_eSystemCallError An exception for the errno. + */ +VALUE rb_syserr_new(int err, const char * msg); -PRINTF_ARGS(NORETURN(void rb_raise(VALUE, const char*, ...)), 2, 3); -PRINTF_ARGS(NORETURN(void rb_fatal(const char*, ...)), 1, 2); -COLDFUNC PRINTF_ARGS(NORETURN(void rb_bug(const char*, ...)), 1, 2); -NORETURN(void rb_bug_errno(const char*, int)); -NORETURN(void rb_sys_fail(const char*)); -NORETURN(void rb_sys_fail_str(VALUE)); -NORETURN(void rb_mod_sys_fail(VALUE, const char*)); -NORETURN(void rb_mod_sys_fail_str(VALUE, VALUE)); -NORETURN(void rb_readwrite_sys_fail(enum rb_io_wait_readwrite, const char*)); -NORETURN(void rb_iter_break(void)); -NORETURN(void rb_iter_break_value(VALUE)); -NORETURN(void rb_exit(int)); -NORETURN(void rb_notimplement(void)); -VALUE rb_syserr_new(int, const char *); +/** + * Identical to rb_syserr_new(), except it takes the message in Ruby's String + * instead of C's. + * + * @param[in] n C level errno. + * @param[in] arg Additional message. + * @retval rb_eSystemCallError An exception for the errno. + */ VALUE rb_syserr_new_str(int n, VALUE arg); -NORETURN(void rb_syserr_fail(int, const char*)); -NORETURN(void rb_syserr_fail_str(int, VALUE)); -NORETURN(void rb_mod_syserr_fail(VALUE, int, const char*)); -NORETURN(void rb_mod_syserr_fail_str(VALUE, int, VALUE)); -NORETURN(void rb_readwrite_syserr_fail(enum rb_io_wait_readwrite, int, const char*)); -NORETURN(void rb_unexpected_type(VALUE,int)); +RBIMPL_ATTR_NORETURN() +/** + * Raises appropriate exception that represents a C errno. + * + * @param[in] err C level errno. + * @param[in] msg Additional message to raise. + * @exception rb_eSystemCallError An exception representing `err`. + * @note It never returns. + */ +void rb_syserr_fail(int err, const char *msg); + +RBIMPL_ATTR_NORETURN() +/** + * Identical to rb_syserr_fail(), except it takes the message in Ruby's String + * instead of C's. + * + * @param[in] err C level errno. + * @param[in] msg Additional message to raise. + * @exception rb_eSystemCallError An exception representing `err`. + * @note It never returns. + */ +void rb_syserr_fail_str(int err, VALUE msg); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_mod_sys_fail(), except it does not depend on C global + * variable errno. Pass it explicitly. + * + * @param[in] mod A ::rb_cModule instance. + * @param[in] err C level errno. + * @param[in] msg Additional message to raise. + * @exception rb_eSystemCallError An exception representing `err`. + * @note It never returns. + */ +void rb_mod_syserr_fail(VALUE mod, int err, const char *msg); + +RBIMPL_ATTR_NORETURN() +/** + * Identical to rb_mod_syserr_fail(), except it takes the message in Ruby's + * String instead of C's. + * + * @param[in] mod A ::rb_cModule instance. + * @param[in] err C level errno. + * @param[in] msg Additional message to raise. + * @exception rb_eSystemCallError An exception representing `err`. + * @note It never returns. + */ +void rb_mod_syserr_fail_str(VALUE mod, int err, VALUE msg); + +RBIMPL_ATTR_NORETURN() +/** + * Identical to rb_readwrite_sys_fail(), except it does not depend on C global + * variable errno. Pass it explicitly. + * + * @param[in] waiting Reason for the IO to wait. + * @param[in] err C level errno. + * @param[in] msg Additional message to raise. + * @exception rb_eEAGAINWaitWritable + * @exception rb_eEWOULDBLOCKWaitWritable + * @exception rb_eEINPROGRESSWaitWritable + * @exception rb_eEAGAINWaitReadable + * @exception rb_eEWOULDBLOCKWaitReadable + * @exception rb_eEINPROGRESSWaitReadable + * @exception rb_eSystemCallError + * @note It never returns. + */ +void rb_readwrite_syserr_fail(enum rb_io_wait_readwrite waiting, int err, const char *msg); + +RBIMPL_ATTR_COLD() +RBIMPL_ATTR_NORETURN() +/** + * Fails with the given object's type incompatibility to the type. + * + * It seems this function is visible from extension libraries only because + * RTYPEDDATA_TYPE() uses it on RUBY_DEBUG. So you can basically ignore it; + * use some other fine-grained method instead. + * + * @param[in] self The object in question. + * @param[in] t Expected type of the object. + * @exception rb_eTypeError `self` not in type `t`. + * @note It never returns. + * @note The second argument must have been an enum ::ruby_value_type, + * but for historical reasons it remains to be an int (in other + * words we see no benefits fixing this bug). + */ +void rb_unexpected_type(VALUE self, int t); + +/** + * @private + * + * This is an implementation detail of #ruby_verbose. Please don't use it + * directly. + * + * @retval Qnil Interpreter is quiet. + * @retval Qfalse Interpreter is kind of chatty. + * @retval otherwise Interpreter is very verbose. + */ VALUE *rb_ruby_verbose_ptr(void); + +/** + * @private + * + * This is an implementation detail of #ruby_debug. Please don't use it + * directly. + * + * @retval Qnil Interpreter not in debug mode. + * @retval Qfalse Interpreter not in debug mode. + * @retval otherwise Interpreter is in debug mode. + */ VALUE *rb_ruby_debug_ptr(void); + +/** + * This variable controls whether the interpreter is in debug mode. Setting + * this to some truthy value is equivalent to passing `-W` flag to the + * interpreter. Setting this to ::Qfalse is equivalent to passing `-W1` flag + * to the interpreter. Setting this to ::Qnil is equivalent to passing `-W0` + * flag to the interpreter. + * + * @retval Qnil Interpreter is quiet. + * @retval Qfalse Interpreter is kind of chatty. + * @retval otherwise Interpreter is very verbose. + */ #define ruby_verbose (*rb_ruby_verbose_ptr()) + +/** + * This variable controls whether the interpreter is in debug mode. Setting + * this to some truthy value is equivalent to passing `-d` flag to the + * interpreter. + * + * @retval Qnil Interpreter not in debug mode. + * @retval Qfalse Interpreter not in debug mode. + * @retval otherwise Interpreter is in debug mode. + */ #define ruby_debug (*rb_ruby_debug_ptr()) -/* reports if `-W' specified */ -PRINTF_ARGS(void rb_warning(const char*, ...), 1, 2); -PRINTF_ARGS(void rb_category_warning(rb_warning_category_t, const char*, ...), 2, 3); -PRINTF_ARGS(void rb_compile_warning(const char *, int, const char*, ...), 3, 4); -PRINTF_ARGS(void rb_category_compile_warn(rb_warning_category_t, const char *, int, const char*, ...), 4, 5); -PRINTF_ARGS(void rb_sys_warning(const char*, ...), 1, 2); -/* reports always */ -COLDFUNC PRINTF_ARGS(void rb_warn(const char*, ...), 1, 2); -COLDFUNC PRINTF_ARGS(void rb_category_warn(rb_warning_category_t, const char*, ...), 2, 3); -PRINTF_ARGS(void rb_compile_warn(const char *, int, const char*, ...), 3, 4); +/* reports if $VERBOSE is true */ +RBIMPL_ATTR_NONNULL((1)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2) +/** + * Issues a warning. + * + * In ruby, warnings these days are tightly coupled with the rb_mWarning + * constant and its `warn` singleton method. This CAPI is just a thin wrapper + * of it; everything passed are formatted like what rb_sprintf() does, then + * passed through to the method. Programs can have their own `def + * Warning.warn` at will to do whatever they want, from ignoring the warnings + * at all to sinking them to some BigQuery data set via a Fluentd cluster. By + * default, the method just emits its passed contents to ::rb_stderr using + * rb_io_write(). + * + * @note This function is affected by the value of $VERBOSE, it does + * nothing unless $VERBOSE is true. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +void rb_warning(const char *fmt, ...); + +RBIMPL_ATTR_NONNULL((2)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) +/** + * Identical to rb_warning(), except it takes additional "category" parameter. + * + * @param[in] cat Name of a known category. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + */ +void rb_category_warning(rb_warning_category_t cat, const char *fmt, ...); + +RBIMPL_ATTR_NONNULL((1, 3)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 3, 4) +/** + * Issues a compile-time warning that happens at `__file__:__line__`. Purpose + * of this function being exposed to CAPI is unclear. + * + * @note This function is affected by the value of $VERBOSE. + * @param[in] file The path corresponding to Ruby level `__FILE__`. + * @param[in] line The number corresponding to Ruby level `__LINE__`. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + */ +void rb_compile_warning(const char *file, int line, const char *fmt, ...); + +RBIMPL_ATTR_NONNULL((1)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2) +/** + * Identical to rb_sys_fail(), except it does not raise an exception to render + * a warning instead. + * + * @note This function is affected by the value of $VERBOSE. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + */ +void rb_sys_warning(const char *fmt, ...); + +/* reports if $VERBOSE is not nil (so if it is true or false) */ +RBIMPL_ATTR_COLD() +RBIMPL_ATTR_NONNULL((1)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2) +/** + * Identical to rb_warning(), except it reports unless $VERBOSE is nil. + * + * @note This function is affected by the value of $VERBOSE, it does + * nothing if $VERBOSE is nil. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + */ +void rb_warn(const char *fmt, ...); + +RBIMPL_ATTR_COLD() +RBIMPL_ATTR_NONNULL((2)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) +/** + * Identical to rb_category_warning(), except it reports unless $VERBOSE is nil. + * + * @param[in] cat Category e.g. deprecated. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + */ +void rb_category_warn(rb_warning_category_t cat, const char *fmt, ...); + +RBIMPL_ATTR_NONNULL((1, 3)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 3, 4) +/** + * Identical to rb_compile_warning(), except it reports unless $VERBOSE is nil. + * + * @param[in] file The path corresponding to Ruby level `__FILE__`. + * @param[in] line The number corresponding to Ruby level `__LINE__`. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + */ +void rb_compile_warn(const char *file, int line, const char *fmt, ...); + +RBIMPL_ATTR_NONNULL((2, 4)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 4, 5) +/** + * Identical to rb_compile_warn(), except it also accepts category. + * + * @param[in] cat Category e.g. deprecated. + * @param[in] file The path corresponding to Ruby level `__FILE__`. + * @param[in] line The number corresponding to Ruby level `__LINE__`. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + */ +void rb_category_compile_warn(rb_warning_category_t cat, const char *file, int line, const char *fmt, ...); + +/** @} */ RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/eval.h b/include/ruby/internal/eval.h index 934611fbb9..5bcbb97746 100644 --- a/include/ruby/internal/eval.h +++ b/include/ruby/internal/eval.h @@ -17,32 +17,382 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Declares ::rb_eval_string(). */ #include "ruby/internal/dllexport.h" +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() -VALUE rb_eval_string(const char*); -VALUE rb_eval_string_protect(const char*, int*); -VALUE rb_eval_string_wrap(const char*, int*); -VALUE rb_funcall(VALUE, ID, int, ...); -VALUE rb_funcallv(VALUE, ID, int, const VALUE*); -VALUE rb_funcallv_kw(VALUE, ID, int, const VALUE*, int); -VALUE rb_funcallv_public(VALUE, ID, int, const VALUE*); -VALUE rb_funcallv_public_kw(VALUE, ID, int, const VALUE*, int); +RBIMPL_ATTR_NONNULL(()) +/** + * Evaluates the given string. + * + * In case it is called from within a C-backended method, the evaluation is + * done under the current binding. However there can be no method. On such + * situation this function evaluates in an isolated binding, like `require` + * runs in a separate one. + * + * `__FILE__` will be `"(eval)"`, and `__LINE__` starts from 1 in the + * evaluation. + * + * @param[in] str Ruby code to evaluate. + * @exception rb_eException Raises an exception on error. + * @return The evaluated result. + * + * @internal + * + * @shyouhei's old tale about the birth and growth of this function: + * + * At the beginning, there was no rb_eval_string(). @shyouhei heard that + * @shugo, author of Apache httpd's mod_ruby module, requested @matz for this + * API. He wanted a way so that mod_ruby can evaluate ruby scripts one by one, + * separately, in each different contexts. So this function was made. It was + * designed to be a global interpreter entry point like ruby_run_node(). + * + * The way it is implemented however allows extension libraries (not just + * programs like Apache httpd) to call this function. Because its name says + * nothing about the initial design, people started to think of it as an + * orthodox way to call ruby level `eval` method from their extension + * libraries. Even our `extension.rdoc` has had a description of this function + * basically according to this understanding. + * + * The old (mod_ruby like) usage still works. But over time, usages of this + * function from extension libraries got popular, while mod_ruby faded out; is + * no longer maintained now. Devs decided to actively support both. This + * function now auto-detects how it is called, and switches how it works + * depending on it. + * + * @see https://bugs.ruby-lang.org/issues/18780 + */ +VALUE rb_eval_string(const char *str); + +RBIMPL_ATTR_NONNULL((1)) +/** + * Identical to rb_eval_string(), except it avoids potential global escapes. + * Such global escapes include exceptions, `throw`, `break`, for example. + * + * It first evaluates the given string as rb_eval_string() does. If no global + * escape occurred during the evaluation, it returns the result and `*state` is + * zero. Otherwise, it returns some undefined value and sets `*state` to + * nonzero. If state is `NULL`, it is not set in both cases. + * + * @param[in] str Ruby code to evaluate. + * @param[out] state State of execution. + * @return The evaluated result if succeeded, an undefined value if + * otherwise. + * @post `*state` is set to zero if succeeded. Nonzero otherwise. + * @warning You have to clear the error info with `rb_set_errinfo(Qnil)` if + * you decide to ignore the caught exception. + * @see rb_eval_string + * @see rb_protect + * + * @internal + * + * The "undefined value" described above is in fact ::RUBY_Qnil for now. But + * @shyouhei doesn't think that we would never change that. + * + * Though not a part of our public API, `state` is in fact an + * enum ruby_tag_type. You can see the potential "nonzero" values by looking + * at vm_core.h. + */ +VALUE rb_eval_string_protect(const char *str, int *state); + +RBIMPL_ATTR_NONNULL((1)) +/** + * Identical to rb_eval_string_protect(), except it evaluates the given string + * under a module binding in an isolated binding. This is the same as a + * binding for loaded libraries on `rb_load(something, true)`. + * + * @param[in] str Ruby code to evaluate. + * @param[out] state State of execution. + * @return The evaluated result if succeeded, an undefined value if + * otherwise. + * @post `*state` is set to zero if succeeded. Nonzero otherwise. + * @warning You have to clear the error info with `rb_set_errinfo(Qnil)` if + * you decide to ignore the caught exception. + * @see rb_eval_string + */ +VALUE rb_eval_string_wrap(const char *str, int *state); + +/** + * Calls a method. Can call both public and private methods. + * + * @param[in,out] recv Receiver of the method. + * @param[in] mid Name of the method to call. + * @param[in] n Number of arguments that follow. + * @param[in] ... Arbitrary number of method arguments. + * @exception rb_eNoMethodError No such method. + * @exception rb_eException Any exceptions happen inside. + * @return What the method evaluates to. + */ +VALUE rb_funcall(VALUE recv, ID mid, int n, ...); + +/** + * Identical to rb_funcall(), except it takes the method arguments as a C + * array. + * + * @param[in,out] recv Receiver of the method. + * @param[in] mid Name of the method to call. + * @param[in] argc Number of arguments. + * @param[in] argv Arbitrary number of method arguments. + * @exception rb_eNoMethodError No such method. + * @exception rb_eException Any exceptions happen inside. + * @return What the method evaluates to. + */ +VALUE rb_funcallv(VALUE recv, ID mid, int argc, const VALUE *argv); + +/** + * Identical to rb_funcallv(), except you can specify how to handle the last + * element of the given array. + * + * @param[in,out] recv Receiver of the method. + * @param[in] mid Name of the method to call. + * @param[in] argc Number of arguments. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eNoMethodError No such method. + * @exception rb_eException Any exceptions happen inside. + * @return What the method evaluates to. + */ +VALUE rb_funcallv_kw(VALUE recv, ID mid, int argc, const VALUE *argv, int kw_splat); + +/** + * Identical to rb_funcallv(), except it only takes public methods into + * account. This is roughly Ruby's `Object#public_send`. + * + * @param[in,out] recv Receiver of the method. + * @param[in] mid Name of the method to call. + * @param[in] argc Number of arguments. + * @param[in] argv Arbitrary number of method arguments. + * @exception rb_eNoMethodError No such method. + * @exception rb_eNoMethodError The method is private or protected. + * @exception rb_eException Any exceptions happen inside. + * @return What the method evaluates to. + */ +VALUE rb_funcallv_public(VALUE recv, ID mid, int argc, const VALUE *argv); + +/** + * Identical to rb_funcallv_public(), except you can specify how to handle the + * last element of the given array. It can also be seen as a routine identical + * to rb_funcallv_kw(), except it only takes public methods into account. + * + * @param[in,out] recv Receiver of the method. + * @param[in] mid Name of the method to call. + * @param[in] argc Number of arguments. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eNoMethodError No such method. + * @exception rb_eNoMethodError The method is private or protected. + * @exception rb_eException Any exceptions happen inside. + * @return What the method evaluates to. + */ +VALUE rb_funcallv_public_kw(VALUE recv, ID mid, int argc, const VALUE *argv, int kw_splat); + +/** + * @deprecated This is an old name of rb_funcallv(). Provided here for + * backwards compatibility to 2.x programs (introduced in 2.1). + * It is not a good name. Please don't use it any longer. + */ #define rb_funcall2 rb_funcallv + +/** + * @deprecated This is an old name of rb_funcallv_public(). Provided here + * for backwards compatibility to 2.x programs (introduced in + * 2.1). It is not a good name. Please don't use it any longer. + */ #define rb_funcall3 rb_funcallv_public -VALUE rb_funcall_passing_block(VALUE, ID, int, const VALUE*); -VALUE rb_funcall_passing_block_kw(VALUE, ID, int, const VALUE*, int); -VALUE rb_funcall_with_block(VALUE, ID, int, const VALUE*, VALUE); -VALUE rb_funcall_with_block_kw(VALUE, ID, int, const VALUE*, VALUE, int); -VALUE rb_call_super(int, const VALUE*); -VALUE rb_call_super_kw(int, const VALUE*, int); + +/** + * Identical to rb_funcallv_public(), except you can pass the passed block. + * + * Sometimes you want to "pass" a block parameter form one method to another. + * Suppose you have this Ruby method `foo`: + * + * ```ruby + * def foo(x, y, &z) + * x.open(y, &z) + * end + * ``` + * + * And suppose you want to translate this into C. Then + * rb_funcall_passing_block() function is usable in this situation. + * + * ```CXX + * VALUE + * foo_translated_into_C(VALUE self, VALUE x, VALUE y) + * { + * const auto open = rb_intern("open"); + * + * return rb_funcall_passing_block(x, open, 1, &y); + * } + * ``` + * + * @see rb_yield_block + * @param[in,out] recv Receiver of the method. + * @param[in] mid Name of the method to call. + * @param[in] argc Number of arguments. + * @param[in] argv Arbitrary number of method arguments. + * @exception rb_eNoMethodError No such method. + * @exception rb_eNoMethodError The method is private or protected. + * @exception rb_eException Any exceptions happen inside. + * @return What the method evaluates to. + */ +VALUE rb_funcall_passing_block(VALUE recv, ID mid, int argc, const VALUE *argv); + +/** + * Identical to rb_funcallv_passing_block(), except you can specify how to + * handle the last element of the given array. It can also be seen as a + * routine identical to rb_funcallv_public_kw(), except you can pass the passed + * block. + * + * @param[in,out] recv Receiver of the method. + * @param[in] mid Name of the method to call. + * @param[in] argc Number of arguments. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eNoMethodError No such method. + * @exception rb_eNoMethodError The method is private or protected. + * @exception rb_eException Any exceptions happen inside. + * @return What the method evaluates to. + */ +VALUE rb_funcall_passing_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, int kw_splat); + +/** + * Identical to rb_funcallv_public(), except you can pass a block. A block + * here basically is an instance of ::rb_cProc. If you want to exercise + * `to_proc` conversion, do so before passing it here. However nil and symbols + * are special-case allowed. + * + * @param[in,out] recv Receiver of the method. + * @param[in] mid Name of the method to call. + * @param[in] argc Number of arguments. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] procval An instance of Proc, Symbol, or NilClass. + * @exception rb_eNoMethodError No such method. + * @exception rb_eNoMethodError The method is private or protected. + * @exception rb_eException Any exceptions happen inside. + * @return What the method evaluates to. + * + * @internal + * + * Implementation-wise, `procval` is in fact a "block handler" object. You + * could also pass an IFUNC (block_handler_ifunc) here to say precise. --- But + * AFAIK there is no 3rd party way to even know that there are objects called + * IFUNC behind-the-scene. + */ +VALUE rb_funcall_with_block(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval); + +/** + * Identical to rb_funcallv_with_block(), except you can specify how to handle + * the last element of the given array. It can also be seen as a routine + * identical to rb_funcallv_public_kw(), except you can pass a block. + * + * @param[in,out] recv Receiver of the method. + * @param[in] mid Name of the method to call. + * @param[in] argc Number of arguments. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] procval An instance of Proc, Symbol, or NilClass. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eNoMethodError No such method. + * @exception rb_eNoMethodError The method is private or protected. + * @exception rb_eException Any exceptions happen inside. + * @return What the method evaluates to. + */ +VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat); + +/** + * This resembles ruby's `super`. + * + * @param[in] argc Number of arguments. + * @param[in] argv Arbitrary number of method arguments. + * @exception rb_eNoMethodError No super method are there. + * @exception rb_eException Any exceptions happen inside. + * @return What the super method evaluates to. + */ +VALUE rb_call_super(int argc, const VALUE *argv); + +/** + * Identical to rb_call_super(), except you can specify how to handle the last + * element of the given array. + * + * @param[in] argc Number of arguments. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eNoMethodError No super method are there. + * @exception rb_eException Any exceptions happen inside. + * @return What the super method evaluates to. + */ +VALUE rb_call_super_kw(int argc, const VALUE *argv, int kw_splat); + +/** + * This resembles ruby's `self`. + * + * @exception rb_eRuntimeError Called from outside of method context. + * @return Current receiver. + */ VALUE rb_current_receiver(void); -int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *); + +RBIMPL_ATTR_NONNULL((2)) +/** + * Keyword argument deconstructor. + * + * Retrieves argument values bound to keywords, which directed by `table` into + * `values`, deleting retrieved entries from `keyword_hash` along the way. + * First `required` number of IDs referred by `table` are mandatory, and + * succeeding `optional` (`-optional-1` if `optional` is negative) number of + * IDs are optional. If a mandatory key is not contained in `keyword_hash`, + * raises ::rb_eArgError. If an optional key is not present in `keyword_hash`, + * the corresponding element in `values` is set to ::RUBY_Qundef. If + * `optional` is negative, rest of `keyword_hash` are ignored, otherwise raises + * ::rb_eArgError. + * + * @warning Handling keyword arguments in the C API is less efficient than + * handling them in Ruby. Consider using a Ruby wrapper method + * around a non-keyword C function. + * @see https://bugs.ruby-lang.org/issues/11339 + * @param[out] keyword_hash Target hash to deconstruct. + * @param[in] table List of keywords that you are interested in. + * @param[in] required Number of mandatory keywords. + * @param[in] optional Number of optional keywords (can be negative). + * @param[out] values Buffer to be filled. + * @exception rb_eArgError Absence of a mandatory keyword. + * @exception rb_eArgError Found an unknown keyword. + * @return Number of found values that are stored into `values`. + */ +int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values); + +RBIMPL_ATTR_NONNULL(()) +/** + * Splits a hash into two. + * + * Takes a hash of various keys, and split it into symbol-keyed parts and + * others. Symbol-keyed part becomes the return value. What remains are + * returned as a new hash object stored at the argument pointer. + * + * @param[in,out] orighash Pointer to a target hash to split. + * @return An extracted keyword hash. + * @post Upon successful return `orighash` points to another hash + * object, whose contents are the remainder of the operation. + * @note The argument hash object is not modified. + */ VALUE rb_extract_keywords(VALUE *orighash); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/event.h b/include/ruby/internal/event.h index f20e01dc01..1d194ed618 100644 --- a/include/ruby/internal/event.h +++ b/include/ruby/internal/event.h @@ -17,59 +17,143 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Debugging and tracing APIs. */ #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" -RBIMPL_SYMBOL_EXPORT_BEGIN() +#ifdef HAVE_STDINT_H +#include <stdint.h> +#endif + +/* These macros are not enums because they are wider than int.*/ + +/** + * @name Traditional set_trace_func events + * + * @{ + */ +#define RUBY_EVENT_NONE 0x0000 /**< No events. */ +#define RUBY_EVENT_LINE 0x0001 /**< Encountered a new line. */ +#define RUBY_EVENT_CLASS 0x0002 /**< Encountered a new class. */ +#define RUBY_EVENT_END 0x0004 /**< Encountered an end of a class clause. */ +#define RUBY_EVENT_CALL 0x0008 /**< A method, written in Ruby, is called. */ +#define RUBY_EVENT_RETURN 0x0010 /**< Encountered a `return` statement. */ +#define RUBY_EVENT_C_CALL 0x0020 /**< A method, written in C, is called. */ +#define RUBY_EVENT_C_RETURN 0x0040 /**< Return from a method, written in C. */ +#define RUBY_EVENT_RAISE 0x0080 /**< Encountered a `raise` statement. */ +#define RUBY_EVENT_ALL 0x00ff /**< Bitmask of traditional events. */ + +/** @} */ + +/** + * @name TracePoint extended events + * + * @{ + */ +#define RUBY_EVENT_B_CALL 0x0100 /**< Encountered an `yield` statement. */ +#define RUBY_EVENT_B_RETURN 0x0200 /**< Encountered a `next` statement. */ +#define RUBY_EVENT_THREAD_BEGIN 0x0400 /**< Encountered a new thread. */ +#define RUBY_EVENT_THREAD_END 0x0800 /**< Encountered an end of a thread. */ +#define RUBY_EVENT_FIBER_SWITCH 0x1000 /**< Encountered a `Fiber#yield`. */ +#define RUBY_EVENT_SCRIPT_COMPILED 0x2000 /**< Encountered an `eval`. */ +#define RUBY_EVENT_RESCUE 0x4000 /**< Encountered a `rescue` statement. */ +#define RUBY_EVENT_TRACEPOINT_ALL 0xffff /**< Bitmask of extended events. */ -/* traditional set_trace_func events */ -#define RUBY_EVENT_NONE 0x0000 -#define RUBY_EVENT_LINE 0x0001 -#define RUBY_EVENT_CLASS 0x0002 -#define RUBY_EVENT_END 0x0004 -#define RUBY_EVENT_CALL 0x0008 -#define RUBY_EVENT_RETURN 0x0010 -#define RUBY_EVENT_C_CALL 0x0020 -#define RUBY_EVENT_C_RETURN 0x0040 -#define RUBY_EVENT_RAISE 0x0080 -#define RUBY_EVENT_ALL 0x00ff - -/* for TracePoint extended events */ -#define RUBY_EVENT_B_CALL 0x0100 -#define RUBY_EVENT_B_RETURN 0x0200 -#define RUBY_EVENT_THREAD_BEGIN 0x0400 -#define RUBY_EVENT_THREAD_END 0x0800 -#define RUBY_EVENT_FIBER_SWITCH 0x1000 -#define RUBY_EVENT_SCRIPT_COMPILED 0x2000 -#define RUBY_EVENT_TRACEPOINT_ALL 0xffff - -/* special events */ -#define RUBY_EVENT_RESERVED_FOR_INTERNAL_USE 0x030000 - -/* internal events */ -#define RUBY_INTERNAL_EVENT_SWITCH 0x040000 -#define RUBY_EVENT_SWITCH 0x040000 /* obsolete name. this macro is for compatibility */ +/** @} */ + +/** + * @name Special events + * + * @internal + * + * These bits are actually used internally. See vm_core.h if you are curious. + * + * @endinternal + * + * @{ + */ +#define RUBY_EVENT_RESERVED_FOR_INTERNAL_USE 0x030000 /**< Opaque bits. */ + +/** @} */ + +/** + * @name Internal events + * + * @shyouhei's understanding is that some of them are visible from extension + * libraries because of `ext/objspace`. But it seems that doesn't describe + * everything? The ultimate reason why they are here remains unclear. + * + * @{ + */ +#define RUBY_INTERNAL_EVENT_SWITCH 0x040000 /**< Thread switched. */ +#define RUBY_EVENT_SWITCH 0x040000 /**< @old{RUBY_INTERNAL_EVENT_SWITCH} */ /* 0x080000 */ -#define RUBY_INTERNAL_EVENT_NEWOBJ 0x100000 -#define RUBY_INTERNAL_EVENT_FREEOBJ 0x200000 -#define RUBY_INTERNAL_EVENT_GC_START 0x400000 -#define RUBY_INTERNAL_EVENT_GC_END_MARK 0x800000 -#define RUBY_INTERNAL_EVENT_GC_END_SWEEP 0x1000000 -#define RUBY_INTERNAL_EVENT_GC_ENTER 0x2000000 -#define RUBY_INTERNAL_EVENT_GC_EXIT 0x4000000 -#define RUBY_INTERNAL_EVENT_OBJSPACE_MASK 0x7f00000 -#define RUBY_INTERNAL_EVENT_MASK 0xffff0000 +#define RUBY_INTERNAL_EVENT_NEWOBJ 0x100000 /**< Object allocated. */ +#define RUBY_INTERNAL_EVENT_FREEOBJ 0x200000 /**< Object swept. */ +#define RUBY_INTERNAL_EVENT_GC_START 0x400000 /**< GC started. */ +#define RUBY_INTERNAL_EVENT_GC_END_MARK 0x800000 /**< GC ended mark phase. */ +#define RUBY_INTERNAL_EVENT_GC_END_SWEEP 0x1000000 /**< GC ended sweep phase. */ +#define RUBY_INTERNAL_EVENT_GC_ENTER 0x2000000 /**< `gc_enter()` is called. */ +#define RUBY_INTERNAL_EVENT_GC_EXIT 0x4000000 /**< `gc_exit()` is called. */ +#define RUBY_INTERNAL_EVENT_OBJSPACE_MASK 0x7f00000 /**< Bitmask of GC events. */ +#define RUBY_INTERNAL_EVENT_MASK 0xffff0000 /**< Bitmask of internal events. */ + +/** @} */ +/** + * Represents event(s). As the name implies events are bit flags. + */ typedef uint32_t rb_event_flag_t; + +/** + * Type of event hooks. When an event happens registered functions are kicked + * with appropriate parameters. + * + * @param[in] evflag The kind of event that happened. + * @param[in] data The `data` passed to rb_add_event_hook(). + * @param[in] self Current receiver. + * @param[in] mid Name of the current method. + * @param[in] klass Current class. + */ typedef void (*rb_event_hook_func_t)(rb_event_flag_t evflag, VALUE data, VALUE self, ID mid, VALUE klass); +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define RB_EVENT_HOOKS_HAVE_CALLBACK_DATA 1 + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * Registers an event hook function. + * + * @param[in] func A callback. + * @param[in] events A set of events that `func` should run. + * @param[in] data Passed as-is to `func`. + */ void rb_add_event_hook(rb_event_hook_func_t func, rb_event_flag_t events, VALUE data); -int rb_remove_event_hook(rb_event_hook_func_t func); +/** + * Removes the passed function from the list of event hooks. + * + * @param[in] func A callback. + * @return Number of deleted event hooks. + * @note As multiple events can share the same `func` it is quite + * possible for the return value to become more than one. + * + * @internal + * + * @shyouhei doesn't know if this is an Easter egg or an official feature, but + * you can pass 0 to the argument. That effectively swipes everything out from + * the hook list. + */ +int rb_remove_event_hook(rb_event_hook_func_t func); RBIMPL_SYMBOL_EXPORT_END() #endif /* RBIMPL_EVENT_H */ diff --git a/include/ruby/internal/fl_type.h b/include/ruby/internal/fl_type.h index 1d570c4743..0a05166784 100644 --- a/include/ruby/internal/fl_type.h +++ b/include/ruby/internal/fl_type.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines enum ::ruby_fl_type. */ #include "ruby/internal/config.h" /* for ENUM_OVER_INT */ @@ -55,46 +55,45 @@ #endif /** @endcond */ -#define FL_SINGLETON RBIMPL_CAST((VALUE)RUBY_FL_SINGLETON) -#define FL_WB_PROTECTED RBIMPL_CAST((VALUE)RUBY_FL_WB_PROTECTED) -#define FL_PROMOTED0 RBIMPL_CAST((VALUE)RUBY_FL_PROMOTED0) -#define FL_PROMOTED1 RBIMPL_CAST((VALUE)RUBY_FL_PROMOTED1) -#define FL_FINALIZE RBIMPL_CAST((VALUE)RUBY_FL_FINALIZE) -#define FL_TAINT RBIMPL_CAST((VALUE)RUBY_FL_TAINT) -#define FL_SHAREABLE RBIMPL_CAST((VALUE)RUBY_FL_SHAREABLE) -#define FL_UNTRUSTED RBIMPL_CAST((VALUE)RUBY_FL_UNTRUSTED) -#define FL_SEEN_OBJ_ID RBIMPL_CAST((VALUE)RUBY_FL_SEEN_OBJ_ID) -#define FL_EXIVAR RBIMPL_CAST((VALUE)RUBY_FL_EXIVAR) -#define FL_FREEZE RBIMPL_CAST((VALUE)RUBY_FL_FREEZE) - -#define FL_USHIFT RBIMPL_CAST((VALUE)RUBY_FL_USHIFT) - -#define FL_USER0 RBIMPL_CAST((VALUE)RUBY_FL_USER0) -#define FL_USER1 RBIMPL_CAST((VALUE)RUBY_FL_USER1) -#define FL_USER2 RBIMPL_CAST((VALUE)RUBY_FL_USER2) -#define FL_USER3 RBIMPL_CAST((VALUE)RUBY_FL_USER3) -#define FL_USER4 RBIMPL_CAST((VALUE)RUBY_FL_USER4) -#define FL_USER5 RBIMPL_CAST((VALUE)RUBY_FL_USER5) -#define FL_USER6 RBIMPL_CAST((VALUE)RUBY_FL_USER6) -#define FL_USER7 RBIMPL_CAST((VALUE)RUBY_FL_USER7) -#define FL_USER8 RBIMPL_CAST((VALUE)RUBY_FL_USER8) -#define FL_USER9 RBIMPL_CAST((VALUE)RUBY_FL_USER9) -#define FL_USER10 RBIMPL_CAST((VALUE)RUBY_FL_USER10) -#define FL_USER11 RBIMPL_CAST((VALUE)RUBY_FL_USER11) -#define FL_USER12 RBIMPL_CAST((VALUE)RUBY_FL_USER12) -#define FL_USER13 RBIMPL_CAST((VALUE)RUBY_FL_USER13) -#define FL_USER14 RBIMPL_CAST((VALUE)RUBY_FL_USER14) -#define FL_USER15 RBIMPL_CAST((VALUE)RUBY_FL_USER15) -#define FL_USER16 RBIMPL_CAST((VALUE)RUBY_FL_USER16) -#define FL_USER17 RBIMPL_CAST((VALUE)RUBY_FL_USER17) -#define FL_USER18 RBIMPL_CAST((VALUE)RUBY_FL_USER18) -#define FL_USER19 RBIMPL_CAST((VALUE)(unsigned int)RUBY_FL_USER19) - -#define ELTS_SHARED RUBY_ELTS_SHARED -#define RUBY_ELTS_SHARED RUBY_ELTS_SHARED -#define RB_OBJ_FREEZE rb_obj_freeze_inline +#define FL_SINGLETON RBIMPL_CAST((VALUE)RUBY_FL_SINGLETON) /**< @old{RUBY_FL_SINGLETON} */ +#define FL_WB_PROTECTED RBIMPL_CAST((VALUE)RUBY_FL_WB_PROTECTED) /**< @old{RUBY_FL_WB_PROTECTED} */ +#define FL_PROMOTED RBIMPL_CAST((VALUE)RUBY_FL_PROMOTED) /**< @old{RUBY_FL_PROMOTED} */ +#define FL_FINALIZE RBIMPL_CAST((VALUE)RUBY_FL_FINALIZE) /**< @old{RUBY_FL_FINALIZE} */ +#define FL_TAINT RBIMPL_CAST((VALUE)RUBY_FL_TAINT) /**< @old{RUBY_FL_TAINT} */ +#define FL_SHAREABLE RBIMPL_CAST((VALUE)RUBY_FL_SHAREABLE) /**< @old{RUBY_FL_SHAREABLE} */ +#define FL_UNTRUSTED RBIMPL_CAST((VALUE)RUBY_FL_UNTRUSTED) /**< @old{RUBY_FL_UNTRUSTED} */ +#define FL_SEEN_OBJ_ID RBIMPL_CAST((VALUE)RUBY_FL_SEEN_OBJ_ID) /**< @old{RUBY_FL_SEEN_OBJ_ID} */ +#define FL_EXIVAR RBIMPL_CAST((VALUE)RUBY_FL_EXIVAR) /**< @old{RUBY_FL_EXIVAR} */ +#define FL_FREEZE RBIMPL_CAST((VALUE)RUBY_FL_FREEZE) /**< @old{RUBY_FL_FREEZE} */ + +#define FL_USHIFT RBIMPL_CAST((VALUE)RUBY_FL_USHIFT) /**< @old{RUBY_FL_USHIFT} */ + +#define FL_USER0 RBIMPL_CAST((VALUE)RUBY_FL_USER0) /**< @old{RUBY_FL_USER0} */ +#define FL_USER1 RBIMPL_CAST((VALUE)RUBY_FL_USER1) /**< @old{RUBY_FL_USER1} */ +#define FL_USER2 RBIMPL_CAST((VALUE)RUBY_FL_USER2) /**< @old{RUBY_FL_USER2} */ +#define FL_USER3 RBIMPL_CAST((VALUE)RUBY_FL_USER3) /**< @old{RUBY_FL_USER3} */ +#define FL_USER4 RBIMPL_CAST((VALUE)RUBY_FL_USER4) /**< @old{RUBY_FL_USER4} */ +#define FL_USER5 RBIMPL_CAST((VALUE)RUBY_FL_USER5) /**< @old{RUBY_FL_USER5} */ +#define FL_USER6 RBIMPL_CAST((VALUE)RUBY_FL_USER6) /**< @old{RUBY_FL_USER6} */ +#define FL_USER7 RBIMPL_CAST((VALUE)RUBY_FL_USER7) /**< @old{RUBY_FL_USER7} */ +#define FL_USER8 RBIMPL_CAST((VALUE)RUBY_FL_USER8) /**< @old{RUBY_FL_USER8} */ +#define FL_USER9 RBIMPL_CAST((VALUE)RUBY_FL_USER9) /**< @old{RUBY_FL_USER9} */ +#define FL_USER10 RBIMPL_CAST((VALUE)RUBY_FL_USER10) /**< @old{RUBY_FL_USER10} */ +#define FL_USER11 RBIMPL_CAST((VALUE)RUBY_FL_USER11) /**< @old{RUBY_FL_USER11} */ +#define FL_USER12 RBIMPL_CAST((VALUE)RUBY_FL_USER12) /**< @old{RUBY_FL_USER12} */ +#define FL_USER13 RBIMPL_CAST((VALUE)RUBY_FL_USER13) /**< @old{RUBY_FL_USER13} */ +#define FL_USER14 RBIMPL_CAST((VALUE)RUBY_FL_USER14) /**< @old{RUBY_FL_USER14} */ +#define FL_USER15 RBIMPL_CAST((VALUE)RUBY_FL_USER15) /**< @old{RUBY_FL_USER15} */ +#define FL_USER16 RBIMPL_CAST((VALUE)RUBY_FL_USER16) /**< @old{RUBY_FL_USER16} */ +#define FL_USER17 RBIMPL_CAST((VALUE)RUBY_FL_USER17) /**< @old{RUBY_FL_USER17} */ +#define FL_USER18 RBIMPL_CAST((VALUE)RUBY_FL_USER18) /**< @old{RUBY_FL_USER18} */ +#define FL_USER19 RBIMPL_CAST((VALUE)(unsigned int)RUBY_FL_USER19) /**< @old{RUBY_FL_USER19} */ + +#define ELTS_SHARED RUBY_ELTS_SHARED /**< @old{RUBY_ELTS_SHARED} */ +#define RB_OBJ_FREEZE rb_obj_freeze_inline /**< @alias{rb_obj_freeze_inline} */ /** @cond INTERNAL_MACRO */ +#define RUBY_ELTS_SHARED RUBY_ELTS_SHARED #define RB_FL_ABLE RB_FL_ABLE #define RB_FL_ALL RB_FL_ALL #define RB_FL_ALL_RAW RB_FL_ALL_RAW @@ -111,69 +110,139 @@ #define RB_OBJ_FREEZE_RAW RB_OBJ_FREEZE_RAW #define RB_OBJ_FROZEN RB_OBJ_FROZEN #define RB_OBJ_FROZEN_RAW RB_OBJ_FROZEN_RAW -#define RB_OBJ_INFECT RB_OBJ_INFECT -#define RB_OBJ_INFECT_RAW RB_OBJ_INFECT_RAW -#define RB_OBJ_TAINT RB_OBJ_TAINT -#define RB_OBJ_TAINTABLE RB_OBJ_TAINTABLE -#define RB_OBJ_TAINTED RB_OBJ_TAINTED -#define RB_OBJ_TAINTED_RAW RB_OBJ_TAINTED_RAW -#define RB_OBJ_TAINT_RAW RB_OBJ_TAINT_RAW #define RB_OBJ_UNTRUST RB_OBJ_TAINT #define RB_OBJ_UNTRUSTED RB_OBJ_TAINTED /** @endcond */ /** - * @defgroup deprecated_macros deprecated macro APIs + * @defgroup deprecated_macros Deprecated macro APIs * @{ - * These macros are deprecated. Prefer their `RB_`-prefixed versions. + * These macros are deprecated. Prefer their `RB_`-prefixed versions. */ -#define FL_ABLE RB_FL_ABLE -#define FL_ALL RB_FL_ALL -#define FL_ALL_RAW RB_FL_ALL_RAW -#define FL_ANY RB_FL_ANY -#define FL_ANY_RAW RB_FL_ANY_RAW -#define FL_REVERSE RB_FL_REVERSE -#define FL_REVERSE_RAW RB_FL_REVERSE_RAW -#define FL_SET RB_FL_SET -#define FL_SET_RAW RB_FL_SET_RAW -#define FL_TEST RB_FL_TEST -#define FL_TEST_RAW RB_FL_TEST_RAW -#define FL_UNSET RB_FL_UNSET -#define FL_UNSET_RAW RB_FL_UNSET_RAW -#define OBJ_FREEZE RB_OBJ_FREEZE -#define OBJ_FREEZE_RAW RB_OBJ_FREEZE_RAW -#define OBJ_FROZEN RB_OBJ_FROZEN -#define OBJ_FROZEN_RAW RB_OBJ_FROZEN_RAW -#define OBJ_INFECT RB_OBJ_INFECT -#define OBJ_INFECT_RAW RB_OBJ_INFECT_RAW -#define OBJ_TAINT RB_OBJ_TAINT -#define OBJ_TAINTABLE RB_OBJ_TAINTABLE -#define OBJ_TAINTED RB_OBJ_TAINTED -#define OBJ_TAINTED_RAW RB_OBJ_TAINTED_RAW -#define OBJ_TAINT_RAW RB_OBJ_TAINT_RAW -#define OBJ_UNTRUST RB_OBJ_UNTRUST -#define OBJ_UNTRUSTED RB_OBJ_UNTRUSTED +#define FL_ABLE RB_FL_ABLE /**< @old{RB_FL_ABLE} */ +#define FL_ALL RB_FL_ALL /**< @old{RB_FL_ALL} */ +#define FL_ALL_RAW RB_FL_ALL_RAW /**< @old{RB_FL_ALL_RAW} */ +#define FL_ANY RB_FL_ANY /**< @old{RB_FL_ANY} */ +#define FL_ANY_RAW RB_FL_ANY_RAW /**< @old{RB_FL_ANY_RAW} */ +#define FL_REVERSE RB_FL_REVERSE /**< @old{RB_FL_REVERSE} */ +#define FL_REVERSE_RAW RB_FL_REVERSE_RAW /**< @old{RB_FL_REVERSE_RAW} */ +#define FL_SET RB_FL_SET /**< @old{RB_FL_SET} */ +#define FL_SET_RAW RB_FL_SET_RAW /**< @old{RB_FL_SET_RAW} */ +#define FL_TEST RB_FL_TEST /**< @old{RB_FL_TEST} */ +#define FL_TEST_RAW RB_FL_TEST_RAW /**< @old{RB_FL_TEST_RAW} */ +#define FL_UNSET RB_FL_UNSET /**< @old{RB_FL_UNSET} */ +#define FL_UNSET_RAW RB_FL_UNSET_RAW /**< @old{RB_FL_UNSET_RAW} */ +#define OBJ_FREEZE RB_OBJ_FREEZE /**< @old{RB_OBJ_FREEZE} */ +#define OBJ_FREEZE_RAW RB_OBJ_FREEZE_RAW /**< @old{RB_OBJ_FREEZE_RAW} */ +#define OBJ_FROZEN RB_OBJ_FROZEN /**< @old{RB_OBJ_FROZEN} */ +#define OBJ_FROZEN_RAW RB_OBJ_FROZEN_RAW /**< @old{RB_OBJ_FROZEN_RAW} */ +#define OBJ_INFECT RB_OBJ_INFECT /**< @old{RB_OBJ_INFECT} */ +#define OBJ_INFECT_RAW RB_OBJ_INFECT_RAW /**< @old{RB_OBJ_INFECT_RAW} */ +#define OBJ_TAINT RB_OBJ_TAINT /**< @old{RB_OBJ_TAINT} */ +#define OBJ_TAINTABLE RB_OBJ_TAINTABLE /**< @old{RB_OBJ_TAINT_RAW} */ +#define OBJ_TAINTED RB_OBJ_TAINTED /**< @old{RB_OBJ_TAINTED} */ +#define OBJ_TAINTED_RAW RB_OBJ_TAINTED_RAW /**< @old{RB_OBJ_TAINTED_RAW} */ +#define OBJ_TAINT_RAW RB_OBJ_TAINT_RAW /**< @old{RB_OBJ_TAINT_RAW} */ +#define OBJ_UNTRUST RB_OBJ_UNTRUST /**< @old{RB_OBJ_TAINT} */ +#define OBJ_UNTRUSTED RB_OBJ_UNTRUSTED /**< @old{RB_OBJ_TAINTED} */ /** @} */ -/* This is an enum because GDB wants it (rather than a macro) */ -enum ruby_fl_ushift { RUBY_FL_USHIFT = 12 }; +/** + * This is an enum because GDB wants it (rather than a macro). People need not + * bother. + */ +enum ruby_fl_ushift { + /** + * Number of bits in ::ruby_fl_type that are _not_ open to users. This is + * an implementation detail. Please ignore. + */ + RUBY_FL_USHIFT = 12 +}; /* > The expression that defines the value of an enumeration constant shall be * > an integer constant expression that has a value representable as an `int`. * * -- ISO/IEC 9899:2018 section 6.7.2.2 * - * So ENUM_OVER_INT situation is an extension to the standard. Note however + * So ENUM_OVER_INT situation is an extension to the standard. Note however * that we do not support 16 bit `int` environment. */ RB_GNUC_EXTENSION +/** + * The flags. Each ruby objects have their own characteristics apart from + * their classes. For instance whether an object is frozen or not is not + * controlled by its class. This is the type that represents such properties. + * + * @note About the `FL_USER` terminology: the "user" here does not necessarily + * mean only you. For instance struct ::RString instances use these + * bits to cache their encodings etc. Devs discussed about this topic, + * reached their consensus that ::RUBY_T_DATA is the only valid data + * structure that can use these bits; other data structures including + * ::RUBY_T_OBJECT use these bits for their own purpose. See also + * https://bugs.ruby-lang.org/issues/18059 + */ enum RBIMPL_ATTR_FLAG_ENUM() ruby_fl_type { + + /** + * @deprecated This flag once was a thing back in the old days, but makes + * no sense any longer today. Exists here for backwards + * compatibility only. You can safely forget about it. + * + * @internal + * + * The reality is our GC no longer remembers write barriers inside of each + * objects, to use dedicated bitmap instead. But this flag is still used + * internally. The current usages of this flag should be something + * different, which is unclear to @shyouhei. + */ RUBY_FL_WB_PROTECTED = (1<<5), - RUBY_FL_PROMOTED0 = (1<<5), - RUBY_FL_PROMOTED1 = (1<<6), - RUBY_FL_PROMOTED = RUBY_FL_PROMOTED0 | RUBY_FL_PROMOTED1, + + /** + * Ruby objects are "generational". There are young objects & old objects. + * Young objects are prone to die & monitored relatively extensively by the + * garbage collector. Old objects tend to live longer & are monitored less + * frequently. When an object survives a GC, its age is incremented. When + * age is equal to RVALUE_OLD_AGE, the object becomes Old. This flag is set + * when an object becomes old, and is used by the write barrier to check if + * an old object should be considered for marking more frequently - as old + * objects that have references added between major GCs need to be remarked + * to prevent the referred object being mistakenly swept. + * + * @internal + * + * But honestly, @shyouhei doesn't think this flag should be visible from + * 3rd parties. It must be an implementation detail that they should never + * know. Might better be hidden. + */ + RUBY_FL_PROMOTED = (1<<5), + + /** + * This flag is no longer in use + * + * @internal + */ + RUBY_FL_UNUSED6 = (1<<6), + + /** + * This flag has something to do with finalisers. A ruby object can have + * its finaliser, which is another object that evaluates when the target + * object is about to die. This flag is used to denote that there is an + * attached finaliser. + * + * @internal + * + * But honestly, @shyouhei doesn't think this flag should be visible from + * 3rd parties. It must be an implementation detail that they should never + * know. Might better be hidden. + */ RUBY_FL_FINALIZE = (1<<7), + + /** + * @deprecated This flag once was a thing back in the old days, but makes + * no sense any longer today. Exists here for backwards + * compatibility only. You can safely forget about it. + */ RUBY_FL_TAINT #if defined(RBIMPL_HAVE_ENUM_ATTRIBUTE) @@ -182,8 +251,25 @@ ruby_fl_type { # pragma deprecated(RUBY_FL_TAINT) #endif - = (1<<8), + = 0, + + /** + * This flag has something to do with Ractor. Multiple Ractors run without + * protecting each other. Sharing an object among Ractors are basically + * dangerous, disabled by default. This flag is used to bypass that + * restriction. Of course, you have to manually prevent race conditions + * then. + * + * This flag needs deep understanding of multithreaded programming. You + * would better not use it. + */ RUBY_FL_SHAREABLE = (1<<8), + + /** + * @deprecated This flag once was a thing back in the old days, but makes + * no sense any longer today. Exists here for backwards + * compatibility only. You can safely forget about it. + */ RUBY_FL_UNTRUSTED #if defined(RBIMPL_HAVE_ENUM_ATTRIBUTE) @@ -192,44 +278,132 @@ ruby_fl_type { # pragma deprecated(RUBY_FL_UNTRUSTED) #endif - = (1<<8), + = 0, + + /** + * This flag has something to do with object IDs. Unlike in the old days, + * an object's object ID (that a user can query using `Object#object_id`) + * is no longer its physical address represented using Ruby level integers. + * It is now a monotonic-increasing integer unrelated to the underlying + * memory arrangement. Object IDs are assigned when necessary; objects are + * born without one, and will eventually have such property when queried. + * The interpreter has to manage which one is which. This is the flag that + * helps the management. Objects with this flag set are the ones with + * object IDs assigned. + * + * @internal + * + * But honestly, @shyouhei doesn't think this flag should be visible from + * 3rd parties. It must be an implementation detail that they should never + * know. Might better be hidden. + */ RUBY_FL_SEEN_OBJ_ID = (1<<9), + + /** + * This flag has something to do with instance variables. 3rd parties need + * not know, but there are several ways to store an object's instance + * variables. Objects with this flag use so-called "generic" backend + * storage. This distinction is purely an implementation detail. People + * need not be aware of this working behind-the-scene. + * + * @internal + * + * As of writing everything except ::RObject and RModule use this scheme. + */ RUBY_FL_EXIVAR = (1<<10), + + /** + * This flag has something to do with data immutability. When this flag is + * set an object is considered "frozen". No modification are expected to + * happen beyond that point for the particular object. Immutability is + * basically considered to be a good property these days. Library authors + * are expected to obey. Test this bit before you touch a data structure. + * + * @see rb_check_frozen() + */ RUBY_FL_FREEZE = (1<<11), +/** (@shyouhei doesn't know how to excude this macro from doxygen). */ #define RBIMPL_FL_USER_N(n) RUBY_FL_USER##n = (1<<(RUBY_FL_USHIFT+n)) - RBIMPL_FL_USER_N(0), - RBIMPL_FL_USER_N(1), - RBIMPL_FL_USER_N(2), - RBIMPL_FL_USER_N(3), - RBIMPL_FL_USER_N(4), - RBIMPL_FL_USER_N(5), - RBIMPL_FL_USER_N(6), - RBIMPL_FL_USER_N(7), - RBIMPL_FL_USER_N(8), - RBIMPL_FL_USER_N(9), - RBIMPL_FL_USER_N(10), - RBIMPL_FL_USER_N(11), - RBIMPL_FL_USER_N(12), - RBIMPL_FL_USER_N(13), - RBIMPL_FL_USER_N(14), - RBIMPL_FL_USER_N(15), - RBIMPL_FL_USER_N(16), - RBIMPL_FL_USER_N(17), - RBIMPL_FL_USER_N(18), + RBIMPL_FL_USER_N(0), /**< User-defined flag. */ + RBIMPL_FL_USER_N(1), /**< User-defined flag. */ + RBIMPL_FL_USER_N(2), /**< User-defined flag. */ + RBIMPL_FL_USER_N(3), /**< User-defined flag. */ + RBIMPL_FL_USER_N(4), /**< User-defined flag. */ + RBIMPL_FL_USER_N(5), /**< User-defined flag. */ + RBIMPL_FL_USER_N(6), /**< User-defined flag. */ + RBIMPL_FL_USER_N(7), /**< User-defined flag. */ + RBIMPL_FL_USER_N(8), /**< User-defined flag. */ + RBIMPL_FL_USER_N(9), /**< User-defined flag. */ + RBIMPL_FL_USER_N(10), /**< User-defined flag. */ + RBIMPL_FL_USER_N(11), /**< User-defined flag. */ + RBIMPL_FL_USER_N(12), /**< User-defined flag. */ + RBIMPL_FL_USER_N(13), /**< User-defined flag. */ + RBIMPL_FL_USER_N(14), /**< User-defined flag. */ + RBIMPL_FL_USER_N(15), /**< User-defined flag. */ + RBIMPL_FL_USER_N(16), /**< User-defined flag. */ + RBIMPL_FL_USER_N(17), /**< User-defined flag. */ + RBIMPL_FL_USER_N(18), /**< User-defined flag. */ #ifdef ENUM_OVER_INT - RBIMPL_FL_USER_N(19), + RBIMPL_FL_USER_N(19), /**< User-defined flag. */ #else # define RUBY_FL_USER19 (RBIMPL_VALUE_ONE<<(RUBY_FL_USHIFT+19)) #endif #undef RBIMPL_FL_USER_N #undef RBIMPL_WIDER_ENUM + /** + * This flag has something to do with data structures. Over time, ruby + * evolved to reduce memory footprints. One of such attempt is so-called + * copy-on-write, which delays duplication of resources until ultimately + * necessary. Some data structures share this scheme. For example + * multiple instances of struct ::RArray could point identical memory + * region in common, as long as they don't differ. As people favour + * immutable style of programming than before, this situation is getting + * more and more common. Because such "shared" memory regions have nuanced + * ownership by nature, each structures need special care for them. This + * flag is used to distinguish such shared constructs. + * + * @internal + * + * But honestly, @shyouhei doesn't think this flag should be visible from + * 3rd parties. It must be an implementation detail that they should never + * know. Might better be hidden. + */ RUBY_ELTS_SHARED = RUBY_FL_USER2, - RUBY_FL_SINGLETON = RUBY_FL_USER0, + + /** + * This flag has something to do with an object's class. There are kind of + * classes called "singleton class", each of which have exactly one + * instance. What is interesting about singleton classes is that they are + * created _after_ their instance were instantiated, like this: + * + * ```ruby + * foo = Object.new # foo is an instance of Object... + * bar = foo.singleton_class # foo is now an instance of bar. + * ``` + * + * Here as you see `bar` is a singleton class of `foo`, which is injected + * into `foo`'s inheritance tree in a different statement (== distinct + * sequence point). In order to achieve this property singleton classes + * are special-cased in the interpreter. There is one bit flag that + * distinguishes if a class is a singleton class or not, and this is it. + * + * @internal + * + * But honestly, @shyouhei doesn't think this flag should be visible from + * 3rd parties. It must be an implementation detail that they should never + * know. Might better be hidden. + */ + RUBY_FL_SINGLETON = RUBY_FL_USER1, }; enum { + /** + * @deprecated This flag once was a thing back in the old days, but makes + * no sense any longer today. Exists here for backwards + * compatibility only. You can safely forget about it. + */ RUBY_FL_DUPPED #if defined(RBIMPL_HAVE_ENUM_ATTRIBUTE) @@ -238,19 +412,34 @@ enum { # pragma deprecated(RUBY_FL_DUPPED) #endif - = RUBY_T_MASK | RUBY_FL_EXIVAR + = (int)RUBY_T_MASK | (int)RUBY_FL_EXIVAR }; #undef RBIMPL_HAVE_ENUM_ATTRIBUTE RBIMPL_SYMBOL_EXPORT_BEGIN() -void rb_obj_infect(VALUE victim, VALUE carrier); +/** + * This is an implementation detail of #RB_OBJ_FREEZE(). People don't use it + * directly. + * + * @param[out] klass A singleton class. + * @post `klass` gets frozen. + */ void rb_freeze_singleton_class(VALUE klass); RBIMPL_SYMBOL_EXPORT_END() RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() RBIMPL_ATTR_FORCEINLINE() +/** + * Checks if the object is flaggable. There are some special cases (most + * notably ::RUBY_Qfalse) where appending a flag to an object is not possible. + * This function can detect that. + * + * @param[in] obj Object in question + * @retval true It is flaggable. + * @retval false No it isn't. + */ static bool RB_FL_ABLE(VALUE obj) { @@ -267,6 +456,15 @@ RB_FL_ABLE(VALUE obj) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * This is an implementation detail of RB_FL_TEST(). 3rd parties need not use + * this. Just always use RB_FL_TEST(). + * + * @param[in] obj Object in question. + * @param[in] flags A set of enum ::ruby_fl_type. + * @pre The object must not be an enum ::ruby_special_consts. + * @return `obj`'s flags, masked by `flags`. + */ static inline VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags) { @@ -276,6 +474,23 @@ RB_FL_TEST_RAW(VALUE obj, VALUE flags) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * Tests if the given flag(s) are set or not. You can pass multiple flags at + * once: + * + * ```CXX + * auto obj = rb_eval_string("..."); + * if (RB_FL_TEST(obj, RUBY_FL_FREEZE | RUBY_FL_SHAREABLE)) { + * printf("Ractor ready!\n"); + * } + * ``` + * + * @param[in] obj Object in question. + * @param[in] flags A set of enum ::ruby_fl_type. + * @return `obj`'s flags, masked by `flags`. + * @note It is intentional for this function to return ::VALUE. The + * return value could be passed to RB_FL_STE() etc. + */ static inline VALUE RB_FL_TEST(VALUE obj, VALUE flags) { @@ -289,6 +504,16 @@ RB_FL_TEST(VALUE obj, VALUE flags) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * This is an implementation detail of RB_FL_ANY(). 3rd parties need not use + * this. Just always use RB_FL_ANY(). + * + * @param[in] obj Object in question. + * @param[in] flags A set of enum ::ruby_fl_type. + * @retval true The object has any of the flags set. + * @retval false No it doesn't at all. + * @pre The object must not be an enum ::ruby_special_consts. + */ static inline bool RB_FL_ANY_RAW(VALUE obj, VALUE flags) { @@ -297,6 +522,14 @@ RB_FL_ANY_RAW(VALUE obj, VALUE flags) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * Identical to RB_FL_TEST(), except it returns bool. + * + * @param[in] obj Object in question. + * @param[in] flags A set of enum ::ruby_fl_type. + * @retval true The object has any of the flags set. + * @retval false No it doesn't at all. + */ static inline bool RB_FL_ANY(VALUE obj, VALUE flags) { @@ -305,6 +538,16 @@ RB_FL_ANY(VALUE obj, VALUE flags) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * This is an implementation detail of RB_FL_ALL(). 3rd parties need not use + * this. Just always use RB_FL_ALL(). + * + * @param[in] obj Object in question. + * @param[in] flags A set of enum ::ruby_fl_type. + * @retval true The object has all of the flags set. + * @retval false The object lacks any of the flags. + * @pre The object must not be an enum ::ruby_special_consts. + */ static inline bool RB_FL_ALL_RAW(VALUE obj, VALUE flags) { @@ -313,6 +556,14 @@ RB_FL_ALL_RAW(VALUE obj, VALUE flags) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * Identical to RB_FL_ANY(), except it mandates all passed flags be set. + * + * @param[in] obj Object in question. + * @param[in] flags A set of enum ::ruby_fl_type. + * @retval true The object has all of the flags set. + * @retval false The object lacks any of the flags. + */ static inline bool RB_FL_ALL(VALUE obj, VALUE flags) { @@ -321,6 +572,21 @@ RB_FL_ALL(VALUE obj, VALUE flags) RBIMPL_ATTR_NOALIAS() RBIMPL_ATTR_ARTIFICIAL() +/** + * @private + * + * This is an implementation detail of RB_FL_SET(). 3rd parties need not use + * this. Just always use RB_FL_SET(). + * + * @param[out] obj Object in question. + * @param[in] flags A set of enum ::ruby_fl_type. + * @post `obj` has `flags` set. + * + * @internal + * + * This is function is here to annotate a part of RB_FL_SET_RAW() as + * `__declspec(noalias)`. + */ static inline void rbimpl_fl_set_raw_raw(struct RBasic *obj, VALUE flags) { @@ -328,6 +594,14 @@ rbimpl_fl_set_raw_raw(struct RBasic *obj, VALUE flags) } RBIMPL_ATTR_ARTIFICIAL() +/** + * This is an implementation detail of RB_FL_SET(). 3rd parties need not use + * this. Just always use RB_FL_SET(). + * + * @param[out] obj Object in question. + * @param[in] flags A set of enum ::ruby_fl_type. + * @post `obj` has `flags` set. + */ static inline void RB_FL_SET_RAW(VALUE obj, VALUE flags) { @@ -336,6 +610,18 @@ RB_FL_SET_RAW(VALUE obj, VALUE flags) } RBIMPL_ATTR_ARTIFICIAL() +/** + * Sets the given flag(s). + * + * ```CXX + * auto v = rb_eval_string("..."); + * RB_FL_SET(v, RUBY_FL_FREEZE); + * ``` + * + * @param[out] obj Object in question. + * @param[in] flags A set of enum ::ruby_fl_type. + * @post `obj` has `flags` set. + */ static inline void RB_FL_SET(VALUE obj, VALUE flags) { @@ -346,6 +632,21 @@ RB_FL_SET(VALUE obj, VALUE flags) RBIMPL_ATTR_NOALIAS() RBIMPL_ATTR_ARTIFICIAL() +/** + * @private + * + * This is an implementation detail of RB_FL_UNSET(). 3rd parties need not use + * this. Just always use RB_FL_UNSET(). + * + * @param[out] obj Object in question. + * @param[in] flags A set of enum ::ruby_fl_type. + * @post `obj` has `flags` cleared. + * + * @internal + * + * This is function is here to annotate a part of RB_FL_UNSET_RAW() as + * `__declspec(noalias)`. + */ static inline void rbimpl_fl_unset_raw_raw(struct RBasic *obj, VALUE flags) { @@ -353,6 +654,14 @@ rbimpl_fl_unset_raw_raw(struct RBasic *obj, VALUE flags) } RBIMPL_ATTR_ARTIFICIAL() +/** + * This is an implementation detail of RB_FL_UNSET(). 3rd parties need not use + * this. Just always use RB_FL_UNSET(). + * + * @param[out] obj Object in question. + * @param[in] flags A set of enum ::ruby_fl_type. + * @post `obj` has `flags` cleared. + */ static inline void RB_FL_UNSET_RAW(VALUE obj, VALUE flags) { @@ -361,6 +670,13 @@ RB_FL_UNSET_RAW(VALUE obj, VALUE flags) } RBIMPL_ATTR_ARTIFICIAL() +/** + * Clears the given flag(s). + * + * @param[out] obj Object in question. + * @param[in] flags A set of enum ::ruby_fl_type. + * @post `obj` has `flags` cleard. + */ static inline void RB_FL_UNSET(VALUE obj, VALUE flags) { @@ -371,6 +687,21 @@ RB_FL_UNSET(VALUE obj, VALUE flags) RBIMPL_ATTR_NOALIAS() RBIMPL_ATTR_ARTIFICIAL() +/** + * @private + * + * This is an implementation detail of RB_FL_REVERSE(). 3rd parties need not + * use this. Just always use RB_FL_REVERSE(). + * + * @param[out] obj Object in question. + * @param[in] flags A set of enum ::ruby_fl_type. + * @post `obj` has `flags` reversed. + * + * @internal + * + * This is function is here to annotate a part of RB_FL_REVERSE_RAW() as + * `__declspec(noalias)`. + */ static inline void rbimpl_fl_reverse_raw_raw(struct RBasic *obj, VALUE flags) { @@ -378,6 +709,14 @@ rbimpl_fl_reverse_raw_raw(struct RBasic *obj, VALUE flags) } RBIMPL_ATTR_ARTIFICIAL() +/** + * This is an implementation detail of RB_FL_REVERSE(). 3rd parties need not + * use this. Just always use RB_FL_REVERSE(). + * + * @param[out] obj Object in question. + * @param[in] flags A set of enum ::ruby_fl_type. + * @post `obj` has `flags` cleared. + */ static inline void RB_FL_REVERSE_RAW(VALUE obj, VALUE flags) { @@ -386,6 +725,14 @@ RB_FL_REVERSE_RAW(VALUE obj, VALUE flags) } RBIMPL_ATTR_ARTIFICIAL() +/** + * Reverses the flags. This function is here mainly for symmetry on set/unset. + * Rarely used in practice. + * + * @param[out] obj Object in question. + * @param[in] flags A set of enum ::ruby_fl_type. + * @post `obj` has `flags` reversed. + */ static inline void RB_FL_REVERSE(VALUE obj, VALUE flags) { @@ -397,67 +744,141 @@ RB_FL_REVERSE(VALUE obj, VALUE flags) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) +/** + * @deprecated This function once was a thing in the old days, but makes no + * sense any longer today. Exists here for backwards + * compatibility only. You can safely forget about it. + * + * @param[in] obj Object in question. + * @return false always. + */ static inline bool RB_OBJ_TAINTABLE(VALUE obj) { + (void)obj; return false; } RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) +/** + * @deprecated This function once was a thing in the old days, but makes no + * sense any longer today. Exists here for backwards + * compatibility only. You can safely forget about it. + * + * @param[in] obj Object in question. + * @return false always. + */ static inline VALUE RB_OBJ_TAINTED_RAW(VALUE obj) { + (void)obj; return false; } RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) +/** + * @deprecated This function once was a thing in the old days, but makes no + * sense any longer today. Exists here for backwards + * compatibility only. You can safely forget about it. + * + * @param[in] obj Object in question. + * @return false always. + */ static inline bool RB_OBJ_TAINTED(VALUE obj) { + (void)obj; return false; } RBIMPL_ATTR_ARTIFICIAL() RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) +/** + * @deprecated This function once was a thing in the old days, but makes no + * sense any longer today. Exists here for backwards + * compatibility only. You can safely forget about it. + * + * @param[in] obj Object in question. + */ static inline void RB_OBJ_TAINT_RAW(VALUE obj) { + (void)obj; return; } RBIMPL_ATTR_ARTIFICIAL() RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) +/** + * @deprecated This function once was a thing in the old days, but makes no + * sense any longer today. Exists here for backwards + * compatibility only. You can safely forget about it. + * + * @param[in] obj Object in question. + */ static inline void RB_OBJ_TAINT(VALUE obj) { + (void)obj; return; } RBIMPL_ATTR_ARTIFICIAL() RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) +/** + * @deprecated This function once was a thing in the old days, but makes no + * sense any longer today. Exists here for backwards + * compatibility only. You can safely forget about it. + * + * @param[in] dst Victim object. + * @param[in] src Infectant object. + */ static inline void RB_OBJ_INFECT_RAW(VALUE dst, VALUE src) { + (void)dst; + (void)src; return; } RBIMPL_ATTR_ARTIFICIAL() RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) +/** + * @deprecated This function once was a thing in the old days, but makes no + * sense any longer today. Exists here for backwards + * compatibility only. You can safely forget about it. + * + * @param[in] dst Victim object. + * @param[in] src Infectant object. + */ static inline void RB_OBJ_INFECT(VALUE dst, VALUE src) { + (void)dst; + (void)src; return; } RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() -/* It is intentional not to return bool here. There is a place in ruby core - * (namely class.c:singleton_class_of()) where return value of this function is - * verbatimly passed to RB_FL_SET_RAW. */ +/** + * This is an implementation detail of RB_OBJ_FROZEN(). 3rd parties need not + * use this. Just always use RB_OBJ_FROZEN(). + * + * @param[in] obj Object in question. + * @retval RUBY_FL_FREEZE Yes it is. + * @retval 0 No it isn't. + * + * @internal + * + * It is intentional not to return bool here. There is a place in ruby core + * (namely `class.c:singleton_class_of()`) where return value of this function + * is passed to RB_FL_SET_RAW(). + */ static inline VALUE RB_OBJ_FROZEN_RAW(VALUE obj) { @@ -466,6 +887,13 @@ RB_OBJ_FROZEN_RAW(VALUE obj) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * Checks if an object is frozen. + * + * @param[in] obj Object in question. + * @retval true Yes it is. + * @retval false No it isn't. + */ static inline bool RB_OBJ_FROZEN(VALUE obj) { @@ -477,22 +905,21 @@ RB_OBJ_FROZEN(VALUE obj) } } +RUBY_SYMBOL_EXPORT_BEGIN +void rb_obj_freeze_inline(VALUE obj); +RUBY_SYMBOL_EXPORT_END + RBIMPL_ATTR_ARTIFICIAL() +/** + * This is an implementation detail of RB_OBJ_FREEZE(). 3rd parties need not + * use this. Just always use RB_OBJ_FREEZE(). + * + * @param[out] obj Object in question. + */ static inline void RB_OBJ_FREEZE_RAW(VALUE obj) { - RB_FL_SET_RAW(obj, RUBY_FL_FREEZE); -} - -static inline void -rb_obj_freeze_inline(VALUE x) -{ - if (RB_FL_ABLE(x)) { - RB_OBJ_FREEZE_RAW(x); - if (RBASIC_CLASS(x) && !(RBASIC(x)->flags & RUBY_FL_SINGLETON)) { - rb_freeze_singleton_class(x); - } - } + rb_obj_freeze_inline(obj); } #endif /* RBIMPL_FL_TYPE_H */ diff --git a/include/ruby/internal/gc.h b/include/ruby/internal/gc.h index d94f8a3736..462f416af2 100644 --- a/include/ruby/internal/gc.h +++ b/include/ruby/internal/gc.h @@ -17,19 +17,382 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Registering values to the GC. */ +#include "ruby/internal/config.h" + +#ifdef STDC_HEADERS +# include <stddef.h> /* size_t */ +#endif + +#ifdef HAVE_SYS_TYPES_H +# include <sys/types.h> /* ssize_t */ +#endif + +#include "ruby/assert.h" +#include "ruby/internal/attr/cold.h" +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/attr/noreturn.h" +#include "ruby/internal/attr/artificial.h" +#include "ruby/internal/attr/maybe_unused.h" +#include "ruby/internal/attr/pure.h" #include "ruby/internal/dllexport.h" +#include "ruby/internal/special_consts.h" +#include "ruby/internal/stdbool.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() +#define RUBY_REF_EDGE(s, p) offsetof(s, p) +#define RUBY_REFS_LIST_PTR(l) (RUBY_DATA_FUNC)(l) +#define RUBY_REF_END SIZE_MAX +#define RUBY_REFERENCES(t) static const size_t t[] +#define RUBY_REFERENCES_START(t) RUBY_REFERENCES(t) = { +#define RUBY_REFERENCES_END RUBY_REF_END, }; + +/* gc.c */ + +RBIMPL_ATTR_COLD() +RBIMPL_ATTR_NORETURN() +/** + * Triggers out-of-memory error. If possible it raises ::rb_eNoMemError. But + * because we are running out of memory that is not always doable. This + * function tries hard to show something, but ultimately can die silently. + * + * @exception rb_eNoMemError Raises it if possible. + */ +void rb_memerror(void); + +RBIMPL_ATTR_PURE() +/** + * Queries if the GC is busy. + * + * @retval 0 It isn't. + * @retval 1 It is. + */ +int rb_during_gc(void); + +RBIMPL_ATTR_NONNULL((1)) +/** + * Marks objects between the two pointers. This is one of the GC utility + * functions that you can call when you design your own + * ::rb_data_type_struct::dmark. + * + * @pre Continuous memory region from `start` to `end` shall be fully + * addressable. + * @param[out] start Pointer to an array of objects. + * @param[out] end Pointer that terminates the array of objects. + * @post Objects from `start` (included) to `end` (excluded) are marked. + * + * @internal + * + * `end` can be NULL... But that just results in no-op. + */ +void rb_gc_mark_locations(const VALUE *start, const VALUE *end); + +/** + * Identical to rb_mark_hash(), except it marks only values of the table and + * leave their associated keys unmarked. This is one of the GC utility + * functions that you can call when you design your own + * ::rb_data_type_struct::dmark. + * + * @warning Of course it can break GC. Leave it unused if unsure. + * @param[in] tbl A table to mark. + * @post Values stored in `tbl` are marked. + */ +void rb_mark_tbl(struct st_table *tbl); + +/** + * Identical to rb_mark_tbl(), except it marks objects using + * rb_gc_mark_movable(). This is one of the GC utility functions that you can + * call when you design your own ::rb_data_type_struct::dmark. + * + * @warning Of course it can break GC. Leave it unused if unsure. + * @param[in] tbl A table to mark. + * @post Values stored in `tbl` are marked. + */ +void rb_mark_tbl_no_pin(struct st_table *tbl); + +/** + * Identical to rb_mark_hash(), except it marks only keys of the table and + * leave their associated values unmarked. This is one of the GC utility + * functions that you can call when you design your own + * ::rb_data_type_struct::dmark. + * + * @warning Of course it can break GC. Leave it unused if unsure. + * @param[in] tbl A table to mark. + * @post Keys stored in `tbl` are marked. + */ +void rb_mark_set(struct st_table *tbl); + /** - * Inform the garbage collector that `valptr` points to a live Ruby object that - * should not be moved. Note that extensions should use this API on global - * constants instead of assuming constants defined in Ruby are always alive. - * Ruby code can remove global constants. + * Marks keys and values associated inside of the given table. This is one of + * the GC utility functions that you can call when you design your own + * ::rb_data_type_struct::dmark. + * + * @param[in] tbl A table to mark. + * @post Objects stored in `tbl` are marked. + */ +void rb_mark_hash(struct st_table *tbl); + +/** + * Updates references inside of tables. After you marked values using + * rb_mark_tbl_no_pin(), the objects inside of the table could of course be + * moved. This function is to fixup those references. You can call this from + * your ::rb_data_type_struct::dcompact. + * + * @param[out] ptr A table that potentially includes moved references. + * @post Moved references, if any, are corrected. + */ +void rb_gc_update_tbl_refs(st_table *ptr); + +/** + * Identical to rb_gc_mark(), except it allows the passed value be a + * non-object. For instance pointers to different type of memory regions are + * allowed here. Such values are silently ignored. This is one of the GC + * utility functions that you can call when you design your own + * ::rb_data_type_struct::dmark. + * + * @param[out] obj A possible object. + * @post `obj` is marked, if possible. + */ +void rb_gc_mark_maybe(VALUE obj); + +/** + * Marks an object. This is one of the GC utility functions that you can call + * when you design your own ::rb_data_type_struct::dmark. + * + * @param[out] obj Arbitrary Ruby object. + * @post `obj` is marked. + */ +void rb_gc_mark(VALUE obj); + +/** + * Maybe this is the only function provided for C extensions to control the + * pinning of objects, so let us describe it in detail. These days Ruby's GC + * is copying. As far as an object's physical address is guaranteed unused, it + * can move around the object space. Our GC engine rearranges these objects + * after it reclaims unreachable objects from our object space, so that the + * space is compact (improves memory locality). This is called the + * "compaction" phase, and works well most of the time... as far as there are + * no C extensions. C extensions complicate the scenario because Ruby core + * cannot detect any use of the physical address of an object inside of C + * functions. In order to prevent memory corruptions, objects observable from + * C extensions are "pinned"; they stick to where they are born until they die, + * just in case any C extensions touch their raw pointers. This variant of + * scheme is called "Mostly-Copying" garbage collector. Authors of C + * extensions, however, can extremely carefully write them to become + * compaction-aware. To do so avoid referring to a Ruby object from inside of + * your struct in the first place. But if that is not possible, use this + * function from your ::rb_data_type_struct::dmark then. This way objects + * marked using it are considered movable. If you chose this way you have to + * manually fix up locations of such moved pointers using rb_gc_location(). + * + * @see Bartlett, Joel F., "Compacting Garbage Collection with Ambiguous + * Roots", ACM SIGPLAN Lisp Pointers Volume 1 Issue 6 pp. 3-12, + * April-May-June, 1988. https://doi.org/10.1145/1317224.1317225 + * + * @param[in] obj Object that is movable. + * @post Values stored in `tbl` are marked. + */ +void rb_gc_mark_movable(VALUE obj); + +/** + * Finds a new "location" of an object. An object can be moved on compaction. + * This function projects its new abode, or just returns the passed object if + * not moved. This is one of the GC utility functions that you can call when + * you design your own ::rb_data_type_struct::dcompact. + * + * @param[in] obj An object, possibly already moved to somewhere else. + * @return An object, which holds the current contents of former `obj`. + */ +VALUE rb_gc_location(VALUE obj); + +/** + * Triggers a GC process. This was the only GC entry point that we had at the + * beginning. Over time our GC evolved. Now what this function does is just a + * very simplified variation of the entire GC algorithms. A series of + * procedures kicked by this API is called a "full" GC. + * + * - It immediately scans the entire object space to sort the dead. + * - It immediately reclaims any single dead bodies to reuse later. + * + * It is worth noting that the procedures above do not include evaluations of + * finalisers. They run later. + * + * @internal + * + * Finalisers are deferred until we can handle interrupts. See + * `rb_postponed_job_flush` in vm_trace.c. + * + * Of course there are GC that are not "full". For instance this one and the + * GC which runs when we are running out of memory are different. See + * `gc_profile_record_flag` defined in gc.c for the kinds of GC. + * + * In spite of the name this is not what everything that a GC can trigger. As + * of writing it seems this function does not trigger compaction. But this + * might change in future. + */ +void rb_gc(void); + +/** + * Copy&paste an object's finaliser to another. This is one of the GC utility + * functions that you can call when you design your own `initialize_copy`, + * `initialize_dup`, `initialize_clone`. + * + * @param[out] dst Destination object. + * @param[in] src Source object. + * @post `dst` and `src` share the same finaliser. + * + * @internal + * + * But isn't it easier for you to call super, and let `Object#initialize_copy` + * call this function instead? + */ +void rb_gc_copy_finalizer(VALUE dst, VALUE src); + +/** + * (Re-) enables GC. This makes sense only after you called rb_gc_disable(). + * + * @retval RUBY_Qtrue GC was disabled before. + * @retval RUBY_Qfalse GC was enabled before. + * @post GC is enabled. + * + * @internal + * + * This is one of such exceptional functions that does not raise both Ruby + * exceptions and C++ exceptions. + */ +VALUE rb_gc_enable(void); + +/** + * Disables GC. This prevents automatic GC runs when the process is running + * out of memory. Such situations shall result in rb_memerror(). However this + * does not prevent users from manually invoking rb_gc(). That should work. + * People abused this by disabling GC at the beginning of an event loop, + * process events without GC overheads, then manually force reclaiming garbage + * at the bottom of the loop. However because our GC is now much smarter than + * just calling rb_gc(), this technique is proven to be sub-optimal these days. + * It is believed that there is currently practically no needs of this + * function. + * + * @retval RUBY_Qtrue GC was disabled before. + * @retval RUBY_Qfalse GC was enabled before. + * @post GC is disabled. + */ +VALUE rb_gc_disable(void); + +/** + * Identical to rb_gc(), except the return value. + * + * @return Always returns ::RUBY_Qnil. + */ +VALUE rb_gc_start(void); + +/** + * Assigns a finaliser for an object. Each objects can have objects (typically + * blocks) that run immediately after that object dies. They are called + * finalisers of an object. This function associates a finaliser object with a + * target object. + * + * @note Note that finalisers run _after_ the object they finalise dies. You + * cannot for instance call its methods. + * @note If your finaliser references the object it finalises that object + * loses any chance to become a garbage; effectively leaks memory until + * the end of the process. + * + * @param[in] obj Target to finalise. + * @param[in] block Something `call`able. + * @exception rb_eRuntimeError Somehow `obj` cannot have finalisers. + * @exception rb_eFrozenError `obj` is frozen. + * @exception rb_eArgError `block` doesn't respond to `call`. + * @return The passed `block`. + * @post `block` runs after `obj` dies. + */ +VALUE rb_define_finalizer(VALUE obj, VALUE block); + +/** + * Modifies the object so that it has no finalisers at all. This function is + * mainly provided for symmetry. No practical usages can be thought of. + * + * @param[out] obj Object to clear its finalisers. + * @exception rb_eFrozenError `obj` is frozen. + * @return The passed `obj`. + * @post `obj` has no finalisers. + * @note There is no way to undefine a specific part of many finalisers + * that `obj` could have. All you can do is to clear them all. + */ +VALUE rb_undefine_finalizer(VALUE obj); + +/** + * Identical to rb_gc_stat(), with "count" parameter. + * + * @return Lifetime total number of runs of GC. + */ +size_t rb_gc_count(void); + +/** + * Obtains various GC related profiles. The parameter can be either a Symbol + * or a Hash. If a Hash is passed, it is filled with everything currently + * available. If a Symbol is passed just that portion is returned. + * + * Possible variations of keys you can pass here change from version to + * version. You can get the list of known keys by passing an empty hash and + * let it be filled. + * + * @param[in,out] key_or_buf A Symbol, or a Hash. + * @exception rb_eTypeError Neither Symbol nor Hash. + * @exception rb_eFrozenError Frozen hash is passed. + * @return In case a Hash is passed it returns 0. Otherwise the + * profile value associated with the given key is returned. + * @post In case a Hash is passed it is filled with values. + */ +size_t rb_gc_stat(VALUE key_or_buf); + +/** + * Obtains various info regarding the most recent GC run. This includes for + * instance the reason of the GC. The parameter can be either a Symbol or a + * Hash. If a Hash is passed, it is filled with everything currently + * available. If a Symbol is passed just that portion is returned. + * + * Possible variations of keys you can pass here change from version to + * version. You can get the list of known keys by passing an empty hash and + * let it be filled. + * + * @param[in,out] key_or_buf A Symbol, or a Hash. + * @exception rb_eTypeError Neither Symbol nor Hash. + * @exception rb_eFrozenError Frozen hash is passed. + * @return In case a Hash is passed it returns that hash. Otherwise + * the profile value associated with the given key is returned. + * @post In case a Hash is passed it is filled with values. + */ +VALUE rb_gc_latest_gc_info(VALUE key_or_buf); + +/** + * Informs that there are external memory usages. Our GC runs when we are + * running out of memory. The amount of memory, however, can increase/decrease + * behind-the-scene. For instance DLLs can allocate memories using `mmap(2)` + * etc, which are opaque to us. Registering such external allocations using + * this function enables proper detection of how much memories an object used + * as a whole. That will trigger GCs more often than it would otherwise. You + * can also pass negative numbers here, to indicate that such external + * allocations are gone. + * + * @param[in] diff Amount of memory increased(+)/decreased(-). + */ +void rb_gc_adjust_memory_usage(ssize_t diff); + +/** + * Inform the garbage collector that the global or static variable pointed by + * `valptr` stores a live Ruby object that should not be moved. Note that + * extensions should use this API on global constants instead of assuming + * constants defined in Ruby are always alive. Ruby code can remove global + * constants. + * + * Because this registration itself has a possibility to trigger a GC, this + * function must be called before any GC-able objects is assigned to the + * address pointed by `valptr`. */ void rb_gc_register_address(VALUE *valptr); @@ -54,4 +417,413 @@ void rb_gc_register_mark_object(VALUE object); RBIMPL_SYMBOL_EXPORT_END() +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ +#undef USE_RGENGC +#define USE_RGENGC 1 + +/** + * @deprecated This macro seems broken. Setting this to anything other than + * zero just doesn't compile. We need to KonMari. + */ +#ifndef USE_RGENGC_LOGGING_WB_UNPROTECT +# define USE_RGENGC_LOGGING_WB_UNPROTECT 0 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RArray. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_ARRAY +# define RGENGC_WB_PROTECTED_ARRAY 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RHash. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_HASH +# define RGENGC_WB_PROTECTED_HASH 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RStruct. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_STRUCT +# define RGENGC_WB_PROTECTED_STRUCT 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RString. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_STRING +# define RGENGC_WB_PROTECTED_STRING 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RObject. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_OBJECT +# define RGENGC_WB_PROTECTED_OBJECT 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RRegexp. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_REGEXP +# define RGENGC_WB_PROTECTED_REGEXP 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RMatch. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_MATCH +# define RGENGC_WB_PROTECTED_MATCH 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RClass. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_CLASS +# define RGENGC_WB_PROTECTED_CLASS 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RFloat. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_FLOAT +# define RGENGC_WB_PROTECTED_FLOAT 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RComplex. It has to be set at the time ruby itself compiles. + * Makes no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_COMPLEX +# define RGENGC_WB_PROTECTED_COMPLEX 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RRational. It has to be set at the time ruby itself compiles. + * Makes no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_RATIONAL +# define RGENGC_WB_PROTECTED_RATIONAL 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RBignum. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_BIGNUM +# define RGENGC_WB_PROTECTED_BIGNUM 1 +#endif + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + * + * @internal + * + * @shyouhei doesn't think anybody uses this right now. + */ +#ifndef RGENGC_WB_PROTECTED_NODE_CREF +# define RGENGC_WB_PROTECTED_NODE_CREF 1 +#endif + +/** + * @defgroup rgengc Write barrier (WB) interfaces: + * + * @note The following core interfaces can be changed in the future. Please + * catch up if you want to insert WB into C-extensions correctly. + * + * @{ + */ + +/** + * Declaration of a "back" pointer. This is a write barrier for new reference + * from "old" generation to "young" generation. It writes `young` into + * `*slot`, which is a pointer inside of `old`. + * + * @param[in] old An old object. + * @param[in] slot A pointer inside of `old`. + * @param[out] young A young object. + */ +#define RB_OBJ_WRITE(old, slot, young) \ + RBIMPL_CAST(rb_obj_write((VALUE)(old), (VALUE *)(slot), (VALUE)(young), __FILE__, __LINE__)) + +/** + * Identical to #RB_OBJ_WRITE(), except it doesn't write any values, but only a + * WB declaration. `oldv` is replaced value with `b` (not used in current + * Ruby). + * + * @param[in] old An old object. + * @param[in] oldv An object previously stored inside of `old`. + * @param[out] young A young object. + */ +#define RB_OBJ_WRITTEN(old, oldv, young) \ + RBIMPL_CAST(rb_obj_written((VALUE)(old), (VALUE)(oldv), (VALUE)(young), __FILE__, __LINE__)) +/** @} */ + +#define OBJ_PROMOTED_RAW RB_OBJ_PROMOTED_RAW /**< @old{RB_OBJ_PROMOTED_RAW} */ +#define OBJ_PROMOTED RB_OBJ_PROMOTED /**< @old{RB_OBJ_PROMOTED} */ +#define OBJ_WB_UNPROTECT RB_OBJ_WB_UNPROTECT /**< @old{RB_OBJ_WB_UNPROTECT} */ + +/** + * Asserts that the passed object is not fenced by write barriers. Objects of + * such property do not contribute to generational GCs. They are scanned + * always. + * + * @param[out] x An object that would not be protected by the barrier. + */ +#define RB_OBJ_WB_UNPROTECT(x) rb_obj_wb_unprotect(x, __FILE__, __LINE__) + +/** + * Identical to #RB_OBJ_WB_UNPROTECT(), except it can also assert that the + * given object is of given type. + * + * @param[in] type One of `ARRAY`, `STRING`, etc. + * @param[out] obj An object of `type` that would not be protected. + * + * @internal + * + * @shyouhei doesn't understand why this has to be visible from extensions. + */ +#define RB_OBJ_WB_UNPROTECT_FOR(type, obj) \ + (RGENGC_WB_PROTECTED_##type ? OBJ_WB_UNPROTECT(obj) : obj) + +/** + * @private + * + * This is an implementation detail of rb_obj_wb_unprotect(). People don't use + * it directly. + */ +#define RGENGC_LOGGING_WB_UNPROTECT rb_gc_unprotect_logging + +/** @cond INTERNAL_MACRO */ +#define RB_OBJ_PROMOTED_RAW RB_OBJ_PROMOTED_RAW +#define RB_OBJ_PROMOTED RB_OBJ_PROMOTED +/** @endcond */ + +RBIMPL_SYMBOL_EXPORT_BEGIN() +/** + * This is the implementation of #RB_OBJ_WRITE(). People don't use it + * directly. + * + * @param[in] old An object that points to `young`. + * @param[out] young An object that is referenced from `old`. + */ +void rb_gc_writebarrier(VALUE old, VALUE young); + +/** + * This is the implementation of #RB_OBJ_WB_UNPROTECT(). People don't use it + * directly. + * + * @param[out] obj An object that does not participate in WB. + */ +void rb_gc_writebarrier_unprotect(VALUE obj); + +#if USE_RGENGC_LOGGING_WB_UNPROTECT +/** + * @private + * + * This is the implementation of #RGENGC_LOGGING_WB_UNPROTECT(). People + * don't use it directly. + * + * @param[in] objptr Don't know why this is a pointer to void but in + * reality this is a pointer to an object that is about + * to be un-protected. + * @param[in] filename Pass C's `__FILE__` here. + * @param[in] line Pass C's `__LINE__` here. + */ +void rb_gc_unprotect_logging(void *objptr, const char *filename, int line); +#endif + +RBIMPL_SYMBOL_EXPORT_END() + +RBIMPL_ATTR_PURE_UNLESS_DEBUG() +RBIMPL_ATTR_ARTIFICIAL() +/** + * This is the implementation of #RB_OBJ_PROMOTED(). People don't use it + * directly. + * + * @param[in] obj An object to query. + * @retval true The object is "promoted". + * @retval false The object is young. Have not experienced GC at all. + */ +static inline bool +RB_OBJ_PROMOTED_RAW(VALUE obj) +{ + RBIMPL_ASSERT_OR_ASSUME(RB_FL_ABLE(obj)); + return RB_FL_ANY_RAW(obj, RUBY_FL_PROMOTED); +} + +RBIMPL_ATTR_PURE_UNLESS_DEBUG() +RBIMPL_ATTR_ARTIFICIAL() +/** + * Tests if the object is "promoted" -- that is, whether the object experienced + * one or more GC marks. + * + * @param[in] obj An object to query. + * @retval true The object is "promoted". + * @retval false The object is young. Have not experienced GC at all. + * @note Hello, is anyone actively calling this function? @shyouhei have + * never seen any actual usages outside of the GC implementation + * itself. + */ +static inline bool +RB_OBJ_PROMOTED(VALUE obj) +{ + if (! RB_FL_ABLE(obj)) { + return false; + } + else { + return RB_OBJ_PROMOTED_RAW(obj); + } +} + +/** + * This is the implementation of #RB_OBJ_WB_UNPROTECT(). People don't use it + * directly. + * + * @param[out] x An object that does not participate in WB. + * @param[in] filename C's `__FILE__` of the caller function. + * @param[in] line C's `__LINE__` of the caller function. + * @return x + */ +static inline VALUE +rb_obj_wb_unprotect( + VALUE x, + RBIMPL_ATTR_MAYBE_UNUSED() + const char *filename, + RBIMPL_ATTR_MAYBE_UNUSED() + int line) +{ +#if USE_RGENGC_LOGGING_WB_UNPROTECT + RGENGC_LOGGING_WB_UNPROTECT(RBIMPL_CAST((void *)x), filename, line); +#endif + rb_gc_writebarrier_unprotect(x); + return x; +} + +/** + * @private + * + * This is the implementation of #RB_OBJ_WRITTEN(). People don't use it + * directly. + * + * @param[in] a An old object. + * @param[in] oldv An object previously stored inside of `old`. + * @param[out] b A young object. + * @param[in] filename C's `__FILE__` of the caller function. + * @param[in] line C's `__LINE__` of the caller function. + * @return a + */ +static inline VALUE +rb_obj_written( + VALUE a, + RBIMPL_ATTR_MAYBE_UNUSED() + VALUE oldv, + VALUE b, + RBIMPL_ATTR_MAYBE_UNUSED() + const char *filename, + RBIMPL_ATTR_MAYBE_UNUSED() + int line) +{ +#if USE_RGENGC_LOGGING_WB_UNPROTECT + RGENGC_LOGGING_OBJ_WRITTEN(a, oldv, b, filename, line); +#endif + + if (!RB_SPECIAL_CONST_P(b)) { + rb_gc_writebarrier(a, b); + } + + return a; +} + +/** + * @private + * + * This is the implementation of #RB_OBJ_WRITE(). People don't use it + * directly. + * + * @param[in] a An old object. + * @param[in] slot A pointer inside of `old`. + * @param[out] b A young object. + * @param[in] filename C's `__FILE__` of the caller function. + * @param[in] line C's `__LINE__` of the caller function. + * @return a + */ +static inline VALUE +rb_obj_write( + VALUE a, VALUE *slot, VALUE b, + RBIMPL_ATTR_MAYBE_UNUSED() + const char *filename, + RBIMPL_ATTR_MAYBE_UNUSED() + int line) +{ +#ifdef RGENGC_LOGGING_WRITE + RGENGC_LOGGING_WRITE(a, slot, b, filename, line); +#endif + + *slot = b; + + rb_obj_written(a, RUBY_Qundef /* ignore `oldv' now */, b, filename, line); + return a; +} + +RBIMPL_ATTR_DEPRECATED(("Will be removed soon")) +static inline void rb_gc_force_recycle(VALUE obj){} + #endif /* RBIMPL_GC_H */ diff --git a/include/ruby/internal/glob.h b/include/ruby/internal/glob.h index b78bb75b88..adbccbdc5e 100644 --- a/include/ruby/internal/glob.h +++ b/include/ruby/internal/glob.h @@ -17,18 +17,96 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Declares ::rb_glob(). */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() -typedef int ruby_glob_func(const char*,VALUE, void*); -void rb_glob(const char*,void(*)(const char*,VALUE,void*),VALUE); -int ruby_glob(const char*,int,ruby_glob_func*,VALUE); -int ruby_brace_glob(const char*,int,ruby_glob_func*,VALUE); +/** + * Type of a glob callback function. Called every time glob scans a path. + * + * @param[in] path The path in question. + * @param[in] arg The argument passed to rb_glob(). + * @param[in] enc Encoding of the path. + * @retval -1 Not enough memory to do the operation. + * @retval 0 Operation successful. + * @retval otherwise Opaque exception state. + * @note You can use rb_protect() to generate the return value. + * + * @internal + * + * This is a wrong design. Type of `enc` should have been `rb_encoding*` + * instead of just `void*`. But we cannot change the API any longer. + * + * Though not a part of our public API, the "opaque exception state" is in fact + * an enum ruby_tag_type. You can see the potential "otherwise" values by + * looking at vm_core.h. + */ +typedef int ruby_glob_func(const char *path, VALUE arg, void *enc); + +RBIMPL_ATTR_NONNULL(()) +/** + * The "glob" operator. Expands the given pattern against the actual local + * filesystem, then iterates over the expanded filenames by calling the + * callback function. + * + * @param[in] pattern A glob pattern. + * @param[in] func Identical to ruby_glob_func, except it can raise + * exceptions instead of returning opaque state. + * @param[in] arg Extra argument passed to func. + * @exception rb_eException Can propagate what `func` raises. + * @note The language accepted as the pattern is not a regular + * expression. It resembles shell's glob. + */ +void rb_glob(const char *pattern, void (*func)(const char *path, VALUE arg, void *enc), VALUE arg); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_glob(), except it returns opaque exception states instead of + * raising exceptions. + * + * @param[in] pattern A glob pattern. + * @param[in] flags No, you are not allowed to use this. Just pass 0. + * @param[in] func A callback function. + * @param[in] arg Extra argument passed to func. + * @return Return value of `func`. + * + * @internal + * + * This function is completely broken by design... Not only is there no sane + * way to pass flags, but there also is no sane way to know what a return value + * is meant to be. + * + * Though not a part of our public API, and @shyouhei thinks it's a failure not + * to be a public API, the flags can be `FNM_EXTGLOB`, `FNM_DOTMATCH` etc. + * Look at dir.c for the list. + * + * Though not a part of our public API, the return value is in fact an + * enum ruby_tag_type. You can see the potential values by looking at + * vm_core.h. + */ +int ruby_glob(const char *pattern, int flags, ruby_glob_func *func, VALUE arg); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to ruby_glob(), @shyouhei currently suspects. Historically you + * had to call this function instead of ruby_glob() if the pattern included + * "{x,y,...}" syntax. However since commit 0f63d961169989a7f6dcf7c0487fe29da, + * ruby_glob() also supports that syntax. It seems as of writing these two + * functions provide basically the same functionality in a different + * implementation. Is this analysis right? Correct me! :FIXME: + * + * @param[in] pattern A glob pattern. + * @param[in] flags No, you are not allowed to use this. Just pass 0. + * @param[in] func A callback function. + * @param[in] arg Extra argument passed to func. + * @return Return value of `func`. + */ +int ruby_brace_glob(const char *pattern, int flags, ruby_glob_func *func, VALUE arg); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/globals.h b/include/ruby/internal/globals.h index ddd731349e..60d8e5309a 100644 --- a/include/ruby/internal/globals.h +++ b/include/ruby/internal/globals.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Ruby-level global variables / constants, visible from C. */ #include "ruby/internal/attr/pure.h" @@ -27,97 +27,147 @@ #include "ruby/internal/value.h" #include "ruby/internal/value_type.h" +/** + * @defgroup object Core objects and their operations + * + * @internal + * + * There are several questionable constants listed in this header file. They + * are intentionally left untouched for purely academic backwards compatibility + * concerns. But for instance do any one of 3rd party extension libraries even + * need to know that there is NameError::Message? + * + * @endinternal + * + * @{ + */ + RBIMPL_SYMBOL_EXPORT_BEGIN() +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define RUBY_INTEGER_UNIFICATION 1 -RUBY_EXTERN VALUE rb_mKernel; -RUBY_EXTERN VALUE rb_mComparable; -RUBY_EXTERN VALUE rb_mEnumerable; -RUBY_EXTERN VALUE rb_mErrno; -RUBY_EXTERN VALUE rb_mFileTest; -RUBY_EXTERN VALUE rb_mGC; -RUBY_EXTERN VALUE rb_mMath; -RUBY_EXTERN VALUE rb_mProcess; -RUBY_EXTERN VALUE rb_mWaitReadable; -RUBY_EXTERN VALUE rb_mWaitWritable; - -RUBY_EXTERN VALUE rb_cBasicObject; -RUBY_EXTERN VALUE rb_cObject; -RUBY_EXTERN VALUE rb_cArray; -RUBY_EXTERN VALUE rb_cBinding; -RUBY_EXTERN VALUE rb_cClass; -RUBY_EXTERN VALUE rb_cDir; -RUBY_EXTERN VALUE rb_cEncoding; -RUBY_EXTERN VALUE rb_cEnumerator; -RUBY_EXTERN VALUE rb_cFalseClass; -RUBY_EXTERN VALUE rb_cFile; -RUBY_EXTERN VALUE rb_cComplex; -RUBY_EXTERN VALUE rb_cFloat; -RUBY_EXTERN VALUE rb_cHash; -RUBY_EXTERN VALUE rb_cIO; -RUBY_EXTERN VALUE rb_cInteger; -RUBY_EXTERN VALUE rb_cMatch; -RUBY_EXTERN VALUE rb_cMethod; -RUBY_EXTERN VALUE rb_cModule; -RUBY_EXTERN VALUE rb_cNameErrorMesg; -RUBY_EXTERN VALUE rb_cNilClass; -RUBY_EXTERN VALUE rb_cNumeric; -RUBY_EXTERN VALUE rb_cProc; -RUBY_EXTERN VALUE rb_cRandom; -RUBY_EXTERN VALUE rb_cRange; -RUBY_EXTERN VALUE rb_cRational; -RUBY_EXTERN VALUE rb_cRegexp; -RUBY_EXTERN VALUE rb_cStat; -RUBY_EXTERN VALUE rb_cString; -RUBY_EXTERN VALUE rb_cStruct; -RUBY_EXTERN VALUE rb_cSymbol; -RUBY_EXTERN VALUE rb_cThread; -RUBY_EXTERN VALUE rb_cTime; -RUBY_EXTERN VALUE rb_cTrueClass; -RUBY_EXTERN VALUE rb_cUnboundMethod; - -RUBY_EXTERN VALUE rb_eException; -RUBY_EXTERN VALUE rb_eStandardError; -RUBY_EXTERN VALUE rb_eSystemExit; -RUBY_EXTERN VALUE rb_eInterrupt; -RUBY_EXTERN VALUE rb_eSignal; -RUBY_EXTERN VALUE rb_eFatal; -RUBY_EXTERN VALUE rb_eArgError; -RUBY_EXTERN VALUE rb_eEOFError; -RUBY_EXTERN VALUE rb_eIndexError; -RUBY_EXTERN VALUE rb_eStopIteration; -RUBY_EXTERN VALUE rb_eKeyError; -RUBY_EXTERN VALUE rb_eRangeError; -RUBY_EXTERN VALUE rb_eIOError; -RUBY_EXTERN VALUE rb_eRuntimeError; -RUBY_EXTERN VALUE rb_eFrozenError; -RUBY_EXTERN VALUE rb_eSecurityError; -RUBY_EXTERN VALUE rb_eSystemCallError; -RUBY_EXTERN VALUE rb_eThreadError; -RUBY_EXTERN VALUE rb_eTypeError; -RUBY_EXTERN VALUE rb_eZeroDivError; -RUBY_EXTERN VALUE rb_eNotImpError; -RUBY_EXTERN VALUE rb_eNoMemError; -RUBY_EXTERN VALUE rb_eNoMethodError; -RUBY_EXTERN VALUE rb_eFloatDomainError; -RUBY_EXTERN VALUE rb_eLocalJumpError; -RUBY_EXTERN VALUE rb_eSysStackError; -RUBY_EXTERN VALUE rb_eRegexpError; -RUBY_EXTERN VALUE rb_eEncodingError; -RUBY_EXTERN VALUE rb_eEncCompatError; -RUBY_EXTERN VALUE rb_eNoMatchingPatternError; - -RUBY_EXTERN VALUE rb_eScriptError; -RUBY_EXTERN VALUE rb_eNameError; -RUBY_EXTERN VALUE rb_eSyntaxError; -RUBY_EXTERN VALUE rb_eLoadError; - -RUBY_EXTERN VALUE rb_eMathDomainError; - -RUBY_EXTERN VALUE rb_stdin, rb_stdout, rb_stderr; +RUBY_EXTERN VALUE rb_mKernel; /**< `Kernel` module. */ +RUBY_EXTERN VALUE rb_mComparable; /**< `Comparable` module. */ +RUBY_EXTERN VALUE rb_mEnumerable; /**< `Enumerable` module. */ +RUBY_EXTERN VALUE rb_mErrno; /**< `Errno` module. */ +RUBY_EXTERN VALUE rb_mFileTest; /**< `FileTest` module. */ +RUBY_EXTERN VALUE rb_mGC; /**< `GC` module. */ +RUBY_EXTERN VALUE rb_mMath; /**< `Math` module. */ +RUBY_EXTERN VALUE rb_mProcess; /**< `Process` module. */ +RUBY_EXTERN VALUE rb_mWaitReadable; /**< `IO::WaitReadable` module. */ +RUBY_EXTERN VALUE rb_mWaitWritable; /**< `IO::WaitReadable` module. */ + +RUBY_EXTERN VALUE rb_cBasicObject; /**< `BasicObject` class. */ +RUBY_EXTERN VALUE rb_cObject; /**< `Object` class. */ +RUBY_EXTERN VALUE rb_cArray; /**< `Array` class. */ +RUBY_EXTERN VALUE rb_cBinding; /**< `Binding` class. */ +RUBY_EXTERN VALUE rb_cClass; /**< `Class` class. */ +RUBY_EXTERN VALUE rb_cDir; /**< `Dir` class. */ +RUBY_EXTERN VALUE rb_cEncoding; /**< `Encoding` class. */ +RUBY_EXTERN VALUE rb_cEnumerator; /**< `Enumerator` class. */ +RUBY_EXTERN VALUE rb_cFalseClass; /**< `FalseClass` class. */ +RUBY_EXTERN VALUE rb_cFile; /**< `File` class. */ +RUBY_EXTERN VALUE rb_cComplex; /**< `Complex` class. */ +RUBY_EXTERN VALUE rb_cFloat; /**< `Float` class. */ +RUBY_EXTERN VALUE rb_cHash; /**< `Hash` class. */ +RUBY_EXTERN VALUE rb_cIO; /**< `IO` class. */ +RUBY_EXTERN VALUE rb_cInteger; /**< `Module` class. */ +RUBY_EXTERN VALUE rb_cMatch; /**< `MatchData` class. */ +RUBY_EXTERN VALUE rb_cMethod; /**< `Method` class. */ +RUBY_EXTERN VALUE rb_cModule; /**< `Module` class. */ +RUBY_EXTERN VALUE rb_cRefinement; /**< `Refinement` class. */ +RUBY_EXTERN VALUE rb_cNameErrorMesg; /**< `NameError::Message` class. */ +RUBY_EXTERN VALUE rb_cNilClass; /**< `NilClass` class. */ +RUBY_EXTERN VALUE rb_cNumeric; /**< `Numeric` class. */ +RUBY_EXTERN VALUE rb_cProc; /**< `Proc` class. */ +RUBY_EXTERN VALUE rb_cRandom; /**< `Random` class. */ +RUBY_EXTERN VALUE rb_cRange; /**< `Range` class. */ +RUBY_EXTERN VALUE rb_cRational; /**< `Rational` class. */ +RUBY_EXTERN VALUE rb_cRegexp; /**< `Regexp` class. */ +RUBY_EXTERN VALUE rb_cStat; /**< `File::Stat` class. */ +RUBY_EXTERN VALUE rb_cString; /**< `String` class. */ +RUBY_EXTERN VALUE rb_cStruct; /**< `Struct` class. */ +RUBY_EXTERN VALUE rb_cSymbol; /**< `Symbol` class. */ +RUBY_EXTERN VALUE rb_cThread; /**< `Thread` class. */ +RUBY_EXTERN VALUE rb_cTime; /**< `Time` class. */ +RUBY_EXTERN VALUE rb_cTrueClass; /**< `TrueClass` class. */ +RUBY_EXTERN VALUE rb_cUnboundMethod; /**< `UnboundMethod` class. */ + +/** + * @} + * @addtogroup exception + * @{ + */ + +RUBY_EXTERN VALUE rb_eException; /**< Mother of all exceptions. */ +RUBY_EXTERN VALUE rb_eStandardError; /**< `StandardError` exception. */ +RUBY_EXTERN VALUE rb_eSystemExit; /**< `SystemExit` exception. */ +RUBY_EXTERN VALUE rb_eInterrupt; /**< `Interrupt` exception. */ +RUBY_EXTERN VALUE rb_eSignal; /**< `SignalException` exception. */ +RUBY_EXTERN VALUE rb_eFatal; /**< `fatal` exception. */ +RUBY_EXTERN VALUE rb_eArgError; /**< `ArgumentError` exception. */ +RUBY_EXTERN VALUE rb_eEOFError; /**< `EOFError` exception. */ +RUBY_EXTERN VALUE rb_eIndexError; /**< `IndexError` exception. */ +RUBY_EXTERN VALUE rb_eStopIteration; /**< `StopIteration` exception. */ +RUBY_EXTERN VALUE rb_eKeyError; /**< `KeyError` exception. */ +RUBY_EXTERN VALUE rb_eRangeError; /**< `RangeError` exception. */ +RUBY_EXTERN VALUE rb_eIOError; /**< `IOError` exception. */ +RUBY_EXTERN VALUE rb_eRuntimeError; /**< `RuntimeError` exception. */ +RUBY_EXTERN VALUE rb_eFrozenError; /**< `FrozenError` exception. */ +RUBY_EXTERN VALUE rb_eSecurityError; /**< `SecurityError` exception. */ +RUBY_EXTERN VALUE rb_eSystemCallError; /**< `SystemCallError` exception. */ +RUBY_EXTERN VALUE rb_eThreadError; /**< `ThreadError` exception. */ +RUBY_EXTERN VALUE rb_eTypeError; /**< `TypeError` exception. */ +RUBY_EXTERN VALUE rb_eZeroDivError; /**< `ZeroDivisionError` exception. */ +RUBY_EXTERN VALUE rb_eNotImpError; /**< `NotImplementedError` exception. */ +RUBY_EXTERN VALUE rb_eNoMemError; /**< `NoMemoryError` exception. */ +RUBY_EXTERN VALUE rb_eNoMethodError; /**< `NoMethodError` exception. */ +RUBY_EXTERN VALUE rb_eFloatDomainError; /**< `FloatDomainError` exception. */ +RUBY_EXTERN VALUE rb_eLocalJumpError; /**< `LocalJumpError` exception. */ +RUBY_EXTERN VALUE rb_eSysStackError; /**< `SystemStackError` exception. */ +RUBY_EXTERN VALUE rb_eRegexpError; /**< `RegexpError` exception. */ +RUBY_EXTERN VALUE rb_eEncodingError; /**< `EncodingError` exception. */ +RUBY_EXTERN VALUE rb_eEncCompatError; /**< `Encoding::CompatibilityError` exception. */ +RUBY_EXTERN VALUE rb_eNoMatchingPatternError; /**< `NoMatchingPatternError` exception. */ +RUBY_EXTERN VALUE rb_eNoMatchingPatternKeyError; /**< `NoMatchingPatternKeyError` exception. */ + +RUBY_EXTERN VALUE rb_eScriptError; /**< `ScriptError` exception. */ +RUBY_EXTERN VALUE rb_eNameError; /**< `NameError` exception. */ +RUBY_EXTERN VALUE rb_eSyntaxError; /**< `SyntaxError` exception. */ +RUBY_EXTERN VALUE rb_eLoadError; /**< `LoadError` exception. */ + +RUBY_EXTERN VALUE rb_eMathDomainError; /**< `Math::DomainError` exception. */ + +/** + * @} + * @addtogroup object + * @{ + */ + +RUBY_EXTERN VALUE rb_stdin; /**< `STDIN` constant. */ +RUBY_EXTERN VALUE rb_stdout; /**< `STDOUT` constant. */ +RUBY_EXTERN VALUE rb_stderr; /**< `STDERR` constant. */ RBIMPL_ATTR_PURE() +/** + * Object to class mapping function. Every object have its class. This + * function obtains that. + * + * @param[in] obj Target object to query. + * @return The class of the given object. + * + * @internal + * + * This function is a super-duper hot path. Optimised targeting modern C + * compilers and x86_64 architecture. + */ static inline VALUE rb_class_of(VALUE obj) { @@ -150,8 +200,10 @@ rb_class_of(VALUE obj) #endif } -#define CLASS_OF rb_class_of +#define CLASS_OF rb_class_of /**< @old{rb_class_of} */ RBIMPL_SYMBOL_EXPORT_END() +/** @} */ + #endif /* RBIMPL_GLOBALS_H */ diff --git a/include/ruby/internal/has/attribute.h b/include/ruby/internal/has/attribute.h index 5231d053d5..f068a65caf 100644 --- a/include/ruby/internal/has/attribute.h +++ b/include/ruby/internal/has/attribute.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_HAS_ATTRIBUTE. */ #include "ruby/internal/config.h" diff --git a/include/ruby/internal/has/builtin.h b/include/ruby/internal/has/builtin.h index 01b1861eef..243ba2a34c 100644 --- a/include/ruby/internal/has/builtin.h +++ b/include/ruby/internal/has/builtin.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_HAS_BUILTIN. */ #include "ruby/internal/config.h" @@ -53,8 +53,10 @@ # define RBIMPL_HAS_BUILTIN___builtin_assume 0 # /* See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52624 for bswap16. */ # define RBIMPL_HAS_BUILTIN___builtin_bswap16 RBIMPL_COMPILER_SINCE(GCC, 4, 8, 0) +#ifndef __OpenBSD__ # define RBIMPL_HAS_BUILTIN___builtin_bswap32 RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0) # define RBIMPL_HAS_BUILTIN___builtin_bswap64 RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0) +#endif # define RBIMPL_HAS_BUILTIN___builtin_clz RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0) # define RBIMPL_HAS_BUILTIN___builtin_clzl RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0) # define RBIMPL_HAS_BUILTIN___builtin_clzll RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0) @@ -76,10 +78,6 @@ # define RBIMPL_HAS_BUILTIN___builtin_unreachable RBIMPL_COMPILER_SINCE(GCC, 4, 5, 0) # /* Note that "0, 0, 0" might be inaccurate. */ -#elif RBIMPL_COMPILER_IS(MSVC) -# /* MSVC has UNREACHABLE, but that is not __builtin_unreachable. */ -# define RBIMPL_HAS_BUILTIN(_) 0 - #else # /* Take config.h definition when available */ # define RBIMPL_HAS_BUILTIN(_) ((RBIMPL_HAS_BUILTIN_ ## _)+0) @@ -109,7 +107,7 @@ # define RBIMPL_HAS_BUILTIN___builtin_rotateright64 0 # define RBIMPL_HAS_BUILTIN___builtin_popcountll HAVE_BUILTIN___BUILTIN_POPCOUNTLL # define RBIMPL_HAS_BUILTIN___builtin_sub_overflow HAVE_BUILTIN___BUILTIN_SUB_OVERFLOW -# if defined(UNREACHABLE) +# if defined(HAVE___BUILTIN_UNREACHABLE) # define RBIMPL_HAS_BUILTIN___builtin_unreachable 1 # else # define RBIMPL_HAS_BUILTIN___builtin_unreachable 0 diff --git a/include/ruby/internal/has/c_attribute.h b/include/ruby/internal/has/c_attribute.h index b7eb94d22a..69b0f402cd 100644 --- a/include/ruby/internal/has/c_attribute.h +++ b/include/ruby/internal/has/c_attribute.h @@ -17,15 +17,27 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_HAS_C_ATTRIBUTE. */ +#include "ruby/internal/has/extension.h" +#include "ruby/internal/has/warning.h" + /** Wraps (or simulates) `__has_c_attribute`. */ #if defined(__cplusplus) # /* Makes no sense. */ # define RBIMPL_HAS_C_ATTRIBUTE(_) 0 +#elif RBIMPL_HAS_EXTENSION(c_attributes) +# /* Hmm. It seems Clang 17 has this macro defined even when -std=c99 mode, +# * _and_ fails to compile complaining that attributes are C2X feature. We +# * need to work around this nonsense. */ +# define RBIMPL_HAS_C_ATTRIBUTE(_) __has_c_attribute(_) + +#elif RBIMPL_HAS_WARNING("-Wc2x-extensions") +# define RBIMPL_HAS_C_ATTRIBUTE(_) 0 + #elif defined(__has_c_attribute) # define RBIMPL_HAS_C_ATTRIBUTE(_) __has_c_attribute(_) diff --git a/include/ruby/internal/has/cpp_attribute.h b/include/ruby/internal/has/cpp_attribute.h index f42a4f5296..6a393b1a24 100644 --- a/include/ruby/internal/has/cpp_attribute.h +++ b/include/ruby/internal/has/cpp_attribute.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_HAS_CPP_ATTRIBUTE. */ #include "ruby/internal/compiler_is.h" diff --git a/include/ruby/internal/has/declspec_attribute.h b/include/ruby/internal/has/declspec_attribute.h index f813da4f5e..1c526daf78 100644 --- a/include/ruby/internal/has/declspec_attribute.h +++ b/include/ruby/internal/has/declspec_attribute.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_HAS_DECLSPEC_ATTRIBUTE. */ #include "ruby/internal/compiler_since.h" diff --git a/include/ruby/internal/has/extension.h b/include/ruby/internal/has/extension.h index 9ceb365ab9..da8c0d3cc2 100644 --- a/include/ruby/internal/has/extension.h +++ b/include/ruby/internal/has/extension.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_HAS_EXTENSION. */ #include "ruby/internal/has/feature.h" diff --git a/include/ruby/internal/has/feature.h b/include/ruby/internal/has/feature.h index b827590c00..7be8d27314 100644 --- a/include/ruby/internal/has/feature.h +++ b/include/ruby/internal/has/feature.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_HAS_FEATURE. */ diff --git a/include/ruby/internal/has/warning.h b/include/ruby/internal/has/warning.h index 03975ecc2f..12d7db183b 100644 --- a/include/ruby/internal/has/warning.h +++ b/include/ruby/internal/has/warning.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_HAS_WARNING. */ diff --git a/include/ruby/internal/intern/array.h b/include/ruby/internal/intern/array.h index aafe0d1350..1909fdf17b 100644 --- a/include/ruby/internal/intern/array.h +++ b/include/ruby/internal/intern/array.h @@ -17,61 +17,640 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cArray. */ +#include "ruby/internal/attr/noalias.h" +#include "ruby/internal/attr/noexcept.h" +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/attr/pure.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() /* array.c */ -void rb_mem_clear(VALUE*, long); -VALUE rb_assoc_new(VALUE, VALUE); -VALUE rb_check_array_type(VALUE); + +RBIMPL_ATTR_NONNULL(()) +RBIMPL_ATTR_NOALIAS() +/** + * Fills the memory region with a series of ::RUBY_Qnil. + * + * @param[out] buf Buffer to squash. + * @param[in] len Number of objects of `buf`. + * @post `buf` is filled with ::RUBY_Qnil. + */ +void rb_mem_clear(VALUE *buf, long len) + RBIMPL_ATTR_NOEXCEPT(true) + ; + +/** + * Identical to rb_ary_new_from_values(), except it expects exactly two + * parameters. + * + * @param[in] car Arbitrary ruby object. + * @param[in] cdr Arbitrary ruby object. + * @return An allocated new array, of length 2, whose contents are the + * passed objects. + */ +VALUE rb_assoc_new(VALUE car, VALUE cdr); + +/** + * Try converting an object to its array representation using its `to_ary` + * method, if any. If there is no such thing, returns ::RUBY_Qnil. + * + * @param[in] obj Arbitrary ruby object to convert. + * @exception rb_eTypeError `obj.to_ary` returned something non-Array. + * @retval RUBY_Qnil No conversion from `obj` to array defined. + * @retval otherwise Converted array representation of `obj`. + * @see rb_io_check_io + * @see rb_check_string_type + * @see rb_check_hash_type + */ +VALUE rb_check_array_type(VALUE obj); + +/** + * Allocates a new, empty array. + * + * @return An allocated new array, whose length is 0. + */ VALUE rb_ary_new(void); + +/** + * Identical to rb_ary_new(), except it additionally specifies how many rooms + * of objects it should allocate. This way you can create an array whose + * capacity is bigger than the length of it. If you can say that an array + * grows to a specific amount, this could be effective than resizing an array + * over and over again and again. + * + * @param[in] capa Designed capacity of the generating array. + * @return An empty array, whose capacity is `capa`. + */ VALUE rb_ary_new_capa(long capa); + +/** + * Constructs an array from the passed objects. + * + * @param[in] n Number of passed objects. + * @param[in] ... Arbitrary ruby objects, filled into the returning array. + * @return An array of size `n`, whose contents are the passed objects. + */ VALUE rb_ary_new_from_args(long n, ...); + +/** + * Identical to rb_ary_new_from_args(), except how objects are passed. + * + * @param[in] n Number of objects of `elts`. + * @param[in] elts Arbitrary ruby objects, filled into the returning array. + * @return An array of size `n`, whose contents are the passed objects. + */ VALUE rb_ary_new_from_values(long n, const VALUE *elts); -VALUE rb_ary_tmp_new(long); -void rb_ary_free(VALUE); -void rb_ary_modify(VALUE); -VALUE rb_ary_freeze(VALUE); -VALUE rb_ary_shared_with_p(VALUE, VALUE); -VALUE rb_ary_aref(int, const VALUE*, VALUE); -VALUE rb_ary_subseq(VALUE, long, long); -void rb_ary_store(VALUE, long, VALUE); -VALUE rb_ary_dup(VALUE); + +/** + * Allocates a hidden (no class) empty array. + * + * @param[in] capa Designed capacity of the array. + * @return A hidden, empty array. + * @see rb_obj_hide() + */ +VALUE rb_ary_hidden_new(long capa); +#define rb_ary_tmp_new rb_ary_hidden_new + +/** + * Destroys the given array for no reason. + * + * @warning DO NOT USE IT. + * @warning Leave this task to our GC. + * @warning It was a wrong indea at the first place to let you know about it. + * + * @param[out] ary The array to be executed. + * @post The given array no longer exists. + * @note Maybe `Array#clear` could be what you want. + * + * @internal + * + * Should have moved this to `internal/array.h`. + */ +void rb_ary_free(VALUE ary); + +/** + * Declares that the array is about to be modified. This for instance let the + * array have a dedicated backend storage. + * + * @param[out] ary Array about to be modified. + * @exception rb_eFrozenError `ary` is frozen. + * @post Upon successful return the passed array is eligible to be + * modified. + */ +void rb_ary_modify(VALUE ary); + +/** @alias{rb_obj_freeze} */ +VALUE rb_ary_freeze(VALUE obj); + +RBIMPL_ATTR_PURE() +/** + * Queries if the passed two arrays share the same backend storage. A use-case + * for knowing such property is to take a snapshot of an array (using + * e.g. rb_ary_replace()), then check later if that snapshot still shares the + * storage with the original. Taking a snapshot is ultra-cheap. If nothing + * happens the impact shall be minimal. But if someone modifies the original, + * that entity shall pay the cost of copy-on-write. You can detect that using + * this API. + * + * @param[in] lhs Comparison LHS. + * @param[in] rhs Comparison RHS. + * @retval RUBY_Qtrue They share the same backend storage. + * @retval RUBY_Qfalse They are distinct. + * @pre Both arguments must be of ::RUBY_T_ARRAY. + */ +VALUE rb_ary_shared_with_p(VALUE lhs, VALUE rhs); + +/** + * Queries element(s) of an array. This is complicated! Refer `Array#slice` + * document for the complete description of how it behaves. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Up to 2 objects. + * @param[in] ary Target array. + * @exception rb_eTypeError `argv` (or its part) includes non-Integer. + * @exception rb_eRangeError rb_cArithSeq is passed, and is OOB. + * @return An element (if requested), or an array of elements (if + * requested), or ::RUBY_Qnil (if index OOB). + * + * @internal + * + * ```rbs + * # "int" is ::Integer or `#to_int`, defined in builtin.rbs + * + * class ::Array[unchecked out T] + * def slice + * : (int i) -> T? + * | (int beg, int len) -> ::Array[T]? + * | (Range[int] r) -> ::Array[T]? + * | (ArithmeticSequence as) -> ::Array[T]? # This also raises RangeError. + * end + * ``` + */ +VALUE rb_ary_aref(int argc, const VALUE *argv, VALUE ary); + +/** + * Obtains a part of the passed array. + * + * @param[in] ary Target array. + * @param[in] beg Subpart index. + * @param[in] len Requested length of returning array. + * @retval RUBY_Qnil Requested range out of bounds of `ary`. + * @retval otherwise An allocated new array whose contents are `ary`'s + * `beg` to `len`. + * @note Return array can be shorter than `len` when for instance + * `[0, 1, 2, 3]`'s 4th to 1,000,000,000th is requested. + */ +VALUE rb_ary_subseq(VALUE ary, long beg, long len); + +/** + * Destructively stores the passed value to the passed array's passed index. + * It also resizes the array's backend storage so that the requested index is + * not out of bounds. + * + * @param[out] ary Target array to modify. + * @param[in] key Where to store `val`. + * @param[in] val What to store at `key`. + * @exception rb_eFrozenError `ary` is frozen. + * @exception rb_eIndexError `key` is negative. + * @post `ary`'s `key`th position is occupied with `val`. + * @post Depending on `key` and previous length of `ary` this operation + * can also create a series of "hole" positions inside of the + * backend storage. They are filled with ::RUBY_Qnil. + */ +void rb_ary_store(VALUE ary, long key, VALUE val); + +/** + * Duplicates an array. + * + * @param[in] ary Target to duplicate. + * @return An allocated new array whose contents are identical to `ary`. + * + * @internal + * + * Not sure why this has to be something different from `ary_make_shared_copy`, + * which seems much efficient. + */ +VALUE rb_ary_dup(VALUE ary); + +/** + * I guess there is no use case of this function in extension libraries, but + * this is a routine identical to rb_ary_dup(). This makes the most sense when + * the passed array is formerly hidden by rb_obj_hide(). + * + * @param[in] ary An array, possibly hidden. + * @return A duplicated new instance of ::rb_cArray. + */ VALUE rb_ary_resurrect(VALUE ary); -VALUE rb_ary_to_ary(VALUE); -VALUE rb_ary_to_s(VALUE); -VALUE rb_ary_cat(VALUE, const VALUE *, long); -VALUE rb_ary_push(VALUE, VALUE); -VALUE rb_ary_pop(VALUE); -VALUE rb_ary_shift(VALUE); -VALUE rb_ary_unshift(VALUE, VALUE); -VALUE rb_ary_entry(VALUE, long); -VALUE rb_ary_each(VALUE); -VALUE rb_ary_join(VALUE, VALUE); -VALUE rb_ary_reverse(VALUE); -VALUE rb_ary_rotate(VALUE, long); -VALUE rb_ary_sort(VALUE); -VALUE rb_ary_sort_bang(VALUE); -VALUE rb_ary_delete(VALUE, VALUE); -VALUE rb_ary_delete_at(VALUE, long); -VALUE rb_ary_clear(VALUE); -VALUE rb_ary_plus(VALUE, VALUE); -VALUE rb_ary_concat(VALUE, VALUE); -VALUE rb_ary_assoc(VALUE, VALUE); -VALUE rb_ary_rassoc(VALUE, VALUE); -VALUE rb_ary_includes(VALUE, VALUE); -VALUE rb_ary_cmp(VALUE, VALUE); + +/** + * Force converts an object to an array. It first tries its `#to_ary` method. + * Takes the result if any. Otherwise creates an array of size 1 whose sole + * element is the passed object. + * + * @param[in] obj Arbitrary ruby object. + * @return An array representation of `obj`. + * @note Unlike rb_str_to_str() which is a variant of + * rb_check_string_type(), rb_ary_to_ary() is not a variant of + * rb_check_array_type(). + */ +VALUE rb_ary_to_ary(VALUE obj); + +/** + * Converts an array into a human-readable string. Historically its behaviour + * changed over time. Currently it is identical to calling `inspect` method. + * This behaviour is from that of python (!!) circa 2006. + * + * @param[in] ary Array to inspect. + * @return Recursively inspected representation of `ary`. + * @see `[ruby-dev:29520]` + */ +VALUE rb_ary_to_s(VALUE ary); + +/** + * Destructively appends multiple elements at the end of the array. + * + * @param[out] ary Where to push `train`. + * @param[in] train Arbitrary ruby objects to push to `ary`. + * @param[in] len Number of objects of `train`. + * @exception rb_eIndexError `len` too large. + * @exception rb_eFrozenError `ary` is frozen. + * @return The passed `ary`. + * @post `ary` has contents from `train` appended at its end. + */ +VALUE rb_ary_cat(VALUE ary, const VALUE *train, long len); + +/** + * Special case of rb_ary_cat() that it adds only one element. + * + * @param[out] ary Where to push `elem`. + * @param[in] elem Arbitrary ruby object to push. + * @exception rb_eFrozenError `ary` is frozen. + * @return The passed `ary`. + * @post `ary` has `elem` appended at its end. + */ +VALUE rb_ary_push(VALUE ary, VALUE elem); + +/** + * Destructively deletes an element from the end of the passed array and + * returns what was deleted. + * + * @param[out] ary Target array to modify. + * @exception rb_eFrozenError `ary` is frozen. + * @return What was at the end of `ary`, or ::RUBY_Qnil if there is + * nothing to remove. + * @post `ary`'s last element, if any, is removed. + * @note There is no way to distinguish whether `ary` was an 1-element + * array whose content was ::RUBY_Qnil, or was empty. + */ +VALUE rb_ary_pop(VALUE ary); + +/** + * Destructively deletes an element from the beginning of the passed array and + * returns what was deleted. It can also be seen as a routine identical to + * rb_ary_pop(), except which side of the array to scrub. + * + * @param[out] ary Target array to modify. + * @exception rb_eFrozenError `ary` is frozen. + * @return What was at the beginning of `ary`, or ::RUBY_Qnil if there is + * nothing to remove. + * @post `ary`'s first element, if any, is removed. As the name implies + * everything else remaining in `ary` gets moved towards `ary`'s + * beginning. + * @note There is no way to distinguish whether `ary` was an 1-element + * array whose content was ::RUBY_Qnil, or was empty. + */ +VALUE rb_ary_shift(VALUE ary); + +/** + * Destructively prepends the passed item at the beginning of the passed array. + * It can also be seen as a routine identical to rb_ary_push(), except which + * side of the array to modify. + * + * @param[out] ary Target array to modify. + * @param[in] elem Arbitrary ruby object to unshift. + * @exception rb_eFrozenError `ary` is frozen. + * @return The passed `ary`. + * @post `ary` has `elem` prepended at this beginning. + */ +VALUE rb_ary_unshift(VALUE ary, VALUE elem); + +RBIMPL_ATTR_PURE() +/** + * Queries an element of an array. When passed offset is negative it counts + * backwards. + * + * @param[in] ary An array to look into. + * @param[in] off Offset (can be negative). + * @return ::RUBY_Qnil when `off` is out of bounds of `ary`. Otherwise + * what is stored at `off`-th position of `ary`. + * @note `ary`'s `off`-th element can happen to be ::RUBY_Qnil. + */ +VALUE rb_ary_entry(VALUE ary, long off); + +/** + * Iteratively yields each element of the passed array to the implicitly passed + * block if any. In case there is no block given, an enumerator that does the + * thing is generated instead. + * + * @param[in] ary Array to iterate over. + * @retval ary Passed block was evaluated. + * @retval otherwise An instance of ::rb_cEnumerator for `Array#each`. + */ +VALUE rb_ary_each(VALUE ary); + +/** + * Recursively stringises the elements of the passed array, flattens that + * result, then joins the sequence using the passed separator. + * + * @param[in] ary Target array to convert. + * @param[in] sep Separator. Either a string, or ::RUBY_Qnil + * if you want no separator. + * @exception rb_eArgError Infinite recursion in `ary`. + * @exception rb_eTypeError `sep` is not a string. + * @exception rb_eEncCompatError Strings do not agree with their encodings. + * @return An instance of ::rb_cString which concatenates stringised + * contents of `ary`, using `sep` as separator. + */ +VALUE rb_ary_join(VALUE ary, VALUE sep); + +/** + * _Destructively_ reverses the passed array in-place. + * + * @warning This is `Array#reverse!`, not `Array#reverse`. + * @param[out] ary Target array to modify. + * @exception rb_eFrozenError `ary` is frozen. + * @return Passed `ary`. + * @post `ary` is reversed. + */ +VALUE rb_ary_reverse(VALUE ary); + +/** + * _Destructively_ rotates the passed array in-place to towards its end. The + * amount can be negative. Would rotate to the opposite direction then. + * + * @warning This is `Array#rotate!`, not `Array#rotate`. + * @param[out] ary Target array to modify. + * @param[in] rot Amount of rotation. + * @exception rb_eFrozenError `ary` is frozen. + * @retval RUBY_Qnil Not rotated. + * @retval ary Rotated. + * @post `ary` is rotated. + */ +VALUE rb_ary_rotate(VALUE ary, long rot); + +/** + * Creates a copy of the passed array, whose elements are sorted according to + * their `<=>` result. + * + * @param[in] ary Array to sort. + * @exception rb_eArgError Comparison not defined among elements. + * @exception rb_eRuntimeError Infinite recursion in `<=>`. + * @return A copy of `ary`, sorted. + * @note As of writing this function uses `qsort` as backend algorithm, + * which means the result is unstable (in terms of sort stability). + */ +VALUE rb_ary_sort(VALUE ary); + +/** + * Destructively sorts the passed array in-place, according to each elements' + * `<=>` result. + * + * @param[in] ary Target array to modify. + * @exception rb_eArgError Comparison not defined among elements. + * @exception rb_eRuntimeError Infinite recursion in `<=>`. + * @return Passed `ary`. + * @post `ary` is sorted. + * @note As of writing this function uses `qsort` as backend algorithm, + * which means the result is unstable (in terms of sort stability). + */ +VALUE rb_ary_sort_bang(VALUE ary); + +/** + * Destructively removes elements from the passed array, so that there would be + * no elements inside that satisfy `==` relationship with the passed object. + * Returns the last deleted element if any. But in case there was nothing to + * delete it gets complicated. It checks for the implicitly passed block. If + * there is a block the return value would be what the block evaluates to. + * Otherwise it resorts to ::RUBY_Qnil. + * + * @param[out] ary Target array to modify. + * @param[in] elem Template object to match against each element. + * @exception rb_eFrozenError `ary` is frozen. + * @return What was deleted, or what was the block returned, or + * ::RUBY_Qnil (see above). + * @post All elements that have `==` relationship with `elem` are purged + * from `ary`. Elements shift their positions so that `ary` gets + * compact. + * + * @internal + * + * Internally there also is `rb_ary_delete_same`, which compares by identity. + */ +VALUE rb_ary_delete(VALUE ary, VALUE elem); + +/** + * Destructively removes an element which resides at the specific index of the + * passed array. Unlike rb_ary_stre() the index can be negative, which means + * the index counts backwards from the array's tail. + * + * @param[out] ary Target array to modify. + * @param[in] pos Position (can be negative). + * @exception rb_eFrozenError `ary` is frozen. + * @return What was deleted, or ::RUBY_Qnil in case of OOB. + * @post `ary`'s `pos`-th element is deleted if any. + * @note There is no way to distinguish whether `pos` is out of bound, + * or `pos` did exist but stored ::RUBY_Qnil as an ordinal value. + */ +VALUE rb_ary_delete_at(VALUE ary, long pos); + +/** + * Destructively removes everything form an array. + * + * @param[out] ary Target array to modify. + * @exception rb_eFrozenError `ary` is frozen. + * @return The passed `ary`. + * @post `ary` is an empty array. + */ +VALUE rb_ary_clear(VALUE ary); + +/** + * Creates a new array, concatenating the former to the latter. + * + * @param[in] lhs Source array #1. + * @param[in] rhs Source array #2. + * @exception rb_eIndexError Result array too big. + * @return A new array containing `rhs` concatenated to `lhs`. + * @note This operation doesn't commute. Don't get confused by the + * "plus" terminology. For historical reasons there are some + * noncommutative `+`s in Ruby. This is one of such things. There + * has been a long discussion around `+`s in programming languages. + * + * @internal + * + * rb_ary_concat() is not a destructive version of rb_ary_plus(). They raise + * different exceptions. Don't know why though. + */ +VALUE rb_ary_plus(VALUE lhs, VALUE rhs); + +/** + * Destructively appends the contents of latter into the end of former. + * + * @param[out] lhs Destination array. + * @param[in] rhs Source array. + * @exception rb_eFrozenError `lhs` is frozen. + * @exception rb_eIndexError Result array too big. + * @exception rb_eTypeError `rhs` doesn't respond to `#to_ary`. + * @return The passed `lhs`. + * @post `lhs` has contents of `rhs` appended to its end. + */ +VALUE rb_ary_concat(VALUE lhs, VALUE rhs); + +/** + * Looks up the passed key, assuming the passed array is an alist. An "alist" + * here is a list of "association"s, much like that of Emacs. Emacs has + * `assoc` function that behaves exactly the same as this one. + * + * ```ruby + * # This is an example of aliist. + * auto_mode_alist = [ + * [ /\.[ch]\z/, :"c-mode" ], + * [ /\.[ch]pp\z/, :"c++-mode" ], + * [ /\.awk\z/, :"awk-mode" ], + * [ /\.cs\z/, :"csharp-mode" ], + * [ /\.go\z/, :"go-mode" ], + * [ /\.java\z/, :"java-mode" ], + * [ /\.pas\z/, :"pascal-mode" ], + * [ /\.rs\z/, :"rust-mode" ], + * [ /\.txt\z/, :"text-mode" ], + * ] + * ``` + * + * This function scans the passed array looking for an element, which itself is + * an array, whose first element is the passed key. If no such element is + * found, returns ::RUBY_Qnil. + * + * Although this function expects the passed array be an array of arrays, it + * can happily accept non-array elements; it just ignores such things. + * + * @param[in] alist An array of arrays. + * @param[in] key Needle. + * @retval RUBY_Qnil Nothing was found. + * @retval otherwise An element in `alist` whose first element is in `==` + * relationship with `key`. + */ +VALUE rb_ary_assoc(VALUE alist, VALUE key); + +/** + * Identical to rb_ary_assoc(), except it scans the passed array from the + * opposite direction. + * + * @param[in] alist An array of arrays. + * @param[in] key Needle. + * @retval RUBY_Qnil Nothing was found. + * @retval otherwise An element in `alist` whose first element is in `==` + * relationship with `key`. + */ +VALUE rb_ary_rassoc(VALUE alist, VALUE key); + +/** + * Queries if the passed array has the passed entry. + * + * @param[in] ary Target array to scan. + * @param[in] elem Target array to find. + * @retval RUBY_Qfalse No element in `ary` is in `==` relationship with + * `elem`. + * @retval RUBY_Qtrue There is at least one element in `ary` which is in + * `==` relationship with `elem`. + * + * @internal + * + * This is the only function in the entire C API that is named using third + * person singular form of a verb (except #ISASCII etc., which are not our + * naming). The counterpart Ruby API of this function is `Array#include?`. + */ +VALUE rb_ary_includes(VALUE ary, VALUE elem); + +/** + * Recursively compares each elements of the two arrays one-by-one using `<=>`. + * + * @param[in] lhs Comparison LHS. + * @param[in] rhs Comparison RHS. + * @retval RUBY_Qnil `lhs` and `rhs` are not comparable. + * @retval -1 `lhs` is less than `rhs`. + * @retval 0 They are equal. + * @retval 1 `rhs` is less then `lhs`. + */ +VALUE rb_ary_cmp(VALUE lhs, VALUE rhs); + +/** + * Replaces the contents of the former object with the contents of the latter. + * + * @param[out] copy Destination object. + * @param[in] orig Source object. + * @exception rb_eTypeError `orig` has no implicit conversion to Array. + * @exception rb_eFrozenError `copy` is frozen. + * @return The passed `copy`. + * @post `copy`'s former components are abandoned. It now has the + * identical length and contents to `orig`. + */ VALUE rb_ary_replace(VALUE copy, VALUE orig); -VALUE rb_get_values_at(VALUE, long, int, const VALUE*, VALUE(*)(VALUE,long)); + +/** + * This _was_ a generalisation of `Array#values_at`, `Struct#values_at`, and + * `MatchData#values_at`. It begun its life as a refactoring effort. However + * as Ruby evolves over time, as of writing none of aforementioned methods + * share their implementations at all. This function is not deprecated; still + * works as it has been. But it is now kind of like a rudimentum. + * + * This function takes an object, which is a receiver, and a series of + * "indices", which are either integers, or ranges of integers. Calls the + * passed callback for each of those indices, along with the receiver. This + * callback is expected to do something like rb_ary_aref(), rb_struct_aref(), + * etc. In case of a range index rb_range_beg_len() expands the range. + * Finally return values of the callback are gathered as an array, then + * returned. + * + * @param[in] obj Arbitrary ruby object. + * @param[in] olen "Length" of `obj`. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv List of "indices", described above. + * @param[in] func Callback function. + * @return A new instance of ::rb_cArray gathering `func`outputs. + * + * @internal + * + * `Array#values_at` no longer uses this function. There is no reason apart + * from historical ones to list this function here. + */ +VALUE rb_get_values_at(VALUE obj, long olen, int argc, const VALUE *argv, VALUE (*func)(VALUE obj, long oidx)); + +/** + * Expands or shrinks the passed array to the passed length. + * + * @param[out] ary An array to modify. + * @param[in] len Desired length of `ary`. + * @exception rb_eFrozenError `ary` is frozen. + * @exception rb_eIndexError `len` too long. + * @return The passed `ary`. + * @post `ary`'s length is `len`. + * @post Depending on `len` and previous length of `ary` this operation + * can also create a series of "hole" positions inside of the + * backend storage. They are filled with ::RUBY_Qnil. + * + * @internal + * + * `len` is signed. Intentional or...? + */ VALUE rb_ary_resize(VALUE ary, long len); -#define rb_ary_new2 rb_ary_new_capa -#define rb_ary_new3 rb_ary_new_from_args -#define rb_ary_new4 rb_ary_new_from_values + +#define rb_ary_new2 rb_ary_new_capa /**< @old{rb_ary_new_capa} */ +#define rb_ary_new3 rb_ary_new_from_args /**< @old{rb_ary_new_from_args} */ +#define rb_ary_new4 rb_ary_new_from_values /**< @old{rb_ary_new_from_values} */ RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/bignum.h b/include/ruby/internal/intern/bignum.h index 1ac92e9c90..c27f77a1fb 100644 --- a/include/ruby/internal/intern/bignum.h +++ b/include/ruby/internal/intern/bignum.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to so-called rb_cBignum. */ #include "ruby/internal/config.h" @@ -26,6 +26,7 @@ # include <stddef.h> #endif +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" #include "ruby/backward/2/long_long.h" @@ -33,71 +34,811 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* bignum.c */ -VALUE rb_big_new(size_t, int); + +/** + * Allocates a bignum object. + * + * @param[in] len Length of the bignum's backend storage, in words. + * @param[in] sign Sign of the bignum. + * @return An allocated new bignum instance. + * @note This only allocates an object, doesn't fill its value in. + * + * @internal + * + * @shyouhei finds it hard to use from extension libraries. `len` is per + * `BDIGIT` but its definition is hidden. + */ +VALUE rb_big_new(size_t len, int sign); + +/** + * Queries if the passed bignum instance is a "bigzero". What is a bigzero? + * Well, bignums are for very big integers, but can also represent tiny ones + * like -1, 0, 1. Bigzero are instances of bignums whose values are zero. + * Knowing if a bignum is bigzero can be handy on occasions, like for instance + * detecting division by zero situation. + * + * @param[in] x A bignum. + * @retval 1 It is a bigzero. + * @retval 0 Otherwise. + */ int rb_bigzero_p(VALUE x); -VALUE rb_big_clone(VALUE); -void rb_big_2comp(VALUE); -VALUE rb_big_norm(VALUE); + +/** + * Duplicates the given bignum. + * + * @param[in] num A bignum. + * @return An allocated bignum, who is equivalent to `num`. + */ +VALUE rb_big_clone(VALUE num); + +/** + * Destructively modify the passed bignum into 2's complement representation. + * + * @note By default bignums are in signed magnitude system. + * + * @param[out] num A bignum to modify. + */ +void rb_big_2comp(VALUE num); + +/** + * Normalises the passed bignum. It for instance returns a fixnum of the same + * value if fixnum can represent that number. + * + * @param[out] x Target bignum (can be destructively modified). + * @return An integer of the identical value (can be `x` itself). + */ +VALUE rb_big_norm(VALUE x); + +/** + * Destructively resizes the backend storage of the passed bignum. + * + * @param[out] big A bignum. + * @param[in] len New length of `big`'s backend, in words. + */ void rb_big_resize(VALUE big, size_t len); -VALUE rb_cstr_to_inum(const char*, int, int); -VALUE rb_str_to_inum(VALUE, int, int); -VALUE rb_cstr2inum(const char*, int); -VALUE rb_str2inum(VALUE, int); -VALUE rb_big2str(VALUE, int); -long rb_big2long(VALUE); + +RBIMPL_ATTR_NONNULL(()) +/** + * Parses C's string to convert into a Ruby's integer. It understands prefixes + * (e.g. `0x`) and underscores. + * + * @param[in] str Stringised representation of the return value. + * @param[in] base Base of conversion. Must be `-36..36` inclusive, + * except `1`. `2..36` means the conversion is done + * according to it, with unmatched prefix understood + * as a part of the result. `-36..-2` means the + * conversion honours prefix when present, or use + * `-base` when absent. `0` is equivalent to `-10`. + * `-1` mandates a prefix. `1` is an error. + * @param[in] badcheck Whether to raise ::rb_eArgError on failure. If + * `0` is passed here this function can return + * `INT2FIX(0)` for parse errors. + * @exception rb_eArgError Failed to parse (and `badcheck` is truthy). + * @return An instance of ::rb_cInteger, which is a numeric interpretation + * of what is written in `str`. + * + * @internal + * + * Not sure if it intentionally accepts `base == -1` or is just buggy. Nobody + * practically uses negative bases these days. + */ +VALUE rb_cstr_to_inum(const char *str, int base, int badcheck); + +/** + * Identical to rb_cstr2inum(), except it takes Ruby's strings instead of C's. + * + * @param[in] str Stringised representation of the return + * value. + * @param[in] base Base of conversion. Must be `-36..36` + * inclusive, except `1`. `2..36` means the + * conversion is done according to it, with + * unmatched prefix understood as a part of the + * result. `-36..-2` means the conversion + * honours prefix when present, or use `-base` + * when absent. `0` is equivalent to `-10`. + * `-1` mandates a prefix. `1` is an error. + * @param[in] badcheck Whether to raise ::rb_eArgError on failure. + * If `0` is passed here this function can + * return `INT2FIX(0)` for parse errors. + * @exception rb_eArgError Failed to parse (and `badcheck` is truthy). + * @exception rb_eTypeError `str` is not a string. + * @exception rb_eEncCompatError `str` is not ASCII compatible. + * @return An instance of ::rb_cInteger, which is a numeric interpretation + * of what is written in `str`. + */ +VALUE rb_str_to_inum(VALUE str, int base, int badcheck); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_cstr_to_inum(), except the second argument controls the base + * and badcheck at once. It basically doesn't raise for parse errors, unless + * the base is zero. + * + * This is an older API. New codes might prefer rb_cstr_to_inum(). + * + * @param[in] str Stringised representation of the return value. + * @param[in] base Base of conversion. Must be `-36..36` inclusive, + * except `1`. `2..36` means the conversion is done + * according to it, with unmatched prefix understood + * as a part of the result. `-36..-2` means the + * conversion honours prefix when present, or use + * `-base` when absent. `0` is equivalent to `-10`. + * `-1` mandates a prefix. `1` is an error. + * @exception rb_eArgError Failed to parse (and `base` is zero). + * @return An instance of ::rb_cInteger, which is a numeric interpretation + * of what is written in `str`. + */ +VALUE rb_cstr2inum(const char *str, int base); + +/** + * Identical to rb_str_to_inum(), except the second argument controls the base + * and badcheck at once. It can also be seen as a routine identical to + * rb_cstr2inum(), except it takes Ruby's strings instead of C's. + * + * This is an older API. New codes might prefer rb_cstr_to_inum(). + * + * @param[in] str Stringised representation of the return + * value. + * @param[in] base Base of conversion. Must be `-36..36` + * inclusive, except `1`. `2..36` means the + * conversion is done according to it, with + * unmatched prefix understood as a part of the + * result. `-36..-2` means the conversion + * honours prefix when present, or use `-base` + * when absent. `0` is equivalent to `-10`. + * `-1` mandates a prefix. `1` is an error. + * @exception rb_eArgError Failed to parse (and `base` is zero). + * @exception rb_eTypeError `str` is not a string. + * @exception rb_eEncCompatError `str` is not ASCII compatible. + * @return An instance of ::rb_cInteger, which is a numeric interpretation + * of what is written in `str`. + */ +VALUE rb_str2inum(VALUE str, int base); + +/** + * Generates a place-value representation of the passed integer. + * + * @param[in] x An integer to stringify. + * @param[in] base `2` to `36` inclusive for each radix. + * @exception rb_eArgError `base` is out of range. + * @exception rb_eRangeError `x` is too big, cannot represent in string. + * @return An instance of ::rb_cString which represents `x`. + */ +VALUE rb_big2str(VALUE x, int base); + +/** + * Converts a bignum into C's `long`. + * + * @param[in] x A bignum. + * @exception rb_eRangeError `x` is out of range of `long`. + * @return The passed value converted into C's `long`. + */ +long rb_big2long(VALUE x); + +/** @alias{rb_big2long} */ #define rb_big2int(x) rb_big2long(x) -unsigned long rb_big2ulong(VALUE); + +/** + * Converts a bignum into C's `unsigned long`. + * + * @param[in] x A bignum. + * @exception rb_eRangeError `x` is out of range of `unsigned long`. + * @return The passed value converted into C's `unsigned long`. + * + * @internal + * + * This function can generate a very large positive integer for a negative + * input. For instance applying Ruby's -4,611,686,018,427,387,905 to this + * function yields C's 13,835,058,055,282,163,711 on my machine. This is how + * it has been. Cannot change any longer. + */ +unsigned long rb_big2ulong(VALUE x); + +/** @alias{rb_big2long} */ #define rb_big2uint(x) rb_big2ulong(x) + #if HAVE_LONG_LONG +/** + * Converts a bignum into C's `long long`. + * + * @param[in] x A bignum. + * @exception rb_eRangeError `x` is out of range of `long long`. + * @return The passed value converted into C's `long long`. + */ LONG_LONG rb_big2ll(VALUE); + +/** + * Converts a bignum into C's `unsigned long long`. + * + * @param[in] x A bignum. + * @exception rb_eRangeError `x` is out of range of `unsigned long long`. + * @return The passed value converted into C's `unsigned long long`. + * + * @internal + * + * This function can generate a very large positive integer for a negative + * input. For instance applying Ruby's -4,611,686,018,427,387,905 to this + * function yields C's 13,835,058,055,282,163,711 on my machine. This is how + * it has been. Cannot change any longer. + */ unsigned LONG_LONG rb_big2ull(VALUE); + #endif /* HAVE_LONG_LONG */ + +RBIMPL_ATTR_NONNULL(()) +/** + * Converts a bignum into a series of its parts. + * + * @param[in] val An integer. + * @param[out] buf Return buffer. + * @param[in] num_longs Number of words of `buf`. + * @exception rb_eTypeError `val` doesn't respond to `#to_int`. + * @post `buf` is filled with `val`'s 2's complement representation, in + * the host CPU's native byte order, from least significant word + * towards the most significant one, for `num_longs` words. + * @note The "pack" terminology comes from `Array#pack`. + */ void rb_big_pack(VALUE val, unsigned long *buf, long num_longs); + +RBIMPL_ATTR_NONNULL(()) +/** + * Constructs a (possibly very big) bignum from a series of integers. `buf[0]` + * would be the return value's least significant word; `buf[num_longs-1]` would + * be that of most significant. + * + * @param[in] buf A series of integers. + * @param[in] num_longs Number of words of `buf`. + * @exception rb_eArgError Result would be too big. + * @return An instance of ::rb_cInteger which is an "unpack"-ed value of + * the parameters. + * @note The "unpack" terminology comes from `String#pack`. + */ VALUE rb_big_unpack(unsigned long *buf, long num_longs); -int rb_uv_to_utf8(char[6],unsigned long); -VALUE rb_dbl2big(double); -double rb_big2dbl(VALUE); -VALUE rb_big_cmp(VALUE, VALUE); -VALUE rb_big_eq(VALUE, VALUE); -VALUE rb_big_eql(VALUE, VALUE); -VALUE rb_big_plus(VALUE, VALUE); -VALUE rb_big_minus(VALUE, VALUE); -VALUE rb_big_mul(VALUE, VALUE); -VALUE rb_big_div(VALUE, VALUE); -VALUE rb_big_idiv(VALUE, VALUE); -VALUE rb_big_modulo(VALUE, VALUE); -VALUE rb_big_divmod(VALUE, VALUE); -VALUE rb_big_pow(VALUE, VALUE); -VALUE rb_big_and(VALUE, VALUE); -VALUE rb_big_or(VALUE, VALUE); -VALUE rb_big_xor(VALUE, VALUE); -VALUE rb_big_lshift(VALUE, VALUE); -VALUE rb_big_rshift(VALUE, VALUE); - -/* For rb_integer_pack and rb_integer_unpack: */ -/* "MS" in MSWORD and MSBYTE means "most significant" */ -/* "LS" in LSWORD and LSBYTE means "least significant" */ + +/* pack.c */ + +RBIMPL_ATTR_NONNULL(()) +/** + * Encodes a Unicode codepoint into its UTF-8 representation. + * + * @param[out] buf Return buffer, must at least be 6 bytes width. + * @param[in] uv An Unicode codepoint. + * @exception rb_eRangeError `uv` is out of Unicode. + * @return Number of bytes written to `buf` + * @post `buf` holds a UTF-8 representation of `uv`. + */ +int rb_uv_to_utf8(char buf[6], unsigned long uv); + +/* bignum.c */ + +/** + * Converts a C's `double` into a bignum. + * + * @param[in] d A value to convert. + * @exception rb_eFloatDomainError `d` is Inf/NaN. + * @return An instance of ::rb_cInteger whose value is approximately `d`. + * + * @internal + * + * @shyouhei is not sure if the result is guaranteed to be the nearest integer + * of `d`. + */ +VALUE rb_dbl2big(double d); + +/** + * Converts a bignum into C's `double`. + * + * @param[in] x A bignum. + * @return The passed value converted into C's `double`. + * + * @internal + * + * @shyouhei is not sure if the result is guaranteed to be `x`'s nearest value + * that a `double` can represent. + */ +double rb_big2dbl(VALUE x); + +/** + * Compares the passed two bignums. + * + * @param[in] lhs Comparison LHS. + * @param[in] rhs Comparison RHS. + * @retval -1 `rhs` is bigger than `lhs`. + * @retval 0 They are identical. + * @retval 1 `lhs` is bigger than `rhs`. + * @see rb_num_coerce_cmp() + */ +VALUE rb_big_cmp(VALUE lhs, VALUE rhs); + +/** + * Equality, in terms of `==`. This checks if the _value_ is the same, not the + * identity. For instance `1 == 1.0` must hold. + * + * @param[in] lhs Comparison LHS. + * @param[in] rhs Comparison RHS. + * @retval RUBY_Qtrue They are the same. + * @retval RUBY_Qfalse They are different. + */ +VALUE rb_big_eq(VALUE lhs, VALUE rhs); + +/** + * Equality, in terms of `eql?`. Unlike rb_big_eq() it does not convert + * ::rb_cFloat etc. This function returns ::RUBY_Qtrue if and only if both + * parameters are bignums, which represent the identical numerical value. + * + * @param[in] lhs Comparison LHS. + * @param[in] rhs Comparison RHS. + * @retval RUBY_Qtrue They are identical. + * @retval RUBY_Qfalse They are distinct. + */ +VALUE rb_big_eql(VALUE lhs, VALUE rhs); + +/** + * Performs addition of the passed two objects. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x + y` evaluates to. + * @see rb_num_coerce_bin() + */ +VALUE rb_big_plus(VALUE x, VALUE y); + +/** + * Performs subtraction of the passed two objects. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x - y` evaluates to. + * @see rb_num_coerce_bin() + */ +VALUE rb_big_minus(VALUE x, VALUE y); + +/** + * Performs multiplication of the passed two objects. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x * y` evaluates to. + * @see rb_num_coerce_bin() + */ +VALUE rb_big_mul(VALUE x, VALUE y); + +/** + * Performs division of the passed two objects. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x / y` evaluates to. + * @see rb_num_coerce_bin() + */ +VALUE rb_big_div(VALUE x, VALUE y); + +/** + * Performs "integer division". This is different from rb_big_div(). + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x.div y` evaluates to. + * @see rb_num_coerce_bin() + */ +VALUE rb_big_idiv(VALUE x, VALUE y); + +/** + * Performs modulo of the passed two objects. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x % y` evaluates to. + * @see rb_num_coerce_bin() + * + * @internal + * + * There also is `rb_big_remainder()` internally, which is different from this + * one. + */ +VALUE rb_big_modulo(VALUE x, VALUE y); + +/** + * Performs "divmod" operation. The operation in bignum's context is that it + * calculates rb_big_idiv() and rb_big_modulo() at once. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x.divmod y` evaluates to. + * @see rb_num_coerce_bin() + */ +VALUE rb_big_divmod(VALUE x, VALUE y); + +/** + * Raises `x` to the powerof `y`. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x ** y` evaluates to. + * @see rb_num_coerce_bin() + * @note This can return an instance of ::rb_cFloat, even when both `x` + * and `y` are bignums. Or an instance of ::rb_cRational, when for + * instance `y` is negative. + */ +VALUE rb_big_pow(VALUE x, VALUE y); + +/** + * Performs bitwise and of the passed two objects. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x & y` evaluates to. + * @see rb_num_coerce_bit() + */ +VALUE rb_big_and(VALUE x, VALUE y); + +/** + * Performs bitwise or of the passed two objects. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x | y` evaluates to. + * @see rb_num_coerce_bit() + */ +VALUE rb_big_or(VALUE x, VALUE y); + +/** + * Performs exclusive or of the passed two objects. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x ^ y` evaluates to. + * @see rb_num_coerce_bit() + */ +VALUE rb_big_xor(VALUE x, VALUE y); + +/** + * Performs shift left. + * + * @param[in] x A bignum. + * @param[in] y Shift amount. + * @exception rb_eTypeError `y` is not an integer. + * @exception rb_eArgError `y` is too big. + * @return `x` shifted left to `y` bits. + * @note `y` can be negative. Shifts right then. + */ +VALUE rb_big_lshift(VALUE x, VALUE y); + +/** + * Performs shift right. + * + * @param[in] x A bignum. + * @param[in] y Shift amount. + * @exception rb_eTypeError `y` is not an integer. + * @return `x` shifted right to `y` bits. + * @note This is arithmetic. Because bignums are not bitfields there is + * no shift right logical operator. + */ +VALUE rb_big_rshift(VALUE x, VALUE y); + +/** + * @name Flags for rb_integer_pack()/rb_integer_unpack() + * @{ + */ + +/** Stores/interprets the most significant word as the first word. */ #define INTEGER_PACK_MSWORD_FIRST 0x01 + +/** Stores/interprets the least significant word as the first word. */ #define INTEGER_PACK_LSWORD_FIRST 0x02 + +/** + * Stores/interprets the most significant byte in a word as the first byte in + * the word. + */ #define INTEGER_PACK_MSBYTE_FIRST 0x10 + +/** + * Stores/interprets the least significant byte in a word as the first byte in + * the word. + */ #define INTEGER_PACK_LSBYTE_FIRST 0x20 + +/** + * Means either #INTEGER_PACK_MSBYTE_FIRST or #INTEGER_PACK_LSBYTE_FIRST, + * depending on the host processor's endian. + */ #define INTEGER_PACK_NATIVE_BYTE_ORDER 0x40 + +/** Uses 2's complement representation. */ #define INTEGER_PACK_2COMP 0x80 + +/** Uses "generic" implementation (handy on test). */ #define INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION 0x400 -/* For rb_integer_unpack: */ + +/** + * Always generates a bignum object even if the integer can be representable + * using fixnum scheme (unpack only) + */ #define INTEGER_PACK_FORCE_BIGNUM 0x100 + +/** + * Interprets the input as a signed negative number (unpack only). If not + * specified returns a positive number. + */ #define INTEGER_PACK_NEGATIVE 0x200 -/* Combinations: */ + +/** Little endian combination. */ #define INTEGER_PACK_LITTLE_ENDIAN \ (INTEGER_PACK_LSWORD_FIRST | \ INTEGER_PACK_LSBYTE_FIRST) + +/** Big endian combination */ #define INTEGER_PACK_BIG_ENDIAN \ (INTEGER_PACK_MSWORD_FIRST | \ INTEGER_PACK_MSBYTE_FIRST) + +/** @} */ + +RBIMPL_ATTR_NONNULL(()) +/** + * Exports an integer into a buffer. This function fills the buffer specified + * by `words` and `numwords` as `val` in the format specified by `wordsize`, + * `nails` and `flags`. + * + * @param[in] val Integer or integer-like object which has + * `#to_int` method. + * @param[out] words Return buffer. + * @param[in] numwords Number of words of `words`. + * @param[in] wordsize Number of bytes per word. + * @param[in] nails Number of padding bits in a word. Most + * significant nails bits of each word are filled + * by zero. + * @param[in] flags Bitwise or of constants whose name starts + * "INTEGER_PACK_". + * @exception rb_eTypeError `val` doesn't respond to `#to_int`. + * + * Possible flags are: + * + * - #INTEGER_PACK_MSWORD_FIRST: + * Stores the most significant word as the first word. + * + * - #INTEGER_PACK_LSWORD_FIRST: + * Stores the least significant word as the first word. + * + * - #INTEGER_PACK_MSBYTE_FIRST: + * Stores the most significant byte in a word as the first byte in the + * word. + * + * - #INTEGER_PACK_LSBYTE_FIRST: + * Stores the least significant byte in a word as the first byte in the + * word. + * + * - #INTEGER_PACK_NATIVE_BYTE_ORDER: + * Either #INTEGER_PACK_MSBYTE_FIRST or #INTEGER_PACK_LSBYTE_FIRST + * corresponding to the host's endian. + * + * - #INTEGER_PACK_2COMP: + * Uses 2's complement representation. + * + * - #INTEGER_PACK_LITTLE_ENDIAN: Shorthand of + * `INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_LSBYTE_FIRST`. + * + * - #INTEGER_PACK_BIG_ENDIAN: Shorthand of + * `INTEGER_PACK_MSWORD_FIRST|INTEGER_PACK_MSBYTE_FIRST`. + * + * - #INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION: + * Uses generic implementation (for test and debug). + * + * This function fills the buffer specified by `words` as `val`'s 2's + * complement representation if #INTEGER_PACK_2COMP is specified in `flags`. + * Otherwise it fills `words` as `abs(val)` and signedness is returned via the + * return value. + * + * @return The signedness and overflow condition. The overflow condition + * depends on #INTEGER_PACK_2COMP. + * + * When #INTEGER_PACK_2COMP is not specified: + * + * - `-2` : + * Negative overflow. `val <= -2**(numwords*(wordsize*CHAR_BIT-nails))` + * + * - `-1` : + * Negative without overflow. + * `-2**(numwords*(wordsize*CHAR_BIT-nails)) < val < 0` + * + * - `0` : zero. `val == 0` + * + * - `1` : + * Positive without overflow. + * `0 < val < 2**(numwords*(wordsize*CHAR_BIT-nails))` + * + * - `2` : + * Positive overflow. `2**(numwords*(wordsize*CHAR_BIT-nails)) <= val` + * + * When #INTEGER_PACK_2COMP is specified: + * + * - `-2` : + * Negative overflow. `val < -2**(numwords*(wordsize*CHAR_BIT-nails))` + * + * - `-1` : + * Negative without overflow. + * `-2**(numwords*(wordsize*CHAR_BIT-nails)) <= val < 0` + * + * - `0` : zero. `val == 0` + * + * - `1` : + * Positive without overflow. + * `0 < val < 2**(numwords*(wordsize*CHAR_BIT-nails))` + * + * - `2` : + * Positive overflow. `2**(numwords*(wordsize*CHAR_BIT-nails)) <= val` + * + * The value, `-2**(numwords*(wordsize*CHAR_BIT-nails))`, is representable in + * 2's complement representation but not representable in absolute value. So + * `-1` is returned for the value if #INTEGER_PACK_2COMP is specified but + * returns `-2` if #INTEGER_PACK_2COMP is not specified. + * + * The least significant words are filled in the buffer when overflow occur. + */ int rb_integer_pack(VALUE val, void *words, size_t numwords, size_t wordsize, size_t nails, int flags); + +RBIMPL_ATTR_NONNULL(()) +/** + * Import an integer from a buffer. + * + * @param[in] words Buffer to import. + * @param[in] numwords Number of words of `words`. + * @param[in] wordsize Number of bytes per word. + * @param[in] nails Number of padding bits in a word. Most + * significant nails bits of each word are ignored. + * @param[in] flags Bitwise or of constants whose name starts + * "INTEGER_PACK_". + * @exception rb_eArgError `numwords * wordsize` too big. + * + * Possible flags are: + * + * - #INTEGER_PACK_MSWORD_FIRST: + * Interpret the first word as the most significant word. + * + * - #INTEGER_PACK_LSWORD_FIRST: + * Interpret the first word as the least significant word. + * + * - #INTEGER_PACK_MSBYTE_FIRST: + * Interpret the first byte in a word as the most significant byte in the + * word. + * + * - #INTEGER_PACK_LSBYTE_FIRST: + * Interpret the first byte in a word as the least significant byte in + * the word. + * + * - #INTEGER_PACK_NATIVE_BYTE_ORDER: + * Either #INTEGER_PACK_MSBYTE_FIRST or #INTEGER_PACK_LSBYTE_FIRST + * corresponding to the host's endian. + * + * - #INTEGER_PACK_2COMP: + * Uses 2's complement representation. + * + * - #INTEGER_PACK_LITTLE_ENDIAN: Shorthand of + * `INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_LSBYTE_FIRST` + * + * - #INTEGER_PACK_BIG_ENDIAN: Shorthand of + * `INTEGER_PACK_MSWORD_FIRST|INTEGER_PACK_MSBYTE_FIRST` + * + * - #INTEGER_PACK_FORCE_BIGNUM: + * Returns a bignum even if its value is representable as a fixnum. + * + * - #INTEGER_PACK_NEGATIVE: + * Returns a non-positive value. (Returns a non-negative value if not + * specified.) + * + * - #INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION: + * Uses generic implementation (for test and debug). + * + * @return An instance of ::rb_cInteger whose value is the interpreted + * `words`. The range of the result value depends on + * #INTEGER_PACK_2COMP and #INTEGER_PACK_NEGATIVE. + * + * When #INTEGER_PACK_2COMP is not set: + * + * - `0 <= val < 2**(numwords*(wordsize*CHAR_BIT-nails))` if + * `!INTEGER_PACK_NEGATIVE` + * + * - `-2**(numwords*(wordsize*CHAR_BIT-nails)) < val <= 0` if + * `INTEGER_PACK_NEGATIVE` + * + * When #INTEGER_PACK_2COMP is set: + * + * - `-2**(numwords*(wordsize*CHAR_BIT-nails)-1)` `<= val <=` + * `2**(numwords*(wordsize*CHAR_BIT-nails)-1)-1` if + * `!INTEGER_PACK_NEGATIVE` + * + * - `-2**(numwords*(wordsize*CHAR_BIT-nails)) <= val <= -1` if + * `INTEGER_PACK_NEGATIVE` + * + * Passing #INTEGER_PACK_2COMP without #INTEGER_PACK_NEGATIVE means sign + * extension. #INTEGER_PACK_2COMP with #INTEGER_PACK_NEGATIVE means assuming + * the higher bits are `1`. + * + * Note that this function returns 0 when `numwords` is zero and + * #INTEGER_PACK_2COMP is set but #INTEGER_PACK_NEGATIVE is not set. + */ VALUE rb_integer_unpack(const void *words, size_t numwords, size_t wordsize, size_t nails, int flags); + +/** + * Calculates the number of bytes needed to represent the absolute value of the + * passed integer. + * + * @param[in] val Integer or integer-like object which has + * `#to_int` method. + * @param[out] nlz_bits_ret Number of leading zero bits in the most + * significant byte is returned if not `NULL`. + * @exception rb_eTypeError `val` doesn't respond to `#to_int`. + * @return `((val_numbits * CHAR_BIT + CHAR_BIT - 1) / CHAR_BIT)`, where + * val_numbits is the number of bits of `abs(val)`. + * @post If `nlz_bits_ret` is not `NULL`, + * `(return_value * CHAR_BIT - val_numbits)` is stored in + * `*nlz_bits_ret`. In this case, + * `0 <= *nlz_bits_ret < CHAR_BIT`. + * + * This function should not overflow. + */ size_t rb_absint_size(VALUE val, int *nlz_bits_ret); + +/** + * Calculates the number of words needed represent the absolute value of the + * passed integer. Unlike rb_absint_size() this function can overflow. It + * returns `(size_t)-1` then. + * + * @param[in] val Integer or integer-like object which has + * `#to_int` method. + * @param[in] word_numbits Number of bits per word. + * @param[out] nlz_bits_ret Number of leading zero bits in the most + * significant word is returned if not `NULL`. + * @exception rb_eTypeError `val` doesn't respond to `#to_int`. + * @retval (size_t)-1 Overflowed. + * @retval otherwise + * `((val_numbits * CHAR_BIT + word_numbits - 1) / word_numbits)`, + * where val_numbits is the number of bits of `abs(val)`. + * @post If `nlz_bits_ret` is not `NULL` and there is no overflow, + * `(return_value * word_numbits - val_numbits)` is stored in + * `*nlz_bits_ret`. In this case, + * `0 <= *nlz_bits_ret < word_numbits.` + * + */ size_t rb_absint_numwords(VALUE val, size_t word_numbits, size_t *nlz_bits_ret); + +/** + * Tests `abs(val)` consists only of a bit or not. + * + * @param[in] val Integer or integer-like object which has + * `#to_int` method. + * @exception rb_eTypeError `val` doesn't respond to `#to_int`. + * @retval 1 `abs(val) == 1 << n` for some `n >= 0`. + * @retval 0 Otherwise. + * + * rb_absint_singlebit_p() can be used to determine required buffer size for + * rb_integer_pack() used with #INTEGER_PACK_2COMP (two's complement). + * + * Following example calculates number of bits required to represent val in + * two's complement number, without sign bit. + * + * ```CXX + * size_t size; + * int neg = FIXNUM_P(val) ? FIX2LONG(val) < 0 : BIGNUM_NEGATIVE_P(val); + * size = rb_absint_numwords(val, 1, NULL) + * if (size == (size_t)-1) ...overflow... + * if (neg && rb_absint_singlebit_p(val)) + * size--; + * ``` + * + * Following example calculates number of bytes required to represent val in + * two's complement number, with sign bit. + * + * ```CXX + * size_t size; + * int neg = FIXNUM_P(val) ? FIX2LONG(val) < 0 : BIGNUM_NEGATIVE_P(val); + * int nlz_bits; + * size = rb_absint_size(val, &nlz_bits); + * if (nlz_bits == 0 && !(neg && rb_absint_singlebit_p(val))) + * size++; + * ``` + */ int rb_absint_singlebit_p(VALUE val); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/class.h b/include/ruby/internal/intern/class.h index d3be80d283..357af5d176 100644 --- a/include/ruby/internal/intern/class.h +++ b/include/ruby/internal/intern/class.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cClass/::rb_cModule. */ #include "ruby/internal/dllexport.h" @@ -27,30 +27,367 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* class.c */ -VALUE rb_class_new(VALUE); -VALUE rb_mod_init_copy(VALUE, VALUE); -VALUE rb_singleton_class_clone(VALUE); -void rb_singleton_class_attached(VALUE,VALUE); -void rb_check_inheritable(VALUE); -VALUE rb_define_class_id(ID, VALUE); -VALUE rb_define_class_id_under(VALUE, ID, VALUE); + +/** + * Creates a new, anonymous class. + * + * @param[in] super What would become a parent class. + * @exception rb_eTypeError `super` is not something inheritable. + * @return An anonymous class that inherits `super`. + */ +VALUE rb_class_new(VALUE super); + +/** + * The comment that comes with this function says `:nodoc:`. Not sure what + * that means though. + * + * @param[out] clone Destination object. + * @param[in] orig Source object. + * @exception rb_eTypeError Cannot copy `orig`. + * @return The passed `clone`. + */ +VALUE rb_mod_init_copy(VALUE clone, VALUE orig); + +/** + * Asserts that the given class can derive a child class. A class might or + * might not be able to do so; for instance a singleton class cannot. + * + * @param[in] super Possible super class. + * @exception rb_eTypeError No it cannot. + * @post Upon successful return `super` can derive. + */ +void rb_check_inheritable(VALUE super); + +/** + * This is a very badly designed API that creates an anonymous class. + * + * @param[in] id Discarded for no reason (why...). + * @param[in] super What would become a parent class. 0 means + * ::rb_cObject. + * @exception rb_eTypeError `super` is not something inheritable. + * @return An anonymous class that inherits `super`. + * @warning You must explicitly name the return value. + */ +VALUE rb_define_class_id(ID id, VALUE super); + +/** + * Identical to rb_define_class_under(), except it takes the name in ::ID + * instead of C's string. + * + * @param[out] outer A class which contains the new class. + * @param[in] id Name of the new class + * @param[in] super A class from which the new class will derive. + * 0 means ::rb_cObject. + * @exception rb_eTypeError The constant name `id` is already taken but the + * constant is not a class. + * @exception rb_eTypeError The class is already defined but the class can + * not be reopened because its superclass is not + * `super`. + * @exception rb_eArgError `super` is NULL. + * @return The created class. + * @post `outer::id` refers the returned class. + * @note If a class named `id` is already defined and its superclass is + * `super`, the function just returns the defined class. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. + */ +VALUE rb_define_class_id_under(VALUE outer, ID id, VALUE super); + +/** + * Creates a new, anonymous module. + * + * @return An anonymous module. + */ VALUE rb_module_new(void); -VALUE rb_define_module_id(ID); -VALUE rb_define_module_id_under(VALUE, ID); -VALUE rb_mod_included_modules(VALUE); -VALUE rb_mod_include_p(VALUE, VALUE); -VALUE rb_mod_ancestors(VALUE); -VALUE rb_class_instance_methods(int, const VALUE*, VALUE); -VALUE rb_class_public_instance_methods(int, const VALUE*, VALUE); -VALUE rb_class_protected_instance_methods(int, const VALUE*, VALUE); -VALUE rb_class_private_instance_methods(int, const VALUE*, VALUE); -VALUE rb_obj_singleton_methods(int, const VALUE*, VALUE); -void rb_define_method_id(VALUE, ID, VALUE (*)(ANYARGS), int); -void rb_undef(VALUE, ID); -void rb_define_protected_method(VALUE, const char*, VALUE (*)(ANYARGS), int); -void rb_define_private_method(VALUE, const char*, VALUE (*)(ANYARGS), int); -void rb_define_singleton_method(VALUE, const char*, VALUE(*)(ANYARGS), int); -VALUE rb_singleton_class(VALUE); + + +/** + * Creates a new, anonymous refinement. + * + * @return An anonymous refinement. + */ +VALUE rb_refinement_new(void); + +/** + * This is a very badly designed API that creates an anonymous module. + * + * @param[in] id Discarded for no reason (why...). + * @return An anonymous module. + * @warning You must explicitly name the return value. + */ +VALUE rb_define_module_id(ID id); + +/** + * Identical to rb_define_module_under(), except it takes the name in ::ID + * instead of C's string. + * + * @param[out] outer A class which contains the new module. + * @param[in] id Name of the new module + * @exception rb_eTypeError The constant name `id` is already taken but the + * constant is not a module. + * @return The created module. + * @post `outer::id` refers the returned module. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. + */ +VALUE rb_define_module_id_under(VALUE outer, ID id); + +/** + * Queries the list of included modules. It can also be seen as a routine to + * first call rb_mod_ancestors(), then rejects non-modules from the return + * value. + * + * @param[in] mod Class or Module. + * @return An array of modules that are either included or prepended in any + * of `mod`'s ancestry tree (including itself). + */ +VALUE rb_mod_included_modules(VALUE mod); + +/** + * Queries if the passed module is included by the module. It can also be seen + * as a routine to first call rb_mod_included_modules(), then see if the return + * value contains the passed module. + * + * @param[in] child A Module. + * @param[in] parent Another Module. + * @exception rb_eTypeError `child` is not an instance of ::rb_cModule. + * @retval RUBY_Qtrue `parent` is either included or prepended in any + * of `child`'s ancestry tree (including itself). + * @return RUBY_Qfalse Otherwise. + */ +VALUE rb_mod_include_p(VALUE child, VALUE parent); + +/** + * Queries the module's ancestors. This routine gathers classes and modules + * that the passed module either inherits, includes, or prepends, then + * recursively applies that routine again and again to the collected entries + * until the list doesn't grow up. + * + * @param[in] mod A module or a class. + * @return An array of classes or modules that `mod` possibly recursively + * inherits, includes, or prepends. + * + * @internal + * + * Above description is written in a recursive language but in practice it + * computes the return value iteratively. + */ +VALUE rb_mod_ancestors(VALUE mod); + +/** + * Queries the class's descendants. This routine gathers classes that are + * subclasses of the given class (or subclasses of those subclasses, etc.), + * returning an array of classes that have the given class as an ancestor. + * The returned array does not include the given class or singleton classes. + * + * @param[in] klass A class. + * @return An array of classes where `klass` is an ancestor. + * + * @internal + */ +VALUE rb_class_descendants(VALUE klass); + +/** + * Queries the class's direct descendants. This routine gathers classes that are + * direct subclasses of the given class, + * returning an array of classes that have the given class as a superclass. + * The returned array does not include singleton classes. + * + * @param[in] klass A class. + * @return An array of classes where `klass` is the `superclass`. + * + * @internal + */ +VALUE rb_class_subclasses(VALUE klass); + + +/** + * Returns the attached object for a singleton class. + * If the given class is not a singleton class, raises a TypeError. + * + * @param[in] klass A class. + * @return The object which has the singleton class `klass`. + * + * @internal + */ +VALUE rb_class_attached_object(VALUE klass); + +/** + * Generates an array of symbols, which are the list of method names defined in + * the passed class. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Array of at most one object, which controls (if + * any) whether the return array includes the names + * of methods defined in ancestors or not. + * @param[in] mod A module or a class. + * @exception rb_eArgError `argc` out of range. + * @return An array of symbols collecting names of instance methods that + * are not private, defined at `mod`. + */ +VALUE rb_class_instance_methods(int argc, const VALUE *argv, VALUE mod); + +/** + * Identical to rb_class_instance_methods(), except it returns names of methods + * that are public only. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Array of at most one object, which controls (if + * any) whether the return array includes the names + * of methods defined in ancestors or not. + * @param[in] mod A module or a class. + * @exception rb_eArgError `argc` out of range. + * @return An array of symbols collecting names of instance methods that + * are public, defined at `mod`. + */ +VALUE rb_class_public_instance_methods(int argc, const VALUE *argv, VALUE mod); + +/** + * Identical to rb_class_instance_methods(), except it returns names of methods + * that are protected only. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Array of at most one object, which controls (if + * any) whether the return array includes the names + * of methods defined in ancestors or not. + * @param[in] mod A module or a class. + * @exception rb_eArgError `argc` out of range. + * @return An array of symbols collecting names of instance methods that + * are protected, defined at `mod`. + */ +VALUE rb_class_protected_instance_methods(int argc, const VALUE *argv, VALUE mod); + +/** + * Identical to rb_class_instance_methods(), except it returns names of methods + * that are private only. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Array of at most one object, which controls (if + * any) whether the return array includes the names + * of methods defined in ancestors or not. + * @param[in] mod A module or a class. + * @exception rb_eArgError `argc` out of range. + * @return An array of symbols collecting names of instance methods that + * are protected, defined at `mod`. + */ +VALUE rb_class_private_instance_methods(int argc, const VALUE *argv, VALUE mod); + +/** + * Identical to rb_class_instance_methods(), except it returns names of + * singleton methods instead of instance methods. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Array of at most one object, which controls (if + * any) whether the return array includes the names + * of methods defined in ancestors or not. + * @param[in] obj Arbitrary ruby object. + * @exception rb_eArgError `argc` out of range. + * @return An array of symbols collecting names of instance methods that + * are not private, defined at the singleton class of `obj`. + */ +VALUE rb_obj_singleton_methods(int argc, const VALUE *argv, VALUE obj); + +/** + * Identical to rb_define_method(), except it takes the name of the method in + * ::ID instead of C's string. + * + * @param[out] klass A module or a class. + * @param[in] mid Name of the function. + * @param[in] func The method body. + * @param[in] arity The number of parameters. See @ref defmethod. + * @note There are in fact 18 different prototypes for func. + * @see ::ruby::backward::cxxanyargs::define_method::rb_define_method_id + */ +void rb_define_method_id(VALUE klass, ID mid, VALUE (*func)(ANYARGS), int arity); + +/* vm_method.c */ + +/** + * Inserts a method entry that hides previous method definition of the given + * name. This is not a deletion of a method. Method of the same name defined + * in a parent class is kept invisible in this way. + * + * @param[out] mod The module to insert an undef. + * @param[in] mid Name of the undef. + * @exception rb_eTypeError `klass` is a non-module. + * @exception rb_eFrozenError `klass` is frozen. + * @exception rb_eNameError No such method named `klass#name`. + * @post `klass#name` is undefined. + * @see rb_undef_method + * + * @internal + * + * @shyouhei doesn't understand why this is not the ::ID -taking variant of + * rb_undef_method(), given rb_remove_method() has its ::ID -taking counterpart + * named rb_remove_method_id(). + */ +void rb_undef(VALUE mod, ID mid); + +/* class.c */ + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_define_method(), except it defines a protected method. + * + * @param[out] klass A module or a class. + * @param[in] mid Name of the function. + * @param[in] func The method body. + * @param[in] arity The number of parameters. See @ref defmethod. + * @note There are in fact 18 different prototypes for func. + * @see ::ruby::backward::cxxanyargs::define_method::rb_define_protected_method + */ +void rb_define_protected_method(VALUE klass, const char *mid, VALUE (*func)(ANYARGS), int arity); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_define_method(), except it defines a private method. + * + * @param[out] klass A module or a class. + * @param[in] mid Name of the function. + * @param[in] func The method body. + * @param[in] arity The number of parameters. See @ref defmethod. + * @note There are in fact 18 different prototypes for func. + * @see ::ruby::backward::cxxanyargs::define_method::rb_define_protected_method + */ +void rb_define_private_method(VALUE klass, const char *mid, VALUE (*func)(ANYARGS), int arity); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_define_method(), except it defines a singleton method. + * + * @param[out] obj Arbitrary ruby object. + * @param[in] mid Name of the function. + * @param[in] func The method body. + * @param[in] arity The number of parameters. See @ref defmethod. + * @note There are in fact 18 different prototypes for func. + * @see ::ruby::backward::cxxanyargs::define_method::rb_define_singleton_method + */ +void rb_define_singleton_method(VALUE obj, const char *mid, VALUE(*func)(ANYARGS), int arity); + +/** + * Finds or creates the singleton class of the passed object. + * + * @param[out] obj Arbitrary ruby object. + * @exception rb_eTypeError `obj` cannot have its singleton class. + * @return A (possibly newly allocated) instance of ::rb_cClass. + * @post `obj` has its singleton class, which is the return value. + * @post In case `obj` is a class, the returned singleton class also has + * its own singleton class in order to keep consistency of the + * inheritance structure of metaclasses. + * @note A new singleton class will be created if `obj` did not have + * one. + * @note The singleton classes for ::RUBY_Qnil, ::RUBY_Qtrue, and + * ::RUBY_Qfalse are ::rb_cNilClass, ::rb_cTrueClass, and + * ::rb_cFalseClass respectively. + * + * @internal + * + * You can _create_ a singleton class of a frozen object. Intentional or ...? + * + * Nowadays there are wider range of objects who cannot have singleton classes + * than before. For instance some string instances cannot for some reason. + */ +VALUE rb_singleton_class(VALUE obj); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/compar.h b/include/ruby/internal/intern/compar.h index d226ca37b1..dc3b377b01 100644 --- a/include/ruby/internal/intern/compar.h +++ b/include/ruby/internal/intern/compar.h @@ -17,17 +17,45 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_mComparable. */ +#include "ruby/internal/attr/cold.h" +#include "ruby/internal/attr/noreturn.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() +/* bignum.c */ + +/** + * Canonicalises the passed `val`, which is the return value of `a <=> b`, into + * C's `{-1, 0, 1}`. This can be handy when you implement a callback function + * to pass to `qsort(3)` etc. + * + * @param[in] val Return value of a space ship operator. + * @param[in] a Comparison LHS. + * @param[in] b Comparison RHS. + * @exception rb_eArgError `a` and `b` are not comparable each other. + * @retval -1 `val` is less than zero. + * @retval 0 `val` is equal to zero. + * @retval 1 `val` is greater than zero. + */ +int rb_cmpint(VALUE val, VALUE a, VALUE b); + /* compar.c */ -int rb_cmpint(VALUE, VALUE, VALUE); -NORETURN(void rb_cmperr(VALUE, VALUE)); + +RBIMPL_ATTR_COLD() +RBIMPL_ATTR_NORETURN() +/** + * Raises "comparison failed" error. + * + * @param[in] a Comparison LHS. + * @param[in] b Comparison RHS. + * @exception rb_eArgError `a` and `b` are not comparable each other. + */ +void rb_cmperr(VALUE a, VALUE b); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/complex.h b/include/ruby/internal/intern/complex.h index 70343221f6..e111bd8ced 100644 --- a/include/ruby/internal/intern/complex.h +++ b/include/ruby/internal/intern/complex.h @@ -17,9 +17,11 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cComplex. */ +#include "ruby/internal/attr/deprecated.h" +#include "ruby/internal/attr/pure.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" #include "ruby/internal/arithmetic/long.h" /* INT2FIX is here. */ @@ -27,32 +29,223 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* complex.c */ -VALUE rb_complex_raw(VALUE, VALUE); + +/** + * Identical to rb_complex_new(), except it assumes both arguments are not + * instances of ::rb_cComplex. It is thus dangerous for extension libraries. + * + * @param[in] real Real part, in any numeric except Complex. + * @param[in] imag Imaginary part, in any numeric except Complex. + * @return An instance of ::rb_cComplex whose value is `real + (imag)i`. + */ +VALUE rb_complex_raw(VALUE real, VALUE imag); + +/** + * Shorthand of `x+0i`. It practically converts `x` into a Complex of the + * identical value. + * + * @param[in] x Any numeric except Complex. + * @return An instance of ::rb_cComplex, whose value is `x + 0i`. + */ #define rb_complex_raw1(x) rb_complex_raw((x), INT2FIX(0)) + +/** @alias{rb_complex_raw} */ #define rb_complex_raw2(x,y) rb_complex_raw((x), (y)) -VALUE rb_complex_new(VALUE, VALUE); + +/** + * Constructs a Complex, by first multiplying the imaginary part with `1i` then + * adds it to the real part. This definition doesn't need both arguments be + * real numbers. It can happily combine two instances of ::rb_cComplex (with + * rotating the latter one). + * + * @param[in] real An instance of ::rb_cNumeric. + * @param[in] imag Another instance of ::rb_cNumeric. + * @return An instance of ::rb_cComplex whose value is `imag * 1i + real`. + */ +VALUE rb_complex_new(VALUE real, VALUE imag); + +/** + * Shorthand of `x+0i`. It practically converts `x` into a Complex of the + * identical value. + * + * @param[in] x Any numeric value. + * @return An instance of ::rb_cComplex, whose value is `x + 0i`. + */ #define rb_complex_new1(x) rb_complex_new((x), INT2FIX(0)) + +/** @alias{rb_complex_new} */ #define rb_complex_new2(x,y) rb_complex_new((x), (y)) + +/** + * Constructs a Complex using polar representations. Unlike rb_complex_new() + * it makes no sense to pass non-real instances to this function. + * + * @param[in] abs Magnitude, in any numeric except Complex. + * @param[in] arg Angle, in radians, in any numeric except Complex. + * @return An instance of ::rb_cComplex which denotes the given polar + * coordinates. + */ VALUE rb_complex_new_polar(VALUE abs, VALUE arg); -DEPRECATED_BY(rb_complex_new_polar, VALUE rb_complex_polar(VALUE abs, VALUE arg)); + +RBIMPL_ATTR_DEPRECATED(("by: rb_complex_new_polar")) +/** @old{rb_complex_new_polar} */ +VALUE rb_complex_polar(VALUE abs, VALUE arg); + +RBIMPL_ATTR_PURE() +/** + * Queries the real part of the passed Complex. + * + * @param[in] z An instance of ::rb_cComplex. + * @return Its real part, which is an instance of ::rb_cNumeric. + */ VALUE rb_complex_real(VALUE z); + +RBIMPL_ATTR_PURE() +/** + * Queries the imaginary part of the passed Complex. + * + * @param[in] z An instance of ::rb_cComplex. + * @return Its imaginary part, which is an instance of ::rb_cNumeric. + */ VALUE rb_complex_imag(VALUE z); + +/** + * Performs addition of the passed two objects. + * + * @param[in] x An instance of ::rb_cComplex. + * @param[in] y Arbitrary ruby object. + * @return What `x + y` evaluates to. + * @see rb_num_coerce_bin() + */ VALUE rb_complex_plus(VALUE x, VALUE y); + +/** + * Performs subtraction of the passed two objects. + * + * @param[in] x An instance of ::rb_cComplex. + * @param[in] y Arbitrary ruby object. + * @return What `x - y` evaluates to. + * @see rb_num_coerce_bin() + */ VALUE rb_complex_minus(VALUE x, VALUE y); + +/** + * Performs multiplication of the passed two objects. + * + * @param[in] x An instance of ::rb_cComplex. + * @param[in] y Arbitrary ruby object. + * @return What `x * y` evaluates to. + * @see rb_num_coerce_bin() + */ VALUE rb_complex_mul(VALUE x, VALUE y); + +/** + * Performs division of the passed two objects. + * + * @param[in] x An instance of ::rb_cComplex. + * @param[in] y Arbitrary ruby object. + * @return What `x / y` evaluates to. + * @see rb_num_coerce_bin() + */ VALUE rb_complex_div(VALUE x, VALUE y); + +/** + * Performs negation of the passed object. + * + * @param[in] z An instance of ::rb_cComplex. + * @return What `-z` evaluates to. + */ VALUE rb_complex_uminus(VALUE z); + +/** + * Performs complex conjugation of the passed object. + * + * @param[in] z An instance of ::rb_cComplex. + * @return Its complex conjugate, in ::rb_cComplex. + */ VALUE rb_complex_conjugate(VALUE z); + +/** + * Queries the absolute (or the magnitude) of the passed object. + * + * @param[in] z An instance of ::rb_cComplex. + * @return Its magnitude, in ::rb_cFloat. + */ VALUE rb_complex_abs(VALUE z); + +/** + * Queries the argument (or the angle) of the passed object. + * + * @param[in] z An instance of ::rb_cComplex. + * @return Its magnitude, in ::rb_cFloat. + */ VALUE rb_complex_arg(VALUE z); + +/** + * Performs exponentiation of the passed two objects. + * + * @param[in] base An instance of ::rb_cComplex. + * @param[in] exp Arbitrary ruby object. + * @return What `base ** exp` evaluates to. + * @see rb_num_coerce_bin() + */ VALUE rb_complex_pow(VALUE base, VALUE exp); + +/** + * Identical to rb_complex_new(), except it takes the arguments as C's double + * instead of Ruby's object. + * + * @param[in] real Real part. + * @param[in] imag Imaginary part. + * @return An instance of ::rb_cComplex whose value is `real + (imag)i`. + */ VALUE rb_dbl_complex_new(double real, double imag); + +/** @alias{rb_complex_plus} */ #define rb_complex_add rb_complex_plus + +/** @alias{rb_complex_minus} */ #define rb_complex_sub rb_complex_minus + +/** @alias{rb_complex_uminus} */ #define rb_complex_nagate rb_complex_uminus -VALUE rb_Complex(VALUE, VALUE); +/** + * Converts various values into a Complex. This function accepts: + * + * - Instances of ::rb_cComplex (taken as-is), + * - Instances of ::rb_cNumeric (adds `0i`), + * - Instances of ::rb_cString (parses), + * - Other objects that respond to `#to_c`. + * + * It (possibly recursively) applies `#to_c` until both sides become a Complex + * value, then computes `imag * 1i + real`. + * + * As a special case, passing ::RUBY_Qundef to `imag` is the same as passing + * `RB_INT2NUM(0)`. + * + * @param[in] real Real part (see above). + * @param[in] imag Imaginary part (see above). + * @exception rb_eTypeError Passed something not described above. + * @return An instance of ::rb_cComplex whose value is `1i * imag + real`. + * + * @internal + * + * This was the implementation of `Kernel#Complex` before, but they diverged. + */ +VALUE rb_Complex(VALUE real, VALUE imag); + +/** + * Shorthand of `x+0i`. It practically converts `x` into a Complex of the + * identical value. + * + * @param[in] x ::rb_cNumeric, ::rb_cString, or something that responds to + * `#to_c`. + * @return An instance of ::rb_cComplex, whose value is `x + 0i`. + */ #define rb_Complex1(x) rb_Complex((x), INT2FIX(0)) + +/** @alias{rb_Complex} */ #define rb_Complex2(x,y) rb_Complex((x), (y)) RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/cont.h b/include/ruby/internal/intern/cont.h index cb651e682e..32647f48aa 100644 --- a/include/ruby/internal/intern/cont.h +++ b/include/ruby/internal/intern/cont.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to rb_cFiber. */ #include "ruby/internal/dllexport.h" @@ -27,20 +27,256 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* cont.c */ -VALUE rb_fiber_new(rb_block_call_func_t, VALUE); + +/** + * Creates a Fiber instance from a C-backended block. + * + * @param[in] func A function, to become the fiber's body. + * @param[in] callback_obj Passed as-is to `func`. + * @return An allocated new instance of rb_cFiber, which is ready to be + * "resume"d. + */ +VALUE rb_fiber_new(rb_block_call_func_t func, VALUE callback_obj); + +/** + * Creates a Fiber instance from a C-backended block with the specified + * storage. + * + * If the given storage is Qundef or Qtrue, this function is equivalent to + * rb_fiber_new() which inherits storage from the current fiber. + * + * Specifying Qtrue is experimental and may be changed in the future. + * + * If the given storage is Qnil, this function will lazy initialize the + * internal storage which starts of empty (without any inheritance). + * + * Otherwise, the given storage is used as the internal storage. + * + * @param[in] func A function, to become the fiber's body. + * @param[in] callback_obj Passed as-is to `func`. + * @param[in] storage The way to set up the storage for the fiber. + * @return An allocated new instance of rb_cFiber, which is ready to be + * "resume"d. + */ +VALUE rb_fiber_new_storage(rb_block_call_func_t func, VALUE callback_obj, VALUE storage); + +/** + * Queries the fiber which is calling this function. Any ruby execution + * context has its fiber, either explicitly or implicitly. + * + * @return The current fiber. + */ VALUE rb_fiber_current(void); -VALUE rb_fiber_alive_p(VALUE); -VALUE rb_obj_is_fiber(VALUE); +/** + * Queries the liveness of the passed fiber. "Alive" in this context means + * that the fiber can still be resumed. Once it reaches is its end of + * execution, this function returns ::RUBY_Qfalse. + * + * @param[in] fiber A target fiber. + * @retval RUBY_Qtrue It is. + * @retval RUBY_Qfalse It isn't. + */ +VALUE rb_fiber_alive_p(VALUE fiber); + +/** + * Queries if an object is a fiber. + * + * @param[in] obj Arbitrary ruby object. + * @retval RUBY_Qtrue It is. + * @retval RUBY_Qfalse It isn't. + */ +VALUE rb_obj_is_fiber(VALUE obj); + +/** + * Resumes the execution of the passed fiber, either from the point at which + * the last rb_fiber_yield() was called if any, or at the beginning of the + * fiber body if it is the first call to this function. + * + * Other arguments are passed into the fiber's body, either as return values of + * rb_fiber_yield() in case it switches to there, or as the block parameter of + * the fiber body if it switches to the beginning of the fiber. + * + * The return value of this function is either the value passed to previous + * rb_fiber_yield() call, or the ultimate evaluated value of the entire fiber + * body if the execution reaches the end of it. + * + * When an exception happens inside of a fiber it propagates to this function. + * + * ```ruby + * f = Fiber.new do |i| + * puts "<x> =>> #{i}" + * puts "<y> <-- #{i + 1}" + * j = Fiber.yield(i + 1) + * puts "<z> =>> #{j}" + * puts "<w> <-- #{j + 1}" + * next j + 1 + * end + * + * puts "[a] <-- 1" + * p = f.resume(1) + * puts "[b] =>> #{p}" + * puts "[c] <-- #{p + 1}" + * q = f.resume(p + 1) + * puts "[d] =>> #{q}" + * ``` + * + * Above program executes in `[a] <x> <y> [b] [c] <z> <w> [d]`. + * + * @param[out] fiber The fiber to resume. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Passed (somehow) to `fiber`. + * @exception rb_eFiberError `fib` is terminated etc. + * @exception rb_eException Any exceptions happen in `fiber`. + * @return (See above) + * @note This function _does_ return. + * + * @internal + * + * @shyouhei expected this function to raise ::rb_eFrozenError for frozen + * fibers but it doesn't in practice. Intentional or ...? + */ VALUE rb_fiber_resume(VALUE fiber, int argc, const VALUE *argv); + +/** + * Identical to rb_fiber_resume(), except you can specify how to handle the + * last element of the given array. + * + * @param[out] fiber The fiber to resume. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Passed (somehow) to `fiber`. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eFiberError `fiber` is terminated etc. + * @exception rb_eException Any exceptions happen in `fiber`. + * @return Either what was yielded or the last value of the fiber body. + */ VALUE rb_fiber_resume_kw(VALUE fiber, int argc, const VALUE *argv, int kw_splat); +/** + * Yields the control back to the point where the current fiber was resumed. + * The passed objects would be the return value of rb_fiber_resume(). This + * fiber then suspends its execution until next time it is resumed. + * + * This function can also raise arbitrary exceptions injected from outside of + * the fiber using rb_fiber_raise(). + * + * ```ruby + * exc = Class.new Exception + * + * f = Fiber.new do + * Fiber.yield + * rescue exc => e + * puts e.message + * end + * + * f.resume + * f.raise exc, "Hi!" + * ``` + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Passed to rb_fiber_resume(). + * @exception rb_eException (See above) + * @return (See rb_fiber_resume() for details) + */ VALUE rb_fiber_yield(int argc, const VALUE *argv); + +/** + * Identical to rb_fiber_yield(), except you can specify how to handle the last + * element of the given array. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Passed to rb_fiber_resume(). + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eException What was raised using `Fiber#raise`. + * @return (See rb_fiber_resume() for details) + */ VALUE rb_fiber_yield_kw(int argc, const VALUE *argv, int kw_splat); +/** + * Transfers control to another fiber, resuming it from where it last stopped + * or starting it if it was not resumed before. The calling fiber will be + * suspended much like in a call to rb_fiber_yield(). + * + * The fiber which receives the transfer call treats it much like a resume + * call. Arguments passed to transfer are treated like those passed to resume. + * + * The two style of control passing to and from fiber (one is rb_fiber_resume() + * and rb_fiber_yield(), another is rb_fiber_transfer() to and from fiber) + * can't be freely mixed. + * + * - If the Fiber's lifecycle had started with transfer, it will never be + * able to yield or be resumed control passing, only finish or transfer + * back. (It still can resume other fibers that are allowed to be + * resumed.) + * + * - If the Fiber's lifecycle had started with resume, it can yield or + * transfer to another Fiber, but can receive control back only the way + * compatible with the way it was given away: if it had transferred, it + * only can be transferred back, and if it had yielded, it only can be + * resumed back. After that, it again can transfer or yield. + * + * If those rules are broken, rb_eFiberError is raised. + * + * For an individual Fiber design, yield/resume is easier to use (the Fiber + * just gives away control, it doesn't need to think about who the control is + * given to), while transfer is more flexible for complex cases, allowing to + * build arbitrary graphs of Fibers dependent on each other. + * + * @param[out] fiber Explicit control destination. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Passed to rb_fiber_resume(). + * @exception rb_eFiberError (See above) + * @exception rb_eException What was raised using `Fiber#raise`. + * @return (See rb_fiber_resume() for details) + */ VALUE rb_fiber_transfer(VALUE fiber, int argc, const VALUE *argv); + +/** + * Identical to rb_fiber_transfer(), except you can specify how to handle the + * last element of the given array. + * + * @param[out] fiber Explicit control destination. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Passed to rb_fiber_resume(). + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eFiberError (See above) + * @exception rb_eException What was raised using `Fiber#raise`. + * @return (See rb_fiber_resume() for details) + */ VALUE rb_fiber_transfer_kw(VALUE fiber, int argc, const VALUE *argv, int kw_splat); +/** + * Identical to rb_fiber_resume() but instead of resuming normal execution of + * the passed fiber, it raises the given exception in it. From inside of the + * fiber this would be seen as if rb_fiber_yield() raised. + * + * This function does return in case the passed fiber gracefully handled the + * passed exception. But if it does not, the raised exception propagates out + * of the passed fiber; this function then does not return. + * + * Parameters are passed to rb_make_exception() to create an exception object. + * See its document for what are allowed here. + * + * It is a failure to call this function against a fiber which is resuming, + * have never run yet, or has already finished running. + * + * @param[out] fiber Where exception is raised. + * @param[in] argc Passed as-is to rb_make_exception(). + * @param[in] argv Passed as-is to rb_make_exception(). + * @exception rb_eFiberError `fiber` is terminated etc. + * @return (See rb_fiber_resume() for details) + */ +VALUE rb_fiber_raise(VALUE fiber, int argc, const VALUE *argv); + RBIMPL_SYMBOL_EXPORT_END() #endif /* RBIMPL_INTERN_CONT_H */ diff --git a/include/ruby/internal/intern/dir.h b/include/ruby/internal/intern/dir.h index 936f4e1f36..da1873e068 100644 --- a/include/ruby/internal/intern/dir.h +++ b/include/ruby/internal/intern/dir.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cDir. */ #include "ruby/internal/dllexport.h" @@ -26,6 +26,15 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* dir.c */ + +/** + * Queries the path of the current working directory of the current process. + * + * @return An instance of ::rb_cString that holds the working directory. + * @note The returned string is in "filesystem" encoding. Most notably on + * Linux this is an alias of default external encoding. Most notably + * on Windows it can be an alias of OS codepage. + */ VALUE rb_dir_getwd(void); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/enum.h b/include/ruby/internal/intern/enum.h index 17c20c1c0a..215ad82672 100644 --- a/include/ruby/internal/intern/enum.h +++ b/include/ruby/internal/intern/enum.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_mEnumerable. */ #include "ruby/internal/dllexport.h" @@ -26,7 +26,47 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* enum.c */ -VALUE rb_enum_values_pack(int, const VALUE*); + +/** + * Basically identical to rb_ary_new_form_values(), except it returns something + * different when `argc` < 2. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arbitrary objects. + * @retval RUBY_Qnil `argc` is zero. + * @retval argv[0] `argc` is one. + * @retval otherwise Otherwise. + * + * @internal + * + * What is this business? Well, this function is about `yield`'s taking + * multiple values. Consider following user-defined class: + * + * ```ruby + * class Foo + * include Enumerable + * + * def each + * yield :q, :w, :e, :r + * end + * end + * + * Foo.new.each_with_object([]) do |i, j| + * j << i # ^^^ <- What to expect for `i`? + * end + * ``` + * + * Here, `Foo#each_with_object` is in fact `Enumerable#each_with_object`, which + * doesn't know what would be yielded. Yet, it has to take a block of arity 2. + * This function is used here, to "pack" arbitrary number of yielded objects + * into one. + * + * If people want to implement their own `Enumerable#each_with_object` this API + * can be handy. Though @shyouhei suspects it is relatively rare for 3rd party + * extension libraries to have such things. Also `Enumerable#each_entry` is + * basically this function exposed as a Ruby method. + */ +VALUE rb_enum_values_pack(int argc, const VALUE *argv); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/enumerator.h b/include/ruby/internal/intern/enumerator.h index c81485155c..20e5d7c6fc 100644 --- a/include/ruby/internal/intern/enumerator.h +++ b/include/ruby/internal/intern/enumerator.h @@ -17,9 +17,10 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cEnumerator. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/intern/eval.h" /* rb_frame_this_func */ #include "ruby/internal/iterator.h" /* rb_block_given_p */ @@ -28,52 +29,230 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() -typedef VALUE rb_enumerator_size_func(VALUE, VALUE, VALUE); +/** + * This is the type of functions that rb_enumeratorize_with_size() expects. In + * theory an enumerator can have indefinite number of elements, but in practice + * it often is the case we can compute the size of an enumerator beforehand. + * If your enumerator has such property, supply a function that calculates such + * values. + * + * @param[in] recv The original receiver of the enumerator. + * @param[in] argv Arguments passed to `Object#enum_for` etc. + * @param[in] eobj The enumerator object. + * @return The size of `eobj`, in ::rb_cNumeric, or ::RUBY_Qnil if the size + * is not known until we actually iterate. + */ +typedef VALUE rb_enumerator_size_func(VALUE recv, VALUE argv, VALUE eobj); +/** + * Decomposed `Enumerator::ArithmeicSequence`. This is a subclass of + * ::rb_cEnumerator, which represents a sequence of numbers with common + * difference. Internal data structure of the class is opaque to users, but + * you can obtain a decomposed one using rb_arithmetic_sequence_extract(). + */ typedef struct { - VALUE begin; - VALUE end; - VALUE step; - int exclude_end; + VALUE begin; /**< "Left" or "lowest" endpoint of the sequence. */ + VALUE end; /**< "Right" or "highest" endpoint of the sequence.*/ + VALUE step; /**< Step between a sequence. */ + int exclude_end; /**< Whether the endpoint is open or closed. */ } rb_arithmetic_sequence_components_t; /* enumerator.c */ -VALUE rb_enumeratorize(VALUE, VALUE, int, const VALUE *); -VALUE rb_enumeratorize_with_size(VALUE, VALUE, int, const VALUE *, rb_enumerator_size_func *); -VALUE rb_enumeratorize_with_size_kw(VALUE, VALUE, int, const VALUE *, rb_enumerator_size_func *, int); -int rb_arithmetic_sequence_extract(VALUE, rb_arithmetic_sequence_components_t *); -VALUE rb_arithmetic_sequence_beg_len_step(VALUE, long *begp, long *lenp, long *stepp, long len, int err); + +/** + * Constructs an enumerator. This roughly resembles `Object#enum_for`. + * + * @param[in] recv A receiver of `meth`. + * @param[in] meth Method ID in a symbol object. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to `meth`. + * @exception rb_eTypeError `meth` is not an instance of ::rb_cSymbol. + * @return A new instance of ::rb_cEnumerator which, when yielded, + * enumerates by calling `meth` on `recv` with `argv`. + */ +VALUE rb_enumeratorize(VALUE recv, VALUE meth, int argc, const VALUE *argv); + +/** + * Identical to rb_enumeratorize(), except you can additionally specify the + * size function of return value. + * + * @param[in] recv A receiver of `meth`. + * @param[in] meth Method ID in a symbol object. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to `meth`. + * @param[in] func Size calculator. + * @exception rb_eTypeError `meth` is not an instance of ::rb_cSymbol. + * @return A new instance of ::rb_cEnumerator which, when yielded, + * enumerates by calling `meth` on `recv` with `argv`. + * @note `func` can be zero, which means the size is unknown. + */ +VALUE rb_enumeratorize_with_size(VALUE recv, VALUE meth, int argc, const VALUE *argv, rb_enumerator_size_func *func); + +/** + * Identical to rb_enumeratorize_with_func(), except you can specify how to + * handle the last element of the given array. + * + * @param[in] recv A receiver of `meth`. + * @param[in] meth Method ID in a symbol object. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to `meth`. + * @param[in] func Size calculator. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eTypeError `meth` is not an instance of ::rb_cSymbol. + * @return A new instance of ::rb_cEnumerator which, when yielded, + * enumerates by calling `meth` on `recv` with `argv`. + * @note `func` can be zero, which means the size is unknown. + */ +VALUE rb_enumeratorize_with_size_kw(VALUE recv, VALUE meth, int argc, const VALUE *argv, rb_enumerator_size_func *func, int kw_splat); + +RBIMPL_ATTR_NONNULL(()) +/** + * Extracts components of the passed arithmetic sequence. This can be seen as + * an extended version of rb_range_values(). + * + * @param[in] as Target instance of `Enumerator::ArithmericSequence`. + * @param[out] buf Decomposed results buffer. + * @return 0 `as` is not `Enumerator::ArithmericSequence`. + * @return 1 Success. + * @post `buf` is filled. + */ +int rb_arithmetic_sequence_extract(VALUE as, rb_arithmetic_sequence_components_t *buf); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_range_beg_len(), except it takes an instance of + * `Enumerator::ArithmericSequence`. + * + * @param[in] as An `Enumerator::ArithmericSequence` instance. + * @param[out] begp Return value buffer. + * @param[out] lenp Return value buffer. + * @param[out] stepp Return value buffer. + * @param[in] len Updated length. + * @param[in] err In case `len` is out of range... + * - `0`: returns ::RUBY_Qnil. + * - `1`: raises ::rb_eRangeError. + * - `2`: `beg` and `len` expanded accordingly. + * @exception rb_eRangeError `as` cannot fit into `long`. + * @retval RUBY_Qfalse `as` is not `Enumerator::ArithmericSequence`. + * @retval RUBY_Qnil `len` is out of `as` but `err` is zero. + * @retval RUBY_Qtrue Otherwise. + * @post `beg` is the (possibly updated) left endpoint. + * @post `len` is the (possibly updated) length of the range. + * + * @internal + * + * Currently no 3rd party applications of this function is found. But that can + * be because this function is relatively new. + */ +VALUE rb_arithmetic_sequence_beg_len_step(VALUE as, long *begp, long *lenp, long *stepp, long len, int err); RBIMPL_SYMBOL_EXPORT_END() +/** @cond INTERNAL_MACRO */ #ifndef RUBY_EXPORT # define rb_enumeratorize_with_size(obj, id, argc, argv, size_fn) \ rb_enumeratorize_with_size(obj, id, argc, argv, (rb_enumerator_size_func *)(size_fn)) # define rb_enumeratorize_with_size_kw(obj, id, argc, argv, size_fn, kw_splat) \ rb_enumeratorize_with_size_kw(obj, id, argc, argv, (rb_enumerator_size_func *)(size_fn), kw_splat) #endif +/** @endcond */ +/** + * This is an implementation detail of #RETURN_SIZED_ENUMERATOR(). You could + * use it directly, but can hardly be handy. + * + * @param[in] obj A receiver. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to the current method. + * @param[in] size_fn Size calculator. + * @return A new instance of ::rb_cEnumerator which, when yielded, + * enumerates by calling the current method on `recv` with `argv`. + */ #define SIZED_ENUMERATOR(obj, argc, argv, size_fn) \ rb_enumeratorize_with_size((obj), ID2SYM(rb_frame_this_func()), \ (argc), (argv), (size_fn)) +/** + * This is an implementation detail of #RETURN_SIZED_ENUMERATOR_KW(). You + * could use it directly, but can hardly be handy. + * + * @param[in] obj A receiver. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to the current method. + * @param[in] size_fn Size calculator. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @return A new instance of ::rb_cEnumerator which, when yielded, + * enumerates by calling the current method on `recv` with `argv`. + */ #define SIZED_ENUMERATOR_KW(obj, argc, argv, size_fn, kw_splat) \ rb_enumeratorize_with_size_kw((obj), ID2SYM(rb_frame_this_func()), \ (argc), (argv), (size_fn), (kw_splat)) +/** + * This roughly resembles `return enum_for(__callee__) unless block_given?`. + * + * @param[in] obj A receiver. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to the current method. + * @param[in] size_fn Size calculator. + * @note This macro may return inside. + */ #define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn) do { \ if (!rb_block_given_p()) \ return SIZED_ENUMERATOR(obj, argc, argv, size_fn); \ } while (0) + +/** + * Identical to #RETURN_SIZED_ENUMERATOR(), except you can specify how to + * handle the last element of the given array. + * + * @param[in] obj A receiver. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to the current method. + * @param[in] size_fn Size calculator. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @note This macro may return inside. + */ #define RETURN_SIZED_ENUMERATOR_KW(obj, argc, argv, size_fn, kw_splat) do { \ if (!rb_block_given_p()) \ return SIZED_ENUMERATOR_KW(obj, argc, argv, size_fn, kw_splat); \ } while (0) +/** + * Identical to #RETURN_SIZED_ENUMERATOR(), except its size is unknown. + * + * @param[in] obj A receiver. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to the current method. + * @note This macro may return inside. + */ #define RETURN_ENUMERATOR(obj, argc, argv) \ RETURN_SIZED_ENUMERATOR(obj, argc, argv, 0) +/** + * Identical to #RETURN_SIZED_ENUMERATOR_KW(), except its size is unknown. It + * can also be seen as a routine identical to #RETURN_ENUMERATOR(), except you + * can specify how to handle the last element of the given array. + * + * @param[in] obj A receiver. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to the current method. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @note This macro may return inside. + */ #define RETURN_ENUMERATOR_KW(obj, argc, argv, kw_splat) \ RETURN_SIZED_ENUMERATOR_KW(obj, argc, argv, 0, kw_splat) diff --git a/include/ruby/internal/intern/error.h b/include/ruby/internal/intern/error.h index aa9fe2daba..11e147a121 100644 --- a/include/ruby/internal/intern/error.h +++ b/include/ruby/internal/intern/error.h @@ -17,44 +17,232 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_eException. */ +#include "ruby/internal/attr/format.h" +#include "ruby/internal/attr/noreturn.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" #include "ruby/internal/fl_type.h" #include "ruby/backward/2/assume.h" -#include "ruby/backward/2/attributes.h" +/** + * This macro is used in conjunction with rb_check_arity(). If you pass it to + * the function's last (max) argument, that means the function does not check + * upper limit. + */ #define UNLIMITED_ARGUMENTS (-1) -#define rb_exc_new2 rb_exc_new_cstr -#define rb_exc_new3 rb_exc_new_str -#define rb_check_trusted rb_check_trusted -#define rb_check_trusted_inline rb_check_trusted + +#define rb_exc_new2 rb_exc_new_cstr /**< @old{rb_exc_new_cstr} */ +#define rb_exc_new3 rb_exc_new_str /**< @old{rb_exc_new_str} */ + +/** @cond INTERNAL_MACRO */ #define rb_check_arity rb_check_arity +/** @endcond */ RBIMPL_SYMBOL_EXPORT_BEGIN() /* error.c */ -VALUE rb_exc_new(VALUE, const char*, long); -VALUE rb_exc_new_cstr(VALUE, const char*); -VALUE rb_exc_new_str(VALUE, VALUE); -PRINTF_ARGS(NORETURN(void rb_loaderror(const char*, ...)), 1, 2); -PRINTF_ARGS(NORETURN(void rb_loaderror_with_path(VALUE path, const char*, ...)), 2, 3); -PRINTF_ARGS(NORETURN(void rb_name_error(ID, const char*, ...)), 2, 3); -PRINTF_ARGS(NORETURN(void rb_name_error_str(VALUE, const char*, ...)), 2, 3); -PRINTF_ARGS(NORETURN(void rb_frozen_error_raise(VALUE, const char*, ...)), 2, 3); -NORETURN(void rb_invalid_str(const char*, const char*)); -NORETURN(void rb_error_frozen(const char*)); -NORETURN(void rb_error_frozen_object(VALUE)); -void rb_error_untrusted(VALUE); -void rb_check_frozen(VALUE); -void rb_check_trusted(VALUE); + +/** + * Creates an instance of the passed exception class. + * + * @param[in] etype A subclass of ::rb_eException. + * @param[in] ptr Buffer contains error message. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eTypeError `etype` is not a class. + * @exception rb_eArgError `len` is negative. + * @return An instance of `etype`. + * @pre At least `len` bytes of continuous memory region shall be + * accessible via `ptr`. + * + * @internal + * + * This function works for non-exception classes as well, as long as they take + * one string argument. + */ +VALUE rb_exc_new(VALUE etype, const char *ptr, long len); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_exc_new(), except it assumes the passed pointer is a pointer + * to a C string. + * + * @param[in] etype A subclass of ::rb_eException. + * @param[in] str A C string (becomes an error message). + * @exception rb_eTypeError `etype` is not a class. + * @return An instance of `etype`. + */ +VALUE rb_exc_new_cstr(VALUE etype, const char *str); + +/** + * Identical to rb_exc_new_cstr(), except it takes a Ruby's string instead of + * C's. + * + * @param[in] etype A subclass of ::rb_eException. + * @param[in] str An instance of ::rb_cString. + * @exception rb_eTypeError `etype` is not a class. + * @return An instance of `etype`. + */ +VALUE rb_exc_new_str(VALUE etype, VALUE str); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL((1)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2) +/** + * Raises an instance of ::rb_eLoadError. + * + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + * @exception rb_eLoadError Always raises this. + * @note It never returns. + * + * @internal + * + * Who needs this? Except ruby itself? + */ +void rb_loaderror(const char *fmt, ...); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL((2)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) +/** + * Identical to rb_loaderror(), except it additionally takes which file is + * unable to load. The path can be obtained later using `LoadError#path` of + * the raising exception. + * + * @param[in] path What failed. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + * @exception rb_eLoadError Always raises this. + * @note It never returns. + */ +void rb_loaderror_with_path(VALUE path, const char *fmt, ...); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL((2)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) +/** + * Raises an instance of ::rb_eNameError. The name can be obtained later using + * `NameError#name` of the raising exception. + * + * @param[in] name What failed. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + * @exception rb_eNameError Always raises this. + * @note It never returns. + */ +void rb_name_error(ID name, const char *fmt, ...); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL((2)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) +/** + * Identical to rb_name_error(), except it takes a ::VALUE instead of ::ID. + * + * @param[in] name What failed. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + * @exception rb_eNameError Always raises this. + * @note It never returns. + */ +void rb_name_error_str(VALUE name, const char *fmt, ...); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL((2)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) +/** + * Raises an instance of ::rb_eFrozenError. The object can be obtained later + * using `FrozenError#receiver` of the raising exception. + * + * @param[in] recv What is frozen. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + * @exception rb_eFrozenError Always raises this. + * @note It never returns. + * + * @internal + * + * Note however, that it is often not possible to inspect a frozen object, + * because the inspection itself could be forbidden by the frozen-ness. + */ +void rb_frozen_error_raise(VALUE recv, const char *fmt, ...); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL(()) +/** + * Honestly I don't understand the name, but it raises an instance of + * ::rb_eArgError. + * + * @param[in] str A message. + * @param[in] type Another message. + * @exception rb_eArgError Always raises this. + * @note It never returns. + */ +void rb_invalid_str(const char *str, const char *type); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_frozen_error_raise(), except its raising exception has a + * message like "can't modify frozen /what/". + * + * @param[in] what What was frozen. + * @exception rb_eFrozenError Always raises this. + * @note It never returns. + */ +void rb_error_frozen(const char *what); + +/** + * Identical to rb_error_frozen(), except it takes arbitrary Ruby object + * instead of C's string. + * + * @param[in] what What was frozen. + * @exception rb_eFrozenError Always raises this. + * @note It never returns. + */ +void rb_error_frozen_object(VALUE what); + +/** + * Queries if the passed object is frozen. + * + * @param[in] obj Target object to test frozen-ness. + * @exception rb_eFrozenError It is frozen. + * @post Upon successful return it is guaranteed _not_ frozen. + */ +void rb_check_frozen(VALUE obj); + +/** + * Ensures that the passed object can be `initialize_copy` relationship. When + * you implement your own one you would better call this at the right beginning + * of your implementation. + * + * @param[in] obj Destination object. + * @param[in] orig Source object. + * @exception rb_eFrozenError `obj` is frozen. + * @post Upon successful return obj is guaranteed safe to copy orig. + */ void rb_check_copyable(VALUE obj, VALUE orig); -NORETURN(MJIT_STATIC void rb_error_arity(int, int, int)); + +RBIMPL_ATTR_NORETURN() +/** + * @private + * + * This is an implementation detail of rb_scan_args(). You don't have to + * bother. + * + * @pre `argc` is out of range of `min`..`max`, both inclusive. + * @param[in] argc Arbitrary integer. + * @param[in] min Minimum allowed `argc`. + * @param[in] max Maximum allowed `argc`. + * @exception rb_eArgError Always. + */ +void rb_error_arity(int argc, int min, int max); + RBIMPL_SYMBOL_EXPORT_END() -/* Does anyone use this? Remain not deleted for compatibility. */ +/** + * @deprecated + * + * Does anyone use this? Remain not deleted for compatibility. + */ #define rb_check_frozen_internal(obj) do { \ VALUE frozen_obj = (obj); \ if (RB_UNLIKELY(RB_OBJ_FROZEN(frozen_obj))) { \ @@ -62,6 +250,7 @@ RBIMPL_SYMBOL_EXPORT_END() } \ } while (0) +/** @alias{rb_check_frozen} */ static inline void rb_check_frozen_inline(VALUE obj) { @@ -69,8 +258,23 @@ rb_check_frozen_inline(VALUE obj) rb_error_frozen_object(obj); } } + +/** @alias{rb_check_frozen} */ #define rb_check_frozen rb_check_frozen_inline +/** + * Ensures that the passed integer is in the passed range. When you can use + * rb_scan_args() that is preferred over this one (powerful, descriptive). But + * it can have its own application area. + * + * @param[in] argc Arbitrary integer. + * @param[in] min Minimum allowed `argv`. + * @param[in] max Maximum allowed `argv`, or `UNLIMITED_ARGUMENTS`. + * @exception rb_eArgError `argc` out of range. + * @return The passed `argc`. + * @post Upon successful return `argc` is in range of `min`..`max`, both + * inclusive. + */ static inline int rb_check_arity(int argc, int min, int max) { diff --git a/include/ruby/internal/intern/eval.h b/include/ruby/internal/intern/eval.h index 11957053d7..2230f7ab0c 100644 --- a/include/ruby/internal/intern/eval.h +++ b/include/ruby/internal/intern/eval.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Pre-1.9 era evaluator APIs (now considered miscellaneous). */ #include "ruby/internal/attr/noreturn.h" @@ -28,31 +28,194 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* eval.c */ RBIMPL_ATTR_NORETURN() -void rb_exc_raise(VALUE); +/** + * Identical to rb_raise(), except it raises the passed exception instance as- + * is instead of creating new one. + * + * @param[in] exc An instance of a subclass of ::rb_eException. + * @exception exc What is passed. + * @exception rb_eTypeError `exc` is not an exception. + * @note It never returns. + * + * @internal + * + * Wellll actually, it can take more than what is described above. This + * function tries to call `exception` method of the passed object. If that + * function returns an exception object that is used instead. + */ +void rb_exc_raise(VALUE exc); RBIMPL_ATTR_NORETURN() -void rb_exc_fatal(VALUE); +/** + * Identical to rb_fatal(), except it raises the passed exception instance as- + * is instead of creating new one. + * + * @param[in] exc An instance of a subclass of ::rb_eException. + * @exception exc What is passed. + * @note It never returns. + * + * @internal + * + * You know what...? Using this API you can make arbitrary exceptions, like + * `RuntimeError`, that doesn't interface with `rescue` clause. This is very + * confusing. + */ +void rb_exc_fatal(VALUE exc); + +/* process.c */ RBIMPL_ATTR_NORETURN() -VALUE rb_f_exit(int, const VALUE*); +/** + * Identical to rb_exit(), except how arguments are passed. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Contains at most one of the following: + * - ::RUBY_Qtrue - means `EXIT_SUCCESS`. + * - ::RUBY_Qfalse - means `EXIT_FAILURE`. + * - Numerical value - takes that value. + * @exception rb_eArgError Wrong `argc`. + * @exception rb_eSystemExit Exception representing the exit status. + * @note It never returns. + */ +VALUE rb_f_exit(int argc, const VALUE *argv); RBIMPL_ATTR_NORETURN() -VALUE rb_f_abort(int, const VALUE*); +/** + * This is similar to rb_f_exit(). In fact on some situation it internally + * calls rb_exit(). But can be very esoteric on occasions. + * + * It takes up to one argument. If an argument is passed, it tries to display + * that. Otherwise if there is `$!`, displays that exception instead. It + * finally raise ::rb_eSystemExit in both cases. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Contains at most one string-ish object. + * @exception rb_eArgError Wrong `argc`. + * @exception rb_eTypeError No conversion from `argv[0]` to String. + * @exception rb_eSystemExit Exception representing `EXIT_FAILURE`. + * @note It never returns. + */ +VALUE rb_f_abort(int argc, const VALUE *argv); + +/* eval.c*/ RBIMPL_ATTR_NORETURN() +/** + * Raises an instance of ::rb_eInterrupt. + * + * @exception rb_eInterrupt Always raises this exception. + * @note It never returns. + */ void rb_interrupt(void); + +/** + * Queries the name of the Ruby level method that is calling this function. + * The "name" in this context is the one assigned to the function for the first + * time (note that methods can have multiple names via aliases). + * + * @retval 0 There is no method (e.g. toplevel context). + * @retval otherwise The name of the current method. + */ ID rb_frame_this_func(void); RBIMPL_ATTR_NORETURN() -void rb_jump_tag(int); -void rb_obj_call_init(VALUE, int, const VALUE*); +/** + * This function is to re-throw global escapes. Such global escapes include + * exceptions, `throw`, `break`, for example. + * + * It makes sense only when used in conjunction with "protect" series APIs + * e.g. rb_protect(), rb_load_protect(), rb_eval_string_protect(), etc. In + * case these functions experience global escapes, they fill their opaque + * `state` return buffer. You can ignore such escapes. But if you decide + * otherwise, you have to somehow escape globally again. This function is used + * for that purpose. + * + * @param[in] state Opaque state of execution. + * @note It never returns. + * + * @internal + * + * Though not a part of our public API, `state` is in fact an enum + * ruby_tag_type. You can see the potential values by looking at vm_core.h. + */ +void rb_jump_tag(int state); + +/** + * Calls `initialize` method of the passed object with the passed arguments. + * It also forwards the implicitly passed block to the method. + * + * @param[in] obj Receiver object. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Passed as-is to `obj.initialize`. + * @exception rb_eException Any exceptions happen inside. + */ +void rb_obj_call_init(VALUE obj, int argc, const VALUE *argv); + +/** + * Identical to rb_obj_call_init(), except you can specify how to handle the + * last element of the given array. + * + * @param[in] obj Receiver object. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Passed as-is to `obj.initialize`. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eNoMethodError No such method. + * @exception rb_eException Any exceptions happen inside. + */ void rb_obj_call_init_kw(VALUE, int, const VALUE*, int); -VALUE rb_protect(VALUE (*)(VALUE), VALUE, int*); + +/** + * Identical to rb_frame_this_func(), except it returns the named used to call + * the method. + * + * @retval 0 There is no method (e.g. toplevel context). + * @retval otherwise The name of the current method. + */ ID rb_frame_callee(void); -VALUE rb_make_exception(int, const VALUE*); + +/** + * Constructs an exception object from the list of arguments, in a manner + * similar to Ruby's `raise`. This function can take: + * + * - No arguments at all, i.e. `argc == 0`. This is not a failure. It + * returns ::RUBY_Qnil then. + * + * - An object, which is an instance of ::rb_cString. In this case an + * instance of ::rb_eRuntimeError whose message is the passed string is + * created then returned. + * + * - An object, which responds to `exception` method, and optionally its + * argument, and optionally its backtrace. For example instances of + * subclasses of ::rb_eException have this method. What is returned from + * the method is returned. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv 0 up to 3 objects. + * @exception rb_eArgError Wrong `argc`. + * @exception rb_eTypeError `argv[0].exception` returned non-exception. + * @return An instance of a subclass of ::rb_eException. + * + * @internal + * + * Historically this was _the_ way `raise` converted its arguments to an + * exception. However they diverged. + */ +VALUE rb_make_exception(int argc, const VALUE *argv); /* eval_jump.c */ -void rb_set_end_proc(void (*)(VALUE), VALUE); + +/** + * Registers a function that shall run on process exit. Registered functions + * run in reverse-chronological order, mixed with syntactic `END` block and + * `Kernel#at_exit`. + * + * @param[in] func Function to run at process exit. + * @param[in] arg Passed as-is to `func`. + */ +void rb_set_end_proc(void (*func)(VALUE arg), VALUE arg); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/file.h b/include/ruby/internal/intern/file.h index 9ebefece66..79820fdc61 100644 --- a/include/ruby/internal/intern/file.h +++ b/include/ruby/internal/intern/file.h @@ -17,25 +17,196 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cFile. */ +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/attr/pure.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() /* file.c */ -VALUE rb_file_s_expand_path(int, const VALUE *); -VALUE rb_file_expand_path(VALUE, VALUE); -VALUE rb_file_s_absolute_path(int, const VALUE *); -VALUE rb_file_absolute_path(VALUE, VALUE); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_file_expand_path(), except how arguments are passed. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Filename, and base directory, in that order. + * @exception rb_eArgError Wrong `argc`. + * @exception rb_eTypeError Non-string passed. + * @exception rb_eEncCompatError No conversion from arguments to a path. + * @return Expanded path. + * + * @internal + * + * It seems nobody actually uses this function right now. Maybe delete it? + */ +VALUE rb_file_s_expand_path(int argc, const VALUE *argv); + +/** + * Identical to rb_file_absolute_path(), except it additionally understands + * `~`. If a given pathname starts with `~someone/`, that part expands to the + * user's home directory (or that of current process' owner's in case of `~/`). + * + * @param[in] fname Relative file name. + * @param[in] dname Lookup base directory name, or in case + * ::RUBY_Qnil is passed the process' current + * working directory is assumed. + * @exception rb_eArgError Home directory is not absolute. + * @exception rb_eTypeError Non-string passed. + * @exception rb_eEncCompatError No conversion from arguments to a path. + * @return Expanded path. + */ +VALUE rb_file_expand_path(VALUE fname, VALUE dname); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_file_absolute_path(), except how arguments are passed. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Filename, and base directory, in that order. + * @exception rb_eArgError Wrong `argc`. + * @exception rb_eTypeError Non-string passed. + * @exception rb_eEncCompatError No conversion from arguments to a path. + * @return Expanded path. + * + * @internal + * + * It seems nobody actually uses this function right now. Maybe delete it? + */ +VALUE rb_file_s_absolute_path(int argc, const VALUE *argv); + +/** + * Maps a relative path to its absolute representation. Relative paths are + * referenced from the passed directory name, or from the process' current + * working directory in case ::RUBY_Qnil is passed. + * + * @param[in] fname Relative file name. + * @param[in] dname Lookup base directory name, or in case + * ::RUBY_Qnil is passed the process' current + * working directory is assumed. + * @exception rb_eArgError Strings contain NUL bytes. + * @exception rb_eTypeError Non-string passed. + * @exception rb_eEncCompatError No conversion from arguments to a path. + * @return Expanded path. + */ +VALUE rb_file_absolute_path(VALUE fname, VALUE dname); + +/** + * Strips a file path's last component (and trailing separators if any). This + * function is relatively simple on POSIX environments; just splits the input + * with `/`, strips the last one, if something remains joins them again, + * otherwise the return value is `"."`. However when it comes to Windows this + * function is quite very much complicated. We have to take UNC etc. into + * account. So for instance `"C:foo"`'s dirname is `"C:."`. + * + * @param[in] fname File name to strip. + * @exception rb_eTypeError `fname` is not a String. + * @exception rb_eArgError `fname` contains NUL bytes. + * @exception rb_eEncCompatError `fname`'s encoding is not path-compat. + * @return A dirname of `fname`. + * @note This is a "pure" operation; it computes the return value solely + * from the passed object and never does any file IO. + */ VALUE rb_file_dirname(VALUE fname); -int rb_find_file_ext(VALUE*, const char* const*); -VALUE rb_find_file(VALUE); -VALUE rb_file_directory_p(VALUE,VALUE); -VALUE rb_str_encode_ospath(VALUE); -int rb_is_absolute_path(const char *); + +RBIMPL_ATTR_NONNULL(()) +/** + * Resolves a feature's path. This function takes for instance `"json"` and + * `[".so", ".rb"]`, and iterates over the `$LOAD_PATH` to see if there is + * either `json.so` or `json.rb` in the directory. + * + * This is not what everything `require` does, but at least `require` is built + * on top of it. + * + * @param[in,out] feature File to search, and return buffer. + * @param[in] exts List of file extensions. + * @exception rb_eTypeError `feature` is not a String. + * @exception rb_eArgError `feature` contains NUL bytes. + * @exception rb_eEncCompatError `feature`'s encoding is not path-compat. + * @retval 0 Not found + * @retval otherwise Found index in `ext`, plus one. + * @post `*feature` is a resolved path. + */ +int rb_find_file_ext(VALUE *feature, const char *const *exts); + +/** + * Identical to rb_find_file_ext(), except it takes a feature name and is + * extension at once, e.g. `"json.rb"`. This difference is much like how + * `require` and `load` are different. + * + * @param[in] path A path relative to `$LOAD_PATH`. + * @exception rb_eTypeError `path` is not a String. + * @exception rb_eArgError `path` contains NUL bytes. + * @exception rb_eEncCompatError `path`'s encoding is not path-compat. + * @return Expanded path. + */ +VALUE rb_find_file(VALUE path); + +/** + * Queries if the given path is either a directory, or a symlink that + * (potentially recursively) points to such thing. + * + * @param[in] _ Ignored (why...?) + * @param[in] path String, or IO. In case of IO it issues + * `fstat(2)` instead of `stat(2)`. + * @exception rb_eFrozenError `path` is a frozen IO (why...?) + * @exception rb_eTypeError `path` is neither String nor IO. + * @exception rb_eArgError `path` contains NUL bytes. + * @exception rb_eEncCompatError `path`'s encoding is not path-compat. + * @retval RUBY_Qtrue `path` is a directory. + * @retval RUBY_Qfalse Otherwise. + */ +VALUE rb_file_directory_p(VALUE _, VALUE path); + +/** + * Converts a string into an "OS Path" encoding, if any. In most operating + * systems there are no such things like per-OS default encoding of filename. + * For them this function is no-op. However most notably on MacOS, pathnames + * are UTF-8 encoded. It converts the given string into such encoding. + * + * @param[in] path An instance of ::rb_cString. + * @exception rb_eEncCompatError `path`'s encoding is not path-compat. + * @return `path`'s contents converted to the OS' path encoding. + */ +VALUE rb_str_encode_ospath(VALUE path); + +RBIMPL_ATTR_NONNULL(()) +RBIMPL_ATTR_PURE() +/** + * Queries if the given path is an absolute path. On POSIX environments it is + * as easy as `path[0] == '/'`. However on Windows, drive letters and UNC + * paths are also taken into account. + * + * @param[in] path A possibly relative path string. + * @retval 1 `path` is absolute. + * @retval 0 `path` is relative. + */ +int rb_is_absolute_path(const char *path); + +/** + * Queries the file size of the given file. Because this function calls + * `fstat(2)` internally, it is a failure to pass a closed file to this + * function. + * + * This function flushes the passed file's buffer if any. Can take time. + * + * @param[in] file A file object. + * @exception rb_eFrozenError `file` is frozen. + * @exception rb_eIOError `file` is closed. + * @exception rb_eSystemCallError Permission denied etc. + * @exception rb_eNoMethodError The given non-file object doesn't respond + * to `#size`. + * @return The size of the passed file. + * @note Passing a non-regular file such as a UNIX domain socket to this + * function is not a failure. But the return value is + * unpredictable. POSIX's `<sys/stat.h>` states that "the use of + * this field is unspecified" then. + */ +rb_off_t rb_file_size(VALUE file); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/hash.h b/include/ruby/internal/intern/hash.h index 70c37917f1..af8dfd5d8f 100644 --- a/include/ruby/internal/intern/hash.h +++ b/include/ruby/internal/intern/hash.h @@ -17,9 +17,10 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cHash. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" #include "ruby/st.h" @@ -27,31 +28,292 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* hash.c */ -void rb_st_foreach_safe(struct st_table *, int (*)(st_data_t, st_data_t, st_data_t), st_data_t); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_st_foreach(), except it raises exceptions when the callback + * function tampers the table during iterating over it. + * + * @param[in] st Table to iterate over. + * @param[in] func Callback function to apply. + * @param[in] arg Passed as-is to `func`. + * @exception rb_eRuntimeError `st` was tampered during iterating. + * + * @internal + * + * This is declared here because exceptions are Ruby level concept. + * + * This is in fact a very thin wrapper of rb_st_foreach_check(). + */ +void rb_st_foreach_safe(struct st_table *st, st_foreach_callback_func *func, st_data_t arg); + +/** @alias{rb_st_foreach_safe} */ #define st_foreach_safe rb_st_foreach_safe -VALUE rb_check_hash_type(VALUE); -void rb_hash_foreach(VALUE, int (*)(VALUE, VALUE, VALUE), VALUE); -VALUE rb_hash(VALUE); + +/** + * Try converting an object to its hash representation using its `to_hash` + * method, if any. If there is no such thing, returns ::RUBY_Qnil. + * + * @param[in] obj Arbitrary ruby object to convert. + * @exception rb_eTypeError `obj.to_hash` returned something non-Hash. + * @retval RUBY_Qnil No conversion from `obj` to hash defined. + * @retval otherwise Converted hash representation of `obj`. + * @see rb_io_check_io + * @see rb_check_array_type + * @see rb_check_string_type + * + * @internal + * + * There is no rb_hash_to_hash() that analogous to rb_str_to_str(). + * Intentional or ...? + */ +VALUE rb_check_hash_type(VALUE obj); + +RBIMPL_ATTR_NONNULL(()) +/** + * Iterates over a hash. This basically does the same thing as + * rb_st_foreach(). But because the passed hash is a Ruby object, its keys and + * values are both Ruby objects. + * + * @param[in] hash An instance of ::rb_cHash to iterate over. + * @param[in] func Callback function to yield. + * @param[in] arg Passed as-is to `func`. + * @exception rb_eRuntimeError `hash` was tampered during iterating. + */ +void rb_hash_foreach(VALUE hash, int (*func)(VALUE key, VALUE val, VALUE arg), VALUE arg); + +/** + * Calculates a message authentication code of the passed object. The return + * value is a very small integer used as an index of a key of a table. In + * order to calculate the value this function calls `#hash` method of the + * passed object. Ruby provides you a default implementation. But if you + * implement your class in C, that default implementation cannot know the + * underlying data structure. You must implement your own `#hash` method then, + * which must return an integer of uniform distribution in a sufficiently + * instant manner. + * + * @param[in] obj Arbitrary Ruby object. + * @exception rb_eTypeError `obj.hash` returned something non-Integer. + * @return A small integer. + * @note `#hash` can return very big integers, but they get truncated. + */ +VALUE rb_hash(VALUE obj); + +/** + * Creates a new, empty hash object. + * + * @return An allocated new instance of ::rb_cHash. + */ VALUE rb_hash_new(void); -VALUE rb_hash_dup(VALUE); -VALUE rb_hash_freeze(VALUE); -VALUE rb_hash_aref(VALUE, VALUE); -VALUE rb_hash_lookup(VALUE, VALUE); -VALUE rb_hash_lookup2(VALUE, VALUE, VALUE); -VALUE rb_hash_fetch(VALUE, VALUE); -VALUE rb_hash_aset(VALUE, VALUE, VALUE); -VALUE rb_hash_clear(VALUE); -VALUE rb_hash_delete_if(VALUE); -VALUE rb_hash_delete(VALUE,VALUE); -VALUE rb_hash_set_ifnone(VALUE hash, VALUE ifnone); -void rb_hash_bulk_insert(long, const VALUE *, VALUE); + +/** + * Identical to rb_hash_new(), except it additionally specifies how many keys + * it is expected to contain. This way you can create a hash that is large enough + * for your need. For large hashes it means it won't need to be reallocated and + * rehashed as much, improving performance. + * + * @param[in] capa Designed capacity of the hash. + * @return An empty Hash, whose capacity is `capa`. + */ +VALUE rb_hash_new_capa(long capa); + +/** + * Duplicates a hash. + * + * @param[in] hash An instance of ::rb_cHash. + * @return An allocated new instance of ::rb_cHash, whose contents are + * a verbatim copy of from `hash`. + */ +VALUE rb_hash_dup(VALUE hash); + +/** @alias{rb_obj_freeze} */ +VALUE rb_hash_freeze(VALUE obj); + +/** + * Queries the given key in the given hash table. If there is the key in the + * hash, returns the value associated with the key. Otherwise it returns the + * "default" value (defined per hash table). + * + * @param[in] hash Hash table to look into. + * @param[in] key Hash key to look for. + * @return Either the value associated with the key, or the default one if + * absent. + */ +VALUE rb_hash_aref(VALUE hash, VALUE key); + +/** + * Identical to rb_hash_aref(), except it always returns ::RUBY_Qnil for + * misshits. + * + * @param[in] hash Hash table to look into. + * @param[in] key Hash key to look for. + * @return Either the value associated with the key, or ::RUBY_Qnil if + * absent. + * @note A hash can store ::RUBY_Qnil as an ordinary value. You cannot + * distinguish whether the key is missing, or just its associated + * value happens to be ::RUBY_Qnil, as far as you use this API. + */ +VALUE rb_hash_lookup(VALUE hash, VALUE key); + +/** + * Identical to rb_hash_lookup(), except you can specify what to return on + * misshits. This is much like 2-arguments version of `Hash#fetch`. + * + * ```CXX + * VALUE hash; + * VALUE key; + * VALUE tmp = rb_obj_alloc(rb_cObject); + * VALUE val = rb_hash_lookup2(hash, key, tmp); + * if (val == tmp) { + * printf("misshit"); + * } + * else { + * printf("hit"); + * } + * ``` + * + * @param[in] hash Hash table to look into. + * @param[in] key Hash key to look for. + * @param[in] def Default value. + * @retval def `hash` does not have `key`. + * @retval otherwise The value associated with `key`. + */ +VALUE rb_hash_lookup2(VALUE hash, VALUE key, VALUE def); + +/** + * Identical to rb_hash_lookup(), except it yields the (implicitly) passed + * block instead of returning ::RUBY_Qnil. + * + * @param[in] hash Hash table to look into. + * @param[in] key Hash key to look for. + * @exception rb_eKeyError No block given. + * @return Either the value associated with the key, or what the block + * evaluates to if absent. + */ +VALUE rb_hash_fetch(VALUE hash, VALUE key); + +/** + * Inserts or replaces ("upsert"s) the objects into the given hash table. This + * basically associates the given value with the given key. On duplicate key + * this function updates its associated value with the given one. Otherwise it + * inserts the association at the end of the table. + * + * @param[out] hash Target hash table to modify. + * @param[in] key Arbitrary Ruby object. + * @param[in] val A value to be associated with `key`. + * @exception rb_eFrozenError `hash` is frozen. + * @return The passed `val` + * @post `val` is associated with `key` in `hash`. + */ +VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val); + +/** + * Swipes everything out of the passed hash table. + * + * @param[out] hash Target to clear. + * @exception rb_eFrozenError `hash`is frozen. + * @return The passed `hash` + * @post `hash` has no contents. + */ +VALUE rb_hash_clear(VALUE hash); + +/** + * Deletes each entry for which the block returns a truthy value. If there is + * no block given, it returns an enumerator that does the thing. + * + * @param[out] hash Target hash to modify. + * @exception rb_eFrozenError `hash` is frozen. + * @retval hash The hash is modified. + * @retval otherwise An instance of ::rb_cEnumerator that does it. + */ +VALUE rb_hash_delete_if(VALUE hash); + +/** + * Deletes the passed key from the passed hash table, if any. + * + * @param[out] hash Target hash to modify. + * @param[in] key Key to delete. + * @retval RUBY_Qnil `hash` has no such key as `key`. + * @retval otherwise What was associated with `key`. + * @post `hash` has no such key as `key`. + */ +VALUE rb_hash_delete(VALUE hash, VALUE key); + +/** + * Inserts a list of key-value pairs into a hash table at once. It is + * semantically identical to repeatedly calling rb_hash_aset(), but can be + * faster than that. + * + * @param[in] argc Length of `argv`, must be even. + * @param[in] argv A list of key, value, key, value, ... + * @param[out] hash Target hash table to modify. + * @post `hash` has contents from `argv`. + * @note `argv` is allowed to be NULL as long as `argc` is zero. + * + * @internal + * + * What happens for duplicated keys? Well it silently discards older ones to + * accept the newest (rightmost) one. This behaviour also mimics repeated call + * of rb_hash_aset(). + */ +void rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash); + +/** + * Type of callback functions to pass to rb_hash_update_by(). + * + * @param[in] newkey A key of the table. + * @param[in] oldkey Value associated with `key` in hash1. + * @param[in] value Value associated with `key` in hash2. + * @return Either one of the passed values to take. + */ typedef VALUE rb_hash_update_func(VALUE newkey, VALUE oldkey, VALUE value); + +/** + * Destructively merges two hash tables into one. It resolves key conflicts by + * calling the passed function and take its return value. + * + * @param[out] hash1 Target hash to be modified. + * @param[in] hash2 A hash to merge into `hash1`. + * @param[in] func Conflict reconciler. + * @exception rb_eFrozenError `hash1` is frozen. + * @exception rb_eRuntimeError `hash2` is updated instead. + * @return The passed `hash1`. + * @post Contents of `hash2` is merged into `hash1`. + * @note You can pass zero to `func`. This means values from `hash2` + * are always taken. + */ VALUE rb_hash_update_by(VALUE hash1, VALUE hash2, rb_hash_update_func *func); -struct st_table *rb_hash_tbl(VALUE, const char *file, int line); -int rb_path_check(const char*); -int rb_env_path_tainted(void); + +/* file.c */ + +/** + * This function is mysterious. What it does is not immediately obvious. Also + * what it does seems platform dependent. + * + * @param[in] path A local path. + * @retval 0 The "check" succeeded. + * @retval otherwise The "check" failed. + */ +int rb_path_check(const char *path); + +/* hash.c */ + +/** + * Destructively removes every environment variables of the running process. + * + * @return The `ENV` object. + * @post The process has no environment variables. + */ VALUE rb_env_clear(void); -VALUE rb_hash_size(VALUE); + +/** + * Identical to #RHASH_SIZE(), except it returns the size in Ruby's integer + * instead of C's. + * + * @param[in] hash A hash object. + * @return The size of the hash. + */ +VALUE rb_hash_size(VALUE hash); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/io.h b/include/ruby/internal/intern/io.h index d2f2e53486..02c249723e 100644 --- a/include/ruby/internal/intern/io.h +++ b/include/ruby/internal/intern/io.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cIO. */ #include "ruby/internal/dllexport.h" @@ -26,43 +26,634 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* io.c */ + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define rb_defout rb_stdout + +/* string.c */ /* ...why? moved in commit de7161526014b781468cea5d84411e23be */ + +/** + * The field separator character for inputs, or the `$;`. This affects how + * `String#split` works. You can set this via the `-F` command line option. + * You can also assign arbitrary ruby objects programmatically, but it makes + * best sense for you to assign a regular expression here. + * + * @internal + * + * Tidbit: "fs" comes from AWK's `FS` variable. + */ RUBY_EXTERN VALUE rb_fs; + +/* io.c */ /* ...why? given rb_fs is in string.c? */ + +/** + * The field separator character for outputs, or the `$,`. This affects how + * `Array#join` works. + * + * @deprecated Assigning anything other than ::RUBY_Qnil to this variable is + * deprecated. + */ RUBY_EXTERN VALUE rb_output_fs; + +/** + * The record separator character for inputs, or the `$/`. This affects how + * `IO#gets` works. You can set this via the `-0` command line option. + * + * @deprecated Assigning anything other than ::RUBY_Qnil to this variable is + * deprecated. + * + * @internal + * + * Tidbit: "rs" comes from AWK's `RS` variable. + */ RUBY_EXTERN VALUE rb_rs; + +/** + * This is the default value of ::rb_rs, i.e. `"\n"`. It seems it has always + * been just a newline string since the beginning. Not sure why C codes has to + * use this, given there is no way for ruby programs to interface. + * + * Also it has not been deprecated for unknown reasons. + */ RUBY_EXTERN VALUE rb_default_rs; + +/** + * The record separator character for outputs, or the `$\`. This affects how + * `IO#print` works. + * + * @deprecated Assigning anything other than ::RUBY_Qnil to this variable is + * deprecated. + */ RUBY_EXTERN VALUE rb_output_rs; -VALUE rb_io_write(VALUE, VALUE); -VALUE rb_io_gets(VALUE); -VALUE rb_io_getbyte(VALUE); -VALUE rb_io_ungetc(VALUE, VALUE); -VALUE rb_io_ungetbyte(VALUE, VALUE); -VALUE rb_io_close(VALUE); -VALUE rb_io_flush(VALUE); -VALUE rb_io_eof(VALUE); -VALUE rb_io_binmode(VALUE); -VALUE rb_io_ascii8bit_binmode(VALUE); -VALUE rb_io_addstr(VALUE, VALUE); -VALUE rb_io_printf(int, const VALUE*, VALUE); -VALUE rb_io_print(int, const VALUE*, VALUE); -VALUE rb_io_puts(int, const VALUE*, VALUE); -VALUE rb_io_fdopen(int, int, const char*); -VALUE rb_io_get_io(VALUE); -VALUE rb_file_open(const char*, const char*); -VALUE rb_file_open_str(VALUE, const char*); + +/** + * Writes the given string to the given IO. + * + * @param[out] io An IO, opened for writing. + * @param[in] str A String-like object to write to `io`. + * @exception rb_eIOError `io` isn't opened for writing. + * @exception rb_eFrozenError `io` is frozen. + * @exception rb_eTypeError No conversion from `str` to String. + * @exception rb_eSystemCallError `write(2)` failed for some reason. + * @return The number of bytes written to the `io`. + * @post `str` (up to the length of return value) is written to `io`. + * @note This function blocks. + * @note Partial write is a thing. It must be at least questionable not + * to check the return value. + * + * @internal + * + * Above description is in fact inaccurate. This function can take arbitrary + * objects, and calls their `write` method. What is written above in fact + * describes how `IO#write` works. You can pass StringIO etc. here, and would + * work completely differently. + */ +VALUE rb_io_write(VALUE io, VALUE str); + +/** + * Reads a "line" from the given IO. A line here means a chunk of characters + * which is terminated by either `"\n"` or an EOF. + * + * @param[in,out] io An IO, opened for reading. + * @exception rb_eIOError `io` isn't opened for reading. + * @exception rb_eFrozenError `io` is frozen. + * @retval RUBY_Qnil `io` is at EOF. + * @retval otherwise An instance of ::rb_cString. + * @post `io` is read. + * @note Unlike `IO#gets` it doesn't set `$_`. + * @note Unlike `IO#gets` it doesn't consider `$/`. + */ +VALUE rb_io_gets(VALUE io); + +/** + * Reads a byte from the given IO. + * + * @note In Ruby a "byte" always means an 8 bit integer ranging from + * 0 to 255 inclusive. + * @param[in,out] io An IO, opened for reading. + * @exception rb_eIOError `io` is not opened for reading. + * @exception rb_eFrozenError `io` is frozen. + * @retval RUBY_Qnil `io` is at EOF. + * @retval otherwise An instance of ::rb_cInteger. + * @post `io` is read. + * + * @internal + * + * Of course there was a function called `rb_io_getc()`. It was removed in + * commit a25fbe3b3e531bbe479f344af24eaf9d2eeae6ea. + */ +VALUE rb_io_getbyte(VALUE io); + +/** + * "Unget"s a string. This function pushes back the passed string onto the + * passed IO, such that a subsequent buffered read will return it. If the + * passed content is in fact an integer, a single character string of that + * codepoint of the encoding of the IO will be pushed back instead. + * + * It might be counter-intuitive but this function can push back multiple + * characters at once. Also this function can be called multiple times on a + * same IO. Also a "character" can be wider than a byte, depending on the + * encoding of the IO. + * + * @param[out] io An IO, opened for reading. + * @param[in] c Either a String, or an Integer. + * @exception rb_eIOError `io` is not opened for reading. + * @exception rb_eFrozenError `io` is frozen. + * @exception rb_eTypeError No conversion from `c` to ::rb_cString. + * @return Always returns ::RUBY_Qnil. + * + * @internal + * + * Why there is ungetc, given there is no getc? + */ +VALUE rb_io_ungetc(VALUE io, VALUE c); + +/** + * Identical to rb_io_ungetc(), except it doesn't take the encoding of the + * passed IO into account. When an integer is passed, it just casts that value + * to C's `unsigned char`, and pushes that back. + * + * @param[out] io An IO, opened for reading. + * @param[in] b Either a String, or an Integer. + * @exception rb_eIOError `io` is not opened for reading. + * @exception rb_eFrozenError `io` is frozen. + * @exception rb_eTypeError No conversion from `b` to ::rb_cString. + * @return Always returns ::RUBY_Qnil. + */ +VALUE rb_io_ungetbyte(VALUE io, VALUE b); + +/** + * Closes the IO. Any buffered contents are flushed to the operating system. + * Any future operations against the IO would raise ::rb_eIOError. In case the + * io was created using `IO.popen`, it also sets the `$?`. + * + * @param[out] io Target IO to close. + * @return Always returns ::RUBY_Qnil. + * @post `$?` is set in case IO is a pipe. + * @post No operations are possible against `io` any further. + * @note This can block to flush the contents. + * @note This can wake other threads up, especially those who are + * `select()`-ing the passed IO. + * @note Multiple invocations of this function over the same IO again + * and again is not an error, since Ruby 2.3. + * + * @internal + * + * You can close a frozen IO... Is this intentional? + */ +VALUE rb_io_close(VALUE io); + +/** + * Flushes any buffered data within the passed IO to the underlying operating + * system. + * + * @param[out] io Target IO to flush. + * @exception rb_eIOError `io` is closed. + * @exception rb_eFrozenError `io` is frozen. + * @exception rb_eSystemCallError `write(2)` failed for some reason. + * @return The passed `io`. + * @post `io`'s buffers are empty. + * @note This operation also discards the read buffer. Should basically + * be harmless, but in an esoteric situation like when user pushed + * something different from what was read using `ungetc`, this + * operation in fact changes the behaviour of the `io`. + * @note Buffering is difficult. This operation flushes the data from + * our userspace to the kernel, but that doesn't always mean you + * can expect them stored persistently onto your hard drive. + */ +VALUE rb_io_flush(VALUE io); + +/** + * Queries if the passed IO is at the end of file. "The end of file" here mans + * that there are no more data to read. This function blocks until the read + * buffer is filled in, and if that operation reached the end of file, it still + * returns ::RUBY_Qfalse (because there are data yet in that buffer). It + * returns ::RUBY_Qtrue once after the buffer is cleared. + * + * @param[in,out] io Target io to query. + * @exception rb_eIOError `io` is not opened for reading. + * @exception rb_eFrozenError `io` is frozen. + * @retval RUBY_Qfalse There are things yet to be read. + * @retval RUBY_Qtrue "The end of file" situation. + */ +VALUE rb_io_eof(VALUE io); + +/** + * Sets the binmode. This operation nullifies the effect of textmode (newline + * conversion from `"\r\n"` to `"\n"` or vice versa). Note that it doesn't + * stop character encodings conversions. For instance an IO created using: + * + * ```ruby + * File.open( + * "/dev/urandom", + * textmode: true, + * external_encoding: Encoding::GB18030, + * internal_encoding: Encoding::Windows_31J) + * ``` + * + * has both newline and character conversions. If you pass such IO to this + * function, only the `textmode:true` part is cancelled. Texts read through + * the IO would still be encoded in Windows-31J; texts written to the IO will + * be encoded in GB18030. + * + * @param[out] io Target IO to modify. + * @exception rb_eFrozenError `io` is frozen. + * @return The passed `io`. + * @post `io` is in binmode. + * @note There is no equivalent operation in Ruby. You can do this only + * in C. + */ +VALUE rb_io_binmode(VALUE io); + +/** + * Forces no conversions be applied to the passed IO. Unlike rb_io_binmode(), + * this cancels any newline conversions as well as encoding conversions. Any + * texts read/written through the IO will be the verbatim binary contents. + * + * @param[out] io Target IO to modify. + * @exception rb_eFrozenError `io` is frozen. + * @return The passed `io`. + * @post `io` is in binmode. Both external/internal encoding are set to + * rb_ascii8bit_encoding(). + * @note This is the implementation of `IO#binmode`. + */ +VALUE rb_io_ascii8bit_binmode(VALUE io); + +/** + * Identical to rb_io_write(), except it always returns the passed IO. + * + * @param[out] io An IO, opened for writing. + * @param[in] str A String-like object to write to `io`. + * @exception rb_eIOError `io` isn't opened for writing. + * @exception rb_eFrozenError `io` is frozen. + * @exception rb_eTypeError No conversion from `str` to String. + * @exception rb_eSystemCallError `write(2)` failed. + * @return The passed `io`. + * @post `str` is written to `io`. + * @note This function blocks. + * + * @internal + * + * As rb_io_write(), above description is a fake. + */ +VALUE rb_io_addstr(VALUE io, VALUE str); + +/** + * This is a rb_f_sprintf() + rb_io_write() combo. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv A format string followed by its arguments. + * @param[out] io An IO, opened for writing. + * @exception rb_eIOError `io` isn't opened for writing. + * @exception rb_eFrozenError `io` is frozen. + * @exception rb_eTypeError No conversion from `str` to String. + * @exception rb_eSystemCallError `write(2)` failed. + * @return Always returns ::RUBY_Qnil. + * @post `argv` is formatted, then written to `io`. + * @note This function blocks. + * + * @internal + * + * As rb_io_write(), above descriptions include fakes. + */ +VALUE rb_io_printf(int argc, const VALUE *argv, VALUE io); + +/** + * Iterates over the passed array to apply rb_io_write() individually. If + * there is `$,`, this function inserts the string in middle of each + * iterations. If there is `$\`, this function appends the string at the end. + * If the array is empty, this function outputs `$_`. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv An array of strings to display. + * @param[out] io An IO, opened for writing. + * @exception rb_eIOError `io` isn't opened for writing. + * @exception rb_eFrozenError `io` is frozen. + * @exception rb_eTypeError No conversion from `str` to String. + * @exception rb_eSystemCallError `write(2)` failed. + * @return Always returns ::RUBY_Qnil. + * @post `argv` is written to `io`. + * @note This function blocks. + * @note This function calls rb_io_write() multiple times. Which means, + * it is not an atomic operation. Outputs from multiple threads + * can interleave. + * + * @internal + * + * As rb_io_write(), above descriptions include fakes. + */ +VALUE rb_io_print(int argc, const VALUE *argv, VALUE io); + +/** + * Iterates over the passed array to apply rb_io_write() individually. Unlike + * rb_io_print(), this function prints a newline per each element. It also + * flattens the passed array (OTOH rb_io_print() just resorts to + * rb_ary_to_s()). + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv An array of strings to display. + * @param[out] io An IO, opened for writing. + * @exception rb_eIOError `io` isn't opened for writing. + * @exception rb_eFrozenError `io` is frozen. + * @exception rb_eTypeError No conversion from `str` to String. + * @exception rb_eSystemCallError `write(2)` failed. + * @return Always returns ::RUBY_Qnil. + * @post `argv` is written to `io`. + * @note This function blocks. + * @note This function calls rb_io_write() multiple times. Which means, + * it is not an atomic operation. Outputs from multiple threads + * can interleave. + * + * @internal + * + * As rb_io_write(), above descriptions include fakes. + */ +VALUE rb_io_puts(int argc, const VALUE *argv, VALUE io); + +/** + * Creates an IO instance whose backend is the given file descriptor. C + * extension libraries sometimes have file descriptors created elsewhere (maybe + * deep inside of another shared library), which they want ruby programs to + * handle. This function is handy for such situations. + * + * @param[in] fd Target file descriptor. + * @param[in] flags Flags, e.g. `O_CREAT|O_EXCL` + * @param[in] path The path of the file that backs `fd`, for diagnostics. + * @return An allocated instance of ::rb_cIO. + * @note Leave `path` NULL if you don't know. + */ +VALUE rb_io_fdopen(int fd, int flags, const char *path); + +RBIMPL_ATTR_NONNULL(()) +/** + * Opens a file located at the given path. + * + * `fmode` is a C string that represents the open mode. It can be one of: + * + * - `r` (means `O_RDONLY`), + * - `w` (means `O_WRONLY | O_TRUNC | O_CREAT`), + * - `a` (means `O_WRONLY | O_APPEND | O_CREAT`), + * + * Followed by zero or more combinations of: + * + * - `b` (means `_O_BINARY`), + * - `t` (means `_O_TEXT`), + * - `+` (means `O_RDWR`), + * - `x` (means `O_TRUNC`), or + * - `:[BOM|]enc[:enc]` (see below). + * + * This last one specifies external (and internal if any) encodings, + * respectively. If optional `BOM|` is specified and the specified external + * encoding is capable of expressing BOMs, opening file's contents' byte order + * is auto-detected using the mechanism. + * + * So for instance, fmode of `"rt|BOM:utf-16le:utf-8"` specifies that... + * + * - the physical representation of the contents of the file is in UTF-16; + * - honours its BOM but assumes little endian if absent; + * - opens the file for reading; + * - what is read is converted into UTF-8; + * - with newlines cannibalised to `\n`. + * + * @param[in] fname Path to open. + * @param[in] fmode Mode specifier much like `fopen(3)`. + * @exception rb_eArgError `fmode` contradicted (e.g. `"bt"`). + * @exception rb_eSystemCallError `open(2)` failed for some reason. + * @return An instance of ::rb_cIO. + */ +VALUE rb_file_open(const char *fname, const char *fmode); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_file_open(), except it takes the pathname as a Ruby's string + * instead of C's. In case the passed Ruby object is a non-String it tries to + * call `#to_path`. + * + * @param[in] fname Path to open. + * @param[in] fmode Mode specifier much like `fopen(3)`. + * @exception rb_eTypeError `fname` is not a String. + * @exception rb_eEncCompatError `fname` is not ASCII-compatible. + * @exception rb_eArgError `fmode` contradicted (e.g. `"bt"`). + * @exception rb_eSystemCallError `open(2)` failed for some reason. + * @return An instance of ::rb_cIO. + */ +VALUE rb_file_open_str(VALUE fname, const char *fmode); + +/** + * Much like rb_io_gets(), but it reads from the mysterious ARGF object. ARGF + * in this context can be seen as a virtual IO which concatenates contents of + * the files passed to the process via the ARGV, or just STDIN if there are no + * such files. + * + * Unlike rb_io_gets() this function sets `$_`. + * + * @exception rb_eFrozenError ARGF resorts to STDIN but it is frozen. + * @retval RUBY_Qnil ARGF is at EOF. + * @retval otherwise An instance of ::rb_cString. + * @post ARGF is read. + * @post `$_` is set. + * + * @internal + * + * In reality, this function can call `ARGF.gets`. Its redefinition can affect + * the behaviour. + * + * Also, you can tamper ARGV on-the-fly in middle of ARGF usages: + * + * ``` + * gets # Reads the first file. + * ARGV << '/proc/self/limits' # Adds a file. + * gets # Can read from /proc/self/limits. + * ``` + */ VALUE rb_gets(void); -void rb_write_error(const char*); -void rb_write_error2(const char*, long); + +RBIMPL_ATTR_NONNULL(()) +/** + * Writes the given error message to somewhere applicable. On Windows it goes + * to the console. On POSIX environments it goes to the standard error. + * + * @warning IT IS A BAD IDEA to use this function form your C extensions. + * It is often annoying when GUI applications write to consoles; + * users don't want to look at there. Programmers also want to + * control the cause of the message itself, like by rescuing an + * exception. Just let ruby handle errors. That must be better than + * going your own way. + * + * @param[in] str Error message to display. + * @post `str` is written to somewhere. + * + * @internal + * + * AFAIK this function is listed here without marked deprecated because there + * are usages of this function in the wild. + */ +void rb_write_error(const char *str); + +/** + * Identical to rb_write_error(), except it additionally takes the message's + * length. Necessary when you want to handle wide characters. + * + * @param[in] str Error message to display. + * @param[in] len Length of `str`, in bytes. + * @post `str` is written to somewhere. + */ +void rb_write_error2(const char *str, long len); + +/** + * Closes everything. In case of POSIX environments, a child process inherits + * its parent's opened file descriptors. Which is nowadays considered as one + * of the UNIX mistakes. This function closes such inherited file descriptors. + * When your C extension needs to have a child process, don't forget to call + * this from your child process right before exec. + * + * @param[in] lowfd Lower bound of FDs (you want STDIN to remain, no?). + * @param[in] maxhint Hint of max FDs. + * @param[in] noclose_fds A hash, whose keys are an allowlist. + * + * @internal + * + * As of writing, in spite of the name, this function does not actually close + * anything. It just sets `FD_CLOEXEC` for everything and let `execve(2)` to + * atomically close them at once. This is because as far as we know there are + * no such platform that has `fork(2)` but lacks `FD_CLOEXEC`. + * + * Because this function is expected to run on a forked process it is entirely + * async-signal-safe. + */ void rb_close_before_exec(int lowfd, int maxhint, VALUE noclose_fds); + +RBIMPL_ATTR_NONNULL(()) +/** + * This is an rb_cloexec_pipe() + rb_update_max_fd() combo. + * + * @param[out] pipes Return buffer. Must at least hold 2 elements. + * @retval 0 Successful creation of a pipe. + * @retval -1 Failure in underlying system call(s). + * @post `pipes` is filled with file descriptors. + * @post `errno` is set on failure. + */ int rb_pipe(int *pipes); + +/** + * Queries if the given FD is reserved or not. Occasionally Ruby interpreter + * opens files for its own purposes. Use this function to prevent touching + * such behind-the-scene descriptors. + * + * @param[in] fd Target file descriptor. + * @retval 1 `fd` is reserved. + * @retval 0 Otherwise. + */ int rb_reserved_fd_p(int fd); + +/** @alias{rb_reserved_fd_p} */ +#define RB_RESERVED_FD_P(fd) rb_reserved_fd_p(fd) + +/** + * Opens a file that closes on exec. In case of POSIX environments, a child + * process inherits its parent's opened file descriptors. Which is nowadays + * considered as one of the UNIX mistakes. This function opens a file + * descriptor as `open(2)` does, but additionally instructs the operating + * system that we don't want it be seen from child processes. + * + * @param[in] pathname File path to open. + * @param[in] flags Open mode, as in `open(2)`. + * @param[in] mode File mode, in case of `O_CREAT`. + * @retval -1 `open(2)` failed for some reason. + * @retval otherwise An allocated new file descriptor. + * @note This function does not raise. + * + * @internal + * + * Whether this function can take NULL or not depends on the underlying open(2) + * system call implementation but @shyouhei doesn't think it's worth trying. + */ int rb_cloexec_open(const char *pathname, int flags, mode_t mode); + +/** + * Identical to rb_cloexec_fcntl_dupfd(), except it implies minfd is 3. + * + * @param[in] oldfd File descriptor to duplicate. + * @retval -1 `dup2(2)` failed for some reason. + * @retval otherwise An allocated new file descriptor. + * @note This function does not raise. + */ int rb_cloexec_dup(int oldfd); + +/** + * Identical to rb_cloexec_dup(), except you can specify the destination file + * descriptor. If the destination is already squatted by another file + * descriptor that gets silently closed without any warnings. (This is a spec + * requested by POSIX.) + * + * @param[in] oldfd File descriptor to duplicate. + * @param[in] newfd Return value destination. + * @retval -1 `dup2(2)` failed for some reason. + * @retval newfd An allocated new file descriptor. + * @post Whatever sat at `newfd` gets closed with no notifications. + * @post In case return value is -1 `newfd` is untouched. + * @note This function does not raise. + */ int rb_cloexec_dup2(int oldfd, int newfd); + +RBIMPL_ATTR_NONNULL(()) +/** + * Opens a pipe with closing on exec. In case of POSIX environments, a child + * process inherits its parent's opened file descriptors. Which is nowadays + * considered as one of the UNIX mistakes. This function opens a pipe as + * `pipe(2)` does, but additionally instructs the operating system that we + * don't want the duplicated FDs be seen from child processes. + * + * @param[out] fildes Return buffer. Must at least hold 2 elements. + * @retval 0 Successful creation of a pipe. + * @retval -1 Failure in underlying system call(s). + * @post `pipes` is filled with file descriptors. + * @post `errno` is set on failure. + */ int rb_cloexec_pipe(int fildes[2]); + +/** + * Duplicates a file descriptor with closing on exec. In case of POSIX + * environments, a child process inherits its parent's opened file descriptors. + * Which is nowadays considered as one of the UNIX mistakes. This function + * duplicates a file descriptor as `dup(2)` does, but additionally instructs + * the operating system that we don't want the duplicated FD be seen from child + * processes. + * + * @param[in] fd File descriptor to duplicate. + * @param[in] minfd Minimum allowed FD to return. + * @retval -1 `dup(2)` failed for some reason. + * @retval otherwise An allocated new file descriptor. + * @note This function does not raise. + * + * `minfd` is handy when for instance STDERR is closed but you don't want to + * use fd 2. + */ int rb_cloexec_fcntl_dupfd(int fd, int minfd); -#define RB_RESERVED_FD_P(fd) rb_reserved_fd_p(fd) + +/** + * Informs the interpreter that the passed fd can be the max. This information + * is used from rb_close_before_exec(). + * + * @param[in] fd An open FD, which can be large. + */ void rb_update_max_fd(int fd); + +/** + * Sets or clears the close-on-exec flag of the passed file descriptor to the + * desired state. STDIN, STDOUT, STDERR are the exceptional file descriptors + * that shall remain open. All others are to be closed on exec. When a C + * extension library opens a file descriptor using anything other than + * rb_cloexec_open() etc., that file descriptor shall experience this function. + * + * @param[in] fd An open file descriptor. + */ void rb_fd_fix_cloexec(int fd); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/load.h b/include/ruby/internal/intern/load.h index 2cc5be0ebe..9ceb98c2e4 100644 --- a/include/ruby/internal/intern/load.h +++ b/include/ruby/internal/intern/load.h @@ -17,28 +17,239 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_f_require(). */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() /* load.c */ -void rb_load(VALUE, int); -void rb_load_protect(VALUE, int, int*); -int rb_provided(const char*); -int rb_feature_provided(const char *, const char **); -void rb_provide(const char*); -VALUE rb_f_require(VALUE, VALUE); -VALUE rb_require_string(VALUE); - -// extension configuration + +/** + * Loads and executes the Ruby program in the given file. + * + * If the path is an absolute path (e.g. starts with `'/'`), the file will be + * loaded directly using the absolute path. If the path is an explicit + * relative path (e.g. starts with `'./'` or `'../'`), the file will be loaded + * using the relative path from the current directory. Otherwise, the file + * will be searched for in the library directories listed in the `$LOAD_PATH`. + * If the file is found in a directory, this function will attempt to load the + * file relative to that directory. If the file is not found in any of the + * directories in the `$LOAD_PATH`, the file will be loaded using the relative + * path from the current directory. + * + * If the file doesn't exist when there is an attempt to load it, a LoadError + * will be raised. + * + * If the `wrap` parameter is true, the loaded script will be executed under an + * anonymous module, protecting the calling program's global namespace. In no + * circumstance will any local variables in the loaded file be propagated to + * the loading environment. + * + * @param[in] path Pathname of a file to load. + * @param[in] wrap Either to load under an anonymous module. + * @exception rb_eTypeError `path` is not a string. + * @exception rb_eArgError `path` is broken as a pathname. + * @exception rb_eEncCompatError `path` is incompatible with pathnames. + * @exception rb_eLoadError `path` not found. + * @exception rb_eException Any exceptions while loading the contents. + * + * @internal + * + * It seems this function is under the rule of bootsnap's regime? + */ +void rb_load(VALUE path, int wrap); + +/** + * Identical to rb_load(), except it avoids potential global escapes. Such + * global escapes include exceptions, `throw`, `break`, for example. + * + * It first evaluates the given file as rb_load() does. If no global escape + * occurred during the evaluation, it `*state` is set to zero on return. + * Otherwise, it sets `*state` to nonzero. If state is `NULL`, it is not set + * in both cases. + * + * @param[in] path Pathname of a file to load. + * @param[in] wrap Either to load under an anonymous module. + * @param[out] state State of execution. + * @post `*state` is set to zero if succeeded. Nonzero otherwise. + * @warning You have to clear the error info with `rb_set_errinfo(Qnil)` if + * you decide to ignore the caught exception. + * @see rb_load + * @see rb_protect + * + * @internal + * + * Though not a part of our public API, `state` is in fact an + * enum ruby_tag_type. You can see the potential "nonzero" values by looking + * at vm_core.h. + */ +void rb_load_protect(VALUE path, int wrap, int *state); + +RBIMPL_ATTR_NONNULL(()) +/** + * Queries if the given feature has already been loaded into the execution + * context. The "feature" head are things like `"json"` or `"socket"`. + * + * @param[in] feature Name of a library you want to know about. + * @retval 1 Yes there is. + * @retval 0 Not yet. + */ +int rb_provided(const char *feature); + +RBIMPL_ATTR_NONNULL((1)) +/** + * Identical to rb_provided(), except it additionally returns the "canonical" + * name of the loaded feature. This can be handy when for instance you want to + * know the actually loaded library is either `foo.rb` or `foo.so`. + * + * @param[in] feature Name of a library you want to know about. + * @param[out] loading Return buffer. + * @retval 1 Yes there is. + * @retval 0 Not yet. + */ +int rb_feature_provided(const char *feature, const char **loading); + +RBIMPL_ATTR_NONNULL(()) +/** + * Declares that the given feature is already provided by someone else. This + * API can be handy when you have an extension called `foo.so` which, when + * required, also provides functionality of `bar.so`. + * + * @param[in] feature Name of a library which had already been provided. + * @post No further `require` would search `feature`. + */ +void rb_provide(const char *feature); + +/** + * Identical to rb_require_string(), except it ignores the first argument for + * no reason. There seems to be no reason for 3rd party extension libraries to + * use it. + * + * @param[in] self Ignored. Can be anything. + * @param[in] feature Name of a feature, e.g. `"json"`. + * @exception rb_eLoadError No such feature. + * @exception rb_eRuntimeError `$"` is frozen; unable to push. + * @retval RUBY_Qtrue The feature is loaded for the first time. + * @retval RUBY_Qfalse The feature has already been loaded. + * @post `$"` is updated. + */ +VALUE rb_f_require(VALUE self, VALUE feature); + +/** + * Finds and loads the given feature, if absent. + * + * If the feature is an absolute path (e.g. starts with `'/'`), the feature + * will be loaded directly using the absolute path. If the feature is an + * explicit relative path (e.g. starts with `'./'` or `'../'`), the feature + * will be loaded using the relative path from the current directory. + * Otherwise, the feature will be searched for in the library directories + * listed in the `$LOAD_PATH`. + * + * If the feature has the extension `".rb"`, it is loaded as a source file; if + * the extension is `".so"`, `".o"`, or `".dll"`, or the default shared library + * extension on the current platform, Ruby loads the shared library as a Ruby + * extension. Otherwise, Ruby tries adding `".rb"`, `".so"`, and so on to the + * name until found. If the file named cannot be found, a LoadError will be + * raised. + * + * For extension libraries the given feature may use any shared library + * extension. For example, on Linux you can require `"socket.dll"` to actually + * load `socket.so`. + * + * The absolute path of the loaded file is added to `$LOADED_FEATURES`. A file + * will not be loaded again if its path already appears in there. + * + * Any constants or globals within the loaded source file will be available in + * the calling program's global namespace. However, local variables will not + * be propagated to the loading environment. + * + * @param[in] feature Name of a feature, e.g. `"json"`. + * @exception rb_eLoadError No such feature. + * @exception rb_eRuntimeError `$"` is frozen; unable to push. + * @retval RUBY_Qtrue The feature is loaded for the first time. + * @retval RUBY_Qfalse The feature has already been loaded. + * @post `$"` is updated. + */ +VALUE rb_require_string(VALUE feature); + +/** + * Resolves and returns a symbol of a function in the native extension + * specified by the feature and symbol names. Extensions will use this function + * to access the symbols provided by other native extensions. + * + * @param[in] feature Name of a feature, e.g. `"json"`. + * @param[in] symbol Name of a symbol defined by the feature. + * @return The resolved symbol of a function, defined and externed by the + * specified feature. It may be NULL if the feature is not loaded, + * the feature is not extension, or the symbol is not found. + */ +void *rb_ext_resolve_symbol(const char *feature, const char *symbol); + +/** + * This macro is to provide backwards compatibility. It provides a way to + * define function prototypes and resolving function symbols in a safe way. + * + * ```CXX + * // prototypes + * #ifdef HAVE_RB_EXT_RESOLVE_SYMBOL + * VALUE *(*other_extension_func)(VALUE,VALUE); + * #else + * VALUE other_extension_func(VALUE); + * #endif + * + * // in Init_xxx() + * #ifdef HAVE_RB_EXT_RESOLVE_SYMBOL + * other_extension_func = \ + * (VALUE(*)(VALUE,VALUE))rb_ext_resolve_symbol(fname, sym_name); + * if (other_extension_func == NULL) { + * // raise your own error + * } + * #endif + * ``` + */ +#define HAVE_RB_EXT_RESOLVE_SYMBOL 1 + +/** + * @name extension configuration + * @{ + */ + +/** + * Asserts that the extension library that calls this function is aware of + * Ractor. Multiple Ractors run without protecting each other. This doesn't + * interface well with C programs, unless designed with an in-depth + * understanding of how Ractors work. Extension libraries are shut out from + * Ractors by default. This API is to bypass that restriction. Once after it + * was called, successive calls to rb_define_method() etc. become definitions + * of methods that are aware of Ractors. The amendment would be in effect + * until the end of rb_require_string() etc. + * + * @param[in] flag Either the library is aware of Ractors or not. + * @post Methods would be callable form Ractors, if `flag` is true. + */ void rb_ext_ractor_safe(bool flag); + +/** @alias{rb_ext_ractor_safe} */ #define RB_EXT_RACTOR_SAFE(f) rb_ext_ractor_safe(f) + +/** + * This macro is to provide backwards compatibility. It must be safe to do + * something like: + * + * ```CXX + * #ifdef HAVE_RB_EXT_RACTOR_SAFE + * rb_ext_ractor_safe(true); + * #endif + * ``` + */ #define HAVE_RB_EXT_RACTOR_SAFE 1 +/** @} */ + RBIMPL_SYMBOL_EXPORT_END() #endif /* RBIMPL_INTERN_LOAD_H */ diff --git a/include/ruby/internal/intern/marshal.h b/include/ruby/internal/intern/marshal.h index 6b0243244e..118d78a4a0 100644 --- a/include/ruby/internal/intern/marshal.h +++ b/include/ruby/internal/intern/marshal.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to rb_mMarshal. */ #include "ruby/internal/dllexport.h" @@ -26,8 +26,85 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* marshal.c */ -VALUE rb_marshal_dump(VALUE, VALUE); -VALUE rb_marshal_load(VALUE); + +/** + * Serialises the given object and all its referring objects, to write them + * down to the passed port. + * + * @param[in] obj Target object to dump. + * @param[out] port IO-like destination buffer. + * @exception rb_eTypeError `obj` cannot be dumped for some reason. + * @exception rb_eRuntimeError `obj` was tampered during dumping. + * @exception rb_eArgError Traversal too deep. + * @return The passed `port` as-is. + * @post Serialised representation of `obj` is written to `port`. + * @note `port` is basically an IO but StringIO is also possible. + */ +VALUE rb_marshal_dump(VALUE obj, VALUE port); + +/** + * Deserialises a previous output of rb_marshal_dump() into a network of + * objects. + * + * @param[in,out] port Either IO or String. + * @exception rb_eTypeError `port` is in unexpected type. + * @exception rb_eArgError Contents of `port` is broken. + * @return Object(s) rebuilt using the info from `port`. + * + * SECURITY CONSIDERATIONS + * ======================== + * + * @warning By design, rb_marshal_load() can deserialise almost any + * class loaded into the Ruby process. In many cases this can + * lead to remote code execution if the Marshal data is loaded + * from an untrusted source. + * @warning As a result, rb_marshal_load() is not suitable as a general + * purpose serialisation format and you should never unmarshal + * user supplied input or other untrusted data. + * @warning If you need to deserialise untrusted data, use JSON or + * another serialisation format that is only able to load + * simple, 'primitive' types such as String, Array, Hash, etc. + * Never allow user input to specify arbitrary types to + * deserialise into. + */ +VALUE rb_marshal_load(VALUE port); + +/** + * Marshal format compatibility layer. Over time, classes evolve, so that + * their internal data structure change drastically. For instance an instance + * of ::rb_cRange was made of ::RUBY_T_OBJECT in 1.x., but in 3.x it is a + * ::RUBY_T_STRUCT now. In order to keep binary compatibility, we "fake" the + * marshalled representation to stick to old types. This is the API to enable + * that manoeuvre. Here is how: + * + * First, because you are going to keep backwards compatibility, you need to + * retain the old implementation of your class. Rename it, and keep the class + * somewhere (for instance rb_register_global_address() could help). Next + * create your new class. Do whatever you want. + * + * Then, this is the key point. Create two new "bridge" functions that convert + * the structs back and forth: + * + * - the "dumper" function that takes an instance of the new class, and + * returns an instance of the old one. This is called from + * rb_marshal_dump(), to keep it possible for old programs to read your new + * data. + * + * - the "loader" function that takes two arguments, new one and old one, in + * that order. rb_marshal_load() calls this function when it finds a + * representation of the retained old class. The old one passed to this + * function is the reconstructed instance of the old class. + * Reverse-engineer that to modify the new one, to have the identical + * contents. + * + * Finally, connect all of them using this function. + * + * @param[in] newclass The class that needs conversion. + * @param[in] oldclass Old implementation of `newclass`. + * @param[in] dumper Function that converts `newclass` to `oldclass`. + * @param[in] loader Function that converts `oldclass` to `newclass`. + * @exception rb_eTypeError `newclass` has no allocator. + */ void rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE)); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/numeric.h b/include/ruby/internal/intern/numeric.h index effc583756..30863fb0c8 100644 --- a/include/ruby/internal/intern/numeric.h +++ b/include/ruby/internal/intern/numeric.h @@ -17,25 +17,191 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cNumeric. */ +#include "ruby/internal/attr/cold.h" +#include "ruby/internal/attr/noreturn.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" -#include "ruby/backward/2/attributes.h" + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ +#define RB_NUM_COERCE_FUNCS_NEED_OPID 1 RBIMPL_SYMBOL_EXPORT_BEGIN() /* numeric.c */ -NORETURN(void rb_num_zerodiv(void)); -#define RB_NUM_COERCE_FUNCS_NEED_OPID 1 -VALUE rb_num_coerce_bin(VALUE, VALUE, ID); -VALUE rb_num_coerce_cmp(VALUE, VALUE, ID); -VALUE rb_num_coerce_relop(VALUE, VALUE, ID); -VALUE rb_num_coerce_bit(VALUE, VALUE, ID); -VALUE rb_num2fix(VALUE); -VALUE rb_fix2str(VALUE, int); -CONSTFUNC(VALUE rb_dbl_cmp(double, double)); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_COLD() +/** + * Just always raises an exception. + * + * @exception rb_eZeroDivError Division by zero error. + */ +void rb_num_zerodiv(void); + +/** + * @name Coercion operators. + * + * What is a coercion? Well Ruby is basically an OOPL but it also has + * arithmetic operators. They are implemented in OO manners. For instance + * `a+b` is a binary operation `+`, whose receiver is `a`, and whose (sole) + * argument is `b`. + * + * The problem is, you often want `a+b == b+a` to hold. That is easy if both + * `a` and `b` belongs to the same class... Ensuring `1 + 2 == 2 + 1` is kind + * of intuitive. But if you want `1.0 + 2 == 2 + 1.0`, things start getting + * complicated. `1.0+2` is `Float#+`, while `2+1.0` is `Integer#+`. In order + * to achieve the equality Float's and Integer's methods must agree with their + * behaviours. + * + * Now. Floats versus Integers situation is still controllable because they + * are both built-in. But in Ruby you can define your own numeric classes. + * BigDecimal, which is a rubygems gem distributed along with the interpreter, + * is one of such examples. Rational was another such example before. In + * short you cannot create list of all possible combination of the classes that + * could be the operand of `+` operator. Then how do we achieve the + * commutativity? + * + * Here comes the concept of coercion. If a definition of an operator + * encounters an object which is unknown to the author, just assumes that the + * unknown object knows how to handle the situation. So for instance when + * `1+x` has unknown `x`, it lets the `x` handle this. + * + * ```ruby + * class Foo + * def +(x) + * if we_know_what_is_x? then + * ... # handle here + * else + * y, z = x.coerce self + * return y + z + * end + * end + * end + * ``` + * + * The `x.coerce` method returns a 2-element array which are "casted" versions + * of `x` and `self`. + * + * @{ + */ + +/** + * Coerced binary operation. This function first coerces the two objects, then + * applies the operation. + * + * @param[in] lhs LHS operand. + * @param[in] rhs RHS operand. + * @param[in] op Operator method name. + * @exception rb_eTypeError Coercion failed for some reason. + * @return `lhs op rhs`, in a coerced way. + */ +VALUE rb_num_coerce_bin(VALUE lhs, VALUE rhs, ID op); + +/** + * Identical to rb_num_coerce_bin(), except for return values. This function + * best suits for comparison operators e.g. `<=>`. + * + * @param[in] lhs LHS operand. + * @param[in] rhs RHS operand. + * @param[in] op Operator method name. + * @retval RUBY_Qnil Coercion failed for some reason. + * @retval otherwise `lhs op rhs`, in a coerced way. + */ +VALUE rb_num_coerce_cmp(VALUE lhs, VALUE rhs, ID op); + +/** + * Identical to rb_num_coerce_cmp(), except for return values. This function + * best suits for relationship operators e.g. `<=`. + * + * @param[in] lhs LHS operand. + * @param[in] rhs RHS operand. + * @param[in] op Operator method name. + * @exception rb_eArgError Coercion failed for some reason. + * @return `lhs op rhs`, in a coerced way. + */ +VALUE rb_num_coerce_relop(VALUE lhs, VALUE rhs, ID op); + +/** + * This one is optimised for bitwise operations, but the API is identical to + * rb_num_coerce_bin(). + * + * @param[in] lhs LHS operand. + * @param[in] rhs RHS operand. + * @param[in] op Operator method name. + * @exception rb_eArgError Coercion failed for some reason. + * @return `lhs op rhs`, in a coerced way. + */ +VALUE rb_num_coerce_bit(VALUE lhs, VALUE rhs, ID op); + +/** @} */ + +/** + * Converts a numeric value into a Fixnum. This is not a preserving + * conversion; for instance 1.5 would be converted into 1. + * + * @param[in] val A numeric object. + * @exception rb_eTypeError No conversion from `val` to Integer. + * @exception rb_eRangeError `val` out of range. + * @return A fixnum converted from `val`. + * + * @internal + * + * This seems used from nowhere? + */ +VALUE rb_num2fix(VALUE val); + +/** + * Generates a place-value representation of the given Fixnum, with given + * radix. + * + * @param[in] val A fixnum to stringify. + * @param[in] base `2` to `36` inclusive for each radix. + * @exception rb_eArgError `base` is out of range. + * @return An instance of ::rb_cString representing `val`. + * @pre `val` must be a Fixnum (no checks performed). + */ +VALUE rb_fix2str(VALUE val, int base); + +RBIMPL_ATTR_CONST() +/** + * Compares two `double`s. Handy when implementing a spaceship operator. + * + * @param[in] lhs A value. + * @param[in] rhs Another value. + * @retval RB_INT2FIX(-1) `lhs` is "bigger than" `rhs`. + * @retval RB_INT2FIX(1) `rhs` is "bigger than" `lhs`. + * @retval RB_INT2FIX(0) They are equal. + * @retval RUBY_Qnil Not comparable, e.g. NaN. + */ +VALUE rb_dbl_cmp(double lhs, double rhs); + +/** + * Raises the passed `x` to the power of `y`. + * + * @note The return value can be really big. + * @note Also the return value can be really small, in case `x` is a + * negative number. + * @param[in] x A number. + * @param[in] y Another number. + * @retval Inf Cannot express the result. + * @retval 1 Either `y` is 0 or `x` is 1. + * @retval otherwise An instance of ::rb_cInteger whose value is `x ** y`. + * + * @internal + * + * This function returns Infinity when `y` is big enough not to fit into a + * Fixnum. Warning is issued then. + */ +RUBY_EXTERN VALUE rb_int_positive_pow(long x, unsigned long y); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/object.h b/include/ruby/internal/intern/object.h index d55178584b..9daad7d046 100644 --- a/include/ruby/internal/intern/object.h +++ b/include/ruby/internal/intern/object.h @@ -17,73 +17,483 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cObject. */ +#include "ruby/internal/attr/const.h" +#include "ruby/internal/attr/deprecated.h" +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/attr/pure.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() +/** + * This macro is (used but) mysterious. Why on earth do we need this? + * + * - `obj != orig` check is done anyways inside of rb_obj_init_copy(). + * - rb_obj_init_copy() returns something. No need are there to add `, 1`. + */ #define RB_OBJ_INIT_COPY(obj, orig) \ ((obj) != (orig) && (rb_obj_init_copy((obj), (orig)), 1)) +/** @old{RB_OBJ_INIT_COPY} */ #define OBJ_INIT_COPY(obj, orig) RB_OBJ_INIT_COPY(obj, orig) -VALUE rb_class_new_instance_pass_kw(int, const VALUE *, VALUE); -VALUE rb_class_new_instance(int, const VALUE*, VALUE); -VALUE rb_class_new_instance_kw(int, const VALUE*, VALUE, int); +/* object.c */ + +/** + * Identical to rb_class_new_instance(), except it passes the passed keywords + * if any to the `#initialize` method. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] klass An instance of ::rb_cClass. + * @exception rb_eTypeError `klass`'s allocator is undefined. + * @exception rb_eException Any exceptions can happen inside. + * @return An allocated new instance of `klass`. + * @note This is _the_ implementation of `Object.new`. + */ +VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass); + +/** + * Allocates, then initialises an instance of the given class. It first calls + * the passed class' allocator to obtain an uninitialised object, then calls + * its initialiser with the remaining arguments. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to `#initialize`. + * @param[in] klass An instance of ::rb_cClass. + * @exception rb_eTypeError `klass`'s allocator is undefined. + * @exception rb_eException Any exceptions can happen inside. + * @return An allocated new instance of `klass`. + */ +VALUE rb_class_new_instance(int argc, const VALUE *argv, VALUE klass); + +/** + * Identical to rb_class_new_instance(), except you can specify how to handle + * the last element of the given array. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] klass An instance of ::rb_cClass. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eTypeError `klass`'s allocator is undefined. + * @exception rb_eException Any exceptions can happen inside. + * @return An allocated new instance of `klass`. + */ +VALUE rb_class_new_instance_kw(int argc, const VALUE *argv, VALUE klass, int kw_splat); + +/** + * Checks for equality of the passed objects, in terms of `Object#eql?`. + * + * @param[in] lhs Comparison left hand side. + * @param[in] rhs Comparison right hand side. + * @retval non-zero They are equal. + * @retval 0 Otherwise. + * @note This function actually calls `lhs.eql?(rhs)` so you cannot + * implement your class' `#eql?` method using it. + */ +int rb_eql(VALUE lhs, VALUE rhs); + +/** + * Generates a textual representation of the given object. + * + * @param[in] obj Arbitrary ruby object. + * @return An instance of ::rb_cString that represents `obj`. + * @note This is the default implementation of `Object#to_s` that each + * subclasses want to override. + */ +VALUE rb_any_to_s(VALUE obj); + +/** + * Generates a human-readable textual representation of the given object. This + * is largely similar to Ruby level `Object#inspect` but not the same; it + * additionally escapes the inspection result so that the string be compatible + * with that of default internal (or default external, if absent). + * + * @param[in] obj Arbitrary ruby object. + * @return An instance of ::rb_cString that represents `obj`. + */ +VALUE rb_inspect(VALUE obj); + +/** + * Queries if the given object is a direct instance of the given class. + * + * @param[in] obj Arbitrary ruby object. + * @param[in] klass An instance of ::rb_cModule. + * @exception rb_eTypeError `klass` is neither module nor class. + * @retval RUBY_Qtrue `obj` is an instance of `klass`. + * @retval RUBY_Qfalse Otherwise. + */ +VALUE rb_obj_is_instance_of(VALUE obj, VALUE klass); + +/** + * Queries if the given object is an instance (of possibly descendants) of the + * given class. + * + * @param[in] obj Arbitrary ruby object. + * @param[in] klass An instance of ::rb_cModule. + * @exception rb_eTypeError `klass` is neither module nor class. + * @retval RUBY_Qtrue `obj` is a `klass`. + * @retval RUBY_Qfalse Otherwise. + */ +VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass); + +/** + * Allocates an instance of the given class. + * + * @param[in] klass A class to instantiate. + * @exception rb_eTypeError `klass` is not a class. + * @return An allocated, not yet initialised instance of `klass`. + * @note It calls the allocator defined by rb_define_alloc_func(). You + * cannot use this function to define an allocator. Use + * TypedData_Make_Struct or others, instead. + * @note Usually prefer rb_class_new_instance() to rb_obj_alloc() and + * rb_obj_call_init(). + * @see rb_class_new_instance() + * @see rb_obj_call_init() + * @see rb_define_alloc_func() + * @see #TypedData_Make_Struct + */ +VALUE rb_obj_alloc(VALUE klass); + +/** + * Produces a shallow copy of the given object. Its list of instance variables + * are copied, but not the objects they reference. It also copies the frozen + * value state. + * + * @param[in] obj Arbitrary ruby object. + * @exception rb_eException `#initialize_copy` can raise anything. + * @return A "clone" of `obj`. + * + * @internal + * + * Unlike ruby-level `Object#clone`, there is no way to control the frozen-ness + * of the return value. + */ +VALUE rb_obj_clone(VALUE obj); + +/** + * Duplicates the given object. This does almost the same thing as + * rb_obj_clone() do. However it does not copy the singleton class (if any). + * It also doesn't copy frozen-ness. + * + * @param[in] obj Arbitrary ruby object. + * @exception rb_eException `#initialize_copy` can raise anything. + * @return A shallow copy of `obj`. + */ +VALUE rb_obj_dup(VALUE obj); + +/** + * Default implementation of `#initialize_copy`, `#initialize_dup` and + * `#initialize_clone`. It does almost nothing. Just raises exceptions for + * checks. + * + * @param[in] dst The destination object. + * @param[in] src The source object. + * @exception rb_eFrozenError `dst` is frozen. + * @exception rb_eTypeError `dst` and `src` have different classes. + * @return Always returns `dst`. + */ +VALUE rb_obj_init_copy(VALUE src, VALUE dst); + +/** + * Just calls rb_obj_freeze_inline() inside. Does this make any sens to + * extension libraries? + * + * @param[out] obj Object to freeze. + * @return Verbatim `obj`. + */ +VALUE rb_obj_freeze(VALUE obj); + +RBIMPL_ATTR_PURE() +/** + * Just calls RB_OBJ_FROZEN() inside. Does this make any sens to extension + * libraries? + * + * @param[in] obj Object in question. + * @retval RUBY_Qtrue Yes it is. + * @retval RUBY_Qfalse No it isn't. + */ +VALUE rb_obj_frozen_p(VALUE obj); + +/* gc.c */ + +/** + * Finds or creates an integer primary key of the given object. In the old + * days this function was a purely arithmetic operation that maps the + * underlying memory address where the object resides into a Ruby's integer. + * Some time around 2.x this changed. It no longer relates its return values + * to C level pointers. This function assigns some random number to the given + * object if absent. The same number will be returned on all subsequent + * requests. No two active objects share a number. + * + * @param[in] obj Arbitrary ruby object. + * @return An instance of ::rb_cInteger which is an "identifier" of `obj`. + * + * @internal + * + * The "some random number" is in fact a monotonic-increasing process-global + * unique integer, much like an `INTEGER AUTO_INCREMENT PRIMARY KEY` column in + * a MySQL table. + */ +VALUE rb_obj_id(VALUE obj); + +RBIMPL_ATTR_CONST() +/** + * Identical to rb_obj_id(), except it hesitates from allocating a new instance + * of ::rb_cInteger. rb_obj_id() could allocate ::RUBY_T_BIGNUM objects. That + * allocation might perhaps impact negatively. On such situations, this + * function instead returns one-shot temporary small integers that need no + * allocations at all. The values are guaranteed unique at the moment, but no + * future promise is made; could be reused. Use of this API should be very + * instant. It is a failure to store the returned integer to somewhere else. + * + * In short it is difficult to use. + * + * @param[in] obj Arbitrary ruby object. + * @return An instance of ::rb_cInteger unique at the moment. + * + * @internal + * + * This is roughly the old behaviour of rb_obj_id(). + */ +VALUE rb_memory_id(VALUE obj); /* object.c */ -int rb_eql(VALUE, VALUE); -VALUE rb_any_to_s(VALUE); -VALUE rb_inspect(VALUE); -VALUE rb_obj_is_instance_of(VALUE, VALUE); -VALUE rb_obj_is_kind_of(VALUE, VALUE); -VALUE rb_obj_alloc(VALUE); -VALUE rb_obj_clone(VALUE); -VALUE rb_obj_dup(VALUE); -VALUE rb_obj_init_copy(VALUE,VALUE); -VALUE rb_obj_taint(VALUE); RBIMPL_ATTR_PURE() -VALUE rb_obj_tainted(VALUE); -VALUE rb_obj_untaint(VALUE); -VALUE rb_obj_untrust(VALUE); +/** + * Finds a "real" class. As the name implies there are class objects that are + * surreal. This function takes a class, traverses its ancestry tree, and + * returns its nearest ancestor which is neither a module nor a singleton + * class. + * + * @param[in] klass An instance of ::rb_cClass. + * @retval RUBY_Qfalse No real class in `klass`' ancestry tree. + * @retval klass `klass` itself is a real class. + * @retval otherwise Nearest ancestor of `klass` who is real. + */ +VALUE rb_class_real(VALUE klass); RBIMPL_ATTR_PURE() -VALUE rb_obj_untrusted(VALUE); -VALUE rb_obj_trust(VALUE); -VALUE rb_obj_freeze(VALUE); +/** + * Determines if the given two modules are relatives. + * + * @param[in] scion Possible subclass. + * @param[in] ascendant Possible superclass. + * @exception rb_eTypeError `ascendant` is not a module. + * @retval RUBY_Qtrue `scion` inherits, or is equal to `ascendant`. + * @retval RUBY_Qfalse `ascendant` inherits `scion`. + * @retval RUBY_Qnil They are not relatives. + */ +VALUE rb_class_inherited_p(VALUE scion, VALUE ascendant); RBIMPL_ATTR_PURE() -VALUE rb_obj_frozen_p(VALUE); +/** + * Queries the parent of the given class. + * + * @param[in] klass A child class. + * @exception rb_eTypeError `klass` is a `Class.allocate`. + * @retval RUBY_Qfalse `klass` has no superclass. + * @retval otherwise `klass`' superclass. + * + * @internal + * + * Is there any class except ::rb_cBasicObject, that has no superclass? + */ +VALUE rb_class_superclass(VALUE klass); -VALUE rb_obj_id(VALUE); -VALUE rb_memory_id(VALUE); -VALUE rb_obj_class(VALUE); +RBIMPL_ATTR_NONNULL(()) +/** + * Converts an object into another type. Calls the specified conversion method + * if necessary. + * + * @param[in] val An object to convert. + * @param[in] type A value of enum ::ruby_value_type. + * @param[in] name Name to display on error (e.g. "Array"). + * @param[in] mid Conversion method (e.g. "to_ary"). + * @exception rb_eTypeError Failed to convert. + * @return An object of the specified type. + */ +VALUE rb_convert_type(VALUE val, int type, const char *name, const char *mid); -RBIMPL_ATTR_PURE() -VALUE rb_class_real(VALUE); +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_convert_type(), except it returns ::RUBY_Qnil instead of + * raising exceptions, in case of conversion failure. It still raises + * exceptions for various reasons, like when the conversion method itself + * raises, though. + * + * @param[in] val An object to convert. + * @param[in] type A value of enum ::ruby_value_type. + * @param[in] name Name to display on error (e.g. "Array"). + * @param[in] mid Conversion method (e.g. "to_ary"). + * @exception rb_eTypeError The `mid` does not generate `type`. + * @retval RUBY_Qnil No conversion defined. + * @retval otherwise An object of the specified type. + */ +VALUE rb_check_convert_type(VALUE val, int type, const char *name, const char *mid); -RBIMPL_ATTR_PURE() -VALUE rb_class_inherited_p(VALUE, VALUE); -VALUE rb_class_superclass(VALUE); -VALUE rb_class_get_superclass(VALUE); -VALUE rb_convert_type(VALUE,int,const char*,const char*); -VALUE rb_check_convert_type(VALUE,int,const char*,const char*); -VALUE rb_check_to_integer(VALUE, const char *); -VALUE rb_check_to_float(VALUE); -VALUE rb_to_int(VALUE); -VALUE rb_check_to_int(VALUE); -VALUE rb_Integer(VALUE); -VALUE rb_to_float(VALUE); -VALUE rb_Float(VALUE); -VALUE rb_String(VALUE); -VALUE rb_Array(VALUE); -VALUE rb_Hash(VALUE); -double rb_cstr_to_dbl(const char*, int); -double rb_str_to_dbl(VALUE, int); +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_check_convert_type(), except the return value type is fixed + * to ::rb_cInteger. + * + * @param[in] val An object to convert. + * @param[in] mid Conversion method (e.g. "to_ary"). + * @exception rb_eTypeError The `mid` does not generate an integer. + * @retval RUBY_Qnil No conversion defined. + * @retval otherwise An instance of ::rb_cInteger. + */ +VALUE rb_check_to_integer(VALUE val, const char *mid); + +/** + * This is complicated. + * + * - When the passed object is already an instance of ::rb_cFloat, just + * returns it as-is. + * + * - When the passed object is something numeric, the function tries to + * convert it using `#to_f` method. + * + * - If that conversion fails (this happens for instance when the numeric + * is a complex) it returns ::RUBY_Qnil. + * + * - Otherwise returns the conversion result. + * + * - Otherwise it also returns ::RUBY_Qnil. + * + * @param[in] val An object to convert. + * @retval RUBY_Qnil Conversion from `val` to float is undefined. + * @retval otherwise Converted result. + */ +VALUE rb_check_to_float(VALUE val); + +/** + * Identical to rb_check_to_int(), except it raises in case of conversion + * mismatch. + * + * @param[in] val An object to convert. + * @exception rb_eTypeError `#to_int` does not generate an integer. + * @return An instance of ::rb_cInteger. + */ +VALUE rb_to_int(VALUE val); + +/** + * Identical to rb_check_to_integer(), except it uses `#to_int` for conversion. + * + * @param[in] val An object to convert. + * @exception rb_eTypeError `#to_int` does not return an integer. + * @retval RUBY_Qnil No conversion defined. + * @retval otherwise An instance of ::rb_cInteger. + */ +VALUE rb_check_to_int(VALUE val); + +/** + * This is the logic behind `Kernel#Integer`. Numeric types are converted + * directly, with floating point numbers being truncated. Strings are + * interpreted strictly; only leading/trailing whitespaces, plus/minus sign, + * radix indicators such as `0x`, digits, and underscores are allowed. + * Anything else are converted by first trying `#to_int`, then `#to_i`. + * + * This is slightly stricter than `String#to_i`. + * + * @param[in] val An object to convert. + * @exception rb_eArgError Malformed `val` passed. + * @exception rb_eTypeError No conversion defined. + * @return An instance of ::rb_cInteger. + */ +VALUE rb_Integer(VALUE val); + +/** + * Identical to rb_check_to_float(), except it raises on error. + * + * @param[in] val An object to convert. + * @exception rb_eTypeError No conversion defined. + * @return An instance of ::rb_cFloat. + */ +VALUE rb_to_float(VALUE val); + +/** + * This is the logic behind `Kernel#Float`. Numeric types are converted + * directly to the nearest value that a Float can represent. Strings are + * interpreted strictly; only leading/trailing whitespaces are allowed except + * what `strtod` understands. Anything else are converted using `#to_f`. + * + * This is slightly stricter than `String#to_f`. + * + * @param[in] val An object to convert. + * @exception rb_eArgError Malformed `val` passed. + * @exception rb_eTypeError No conversion defined. + * @return An instance of ::rb_cFloat. + */ +VALUE rb_Float(VALUE val); + +/** + * This is the logic behind `Kernel#String`. Arguments are converted by first + * trying `#to_str`, then `#to_s`. + * + * @param[in] val An object to convert. + * @exception rb_eTypeError No conversion defined. + * @return An instance of ::rb_cString. + */ +VALUE rb_String(VALUE val); + +/** + * This is the logic behind `Kernel#Array`. Arguments are converted by first + * trying `#to_ary`, then `#to_a`, and if both failed, returns an array of + * length 1 that contains the passed argument as the sole contents. + * + * @param[in] val An object to convert. + * @return An instance of ::rb_cArray. + */ +VALUE rb_Array(VALUE val); + +/** + * This is the logic behind `Kernel#Hash`. Arguments are converted by first + * trying `#to_hash`. if it failed, and the argument is either ::RUBY_Qnil or + * an empty array, returns an empty hash. Otherwise an exception is raised. + * + * @param[in] val An object to convert. + * @exception rb_eTypeError No conversion defined. + * @return An instance of ::rb_cHash. + */ +VALUE rb_Hash(VALUE val); + +RBIMPL_ATTR_NONNULL(()) +/** + * Converts a textual representation of a real number into a numeric, which is + * the nearest value that the return type can represent, of the value that the + * argument represents. This is in fact a 2-in-1 function whose behaviour can + * be controlled using the second (mode) argument. If the mode is zero, this + * function is in "historical" mode which only understands "floating-constant" + * defined at ISO/IEC 9899:1990 section 6.1.3.1. If the mode is nonzero, it is + * in "extended" mode, which also accepts "hexadecimal-floating-constant" + * defined at ISO/IEC 9899:2018 section 6.4.4.2. + * + * @param[in] str A textual representation of a real number. + * @param[in] mode Conversion mode, as described above. + * @exception rb_eArgError Malformed `str` passed. + * @see https://bugs.ruby-lang.org/issues/2969 + * @note Null pointers are allowed, and it returns 0.0 then. + */ +double rb_cstr_to_dbl(const char *str, int mode); + +/** + * Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of + * C's. + * + * @param[in] str A textual representation of a real number. + * @param[in] mode Conversion mode, as described in rb_cstr_to_dbl(). + * @exception rb_eArgError Malformed `str` passed. + * @see https://bugs.ruby-lang.org/issues/2969 + */ +double rb_str_to_dbl(VALUE str, int mode); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/parse.h b/include/ruby/internal/intern/parse.h index 9424657bbc..7c4e9925b9 100644 --- a/include/ruby/internal/intern/parse.h +++ b/include/ruby/internal/intern/parse.h @@ -17,45 +17,176 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cSymbol. */ #include "ruby/internal/attr/const.h" +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() -/* parse.y */ -ID rb_id_attrset(ID); +/* symbol.c */ + +/** + * Calculates an ID of attribute writer. For instance it returns `:foo=` when + * passed `:foo`. + * + * @param[in] id An id. + * @exception rb_eNameError `id` is not for attributes (e.g. operator). + * @return Calculated name of attribute writer. + */ +ID rb_id_attrset(ID id); RBIMPL_ATTR_CONST() -int rb_is_const_id(ID); +/** + * Classifies the given ID, then sees if it is a constant. In case an ID is in + * Unicode (likely), its "constant"-ness is determined if its first character + * is either upper case or title case. Otherwise it is detected if case- + * folding the first character changes its case or not. + * + * @param[in] id An id to classify. + * @retval 1 It is a constant. + * @retval 0 It isn't. + */ +int rb_is_const_id(ID id); RBIMPL_ATTR_CONST() -int rb_is_global_id(ID); +/** + * Classifies the given ID, then sees if it is a global variable. A global + * variable must start with `$`. + * + * @param[in] id An id to classify. + * @retval 1 It is a global variable. + * @retval 0 It isn't. + */ +int rb_is_global_id(ID id); RBIMPL_ATTR_CONST() -int rb_is_instance_id(ID); +/** + * Classifies the given ID, then sees if it is an instance variable. An + * instance variable must start with `@`, but not `@@`. + * + * @param[in] id An id to classify. + * @retval 1 It is an instance variable. + * @retval 0 It isn't. + */ +int rb_is_instance_id(ID id); RBIMPL_ATTR_CONST() -int rb_is_attrset_id(ID); +/** + * Classifies the given ID, then sees if it is an attribute writer. An + * attribute writer is otherwise a local variable, except it ends with `=`. + * + * @param[in] id An id to classify. + * @retval 1 It is an attribute writer. + * @retval 0 It isn't. + */ +int rb_is_attrset_id(ID id); RBIMPL_ATTR_CONST() -int rb_is_class_id(ID); +/** + * Classifies the given ID, then sees if it is a class variable. A class + * variable is must start with `@@`. + * + * @param[in] id An id to classify. + * @retval 1 It is a class variable. + * @retval 0 It isn't. + */ +int rb_is_class_id(ID id); RBIMPL_ATTR_CONST() -int rb_is_local_id(ID); +/** + * Classifies the given ID, then sees if it is a local variable. A local + * variable starts with a lowercase character, followed by some alphanumeric + * characters or `_`, then ends with anything other than `!`, `?`, or `=`. + * + * @param[in] id An id to classify. + * @retval 1 It is a local variable. + * @retval 0 It isn't. + */ +int rb_is_local_id(ID id); RBIMPL_ATTR_CONST() +/** + * Classifies the given ID, then sees if it is a junk ID. An ID with no + * special syntactic structure is considered junk. This category includes for + * instance punctuation. + * + * @param[in] id An id to classify. + * @retval 1 It is a junk. + * @retval 0 It isn't. + */ int rb_is_junk_id(ID); -int rb_symname_p(const char*); + +RBIMPL_ATTR_NONNULL(()) +/** + * Sees if the passed C string constructs a valid syntactic symbol. Invalid + * ones for instance includes whitespaces. + * + * @param[in] str A C string to check. + * @retval 1 It is a valid symbol name. + * @retval 0 It is invalid as a symbol name. + */ +int rb_symname_p(const char *str); + +/* vm.c */ + +/** + * Queries the last match, or `Regexp.last_match`, or the `$~`. You don't have + * to use it, because in reality you can get `$~` using rb_gv_get() as usual. + * + * @retval RUBY_Qnil The method has not ran a regular expression. + * @retval otherwise An instance of ::rb_cMatch. + */ VALUE rb_backref_get(void); -void rb_backref_set(VALUE); + +/** + * Updates `$~`. You don't have to use it, because in reality you can set `$~` + * using rb_gv_set() as usual. + * + * @param[in] md Arbitrary Ruby object. + * @post The passed object is assigned to `$~`. + * + * @internal + * + * Yes, this function bypasses the Check_Type() that would normally prevent + * evil souls from assigning evil objects to `$~`. Use of this function is a + * really bad smell. + */ +void rb_backref_set(VALUE md); + +/** + * Queries the last line, or the `$_`. You don't have to use it, because in + * reality you can get `$_` using rb_gv_get() as usual. + * + * @retval RUBY_Qnil There has never been a "line" yet. + * @retval otherwise The last set `$_` value. + */ VALUE rb_lastline_get(void); -void rb_lastline_set(VALUE); + +/** + * Updates `$_`. You don't have to use it, because in reality you can set `$_` + * using rb_gv_set() as usual. + * + * @param[in] str Arbitrary Ruby object. + * @post The passed object is assigned to `$_`. + * + * @internal + * + * Unlike `$~`, you can assign non-strings to `$_`, even from ruby scripts. + */ +void rb_lastline_set(VALUE str); /* symbol.c */ + +/** + * Collects every single bits of symbols that have ever interned in the entire + * history of the current process. + * + * @return An array that contains all symbols that have ever existed. + */ VALUE rb_sym_all_symbols(void); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/proc.h b/include/ruby/internal/intern/proc.h index d6f77cbd4d..b8c3c5e146 100644 --- a/include/ruby/internal/intern/proc.h +++ b/include/ruby/internal/intern/proc.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cProc. */ #include "ruby/internal/dllexport.h" @@ -27,26 +27,326 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* proc.c */ + +/** + * Constructs a Proc object from implicitly passed components. When a ruby + * method is called with a block, that block is not explicitly passed around + * using C level function parameters. This function gathers all the necessary + * info to turn them into a Ruby level instance of ::rb_cProc. + * + * @exception rb_eArgError There is no passed block. + * @return An instance of ::rb_cProc. + */ VALUE rb_block_proc(void); + +/** + * Identical to rb_proc_new(), except it returns a lambda. + * + * @exception rb_eArgError There is no passed block. + * @return An instance of ::rb_cProc. + */ VALUE rb_block_lambda(void); -VALUE rb_proc_new(rb_block_call_func_t, VALUE); -VALUE rb_obj_is_proc(VALUE); -VALUE rb_proc_call(VALUE, VALUE); -VALUE rb_proc_call_kw(VALUE, VALUE, int); -VALUE rb_proc_call_with_block(VALUE, int argc, const VALUE *argv, VALUE); -VALUE rb_proc_call_with_block_kw(VALUE, int argc, const VALUE *argv, VALUE, int); -int rb_proc_arity(VALUE); -VALUE rb_proc_lambda_p(VALUE); + +/** + * This is an rb_iterate() + rb_block_proc() combo. + * + * ```CXX + * VALUE + * my_own_iterator(RB_BLOCK_CALL_FUNC_ARGLIST(y, c)) + * { + * const auto plus = rb_intern("+"); + * return rb_funcall(c, plus, 1, y); + * } + * + * VALUE + * my_own_method(VALUE self) + * { + * return rb_proc_new(my_own_iterator, self); + * } + * ``` + * + * @param[in] func A backend function of a proc. + * @param[in] callback_arg Passed to `func`'s callback_arg. + * @return A C-backended proc object. + * + */ +VALUE rb_proc_new(rb_block_call_func_t func, VALUE callback_arg); + +/** + * Queries if the given object is a proc. + * + * @note This is about the object's data structure, not its class etc. + * @param[in] recv Object in question. + * @retval RUBY_Qtrue It is a proc. + * @retval RUBY_Qfalse Otherwise. + */ +VALUE rb_obj_is_proc(VALUE recv); + +/** + * Evaluates the passed proc with the passed arguments. + * + * @param[in] recv The proc to call. + * @param[in] args An instance of ::RArray which is the arguments. + * @exception rb_eException Any exceptions happen inside. + * @return What the proc evaluates to. + */ +VALUE rb_proc_call(VALUE recv, VALUE args); + +/** + * Identical to rb_proc_call(), except you can specify how to handle the last + * element of the given array. + * + * @param[in] recv The proc to call. + * @param[in] args An instance of ::RArray which is the arguments. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `args`' last is not a keyword argument. + * - RB_PASS_KEYWORDS `args`' last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eException Any exceptions happen inside. + * @return What the proc evaluates to. + */ +VALUE rb_proc_call_kw(VALUE recv, VALUE args, int kw_splat); + +/** + * Identical to rb_proc_call(), except you can additionally pass another proc + * object, as a block. Nowadays procs can take blocks: + * + * ```ruby + * l = -> (positional, optional=nil, *rest, kwarg:, **kwrest, &block) { + * # ... how can we pass this `&block`? ^^^^^^ + * } + * ``` + * + * And this function is to pass one to such procs. + * + * @param[in] recv The proc to call. + * @param[in] argc Number of arguments. + * @param[in] argv Arbitrary number of proc arguments. + * @param[in] proc Proc as a passed block. + * @exception rb_eException Any exceptions happen inside. + * @return What the proc evaluates to. + */ +VALUE rb_proc_call_with_block(VALUE recv, int argc, const VALUE *argv, VALUE proc); + +/** + * Identical to rb_proc_call_with_block(), except you can specify how to handle + * the last element of the given array. It can also be seen as a routine + * identical to rb_proc_call_kw(), except you can additionally pass another + * proc object as a block. + * + * @param[in] recv The proc to call. + * @param[in] argc Number of arguments. + * @param[in] argv Arbitrary number of proc arguments. + * @param[in] proc Proc as a passed block. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `args`' last is not a keyword argument. + * - RB_PASS_KEYWORDS `args`' last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eException Any exceptions happen inside. + * @return What the proc evaluates to. + */ +VALUE rb_proc_call_with_block_kw(VALUE recv, int argc, const VALUE *argv, VALUE proc, int kw_splat); + +/** + * Queries the number of mandatory arguments of the given Proc. If its block + * is declared to take no arguments, returns `0`. If the block is known to + * take exactly `n` arguments, returns `n`. If the block has optional + * arguments, returns `-n-1`, where `n` is the number of mandatory arguments, + * with the exception for blocks that are not lambdas and have only a finite + * number of optional arguments; in this latter case, returns `n`. Keyword + * arguments will be considered as a single additional argument, that argument + * being mandatory if any keyword argument is mandatory. + * + * @param[in] recv Target Proc object. + * @retval 0 It takes no arguments. + * @retval >0 It takes exactly this number of arguments. + * @retval <0 It takes optional arguments. + */ +int rb_proc_arity(VALUE recv); + +/** + * Queries if the given object is a lambda. Instances of ::rb_cProc are either + * lambda or proc. They differ in several points. This function can + * distinguish them without actually evaluating their contents. + * + * @param[in] recv Target proc object. + * @retval RUBY_Qtrue It is a lambda. + * @retval RUBY_Qfalse Otherwise. + */ +VALUE rb_proc_lambda_p(VALUE recv); + +/** + * Snapshots the current execution context and turn it into an instance of + * ::rb_cBinding. + * + * @return An instance of ::rb_cBinding. + */ VALUE rb_binding_new(void); -VALUE rb_obj_method(VALUE, VALUE); -VALUE rb_obj_is_method(VALUE); -VALUE rb_method_call(int, const VALUE*, VALUE); -VALUE rb_method_call_kw(int, const VALUE*, VALUE, int); -VALUE rb_method_call_with_block(int, const VALUE *, VALUE, VALUE); -VALUE rb_method_call_with_block_kw(int, const VALUE *, VALUE, VALUE, int); -int rb_mod_method_arity(VALUE, ID); -int rb_obj_method_arity(VALUE, ID); -VALUE rb_protect(VALUE (*)(VALUE), VALUE, int*); + +/** + * Creates a method object. A method object is a proc-like object that you can + * "call". Note that a method object snapshots the method at the time the + * object is created: + * + * ```ruby + * class Foo + * def foo + * return 1 + * end + * end + * + * obj = Foo.new.method(:foo) + * + * class Foo + * def foo + * return 2 + * end + * end + * + * obj.call # => 1, not 2. + * ``` + * + * @param[in] recv Receiver of the method. + * @param[in] mid Method name, in either String or Symbol. + * @exception rb_eNoMethodError No such method. + * @return An instance of ::rb_cMethod. + */ +VALUE rb_obj_method(VALUE recv, VALUE mid); + +/** + * Queries if the given object is a method. + * + * @note This is about the object's data structure, not its class etc. + * @param[in] recv Object in question. + * @retval RUBY_Qtrue It is a method. + * @retval RUBY_Qfalse Otherwise. + */ +VALUE rb_obj_is_method(VALUE recv); + +/** + * Evaluates the passed method with the passed arguments. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] recv The method object to call. + * @exception rb_eTypeError `recv` is not a method. + * @exception rb_eException Any exceptions happen inside. + * @return What the method returns. + */ +VALUE rb_method_call(int argc, const VALUE *argv, VALUE recv); + +/** + * Identical to rb_method_call(), except you can specify how to handle the last + * element of the given array. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] recv The method object to call. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `args`' last is not a keyword argument. + * - RB_PASS_KEYWORDS `args`' last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eTypeError `recv` is not a method. + * @exception rb_eException Any exceptions happen inside. + * @return What the method returns. + */ +VALUE rb_method_call_kw(int argc, const VALUE *argv, VALUE recv, int kw_splat); + +/** + * Identical to rb_proc_call(), except you can additionally pass a proc as a + * block. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] recv The method object to call. + * @param[in] proc Proc as a passed block. + * @exception rb_eTypeError `recv` is not a method. + * @exception rb_eException Any exceptions happen inside. + * @return What the method returns. + */ +VALUE rb_method_call_with_block(int argc, const VALUE *argv, VALUE recv, VALUE proc); + +/** + * Identical to rb_method_call_with_block(), except you can specify how to + * handle the last element of the given array. It can also be seen as a + * routine identical to rb_method_call_kw(), except you can additionally pass + * another proc object as a block. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] recv The method object to call. + * @param[in] proc Proc as a passed block. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `args`' last is not a keyword argument. + * - RB_PASS_KEYWORDS `args`' last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eTypeError `recv` is not a method. + * @exception rb_eException Any exceptions happen inside. + * @return What the method returns. + */ +VALUE rb_method_call_with_block_kw(int argc, const VALUE *argv, VALUE recv, VALUE proc, int kw_splat); + +/** + * Queries the number of mandatory arguments of the method defined in the given + * module. If it is declared to take no arguments, returns `0`. If it takes + * exactly `n` arguments, returns `n`. If it has optional arguments, returns + * `-n-1`, where `n` is the number of mandatory arguments. Keyword arguments + * will be considered as a single additional argument, that argument being + * mandatory if any keyword argument is mandatory. + * + * @param[in] mod Namespace to search a method for. + * @param[in] mid Method id. + * @retval 0 It takes no arguments. + * @retval >0 It takes exactly this number of arguments. + * @retval <0 It takes optional arguments. + */ +int rb_mod_method_arity(VALUE mod, ID mid); + +/** + * Identical to rb_mod_method_arity(), except it searches for singleton methods + * rather than instance methods. + * + * @param[in] obj Object to search for a singleton method. + * @param[in] mid Method id. + * @retval 0 It takes no arguments. + * @retval >0 It takes exactly this number of arguments. + * @retval <0 It takes optional arguments. + */ +int rb_obj_method_arity(VALUE obj, ID mid); + +/* eval.c */ + +RBIMPL_ATTR_NONNULL((1)) +/** + * Protects a function call from potential global escapes from the function. + * Such global escapes include exceptions, `throw`, `break`, for example. + * + * It first calls the function func with `args` as the argument. If no global + * escape occurred during the function, it returns the result and `*state` is + * zero. Otherwise, it returns ::RUBY_Qnil and sets `*state` to nonzero. If + * `state` is `NULL`, it is not set in both cases. + * + * @param[in] func A function that potentially escapes globally. + * @param[in] args Passed as-is to `func`. + * @param[out] state State of execution. + * @return What `func` returns, or an undefined value when it did not + * return. + * @post `*state` is set to zero if succeeded. Nonzero otherwise. + * @warning You have to clear the error info with `rb_set_errinfo(Qnil)` if + * you decide to ignore the caught exception. + * @see rb_eval_string_protect() + * @see rb_load_protect() + * + * @internal + * + * The "undefined value" described above is in fact ::RUBY_Qnil for now. But + * @shyouhei doesn't think that we would never change that. + * + * Though not a part of our public API, `state` is in fact an + * enum ruby_tag_type. You can see the potential "nonzero" values by looking + * at vm_core.h. + */ +VALUE rb_protect(VALUE (*func)(VALUE args), VALUE args, int *state); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/process.h b/include/ruby/internal/intern/process.h index 2b1005a205..cfa5e13162 100644 --- a/include/ruby/internal/intern/process.h +++ b/include/ruby/internal/intern/process.h @@ -17,9 +17,10 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_mProcess. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/attr/noreturn.h" #include "ruby/internal/config.h" /* rb_pid_t is defined here. */ #include "ruby/internal/dllexport.h" @@ -28,17 +29,252 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* process.c */ + +/** + * Wait for the specified process to terminate, reap it, and return its status. + * + * @param[in] pid The process ID to wait for. + * @param[in] flags The flags to pass to waitpid(2). + * @return VALUE An instance of Process::Status. + */ +VALUE rb_process_status_wait(rb_pid_t pid, int flags); + +/** + * Sets the "last status", or the `$?`. + * + * @param[in] status The termination status, as defined in `waitpid(3posix)`. + * @param[in] pid The last child of the current process. + * @post `$?` is updated. + */ void rb_last_status_set(int status, rb_pid_t pid); + +/** + * Queries the "last status", or the `$?`. + * + * @retval RUBY_Qnil The current thread has no dead children. + * @retval otherwise An instance of Process::Status describing the status of + * the child that was most recently `wait`-ed. + */ VALUE rb_last_status_get(void); -int rb_proc_exec(const char*); + +RBIMPL_ATTR_NONNULL(()) +/** + * Executes a shell command. + * + * @warning THIS FUNCTION RETURNS on error! + * @param[in] cmd Passed to the shell. + * @retval -1 Something prevented the command execution. + * @post Upon successful execution this function doesn't return. + * @post In case it returns the `errno` is set properly. + */ +int rb_proc_exec(const char *cmd); RBIMPL_ATTR_NORETURN() -VALUE rb_f_exec(int, const VALUE*); +/** + * Replaces the current process by running the given external command. This is + * the implementation of `Kernel#exec`. + * + * @param[in] argc Number of objects in `argv`. + * @param[in] argv Command and its options to execute. + * @exception rb_eTypeError Invalid options e.g. non-String argv. + * @exception rb_eArgError Invalid options e.g. redirection cycle. + * @exception rb_eNotImpError Not implemented e.g. no `setuid(2)`. + * @exception rb_eRuntimeError `Process::UID.switch` in operation. + * @exception rb_eSystemCallError `execve(2)` failed. + * @warning This function doesn't return. + * @warning On failure it raises. On success the process is replaced. + * + * @internal + * + * @shyouhei have to say that the rdoc for `Kernel#exec` is fairly incomplete. + * AFAIK this function ultimately takes the following signature: + * + * ```rbs + * type boolx = bool | nil # != `boolish` + * + * type rlim_t = Integer # rlim_cur + * | [ Integer, Integer ] # rlim_cur, rlim_max + * + * type uid_t = String # e.g. "root" + * | Integer # e.g. 0 + * + * type gid_t = String # e.g. "wheel" + * | Integer # e.g. 0 + * + * type fmode = String # e.g. "rb" + * | Integer # e.g. O_RDONLY | O_BINARY + * + * type mode_t = Integer # e.g. 0644 + * + * type pgrp = true # Creates a dedicated pgroup + * | 0 # ditto + * | nil # Uses the current one + * | Integer # Uses this specific pgroup + * + * type fd = :in # STDIN + * | :out # STDOUT + * | :err # STDERR + * | IO # This specific IO + * | Integer # A file descriptor of this # + * + * type src = fd | [ fd ] + * type dst = :close # Intuitive + * | fd # Intuitive + * | String # Open a file at this path + * | [ String ] # ... using O_RDONLY + * | [ String, fmode ] # ... using this mode + * | [ String, fmode, mode_t ] # ... with a permission + * | [ :child, fd ] # fd of child side + * + * type redir = Hash[ src, dst ] + * + * # ---- + * + * # Key-value pair of environment variables + * type envp = Hash[ String, String ] + * + * # Actual name (and the name passed to the subprocess if any) + * type arg0 = String | [ String, String ] + * + * # Arbitrary string parameters + * type argv = String + * + * # Exec options: + * type argh = redir | { + * chdir: String, # Working directory + * close_others: boolx, # O_CLOEXEC like behaviour + * gid: gid_t, # setegid(2) + * pgrooup: pgrp, # setpgrp(2) + * rlimit_as: rlim_t, # setrlimit(2) + * rlimit_core: rlim_t, # ditto + * rlimit_cpu: rlim_t, # ditto + * rlimit_data: rlim_t, # ditto + * rlimit_fsize: rlim_t, # ditto + * rlimit_memlock: rlim_t, # ditto + * rlimit_msgqueue: rlim_t, # ditto + * rlimit_nice: rlim_t, # ditto + * rlimit_nofile: rlim_t, # ditto + * rlimit_nproc: rlim_t, # ditto + * rlimit_rss: rlim_t, # ditto + * rlimit_rtprio: rlim_t, # ditto + * rlimit_rttime: rlim_t, # ditto + * rlimit_sbsize: rlim_t, # ditto + * rlimit_sigpending: rlim_t, # ditto + * rlimit_stack: rlim_t, # ditto + * uid: uid_t, # seteuid(2) + * umask: mode_t, # umask(2) + * unsetenv_others: boolx # Unset everything except the passed envp + * } + * + * # ==== + * + * class Kernel + * def self?.exec + * : ( arg0 cmd, *argv args ) -> void + * | ( arg0 cmd, *argv args, argh opts) -> void + * | (envp env, arg0 cmd, *argv args ) -> void + * | (envp env, arg0 cmd, *argv args, argh opts) -> void + * end + * ``` + */ +VALUE rb_f_exec(int argc, const VALUE *argv); + +/** + * Waits for a process, with releasing GVL. + * + * @param[in] pid Process ID. + * @param[out] status The wait status is filled back. + * @param[in] flags Wait options. + * @retval -1 System call failed, errno set. + * @retval 0 WNOHANG but no waitable children. + * @retval otherwise A process ID that was `wait()`-ed. + * @post Upon successful return `status` is updated to have the process' + * status. + * @note `status` can be NULL. + * @note The arguments are passed through to underlying system call(s). + * Can have special meanings. For instance passing `(rb_pid_t)-1` + * to `pid` means it waits for any processes, under + * POSIX-compliant situations. + */ rb_pid_t rb_waitpid(rb_pid_t pid, int *status, int flags); + +/** + * This is a shorthand of rb_waitpid without status and flags. It has been + * like this since the very beginning. The initial revision already did the + * same thing. Not sure why, then, it has been named `syswait`. AFAIK this is + * different from how `wait(3posix)` works. + * + * @param[in] pid Passed to rb_waitpid(). + */ void rb_syswait(rb_pid_t pid); -rb_pid_t rb_spawn(int, const VALUE*); -rb_pid_t rb_spawn_err(int, const VALUE*, char*, size_t); -VALUE rb_proc_times(VALUE); + +/** + * Identical to rb_f_exec(), except it spawns a child process instead of + * replacing the current one. + * + * @param[in] argc Number of objects in `argv`. + * @param[in] argv Command and its options to execute. + * @exception rb_eTypeError Invalid options e.g. non-String argv. + * @exception rb_eArgError Invalid options e.g. redirection cycle. + * @exception rb_eNotImpError Not implemented e.g. no `setuid(2)`. + * @exception rb_eRuntimeError `Process::UID.switch` in operation. + * @retval -1 Child process died for some reason. + * @retval otherwise The ID of the born child. + * + * @internal + * + * This is _really_ identical to rb_f_exec() until ultimately calling the + * system call. Almost everything are shared among these two (and + * rb_f_system()). + */ +rb_pid_t rb_spawn(int argc, const VALUE *argv); + +/** + * Identical to rb_spawn(), except you can additionally know the detailed + * situation in case of abnormal parturitions. + * + * @param[in] argc Number of objects in `argv`. + * @param[in] argv Command and its options to execute. + * @param[out] errbuf Error description write-back buffer. + * @param[in] buflen Number of bytes of `errbuf`, including NUL. + * @exception rb_eTypeError Invalid options e.g. non-String argv. + * @exception rb_eArgError Invalid options e.g. redirection cycle. + * @exception rb_eNotImpError Not implemented e.g. no `setuid(2)`. + * @exception rb_eRuntimeError `Process::UID.switch` in operation. + * @retval -1 Child process died for some reason. + * @retval otherwise The ID of the born child. + * @post In case of `-1`, at most `buflen` bytes of the reason why is + * written back to `errbuf`. + */ +rb_pid_t rb_spawn_err(int argc, const VALUE *argv, char *errbuf, size_t buflen); + +/** + * Gathers info about resources consumed by the current process. + * + * @param[in] _ Not used. Pass anything. + * @return An instance of `Process::Tms`. + * + * @internal + * + * This function might or might not exist depending on `./configure` result. + * It must be a portability hell. Better not use. + */ +VALUE rb_proc_times(VALUE _); + +/** + * "Detaches" a subprocess. In POSIX systems every child processes that a + * process creates must be `wait(2)`-ed. A child process that died yet has not + * been waited so far is called a "zombie", which more or less consumes + * resources. This function automates reclamation of such processes. Once + * after this function successfully returns you can basically forget about the + * child process. + * + * @param[in] pid Process to wait. + * @return An instance of ::rb_cThread which is `waitpid(2)`-ing `pid`. + * @post You can just forget about the return value. GC reclaims it. + * @post You can know the exit status by querying `#value` of the + * return value (which is a blocking operation). + */ VALUE rb_detach_process(rb_pid_t pid); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/random.h b/include/ruby/internal/intern/random.h index 6482a8f6e8..5577f53cb4 100644 --- a/include/ruby/internal/intern/random.h +++ b/include/ruby/internal/intern/random.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief MT19937 backended pseudo random number generator. * @see Matsumoto, M., Nishimura, T., "Mersenne Twister: A 623- * dimensionally equidistributed uniform pseudorandom number @@ -30,13 +30,85 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* random.c */ + +/** + * Generates a 32 bit random number. + * + * @return A random number. + * @note Now that we have ractors, the RNG behind this function is + * per-ractor. + */ unsigned int rb_genrand_int32(void); + +/** + * Generates a `double` random number. + * + * @return A random number. + * @note This function shares the RNG with rb_genrand_int32(). + */ double rb_genrand_real(void); + +/** + * Resets the RNG behind rb_genrand_int32()/rb_genrand_real(). + * + * @post The (now per-ractor) default RNG's internal state is cleared. + */ void rb_reset_random_seed(void); + +/** + * Generates a String of random bytes. + * + * @param[in,out] rnd An instance of ::rb_cRandom. + * @param[in] n Requested number of bytes. + * @return An instance of ::rb_cString, of binary, of `n` bytes length, + * whose contents are random bits. + * + * @internal + * + * @shyouhei doesn't know if this is an Easter egg or an official feature, but + * this function can take a wider range of objects, such as `Socket::Ifaddr`. + * The arguments are just silently ignored and the default RNG is used instead, + * if they are non-RNG. + */ VALUE rb_random_bytes(VALUE rnd, long n); + +/** + * Identical to rb_genrand_int32(), except it generates using the passed RNG. + * + * @param[in,out] rnd An instance of ::rb_cRandom. + * @return A random number. + */ unsigned int rb_random_int32(VALUE rnd); + +/** + * Identical to rb_genrand_real(), except it generates using the passed RNG. + * + * @param[in,out] rnd An instance of ::rb_cRandom. + * @return A random number. + */ double rb_random_real(VALUE rnd); + +/** + * Identical to rb_genrand_ulong_limited(), except it generates using the + * passed RNG. + * + * @param[in,out] rnd An instance of ::rb_cRandom. + * @param[in] limit Max possible return value. + * @return A random number, distributed in `[0, limit]` interval. + * @note Note it can return `limit`. + * @note Whether the return value distributes uniformly in the + * interval or not depends on how the argument RNG behaves; at + * least in case of MT19937 it does. + */ unsigned long rb_random_ulong_limited(VALUE rnd, unsigned long limit); + +/** + * Generates a random number whose upper limit is `i`. + * + * @param[in] i Max possible return value. + * @return A random number, uniformly distributed in `[0, limit]` interval. + * @note Note it can return `i`. + */ unsigned long rb_genrand_ulong_limited(unsigned long i); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/range.h b/include/ruby/internal/intern/range.h index 7ca47915e2..1f7d7c313f 100644 --- a/include/ruby/internal/intern/range.h +++ b/include/ruby/internal/intern/range.h @@ -17,17 +17,71 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cRange. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() /* range.c */ -VALUE rb_range_new(VALUE, VALUE, int); -VALUE rb_range_beg_len(VALUE, long*, long*, long, int); + +/** + * Creates a new Range. + * + * @param[in] beg "Left" or "lowest" endpoint of the range. + * @param[in] end "Right" or "highest" endpoint of the range. + * @param[in] excl Whether the range is open-ended. + * @exception rb_eArgError `beg` and `end` are not comparable. + * @note These days both endpoints can be ::RUBY_Qnil, which means that + * endpoint is unbound. + */ +VALUE rb_range_new(VALUE beg, VALUE end, int excl); + +RBIMPL_ATTR_NONNULL(()) +/** + * Deconstructs a numerical range. As the arguments are `long` based, it + * expects everything are in the `long` domain. + * + * @param[in] range A range of numerical endpoints. + * @param[out] begp Return value buffer. + * @param[out] lenp Return value buffer. + * @param[in] len Updated length. + * @param[in] err In case `len` is out of range... + * - `0`: returns ::RUBY_Qnil. + * - `1`: raises ::rb_eRangeError. + * - `2`: `beg` and `len` expanded accordingly. + * @exception rb_eTypeError `range` is not a numerical range. + * @exception rb_eRangeError `range` cannot fit into `long`. + * @retval RUBY_Qfalse `range` is not an ::rb_cRange. + * @retval RUBY_Qnil `len` is out of `range` but `err` is zero. + * @retval RUBY_Qtrue Otherwise. + * @post `beg` is the (possibly updated) left endpoint. + * @post `len` is the (possibly updated) length of the range. + * + * @internal + * + * The complex error handling switch reflects the fact that `Array#[]=` and + * `String#[]=` behave differently when they take ranges. + */ +VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err); + +RBIMPL_ATTR_NONNULL(()) +/** + * Deconstructs a range into its components. + * + * @param[in] range Range or range-ish object. + * @param[out] begp Return value buffer. + * @param[out] endp Return value buffer. + * @param[out] exclp Return value buffer. + * @retval RUBY_Qfalse `range` is not an instance of ::rb_cRange. + * @retval RUBY_Qtrue Argument pointers are updated. + * @post `*begp` is the left endpoint of the range. + * @post `*endp` is the right endpoint of the range. + * @post `*exclp` is whether the range is open-ended or not. + */ int rb_range_values(VALUE range, VALUE *begp, VALUE *endp, int *exclp); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/rational.h b/include/ruby/internal/intern/rational.h index 30a87ff31f..ff4beca297 100644 --- a/include/ruby/internal/intern/rational.h +++ b/include/ruby/internal/intern/rational.h @@ -17,9 +17,10 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cRational. */ +#include "ruby/internal/attr/pure.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" #include "ruby/internal/arithmetic/long.h" /* INT2FIX is here. */ @@ -27,19 +28,144 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* rational.c */ -VALUE rb_rational_raw(VALUE, VALUE); + +/** + * Identical to rb_rational_new(), except it skips argument validations. It is + * thus dangerous for extension libraries. For instance `1/0r` could be + * constructed using this. + * + * @param[in] num Numerator, an instance of ::rb_cInteger. + * @param[in] den Denominator, an instance of ::rb_cInteger. + * @exception rb_eTypeError Either argument is not an Integer. + * @return An instance of ::rb_cRational whose value is `(num/den)r`. + */ +VALUE rb_rational_raw(VALUE num, VALUE den); + +/** + * Shorthand of `(x/1)r`. As `x` is already an Integer, it practically + * converts it into a Rational of the identical value. + * + * @param[in] x An instance of ::rb_cInteger. + * @return An instance of ::rb_cRational, whose value is `(x/1)r`. + */ #define rb_rational_raw1(x) rb_rational_raw((x), INT2FIX(1)) + +/** @alias{rb_rational_raw} */ #define rb_rational_raw2(x,y) rb_rational_raw((x), (y)) -VALUE rb_rational_new(VALUE, VALUE); + +/** + * Constructs a Rational, with reduction. This returns for instance `(2/3)r` + * for `rb_rational_new(INT2NUM(-384), INT2NUM(-576))`. + * + * @param[in] num Numerator, an instance of ::rb_cInteger. + * @param[in] den Denominator, an instance of ::rb_cInteger. + * @exception rb_eZeroDivError `den` is zero. + * @return An instance of ::rb_cRational whose value is `(num/den)r`. + */ +VALUE rb_rational_new(VALUE num, VALUE den); + +/** + * Shorthand of `(x/1)r`. As `x` is already an Integer, it practically + * converts it into a Rational of the identical value. + * + * @param[in] x An instance of ::rb_cInteger. + * @return An instance of ::rb_cRational, whose value is `(x/1)r`. + */ #define rb_rational_new1(x) rb_rational_new((x), INT2FIX(1)) + +/** @alias{rb_rational_new} */ #define rb_rational_new2(x,y) rb_rational_new((x), (y)) -VALUE rb_Rational(VALUE, VALUE); + +/** + * Converts various values into a Rational. This function accepts: + * + * - Instances of ::rb_cInteger (taken as-is), + * - Instances of ::rb_cRational (taken as-is), + * - Instances of ::rb_cFloat (applies `#to_r`), + * - Instances of ::rb_cComplex (applies `#to_r`), + * - Instances of ::rb_cString (applies `#to_r`), + * - Other objects that respond to `#to_r`. + * + * It (possibly recursively) applies `#to_r` until both sides become either + * Integer or Rational, then divides them. + * + * As a special case, passing ::RUBY_Qundef to `den` is the same as passing + * `RB_INT2NUM(1)`. + * + * @param[in] num Numerator (see above). + * @param[in] den Denominator (see above). + * @exception rb_eTypeError Passed something not described above. + * @exception rb_eFloatDomainError `#to_r` produced Nan/Inf. + * @exception rb_eZeroDivError `#to_r` produced zero for `den`. + * @return An instance of ::rb_cRational whose value is `(num/den)r`. + * + * @internal + * + * This was the implementation of `Kernel#Rational` before, but they diverged. + */ +VALUE rb_Rational(VALUE num, VALUE den); + +/** + * Shorthand of `(x/1)r`. It practically converts it into a Rational of the + * identical value. + * + * @param[in] x ::rb_cInteger, ::rb_cRational, or something that responds to + * `#to_r`. + * @return An instance of ::rb_cRational, whose value is `(x/1)r`. + */ #define rb_Rational1(x) rb_Rational((x), INT2FIX(1)) + +/** @alias{rb_Rational} */ #define rb_Rational2(x,y) rb_Rational((x), (y)) + +RBIMPL_ATTR_PURE() +/** + * Queries the numerator of the passed Rational. + * + * @param[in] rat An instance of ::rb_cRational. + * @return Its numerator part, which is an instance of ::rb_cInteger. + */ VALUE rb_rational_num(VALUE rat); + +RBIMPL_ATTR_PURE() +/** + * Queries the denominator of the passed Rational. + * + * @param[in] rat An instance of ::rb_cRational. + * @return Its denominator part, which is an instance of ::rb_cInteger + * greater than or equal to one.. + */ VALUE rb_rational_den(VALUE rat); -VALUE rb_flt_rationalize_with_prec(VALUE, VALUE); -VALUE rb_flt_rationalize(VALUE); + +/** + * Simplified approximation of a float. It returns a rational `rat` which + * satisfies: + * + * ``` + * flt - |prec| <= rat <= flt + |prec| + * ``` + * + * ```ruby + * 3.141592.rationalize(0.001) # => (201/64)r + * 3.141592.rationalize(0.01)' # => (22/7)r + * 3.141592.rationalize(0.1)' # => (16/5)r + * 3.141592.rationalize(1)' # => (3/1)r + * ``` + * + * @param[in] flt An instance of ::rb_cFloat to rationalise. + * @param[in] prec Another ::rb_cFloat, which is the "precision". + * @return Approximation of `flt`, in ::rb_cRational. + */ +VALUE rb_flt_rationalize_with_prec(VALUE flt, VALUE prec); + +/** + * Identical to rb_flt_rationalize_with_prec(), except it auto-detects + * appropriate precision depending on the passed value. + * + * @param[in] flt An instance of ::rb_cFloat to rationalise. + * @return Approximation of `flt`, in ::rb_cRational. + */ +VALUE rb_flt_rationalize(VALUE flt); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/re.h b/include/ruby/internal/intern/re.h index dd7baef954..4dd58b469b 100644 --- a/include/ruby/internal/intern/re.h +++ b/include/ruby/internal/intern/re.h @@ -17,33 +17,227 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cRegexp. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() /* re.c */ + +/** + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + * + * @internal + * + * This was a function that switched between memcmp and rb_memcicmp depending + * on then-called `ruby_ignorecase`, or the `$=` global variable. That feature + * was abandoned in sometime around version 1.9.0. + */ #define rb_memcmp memcmp -int rb_memcicmp(const void*,const void*,long); -void rb_match_busy(VALUE); -VALUE rb_reg_nth_defined(int, VALUE); -VALUE rb_reg_nth_match(int, VALUE); + +/** + * Identical to st_locale_insensitive_strcasecmp(), except it is timing safe + * and returns something different. + * + * @param[in] s1 Comparison LHS. + * @param[in] s2 Comparison RHS. + * @param[in] n Comparison shall stop after first `n` bytes are scanned. + * @retval <0 `s1` is "less" than `s2`. + * @retval 0 Both sides converted into lowercase would be identical. + * @retval >0 `s1` is "greater" than `s2`. + * @note The "case" here means that of the POSIX Locale. + * + * @internal + * + * Can accept NULLs as long as n is also 0, and returns 0. + */ +int rb_memcicmp(const void *s1,const void *s2, long n); + +/** + * Asserts that the given MatchData is "occupied". MatchData shares its + * backend storages with its Regexp object. But programs can destructively + * tamper its contents. Calling this function beforehand shall prevent such + * modifications to spill over into other objects. + * + * @param[out] md Target instance of ::rb_cMatch. + * @post The object is "busy". + * + * @internal + * + * There is rb_match_unbusy internally, but extension libraries are left unable + * to do so. + */ +void rb_match_busy(VALUE md); + +/** + * Identical to rb_reg_nth_match(), except it just returns Boolean. This could + * skip allocating a returning string, resulting in reduced memory footprints + * if applicable. + * + * @param[in] n Match index. + * @param[in] md An instance of ::rb_cMatch. + * @exception rb_eTypeError `md` is not initialised. + * @retval RUBY_Qnil There is no `n`-th capture. + * @retval RUBY_Qfalse There is a `n`-th capture and is empty. + * @retval RUBY_Qtrue There is a `n`-th capture that has something. + * + */ +VALUE rb_reg_nth_defined(int n, VALUE md); + +/** + * Queries the nth captured substring. + * + * @param[in] n Match index. + * @param[in] md An instance of ::rb_cMatch. + * @exception rb_eTypeError `md` is not initialised. + * @retval RUBY_Qnil There is no `n`-th capture. + * @retval otherwise An allocated instance of ::rb_cString containing + * the contents captured. + */ +VALUE rb_reg_nth_match(int n, VALUE md); + +/** + * Queries the index of the given named capture. Captures could be named. But + * that doesn't mean named ones are not indexed. A regular expression can mix + * named and non-named captures, and they are all indexed. This function + * converts from a name to its index. + * + * @param[in] match An instance of ::rb_cMatch. + * @param[in] backref Capture name, in String, Symbol, or Numeric. + * @exception rb_eIndexError No such named capture. + * @return The index of the given name. + */ int rb_reg_backref_number(VALUE match, VALUE backref); -VALUE rb_reg_last_match(VALUE); -VALUE rb_reg_match_pre(VALUE); -VALUE rb_reg_match_post(VALUE); -VALUE rb_reg_match_last(VALUE); + +/** + * This just returns the argument, stringified. What a poor name. + * + * @param[in] md An instance of ::rb_cMatch. + * @return Its 0th capture (i.e. entire matched string). + */ +VALUE rb_reg_last_match(VALUE md); + +/** + * The portion of the original string before the given match. + * + * @param[in] md An instance of ::rb_cMatch. + * @return Its "prematch". This is perl's ``$```. + */ +VALUE rb_reg_match_pre(VALUE md); + +/** + * The portion of the original string after the given match. + * + * @param[in] md An instance of ::rb_cMatch. + * @return Its "postmatch". This is perl's `$'`. + */ +VALUE rb_reg_match_post(VALUE md); + +/** + * The portion of the original string that captured at the very last. + * + * @param[in] md An instance of ::rb_cMatch. + * @return Its "lastmatch". This is perl's `$+`. + */ +VALUE rb_reg_match_last(VALUE md); + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define HAVE_RB_REG_NEW_STR 1 -VALUE rb_reg_new_str(VALUE, int); -VALUE rb_reg_new(const char *, long, int); + +/** + * Identical to rb_reg_new(), except it takes the expression in Ruby's string + * instead of C's. + * + * @param[in] src Source code in String. + * @param[in] opts Options e.g. ONIG_OPTION_MULTILINE. + * @exception rb_eRegexpError `src` and `opts` do not interface. + * @return Allocated new instance of ::rb_cRegexp. + */ +VALUE rb_reg_new_str(VALUE src, int opts); + +RBIMPL_ATTR_NONNULL(()) +/** + * Creates a new Regular expression. + * + * @param[in] src Source code. + * @param[in] len `strlen(src)`. + * @param[in] opts Options e.g. ONIG_OPTION_MULTILINE. + * @return Allocated new instance of ::rb_cRegexp. + */ +VALUE rb_reg_new(const char *src, long len, int opts); + +/** + * Allocates an instance of ::rb_cRegexp. + * + * @private + * + * Nobody should call this function. Regular expressions that are not + * initialised must not exist in the wild. + */ VALUE rb_reg_alloc(void); + +/** + * Initialises an instance of ::rb_cRegexp. + * + * @private + * + * This just raises for ordinal regexp objects. Extension libraries must not + * use. + */ VALUE rb_reg_init_str(VALUE re, VALUE s, int options); -VALUE rb_reg_match(VALUE, VALUE); -VALUE rb_reg_match2(VALUE); -int rb_reg_options(VALUE); + +/** + * This is the match operator. + * + * @param[in] re An instance of ::rb_cRegexp. + * @param[in] str An instance of ::rb_cString. + * @exception rb_eTypeError `str` is not a string. + * @exception rb_eRegexpError Error inside of Onigmo (unlikely). + * @retval RUBY_Qnil Match failed. + * @retval otherwise Matched position (character index inside of + * `str`). + * @post `Regexp.last_match` is updated. + * @post `$&`, `$~`, etc., are updated. + * @note If you do this in ruby, named captures are assigned to local + * variable of the local scope. But that doesn't happen here. The + * assignment is done by the interpreter. + */ +VALUE rb_reg_match(VALUE re, VALUE str); + +/** + * Identical to rb_reg_match(), except it matches against rb_lastline_get() + * (or, the `$_`). + * + * @param[in] re An instance of ::rb_cRegexp. + * @exception rb_eRegexpError Error inside of Onigmo (unlikely). + * @retval RUBY_Qnil Match failed or `$_` is absent. + * @retval otherwise Matched position (character index inside of + * `$_`). + * @post `Regexp.last_match` is updated. + * @post `$&`, `$~`, etc., are updated. + */ +VALUE rb_reg_match2(VALUE re); + +/** + * Queries the options of the passed regular expression. + * + * @param[in] re An instance of ::rb_cRegexp. + * @return Its options. + * @note Possible return values are defined in Onigmo.h. + */ +int rb_reg_options(VALUE re); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/ruby.h b/include/ruby/internal/intern/ruby.h index 9d9a71cf7a..efe61424ca 100644 --- a/include/ruby/internal/intern/ruby.h +++ b/include/ruby/internal/intern/ruby.h @@ -17,20 +17,60 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Process-global APIs. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() /* ruby.c */ +/** @alias{rb_get_argv} */ #define rb_argv rb_get_argv() + +/** + * The value of `$0` at process bootup. + * + * @note This is just a snapshot of `$0`, not the backend storage of it. `$0` + * could become something different because it is a writable global + * variable. Modifying it for instance affects `ps(1)` output. Don't + * assume they are synced. + */ RUBY_EXTERN VALUE rb_argv0; + +/* io.c */ + +/** + * Queries the arguments passed to the current process that you can access from + * Ruby as `ARGV`. + * + * @return An array of strings containing arguments passed to the process. + */ VALUE rb_get_argv(void); -void *rb_load_file(const char*); -void *rb_load_file_str(VALUE); + +/* ruby.c */ + +RBIMPL_ATTR_NONNULL(()) +/** + * Loads the given file. This function opens the given pathname for reading, + * parses the contents as a Ruby script, and returns an opaque "node" pointer. + * You can then pass it to ruby_run_node() for evaluation. + * + * @param[in] file File name, or "-" to read from stdin. + * @return Opaque "node" pointer. + */ +void *rb_load_file(const char *file); + +/** + * Identical to rb_load_file(), except it takes the argument as a Ruby's string + * instead of C's. + * + * @param[in] file File name, or "-" to read from stdin. + * @return Opaque "node" pointer. + */ +void *rb_load_file_str(VALUE file); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/select.h b/include/ruby/internal/intern/select.h index 43d4cf354c..6ba84c6e63 100644 --- a/include/ruby/internal/intern/select.h +++ b/include/ruby/internal/intern/select.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs to provide ::rb_fd_select(). * @note Functions and structs defined in this header file are not * necessarily ruby-specific. They don't need ::VALUE etc. @@ -35,9 +35,11 @@ # include "ruby/internal/intern/select/largesize.h" #elif defined(_WIN32) # include "ruby/internal/intern/select/win32.h" +# /** Does nothing (defined for compatibility). */ # define rb_fd_resize(n, f) ((void)(f)) #else # include "ruby/internal/intern/select/posix.h" +# /** Does nothing (defined for compatibility). */ # define rb_fd_resize(n, f) ((void)(f)) #endif @@ -45,7 +47,39 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() struct timeval; -int rb_thread_fd_select(int, rb_fdset_t *, rb_fdset_t *, rb_fdset_t *, struct timeval *); +/** + * Waits for multiple file descriptors at once. This is basically a wrapper of + * system-provided select() with releasing GVL, to allow other Ruby threads run + * in parallel. + * + * @param[in] nfds Max FD in everything passed, plus one. + * @param[in,out] rfds Set of FDs to wait for reads. + * @param[in,out] wfds Set of FDs to wait for writes. + * @param[in,out] efds Set of FDs to wait for OOBs. + * @param[in,out] timeout Max blocking duration. + * @retval -1 Failed, errno set. + * @retval 0 Timeout exceeded. + * @retval otherwise Total number of file descriptors returned. + * @post `rfds` contains readable FDs. + * @post `wfds` contains writable FDs. + * @post `efds` contains exceptional FDs. + * @post `timeout` is the time left. + * @note All pointers are allowed to be null pointers. + * + * Although backend threads can run in parallel of this function, touching a + * file descriptor from multiple threads could be problematic. For instance + * what happens when a thread closes a file descriptor that is selected by + * someone else, vastly varies among operating systems. You would better avoid + * touching an fd from more than one threads. + * + * @internal + * + * Although any file descriptors are possible here, it makes completely no + * sense to pass a descriptor that is not `O_NONBLOCK`. If you want to know + * the reason for this limitation in detail, you might find this thread super + * interesting: https://lkml.org/lkml/2004/10/6/117 + */ +int rb_thread_fd_select(int nfds, rb_fdset_t *rfds, rb_fdset_t *wfds, rb_fdset_t *efds, struct timeval *timeout); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/select/largesize.h b/include/ruby/internal/intern/select/largesize.h index ba56a159b1..d65f088c06 100644 --- a/include/ruby/internal/intern/select/largesize.h +++ b/include/ruby/internal/intern/select/largesize.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs to provide ::rb_fd_select(). * * Several Unix platforms support file descriptors bigger than FD_SETSIZE in @@ -35,9 +35,6 @@ * `select(2)` documents how to allocate fd_set dynamically. * http://www.openbsd.org/cgi-bin/man.cgi?query=select&manpath=OpenBSD+4.4 * - * - HP-UX documents how to allocate fd_set dynamically. - * http://docs.hp.com/en/B2355-60105/select.2.html - * * - Solaris 8 has `select_large_fdset` * * - Mac OS X 10.7 (Lion) @@ -66,26 +63,134 @@ struct timeval; +/** + * The data structure which wraps the fd_set bitmap used by select(2). This + * allows Ruby to use FD sets larger than that allowed by historic limitations + * on modern platforms. + */ typedef struct { - int maxfd; - fd_set *fdset; + int maxfd; /**< Maximum allowed number of FDs. */ + fd_set *fdset; /**< File descriptors buffer */ } rb_fdset_t; RBIMPL_SYMBOL_EXPORT_BEGIN() -void rb_fd_init(rb_fdset_t *); -void rb_fd_term(rb_fdset_t *); -void rb_fd_zero(rb_fdset_t *); -void rb_fd_set(int, rb_fdset_t *); -void rb_fd_clr(int, rb_fdset_t *); -int rb_fd_isset(int, const rb_fdset_t *); -void rb_fd_copy(rb_fdset_t *, const fd_set *, int); +RBIMPL_ATTR_NONNULL(()) +/** + * (Re-)initialises a fdset. One must be initialised before other `rb_fd_*` + * operations. Analogous to calling `malloc(3)` to allocate an `fd_set`. + * + * @param[out] f An fdset to squash. + * @post `f` holds no file descriptors. + */ +void rb_fd_init(rb_fdset_t *f); + +RBIMPL_ATTR_NONNULL(()) +/** + * Destroys the ::rb_fdset_t, releasing any memory and resources it used. It + * must be reinitialised using rb_fd_init() before future use. Analogous to + * calling `free(3)` to release memory for an `fd_set`. + * + * @param[out] f An fdset to squash. + * @post `f` holds no file descriptors. + */ +void rb_fd_term(rb_fdset_t *f); + +RBIMPL_ATTR_NONNULL(()) +/** + * Wipes out the current set of FDs. + * + * @param[out] f The fdset to clear. + * @post `f` has no FDs. + */ +void rb_fd_zero(rb_fdset_t *f); + +RBIMPL_ATTR_NONNULL(()) +/** + * Sets an fd to a fdset. + * + * @param[in] fd A file descriptor. + * @param[out] f Target fdset. + * @post `f` holds `fd`. + */ +void rb_fd_set(int fd, rb_fdset_t *f); + +RBIMPL_ATTR_NONNULL(()) +/** + * Releases a specific FD from the given fdset. + * + * @param[in] fd Target FD. + * @param[out] f The fdset that holds `fd`. + * @post `f` doesn't hold n. + */ +void rb_fd_clr(int fd, rb_fdset_t *f); + +RBIMPL_ATTR_NONNULL(()) +RBIMPL_ATTR_PURE() +/** + * Queries if the given FD is in the given set. + * + * @param[in] fd Target FD. + * @param[in] f The fdset to scan. + * @retval 1 Yes there is. + * @retval 0 No there isn't. + * @see http://www.freebsd.org/cgi/query-pr.cgi?pr=91421 + */ +int rb_fd_isset(int fd, const rb_fdset_t *f); + +/** + * Destructively overwrites an fdset with another. + * + * @param[out] dst Target fdset. + * @param[in] src Source fdset. + * @param[in] max Maximum number of file descriptors to copy. + * @post `dst` is a copy of `src`. + */ +void rb_fd_copy(rb_fdset_t *dst, const fd_set *src, int max); + +/** + * Identical to rb_fd_copy(), except it copies unlimited number of file + * descriptors. + * + * @param[out] dst Target fdset. + * @param[in] src Source fdset. + * @post `dst` is a copy of `src`. + */ void rb_fd_dup(rb_fdset_t *dst, const rb_fdset_t *src); -int rb_fd_select(int, rb_fdset_t *, rb_fdset_t *, rb_fdset_t *, struct timeval *); + +/** + * Waits for multiple file descriptors at once. + * + * @param[in] nfds Max FD in everything passed, plus one. + * @param[in,out] rfds Set of FDs to wait for reads. + * @param[in,out] wfds Set of FDs to wait for writes. + * @param[in,out] efds Set of FDs to wait for OOBs. + * @param[in,out] timeout Max blocking duration. + * @retval -1 Failed, errno set. + * @retval 0 Timeout exceeded. + * @retval otherwise Total number of file descriptors returned. + * @post `rfds` contains readable FDs. + * @post `wfds` contains writable FDs. + * @post `efds` contains exceptional FDs. + * @post `timeout` is the time left. + * @note All pointers are allowed to be null pointers. + */ +int rb_fd_select(int nfds, rb_fdset_t *rfds, rb_fdset_t *wfds, rb_fdset_t *efds, struct timeval *timeout); RBIMPL_SYMBOL_EXPORT_END() RBIMPL_ATTR_NONNULL(()) RBIMPL_ATTR_PURE() -/* :TODO: can this function be __attribute__((returns_nonnull)) or not? */ +/** + * Raw pointer to `fd_set`. + * + * @param[in] f Target fdset. + * @retval NULL `f` is already terminated by rb_fd_term(). + * @retval otherwise Underlying fd_set. + * + * @internal + * + * Extension library must not touch raw pointers. It was a bad idea to let + * them use it. + */ static inline fd_set * rb_fd_ptr(const rb_fdset_t *f) { @@ -94,6 +199,12 @@ rb_fd_ptr(const rb_fdset_t *f) RBIMPL_ATTR_NONNULL(()) RBIMPL_ATTR_PURE() +/** + * It seems this function has no use. Maybe just remove? + * + * @param[in] f A set. + * @return Number of file descriptors stored. + */ static inline int rb_fd_max(const rb_fdset_t *f) { diff --git a/include/ruby/internal/intern/select/posix.h b/include/ruby/internal/intern/select/posix.h index 6c1092b39d..0a9b0b2e51 100644 --- a/include/ruby/internal/intern/select/posix.h +++ b/include/ruby/internal/intern/select/posix.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs to provide ::rb_fd_select(). */ #include "ruby/internal/config.h" @@ -30,17 +30,41 @@ # include <unistd.h> /* for select(2) (archaic UNIX) */ #endif -#include "ruby/internal/attr/pure.h" #include "ruby/internal/attr/const.h" +#include "ruby/internal/attr/noalias.h" +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/attr/pure.h" +/** + * The data structure which wraps the fd_set bitmap used by `select(2)`. This + * allows Ruby to use FD sets larger than what has been historically allowed on + * modern platforms. + * + * @internal + * + * ... but because this header file is included only when the system is with + * that "historic restrictions", this is nothing more than an alias of fd_set. + */ typedef fd_set rb_fdset_t; +/** Clears the given ::rb_fdset_t. */ #define rb_fd_zero FD_ZERO + +/** Sets the given fd to the ::rb_fdset_t. */ #define rb_fd_set FD_SET + +/** Unsets the given fd from the ::rb_fdset_t. */ #define rb_fd_clr FD_CLR + +/** Queries if the given fd is in the ::rb_fdset_t. */ #define rb_fd_isset FD_ISSET + +/** Initialises the :given :rb_fdset_t. */ #define rb_fd_init FD_ZERO + +/** Waits for multiple file descriptors at once. */ #define rb_fd_select select + /**@cond INTERNAL_MACRO */ #define rb_fd_copy rb_fd_copy #define rb_fd_dup rb_fd_dup @@ -48,20 +72,50 @@ typedef fd_set rb_fdset_t; #define rb_fd_max rb_fd_max /** @endcond */ +RBIMPL_ATTR_NONNULL(()) +RBIMPL_ATTR_NOALIAS() +/** + * Destructively overwrites an fdset with another. + * + * @param[out] dst Target fdset. + * @param[in] src Source fdset. + * @param[in] n Unused parameter. + * @post `dst` is a copy of `src`. + */ static inline void rb_fd_copy(rb_fdset_t *dst, const fd_set *src, int n) { *dst = *src; } +RBIMPL_ATTR_NONNULL(()) +RBIMPL_ATTR_NOALIAS() +/** + * Destructively overwrites an fdset with another. + * + * @param[out] dst Target fdset. + * @param[in] src Source fdset. + * @post `dst` is a copy of `src`. + */ static inline void -rb_fd_dup(rb_fdset_t *dst, const fd_set *src, int n) +rb_fd_dup(rb_fdset_t *dst, const fd_set *src) { *dst = *src; } RBIMPL_ATTR_PURE() /* :TODO: can this function be __attribute__((returns_nonnull)) or not? */ +/** + * Raw pointer to `fd_set`. + * + * @param[in] f Target fdset. + * @return Underlying fd_set. + * + * @internal + * + * Extension library must not touch raw pointers. It was a bad idea to let + * them use it. + */ static inline fd_set * rb_fd_ptr(rb_fdset_t *f) { @@ -69,14 +123,22 @@ rb_fd_ptr(rb_fdset_t *f) } RBIMPL_ATTR_CONST() +/** + * It seems this function has no use. Maybe just remove? + * + * @param[in] f A set. + * @return Number of file descriptors stored. + */ static inline int rb_fd_max(const rb_fdset_t *f) { return FD_SETSIZE; } -/* :FIXME: What are these? They don't exist for shibling implementations. */ +/** @cond INTERNAL_MACRO */ +/* :FIXME: What are these? They don't exist for sibling implementations. */ #define rb_fd_init_copy(d, s) (*(d) = *(s)) #define rb_fd_term(f) ((void)(f)) +/** @endcond */ #endif /* RBIMPL_INTERN_SELECT_POSIX_H */ diff --git a/include/ruby/internal/intern/select/win32.h b/include/ruby/internal/intern/select/win32.h index ef75a0f760..edaf7a8523 100644 --- a/include/ruby/internal/intern/select/win32.h +++ b/include/ruby/internal/intern/select/win32.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs to provide ::rb_fd_select(). */ #include "ruby/internal/dosish.h" /* for rb_w32_select */ @@ -41,21 +41,83 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() struct timeval; +/** + * The data structure which wraps the fd_set bitmap used by select(2). This + * allows Ruby to use FD sets larger than that allowed by historic limitations + * on modern platforms. + */ typedef struct { - int capa; - fd_set *fdset; + int capa; /**< Maximum allowed number of FDs. */ + fd_set *fdset; /**< File descriptors buffer. */ } rb_fdset_t; -void rb_fd_init(rb_fdset_t *); -void rb_fd_term(rb_fdset_t *); -void rb_fd_set(int, rb_fdset_t *); -void rb_w32_fd_copy(rb_fdset_t *, const fd_set *, int); +RBIMPL_ATTR_NONNULL(()) +/** + * (Re-)initialises a fdset. One must be initialised before other `rb_fd_*` + * operations. Analogous to calling `malloc(3)` to allocate an `fd_set`. + * + * @param[out] f An fdset to squash. + * @post `f` holds no file descriptors. + * + * @internal + * + * Can't this leak memory if the same `f` is passed twice...? + */ +void rb_fd_init(rb_fdset_t *f); + +RBIMPL_ATTR_NONNULL(()) +/** + * Destroys the ::rb_fdset_t, releasing any memory and resources it used. It + * must be reinitialised using rb_fd_init() before future use. Analogous to + * calling `free(3)` to release memory for an `fd_set`. + * + * @param[out] f An fdset to squash. + * @post `f` holds no file descriptors. + */ +void rb_fd_term(rb_fdset_t *f); + +RBIMPL_ATTR_NONNULL(()) +/** + * Sets an fd to a fdset. + * + * @param[in] fd A file descriptor. + * @param[out] f Target fdset. + * @post `f` holds `fd`. + */ +void rb_fd_set(int fd, rb_fdset_t *f); + +RBIMPL_ATTR_NONNULL(()) +/** + * Destructively overwrites an fdset with another. + * + * @param[out] dst Target fdset. + * @param[in] src Source fdset. + * @param[in] max Maximum number of file descriptors to copy. + * @post `dst` is a copy of `src`. + */ +void rb_w32_fd_copy(rb_fdset_t *dst, const fd_set *src, int max); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_w32_fd_copy(), except it copies unlimited number of file + * descriptors. + * + * @param[out] dst Target fdset. + * @param[in] src Source fdset. + * @post `dst` is a copy of `src`. + */ void rb_w32_fd_dup(rb_fdset_t *dst, const rb_fdset_t *src); RBIMPL_SYMBOL_EXPORT_END() RBIMPL_ATTR_NONNULL(()) RBIMPL_ATTR_NOALIAS() +/** + * Wipes out the current set of FDs. + * + * @param[out] f The fdset to clear. + * @post `f` has no FDs. + */ static inline void rb_fd_zero(rb_fdset_t *f) { @@ -63,6 +125,13 @@ rb_fd_zero(rb_fdset_t *f) } RBIMPL_ATTR_NONNULL(()) +/** + * Releases a specific FD from the given fdset. + * + * @param[in] n Target FD. + * @param[out] f The fdset that holds `n`. + * @post `f` doesn't hold n. + */ static inline void rb_fd_clr(int n, rb_fdset_t *f) { @@ -70,6 +139,14 @@ rb_fd_clr(int n, rb_fdset_t *f) } RBIMPL_ATTR_NONNULL(()) +/** + * Queries if the given FD is in the given set. + * + * @param[in] n Target FD. + * @param[in] f The fdset to scan. + * @retval 1 Yes there is. + * @retval 0 No there isn't. + */ static inline int rb_fd_isset(int n, rb_fdset_t *f) { @@ -77,6 +154,14 @@ rb_fd_isset(int n, rb_fdset_t *f) } RBIMPL_ATTR_NONNULL(()) +/** + * Destructively overwrites an fdset with another. + * + * @param[out] dst Target fdset. + * @param[in] src Source fdset. + * @param[in] n Maximum number of file descriptors to copy. + * @post `dst` is a copy of `src`. + */ static inline void rb_fd_copy(rb_fdset_t *dst, const fd_set *src, int n) { @@ -84,12 +169,45 @@ rb_fd_copy(rb_fdset_t *dst, const fd_set *src, int n) } RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_fd_copy(), except it copies unlimited number of file + * descriptors. + * + * @param[out] dst Target fdset. + * @param[in] src Source fdset. + * @post `dst` is a copy of `src`. + */ static inline void rb_fd_dup(rb_fdset_t *dst, const rb_fdset_t *src) { rb_w32_fd_dup(dst, src); } +/** + * Waits for multiple file descriptors at once. + * + * @param[in] n Max FD in everything passed, plus one. + * @param[in,out] rfds Set of FDs to wait for reads. + * @param[in,out] wfds Set of FDs to wait for writes. + * @param[in,out] efds Set of FDs to wait for OOBs. + * @param[in,out] timeout Max blocking duration. + * @retval -1 Failed, errno set. + * @retval 0 Timeout exceeded. + * @retval otherwise Total number of file descriptors returned. + * @post `rfds` contains readable FDs. + * @post `wfds` contains writable FDs. + * @post `efds` contains exceptional FDs. + * @post `timeout` is the time left. + * @note All pointers are allowed to be null pointers. + * + * @internal + * + * This can wait for `SOCKET` and `HANDLE` at once. In order to achieve that + * property we heavily touch the internals of MSVCRT. We `CreateFile` a + * `"NUL"` alongside of a socket and directly manipulate its `struct ioinfo`. + * This is of course a very dirty hack. If we could design the API today we + * could use `CancellIoEx`. But we are older than that Win32 API. + */ static inline int rb_fd_select(int n, rb_fdset_t *rfds, rb_fdset_t *wfds, rb_fdset_t *efds, struct timeval *timeout) { @@ -103,7 +221,18 @@ rb_fd_select(int n, rb_fdset_t *rfds, rb_fdset_t *wfds, rb_fdset_t *efds, struct RBIMPL_ATTR_NONNULL(()) RBIMPL_ATTR_PURE() -/* :TODO: can this function be __attribute__((returns_nonnull)) or not? */ +/** + * Raw pointer to `fd_set`. + * + * @param[in] f Target fdset. + * @retval NULL `f` is already terminated by rb_fd_term(). + * @retval otherwise Underlying fd_set. + * + * @internal + * + * Extension library must not touch raw pointers. It was a bad idea to let + * them use it. + */ static inline fd_set * rb_fd_ptr(const rb_fdset_t *f) { @@ -111,7 +240,13 @@ rb_fd_ptr(const rb_fdset_t *f) } RBIMPL_ATTR_NONNULL(()) -RBIMPL_ATTR_PURE() +RBIMPL_ATTR_PURE_UNLESS_DEBUG() +/** + * It seems this function has no use. Maybe just remove? + * + * @param[in] f A set. + * @return Number of file descriptors stored. + */ static inline int rb_fd_max(const rb_fdset_t *f) { diff --git a/include/ruby/internal/intern/signal.h b/include/ruby/internal/intern/signal.h index 8739c51f53..4773788651 100644 --- a/include/ruby/internal/intern/signal.h +++ b/include/ruby/internal/intern/signal.h @@ -17,22 +17,129 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Signal handling APIs. */ +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/attr/pure.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() /* signal.c */ -VALUE rb_f_kill(int, const VALUE*); -#ifdef POSIX_SIGNAL -#define posix_signal ruby_posix_signal -void (*posix_signal(int, void (*)(int)))(int); -#endif -const char *ruby_signal_name(int); -void ruby_default_signal(int); + +RBIMPL_ATTR_NONNULL(()) +/** + * Sends a signal ("kills") to processes. + * + * The first argument is the signal, either in: + * + * - Numerical representation (e.g. `9`), or + * - Textual representation of canonical (e.g. `:SIGKILL`) name or + * abbreviated (e.g. `:KILL`) name, either in ::rb_cSymbol or ::rb_cString. + * + * All the remaining arguments are numerical representations of process IDs. + * This function iterates over them to send the specified signal. + * + * You can specify both negative PIDs and negative signo to this function: + * + * ``` + * sig \ pid | >= 1 | == 0 | == -1 | <= -2 + * ===========+======+======+=======+======= + * > 0 | #1 | #2 | #3 | #4 + * == 0 | #5 | #6 | #7 | #8 + * < 0 | #9 | #10 | #11 + * ``` + * + * - Case #1: When signo and PID are both positive, this function sends the + * specified signal to the specified process (intuitive). + * + * - Case #2: When signo is positive and PID is zero, this function sends + * that signal to the current process group. + * + * - Case #3: When signo is positive and PID is -1, this function sends that + * signal to everything that the current process is allowed to kill. + * + * - Case #4: When signo is positive and PID is negative (but not -1), this + * function sends that signal to every processes in a process group, whose + * process group ID is the absolute value of the passed PID. + * + * - Case #5: When signo is zero and PID is positive, this function just + * checks for the existence of the specified process and doesn't send + * anything to anyone. In case the process is absent `Errno::ESRCH` is + * raised. + * + * - Case #6: When signo and PID are both zero, this function checks for the + * existence of the current process group. And it must do. This function + * is effectively a no-op then. + * + * - Case #7: When signo is zero and PID is -1, this function checks if there + * is any other process that the current process can kill. At least init + * (PID 1) must exist, so this must not fail. + * + * - Case #8: When signo is zero and PID is negative (but not -1), this + * function checks if there is a process group whose process group ID is + * the absolute value of the passed PID. In case the process group is + * absent `Errno::ESRCH` is raised. + * + * - Case #9: When signo is negative and PID is positive, this function sends + * the absolute value of the passed signo to the process group specified as + * the PID. + * + * - Case #10: When signo is negative and PID is zero, it is highly expected + * that this function sends the absolute value of the passed signo to the + * current process group. Strictly speaking, IEEE Std 1003.1-2017 + * specifies that this (`killpg(3posix)` with an argument of zero) is an + * undefined behaviour. But no operating system is known so far that does + * things differently. + * + * - Case #11: When signo and PID are both negative, the behaviour of this + * function depends on how `killpg(3)` works. On Linux, it seems such + * attempt is strictly prohibited and `Errno::EINVAL` is raised. But on + * macOS, it seems it tries to send the signal actually to the process + * group. + * + * @note Above description is in fact different from how `kill(2)` works. + * We interpret the passed arguments before passing them through to + * system calls. + * @param[in] argc Number of objects in `argv`. + * @param[in] argv Signal, followed by target PIDs. + * @exception rb_eArgError Unknown signal name. + * @exception rb_eSystemCallError Various errors sending signal to processes. + * @return Something numeric. The meaning of this return value is unclear. + * It seems in case of #1 above, this could be the body count. But + * other cases remain mysterious. + */ +VALUE rb_f_kill(int argc, const VALUE *argv); + +RBIMPL_ATTR_PURE() +/** + * Queries the name of the signal. It returns for instance `"KILL"` for + * SIGKILL. + * + * @param[in] signo Signal number to query. + * @retval 0 No such signal. + * @retval otherwise A pointer to a static C string that is the name of + * the signal. + * @warning Don't free the return value. + */ +const char *ruby_signal_name(int signo); + +/** + * Pretends as if there was no custom signal handler. This function sets the + * signal action to SIG_DFL, then kills itself. + * + * @param[in] sig The signal. + * @post Previous signal handler is lost. + * @post Passed signal is sent to the current process. + * + * @internal + * + * @shyouhei doesn't understand the needs of this function being visible from + * extension libraries. + */ +void ruby_default_signal(int sig); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/sprintf.h b/include/ruby/internal/intern/sprintf.h index 2c90548353..aedc0f9ab1 100644 --- a/include/ruby/internal/intern/sprintf.h +++ b/include/ruby/internal/intern/sprintf.h @@ -17,26 +17,142 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Our own private printf(3). + * extension libraries. They could be written in C++98. + * @brief Our own private `printf(3)`. */ #include "ruby/internal/attr/format.h" +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() /* sprintf.c */ -VALUE rb_f_sprintf(int, const VALUE*); +/** + * Identical to rb_str_format(), except how the arguments are arranged. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv A format string, followed by its arguments. + * @return A rendered new instance of ::rb_cString. + * + * @internal + * + * You can safely pass NULL to `argv`. Doesn't make any sense though. + */ +VALUE rb_f_sprintf(int argc, const VALUE *argv); + +RBIMPL_ATTR_NONNULL((1)) RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2) -VALUE rb_sprintf(const char*, ...); -VALUE rb_vsprintf(const char*, va_list); +/** + * Ruby's extended `sprintf(3)`. We ended up reinventing the entire `printf` + * business because we don't want to depend on locales. OS-provided `printf` + * routines might or might not, which caused instabilities of the result + * strings. + * + * The format sequence is a mixture of format specifiers and other verbatim + * contents. Each format specifier starts with a `%`, and has the following + * structure: + * + * ``` + * %[flags][width][.precision][length]conversion + * ``` + * + * This function supports flags of ` `, `#`, `+`, `-`, `0`, width of + * non-negative decimal integer and `*`, precision of non-negative decimal + * integers and `*`, length of `L`, `h`, `t`, `z`, `l`, `ll`, `q`, conversions + * of `A`, `D`, `E`, `G`, `O`, `U`, `X`, `a`, `c`, `d`, `e`, `f`, `g`, `i`, + * `n`, `o`, `p`, `s`, `u`, `x`, and `%`. In case of `_WIN32` it also supports + * `I`. And additionally, it supports magical `PRIsVALUE` macro that can + * stringise arbitrary Ruby objects: + * + * ```CXX + * rb_sprintf("|%"PRIsVALUE"|", RUBY_Qtrue); // => "|true|" + * rb_sprintf("%+"PRIsVALUE, rb_stdin); // => "#<IO:<STDIN>>" + * ``` + * + * @param[in] fmt A `printf`-like format specifier. + * @param[in] ... Variadic number of contents to format. + * @return A rendered new instance of ::rb_cString. + * + * @internal + * + * :FIXME: We can improve this document. + */ +VALUE rb_sprintf(const char *fmt, ...); + +RBIMPL_ATTR_NONNULL((1)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 0) +/** + * Identical to rb_sprintf(), except it takes a `va_list`. + * + * @param[in] fmt A `printf`-like format specifier. + * @param[in] ap Contents to format. + * @return A rendered new instance of ::rb_cString. + */ +VALUE rb_vsprintf(const char *fmt, va_list ap); +RBIMPL_ATTR_NONNULL((2)) RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) -VALUE rb_str_catf(VALUE, const char*, ...); -VALUE rb_str_vcatf(VALUE, const char*, va_list); -VALUE rb_str_format(int, const VALUE *, VALUE); +/** + * Identical to rb_sprintf(), except it renders the output to the specified + * object rather than creating a new one. + * + * @param[out] dst String to modify. + * @param[in] fmt A `printf`-like format specifier. + * @param[in] ... Variadic number of contents to format. + * @exception rb_eTypeError `dst` is not a String. + * @return Passed `dst`. + * @post `dst` has the rendered output appended to its end. + */ +VALUE rb_str_catf(VALUE dst, const char *fmt, ...); + +RBIMPL_ATTR_NONNULL((2)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 0) +/** + * Identical to rb_str_catf(), except it takes a `va_list`. It can also be + * seen as a routine identical to rb_vsprintf(), except it renders the output + * to the specified object rather than creating a new one. + * + * @param[out] dst String to modify. + * @param[in] fmt A `printf`-like format specifier. + * @param[in] ap Contents to format. + * @exception rb_eTypeError `dst` is not a String. + * @return Passed `dst`. + * @post `dst` has the rendered output appended to its end. + */ +VALUE rb_str_vcatf(VALUE dst, const char *fmt, va_list ap); + +/** + * Formats a string. + * + * Returns the string resulting from applying `fmt` to `argv`. The format + * sequence is a mixture of format specifiers and other verbatim contents. + * Each format specifier starts with a `%`, and has the following structure: + * + * ``` + * %[flags][width][.precision]type + * ``` + * + * ... which is different from that of rb_sprintf(). Because ruby has no + * `short` or `long`, there is no way to specify a "length" of an argument. + * + * This function supports flags of ` `, `#`, `+`, `-`, `<>`, `{}`, with of + * non-negative decimal integer and `$`, `*`, precision of non-negative decimal + * integer and `$`, `*`, type of `A`, `B`, `E`, `G`, `X`, `a`, `b`, `c`, `d`, + * `e`, `f`, `g`, `i`, `o`, `p`, `s`, `u`, `x`, `%`. This list is also + * (largely the same but) not identical to that of rb_sprintf(). + * + * @param[in] argc Number of objects in `argv`. + * @param[in] argv Format arguments. + * @param[in] fmt A printf-like format specifier. + * @exception rb_eTypeError `fmt` is not a string. + * @exception rb_eArgError Failed to parse `fmt`. + * @return A rendered new instance of ::rb_cString. + * @note Everything it takes must be Ruby objects. + * + */ +VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/string.h b/include/ruby/internal/intern/string.h index a590b2043e..6827563e8d 100644 --- a/include/ruby/internal/intern/string.h +++ b/include/ruby/internal/intern/string.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cString. */ #include "ruby/internal/config.h" @@ -34,6 +34,7 @@ # include <stdint.h> #endif +#include "ruby/internal/attr/deprecated.h" #include "ruby/internal/attr/nonnull.h" #include "ruby/internal/attr/pure.h" #include "ruby/internal/constant_p.h" @@ -45,107 +46,1335 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* string.c */ -VALUE rb_str_new(const char*, long); -VALUE rb_str_new_cstr(const char*); -VALUE rb_str_new_shared(VALUE); -VALUE rb_str_new_frozen(VALUE); -VALUE rb_str_new_with_class(VALUE, const char*, long); -VALUE rb_tainted_str_new_cstr(const char*); -VALUE rb_tainted_str_new(const char*, long); -VALUE rb_external_str_new(const char*, long); -VALUE rb_external_str_new_cstr(const char*); -VALUE rb_locale_str_new(const char*, long); -VALUE rb_locale_str_new_cstr(const char*); -VALUE rb_filesystem_str_new(const char*, long); -VALUE rb_filesystem_str_new_cstr(const char*); -VALUE rb_str_buf_new(long); -VALUE rb_str_buf_new_cstr(const char*); -VALUE rb_str_buf_new2(const char*); -VALUE rb_str_tmp_new(long); -VALUE rb_usascii_str_new(const char*, long); -VALUE rb_usascii_str_new_cstr(const char*); -VALUE rb_utf8_str_new(const char*, long); -VALUE rb_utf8_str_new_cstr(const char*); + +/** + * Allocates an instance of ::rb_cString. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString, of `len` bytes length, of + * "binary" encoding, whose contents are verbatim copy of `ptr`. + * @pre At least `len` bytes of continuous memory region shall be + * accessible via `ptr`. + */ +VALUE rb_str_new(const char *ptr, long len); + +/** + * Identical to rb_str_new(), except it assumes the passed pointer is a pointer + * to a C string. + * + * @param[in] ptr A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @exception rb_eArgError `ptr` is a null pointer. + * @return An instance of ::rb_cString, of "binary" encoding, whose + * contents are verbatim copy of `ptr`. + * @pre `ptr` must not be a null pointer. + */ +VALUE rb_str_new_cstr(const char *ptr); + +/** + * Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of + * C's. Implementation wise it creates a string that shares the backend memory + * region with the receiver. So the name. But there is no way for extension + * libraries to know if a string is of such variant. + * + * @param[in] str An object of ::RString. + * @return An allocated instance of ::rb_cString, which shares the + * encoding, length, and contents with the passed string. + * @pre `str` must not be any arbitrary object except ::RString. + * @note Use #StringValue to enforce the precondition. + */ +VALUE rb_str_new_shared(VALUE str); + +/** + * Creates a frozen copy of the string, if necessary. This function does + * nothing when the passed string is already frozen. Otherwise, it allocates a + * copy of it, which is frozen. The passed string is untouched either ways. + * + * @param[in] str An object of ::RString. + * @return Something frozen. + * @pre `str` must not be any arbitrary object except ::RString. + * @note Use #StringValue to enforce the precondition. + */ +VALUE rb_str_new_frozen(VALUE str); + +/** + * Identical to rb_str_new(), except it takes the class of the allocating + * object. + * + * @param[in] obj A string-ish object. + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of the class of `obj`, of `len` bytes length, of + * "binary" encoding, whose contents are verbatim copy of `ptr`. + * @pre At least `len` bytes of continuous memory region shall be + * accessible via `ptr`. + * + * @internal + * + * Why it doesn't take an instance of ::rb_cClass? + */ +VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len); + +/** + * Identical to rb_str_new(), except it generates a string of "default + * external" encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString. In case encoding conversion from + * "default internal" to "default external" is fully defined over + * the given contents, then the return value is a string of + * "default external" encoding, whose contents are the converted + * ones. Otherwise the string is a junk. + * @warning It doesn't raise on a conversion failure and silently ends up in + * a corrupted output. You can know the failure by querying + * `valid_encoding?` of the result object. + */ +VALUE rb_external_str_new(const char *ptr, long len); + +RBIMPL_ATTR_NONNULL(()) /** + * Identical to rb_external_str_new(), except it assumes the passed pointer is + * a pointer to a C string. It can also be seen as a routine identical to + * rb_str_new_cstr(), except it generates a string of "default external" + * encoding. + * + * @param[in] ptr A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString. In case encoding conversion from + * "default internal" to "default external" is fully defined over + * the given contents, then the return value is a string of + * "default external" encoding, whose contents are the converted + * ones. Otherwise the string is a junk. + * @warning It doesn't raise on a conversion failure and silently ends up in + * a corrupted output. You can know the failure by querying + * `valid_encoding?` of the result object. + * @pre `ptr` must not be a null pointer. + */ +VALUE rb_external_str_new_cstr(const char *ptr); + +/** + * Identical to rb_str_new(), except it generates a string of "locale" + * encoding. It can also be seen as a routine identical to + * rb_external_str_new(), except it generates a string of "locale" encoding + * instead of "default external" encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString. In case encoding conversion from + * "default internal" to "locale" is fully defined over the given + * contents, then the return value is a string of "locale" + * encoding, whose contents are the converted ones. Otherwise the + * string is a junk. + * @warning It doesn't raise on a conversion failure and silently ends up in + * a corrupted output. You can know the failure by querying + * `valid_encoding?` of the result object. + */ +VALUE rb_locale_str_new(const char *ptr, long len); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_locale_str_new(), except it assumes the passed pointer is a + * pointer to a C string. It can also be seen as a routine identical to + * rb_external_str_new_cstr(), except it generates a string of "locale" + * encoding instead of "default external". + * + * @param[in] ptr A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString. In case encoding conversion from + * "default internal" to "locale" is fully defined over the given + * contents, then the return value is a string of "locale" + * encoding, whose contents are the converted ones. Otherwise the + * string is a junk. + * @warning It doesn't raise on a conversion failure and silently ends up in + * a corrupted output. You can know the failure by querying + * `valid_encoding?` of the result object. + * @pre `ptr` must not be a null pointer. + */ +VALUE rb_locale_str_new_cstr(const char *ptr); + +/** + * Identical to rb_str_new(), except it generates a string of "filesystem" + * encoding. It can also be seen as a routine identical to + * rb_external_str_new(), except it generates a string of "filesystem" encoding + * instead of "default external" encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString. In case encoding conversion from + * "default internal" to "filesystem" is fully defined over the + * given contents, then the return value is a string of + * "filesystem" encoding, whose contents are the converted ones. + * Otherwise the string is a junk. + * @warning It doesn't raise on a conversion failure and silently ends up in + * a corrupted output. You can know the failure by querying + * `valid_encoding?` of the result object. + */ +VALUE rb_filesystem_str_new(const char *ptr, long len); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_filesystem_str_new(), except it assumes the passed pointer + * is a pointer to a C string. It can also be seen as a routine identical to + * rb_external_str_new_cstr(), except it generates a string of "filesystem" + * encoding instead of "default external". + * + * @param[in] ptr A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString. In case encoding conversion from + * "default internal" to "filesystem" is fully defined over the + * given contents, then the return value is a string of + * "filesystem" encoding, whose contents are the converted ones. + * Otherwise the string is a junk. + * @warning It doesn't raise on a conversion failure and silently ends up in + * a corrupted output. You can know the failure by querying + * `valid_encoding?` of the result object. + * @pre `ptr` must not be a null pointer. + */ +VALUE rb_filesystem_str_new_cstr(const char *ptr); + +/** + * Allocates a "string buffer". A string buffer here is an instance of + * ::rb_cString, whose capacity is bigger than the length of it. If you can + * say that a string grows to a specific amount of bytes, this could be + * effective than resizing a string over and over again and again. + * + * @param[in] capa Designed capacity of the generating string. + * @return An empty string, of "binary" encoding, whose capacity is `capa`. + */ +VALUE rb_str_buf_new(long capa); + +RBIMPL_ATTR_NONNULL(()) +/** + * This is a rb_str_buf_new() + rb_str_buf_cat() combo. + * + * @param[in] ptr A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString, of "binary" encoding, whose + * contents are verbatim copy of `ptr`. + * @pre `ptr` must not be a null pointer. + * + * @internal + * + * This must be identical to rb_str_new_cstr(), except done in inefficient way? + * @shyouhei doesn't understand why this is not a simple alias. + */ +VALUE rb_str_buf_new_cstr(const char *ptr); + +/** + * Allocates a "temporary" string. This is a hidden empty string. Handy on + * occasions. + * + * @param[in] len Designed length of the string. + * @return A hidden, empty string. + * @see rb_obj_hide() + */ +VALUE rb_str_tmp_new(long len); + +/** + * Identical to rb_str_new(), except it generates a string of "US ASCII" + * encoding. This is different from rb_external_str_new(), not only for the + * output encoding, but also it doesn't convert the contents. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString, of `len` bytes length, of + * "US ASCII" encoding, whose contents are verbatim copy of `ptr`. + */ +VALUE rb_usascii_str_new(const char *ptr, long len); + +/** + * Identical to rb_str_new_cstr(), except it generates a string of "US ASCII" + * encoding. It can also be seen as a routine Identical to + * rb_usascii_str_new(), except it assumes the passed pointer is a pointer to a + * C string. + * + * @param[in] ptr A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @exception rb_eArgError `ptr` is a null pointer. + * @return An instance of ::rb_cString, of "US ASCII" encoding, whose + * contents are verbatim copy of `ptr`. + * @pre `ptr` must not be a null pointer. + */ +VALUE rb_usascii_str_new_cstr(const char *ptr); + +/** + * Identical to rb_str_new(), except it generates a string of "UTF-8" encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString, of `len` bytes length, of + * "UTF-8" encoding, whose contents are verbatim copy of `ptr`. + */ +VALUE rb_utf8_str_new(const char *ptr, long len); + +/** + * Identical to rb_str_new_cstr(), except it generates a string of "UTF-8" + * encoding. It can also be seen as a routine Identical to + * rb_usascii_str_new(), except it assumes the passed pointer is a pointer to a + * C string. + * + * @param[in] ptr A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @exception rb_eArgError `ptr` is a null pointer. + * @return An instance of ::rb_cString, of "UTF-8" encoding, whose contents + * are verbatim copy of `ptr`. + * @pre `ptr` must not be a null pointer. + */ +VALUE rb_utf8_str_new_cstr(const char *ptr); + +/** + * @name Special strings that are backended by C string literals. + * * *_str_new_static functions are intended for C string literals. * They require memory in the range [ptr, ptr+len] to always be readable. * Note that this range covers a total of len + 1 bytes. + * + * @{ + */ + +/** + * Identical to rb_str_new(), except it takes a C string literal. + * + * @param[in] ptr A C string literal. + * @param[in] len `strlen(ptr)`. + * @exception rb_eArgError `len` out of range of `size_t`. + * @pre `ptr` must be a C string constant. + * @return An instance of ::rb_cString, of "binary" encoding, whose backend + * storage is the passed C string literal. + * @warning It is a very bad idea to write to a C string literal (often + * immediate SEGV shall occur). Consider return values of this + * function be read-only. + * + * @internal + * + * Surprisingly it can take NULL, and generates an empty string. */ VALUE rb_str_new_static(const char *ptr, long len); + +/** + * Identical to rb_str_new_static(), except it generates a string of "US ASCII" + * encoding instead of "binary". It can also be seen as a routine identical to + * rb_usascii_str_new(), except it takes a C string literal. + * + * @param[in] ptr A C string literal. + * @param[in] len `strlen(ptr)`. + * @exception rb_eArgError `len` out of range of `size_t`. + * @pre `ptr` must be a C string constant. + * @return An instance of ::rb_cString, of "US ASCII" encoding, whose + * backend storage is the passed C string literal. + * @warning It is a very bad idea to write to a C string literal (often + * immediate SEGV shall occur). Consider return values of this + * function be read-only. + */ VALUE rb_usascii_str_new_static(const char *ptr, long len); + +/** + * Identical to rb_str_new_static(), except it generates a string of "UTF-8" + * encoding instead of "binary". It can also be seen as a routine identical to + * rb_utf8_str_new(), except it takes a C string literal. + * + * @param[in] ptr A C string literal. + * @param[in] len `strlen(ptr)`. + * @exception rb_eArgError `len` out of range of `size_t`. + * @pre `ptr` must be a C string constant. + * @return An instance of ::rb_cString, of "UTF-8" encoding, whose backend + * storage is the passed C string literal. + * @warning It is a very bad idea to write to a C string literal (often + * immediate SEGV shall occur). Consider return values of this + * function be read-only. + */ VALUE rb_utf8_str_new_static(const char *ptr, long len); -VALUE rb_str_to_interned_str(VALUE); -VALUE rb_interned_str(const char *, long); -VALUE rb_interned_str_cstr(const char *); -void rb_str_free(VALUE); -void rb_str_shared_replace(VALUE, VALUE); -VALUE rb_str_buf_append(VALUE, VALUE); + +/** @} */ + +/** + * Identical to rb_interned_str(), except it takes a Ruby's string instead of + * C's. It can also be seen as a routine identical to rb_str_new_shared(), + * except it returns an infamous "f"string. + * + * @param[in] str An object of ::RString. + * @return An instance of ::rb_cString, either cached or allocated, which + * has the identical encoding, length, and contents with the passed + * string. + * @pre `str` must not be any arbitrary object except ::RString. + * @note Use #StringValue to enforce the precondition. + * + * @internal + * + * It actually finds or creates a fstring of the needed property, and + * destructively modifies the receiver behind-the-scene so that it becomes a + * shared string whose parent is the returning fstring. + */ +VALUE rb_str_to_interned_str(VALUE str); + +/** + * Identical to rb_str_new(), except it returns an infamous "f"string. What is + * a fstring? Well it is a special subkind of strings that is immutable, + * deduped globally, and managed by our GC. It is much like a Symbol (in fact + * Symbols are dynamic these days and are backended using fstrings). This + * concept has been silently introduced at some point in 2.x era. Since then + * it gained wider acceptance in the core. Starting from 3.x extension + * libraries can also generate ones. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eArgError `len` is negative. + * @return A found or created instance of ::rb_cString, of `len` bytes + * length, of "binary" encoding, whose contents are identical to + * that of `ptr`. + * @pre At least `len` bytes of continuous memory region shall be + * accessible via `ptr`. + */ +VALUE rb_interned_str(const char *ptr, long len); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_interned_str(), except it assumes the passed pointer is a + * pointer to a C's string. It can also be seen as a routine identical to + * rb_str_to_interned_str(), except it takes a C's string instead of Ruby's. + * Or it can also be seen as a routine identical to rb_str_new_cstr(), except + * it returns an infamous "f"string. + * + * @param[in] ptr A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString, of "binary" encoding, whose + * contents are verbatim copy of `ptr`. + * @pre `ptr` must not be a null pointer. + */ +VALUE rb_interned_str_cstr(const char *ptr); + +/** + * Destroys the given string for no reason. + * + * @warning DO NOT USE IT. + * @warning Leave this task to our GC. + * @warning It was a bad idea at the first place to let you know about it. + * + * @param[out] str The string to be executed. + * @post The given string no longer exists. + * @note Maybe `String#clear` could be what you want. + * + * @internal + * + * Should have moved this to `internal/string.h`. + */ +void rb_str_free(VALUE str); + +/** + * Replaces the contents of the former with the latter. + * + * @param[out] dst Destination object. + * @param[in] src Source object. + * @pre Both objects must not be any arbitrary objects except + * ::RString. + * @post `dst`'s former components are abandoned. It now has the + * identical encoding, length, and contents to `src`. + * @see rb_str_replace() + * + * @internal + * + * @shyouhei doesn't understand why this is useful to extension libraries. + * Just use rb_str_replace(). What's wrong with that? + */ +void rb_str_shared_replace(VALUE dst, VALUE src); + +/** + * Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of + * C's. It can also be seen as a routine identical to rb_str_shared_replace(), + * except it appends instead of replaces. + * + * @param[out] dst Destination object. + * @param[in] src Source object. + * @exception rb_eEncCompatError Can't mix the encodings. + * @exception rb_eArgError Result string too big. + * @return The passed `dst`. + * @pre Both objects must not be any arbitrary objects except + * ::RString. + * @post `dst` has the contents of `src` appended, with encoding + * converted into `dst`'s one, into the end of `dst`. + */ +VALUE rb_str_buf_append(VALUE dst, VALUE src); + +/** @alias{rb_str_cat} */ VALUE rb_str_buf_cat(VALUE, const char*, long); + +/** @alias{rb_str_cat_cstr} */ VALUE rb_str_buf_cat2(VALUE, const char*); -VALUE rb_str_buf_cat_ascii(VALUE, const char*); -VALUE rb_obj_as_string(VALUE); -VALUE rb_check_string_type(VALUE); -void rb_must_asciicompat(VALUE); -VALUE rb_str_dup(VALUE); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_str_cat_cstr(), except it additionally assumes the source + * string be a NUL terminated ASCII string. + * + * @param[out] dst Destination object. + * @param[in] src Source string. + * @exception rb_eArgError Result string too big. + * @return The passed `dst`. + * @pre `dst` must not be any arbitrary object except ::RString. + * @pre `src` must be a NUL terminated ASCII string. + * @post `dst` has the contents of `src` appended, with encoding + * converted into `dst`'s one, into the end of `dst`. + */ +VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src); + +/** + * Try converting an object to its stringised representation using its `to_s` + * method, if any. If there is no such thing, it resorts to rb_any_to_s() + * output. + * + * @param[in] obj Arbitrary ruby object to stringise. + * @return An instance of ::rb_cString. + */ +VALUE rb_obj_as_string(VALUE obj); + +/** + * Try converting an object to its stringised representation using its `to_str` + * method, if any. If there is no such thing, returns ::RUBY_Qnil. + * + * @param[in] obj Arbitrary ruby object to stringise. + * @exception rb_eTypeError `obj.to_str` returned something non-String. + * @retval RUBY_Qnil No conversion from obj to String defined. + * @return otherwise Stringised representation of `obj`. + * @see rb_io_check_io + * @see rb_check_array_type + * @see rb_check_hash_type + */ +VALUE rb_check_string_type(VALUE obj); + +/** + * Asserts that the given string's encoding is (Ruby's definition of) ASCII + * compatible. + * + * @param[in] obj An instance of ::rb_cString. + * @exception rb_eEncCompatError `obj` is ASCII incompatible. + * + * @internal + * + * @shyouhei doesn't know if this is an Easter egg or an official feature, but + * this function can in fact take non-strings such as Symbols, Regexps, IOs, + * etc. However if something unsupported is passed, it causes SEGV. It seems + * the feature is kind of untested. + */ +void rb_must_asciicompat(VALUE obj); + +/** + * Duplicates a string. + * + * @param[in] str String in question to duplicate. + * @return A duplicated new instance. + * @pre `str` must be of ::RString. + */ +VALUE rb_str_dup(VALUE str); + +/** + * I guess there is no use case of this function in extension libraries, but + * this is a routine identical to rb_str_dup(), except it always creates an + * instance of ::rb_cString regardless of the given object's class. This makes + * the most sense when the passed string is formerly hidden by rb_obj_hide(). + * + * @param[in] str A string, possibly hidden. + * @return A duplicated new instance of ::rb_cString. + */ VALUE rb_str_resurrect(VALUE str); -VALUE rb_str_locktmp(VALUE); -VALUE rb_str_unlocktmp(VALUE); + +/** + * Returns whether a string is chilled or not. + * + * This function is temporary and users must check for its presence using + * #ifdef HAVE_RB_STR_CHILLED_P. If HAVE_RB_STR_CHILLED_P is not defined, then + * strings can't be chilled. + * + * @param[in] str A string. + * @retval 1 The string is chilled. + * @retval 0 Otherwise. + */ +bool rb_str_chilled_p(VALUE str); + +#define HAVE_RB_STR_CHILLED_P 1 + +/** + * Obtains a "temporary lock" of the string. This advisory locking mechanism + * prevents other cooperating threads from tampering the receiver. The same + * thing could be done via freeze mechanism, but this one can also be unlocked + * using rb_str_unlocktmp(). + * + * @param[out] str String to lock. + * @exception rb_eRuntimeError `str` already locked. + * @return The given string. + * @post The string is locked. + */ +VALUE rb_str_locktmp(VALUE str); + +/** + * Releases a lock formerly obtained by rb_str_locktmp(). + * + * @param[out] str String to unlock. + * @exception rb_eRuntimeError `str` already unlocked. + * @return The given string. + * @post The string is locked. + */ +VALUE rb_str_unlocktmp(VALUE str); + +/** @alias{rb_str_new_frozen} */ VALUE rb_str_dup_frozen(VALUE); + +/** @alias{rb_str_new_frozen} */ #define rb_str_dup_frozen rb_str_new_frozen -VALUE rb_str_plus(VALUE, VALUE); -VALUE rb_str_times(VALUE, VALUE); -long rb_str_sublen(VALUE, long); -VALUE rb_str_substr(VALUE, long, long); -VALUE rb_str_subseq(VALUE, long, long); -char *rb_str_subpos(VALUE, long, long*); -void rb_str_modify(VALUE); -void rb_str_modify_expand(VALUE, long); -VALUE rb_str_freeze(VALUE); -void rb_str_set_len(VALUE, long); -VALUE rb_str_resize(VALUE, long); -VALUE rb_str_cat(VALUE, const char*, long); -VALUE rb_str_cat_cstr(VALUE, const char*); + +/** + * Generates a new string, concatenating the former to the latter. It can also + * be seen as a routine identical to rb_str_append(), except it doesn't tamper + * the passed strings to create a new one instead. + * + * @param[in] lhs Source string #1. + * @param[in] rhs Source string #2. + * @exception rb_eEncCompatError Can't mix the encodings. + * @exception rb_eArgError Result string too big. + * @return A new string containing `rhs` concatenated to `lhs`. + * @pre Both objects must not be any arbitrary objects except ::RString. + * @note This operation doesn't commute. Don't get confused by the + * "plus" terminology. For historical reasons there are some + * noncommutative `+`s in Ruby. This is one of such things. There + * has been a long discussion around `+`s in programming languages. + */ +VALUE rb_str_plus(VALUE lhs, VALUE rhs); + +/** + * Repetition of a string. + * + * @param[in] str String to repeat. + * @param[in] num Count, something numeric. + * @exception rb_eArgError `num` is negative. + * @return A new string repeating `num` times of `str`. + */ +VALUE rb_str_times(VALUE str, VALUE num); + +/** + * Byte offset to character offset conversion. This makes sense when the + * receiver is in a multibyte encoding. The string's i-th character does not + * always sit at its i-th byte. This function scans the contents to find the + * character index that matches the byte index. Generally speaking this is an + * `O(n)` operation. Could be slow. + * + * @param[in] str The string to scan. + * @param[in] pos Offset, in bytes. + * @return Offset, in characters. + */ +long rb_str_sublen(VALUE str, long pos); + +/** + * This is the implementation of two-argumented `String#slice`. + * + * - Returns the substring of the given `len` found in `str` at offset `beg`: + * + * ```ruby + * 'foo'[0, 2] # => "fo" + * 'foo'[0, 0] # => "" + * ``` + * + * - Counts backward from the end of `str` if `beg` is negative: + * + * ```ruby + * 'foo'[-2, 2] # => "oo" + * ``` + * + * - Special case: returns a new empty string if `beg` is equal to the length + * of `str`: + * + * ```ruby + * 'foo'[3, 2] # => "" + * ``` + * + * - Returns a null pointer if `beg` is out of range: + * + * ```ruby + * 'foo'[4, 2] # => nil + * 'foo'[-4, 2] # => nil + * ``` + * + * - Returns the trailing substring of `str` if `len` is large: + * + * ```ruby + * 'foo'[1, 50] # => "oo" + * ``` + * + * - Returns a null pointer if `len` is negative: + * + * ```ruby + * 'foo'[0, -1] # => nil + * ``` + * + * @param[in] str The string to slice. + * @param[in] beg Requested offset of the substring. + * @param[in] len Requested length of the substring. + * @retval RUBY_Qnil Parameters out of range. + * @retval otherwise A new string whose contents is the specified + * substring of `str`. + * @pre `str` must not be any arbitrary objects except ::RString. + */ +VALUE rb_str_substr(VALUE str, long beg, long len); + +/** + * Identical to rb_str_substr(), except the numbers are interpreted as byte + * offsets instead of character offsets. + * + * @param[in] str The string to slice. + * @param[in] beg Requested offset of the substring. + * @param[in] len Requested length of the substring. + * @return A new string whose contents is the specified substring of `str`. + * @pre `str` must not be any arbitrary objects except ::RString. + * @pre `beg` and `len` must not point to OOB contents. + */ +VALUE rb_str_subseq(VALUE str, long beg, long len); + +/** + * Identical to rb_str_substr(), except it returns a C's string instead of + * Ruby's. + * + * @param[in] str The string to slice. + * @param[in] beg Requested offset of the substring. + * @param[in,out] len Requested length of the substring. + * @retval NULL Parameters out of range. + * @retval otherwise A pointer inside of `str`'s backend storage where + * the specified substring exist. + * @pre `str` must not be any arbitrary objects except ::RString. + * @post `len` is updated to have the length of the return value. + */ +char *rb_str_subpos(VALUE str, long beg, long *len); + +/** + * Declares that the string is about to be modified. This for instance let the + * string have a dedicated backend storage. + * + * @param[out] str String about to be modified. + * @exception rb_eRuntimeError `str` is `locktmp`-ed. + * @exception rb_eFrozenError `str` is frozen. + * @pre `str` must not be any arbitrary objects except ::RString. + * @post Upon successful return the passed string is eligible to be + * modified. + */ +void rb_str_modify(VALUE str); + +/** + * Identical to rb_str_modify(), except it additionally expands the capacity of + * the receiver. + * + * @param[out] str Target string to modify. + * @param[in] capa Additional capacity to add. + * @exception rb_eArgError `capa` is negative. + * @exception rb_eRuntimeError `str` is `locktmp`-ed. + * @exception rb_eFrozenError `str` is frozen. + * @pre `str` must not be any arbitrary objects except ::RString. + * @post Upon successful return the passed string is modified so that + * its capacity is increased for `capa` bytes. + */ +void rb_str_modify_expand(VALUE str, long capa); + +/** + * This is the implementation of `String#freeze`. + * + * @param[out] str Target string to freeze. + * @return The passed string. + * @post Upon successful return the passed string is frozen. + */ +VALUE rb_str_freeze(VALUE str); + +/** + * Overwrites the length of the string. Typically this is used to shrink a + * string that was formerly expanded. + * + * ```CXX + * extern int fd; + * auto str = rb_eval_string("'...'"); + * rb_str_modify_expand(str, BUFSIZ); + * if (auto len = recv(fd, RSTRING_PTR(str), BUFSIZ, 0); len >= 0) { + * rb_str_set_len(str, len); + * } + * else { + * rb_sys_fail("recv(2)"); + * } + * ``` + * + * @param[out] str String to shrink. + * @param[in] len New length of the string. + * @exception rb_eRuntimeError `str` is `locktmp`-ed. + * @exception rb_eFrozenError `str` is frozen. + * @pre `str` must not be any arbitrary objects except ::RString. + * @post Upon successful return `str`'s length is set to `len`. + */ +void rb_str_set_len(VALUE str, long len); + +/** + * Overwrites the length of the string. In contrast to rb_str_set_len(), this + * function can also expand a string. + * + * @param[out] str String to shrink. + * @param[in] len New length of the string. + * @exception rb_eArgError `len` is negative. + * @exception rb_eRuntimeError `str` is `locktmp`-ed. + * @exception rb_eFrozenError `str` is frozen. + * @return The passed `str`. + * @pre `str` must not be any arbitrary objects except ::RString. + * @post Upon successful return `str` is either expanded or shrunken to + * have its length be `len`. + */ +VALUE rb_str_resize(VALUE str, long len); + +/** + * Destructively appends the passed contents to the string. + * + * @param[out] dst Destination object. + * @param[in] src Contents to append. + * @param[in] srclen Length of `src`. + * @exception rb_eArgError `srclen` is negative. + * @return The passed `dst`. + * @pre `dst` must not be any arbitrary objects except ::RString. + * @post `dst` has the contents of `ptr` appended. + */ +VALUE rb_str_cat(VALUE dst, const char *src, long srclen); + +/** + * Identical to rb_str_cat(), except it assumes the passed pointer is a pointer + * to a C string. + * + * @param[out] dst Destination object. + * @param[in] src Contents to append. + * @exception rb_eArgError Result string too big. + * @exception rb_eArgError `src` is a null pointer. + * @return The passed `dst`. + * @pre `dst` must not be any arbitrary objects except ::RString. + * @pre `src` must not be a null pointer. + * @post `dst` has the contents of `src` appended. + */ +VALUE rb_str_cat_cstr(VALUE dst, const char *src); + +/** @alias{rb_str_cat_cstr} */ VALUE rb_str_cat2(VALUE, const char*); -VALUE rb_str_append(VALUE, VALUE); -VALUE rb_str_concat(VALUE, VALUE); + +/** + * Identical to rb_str_buf_append(), except it converts the right hand side + * before concatenating. + * + * @param[out] dst Destination object. + * @param[in] src Source object. + * @exception rb_eEncCompatError Can't mix the encodings. + * @exception rb_eArgError Result string too big. + * @return The passed `dst`. + * @pre `dst` must not be any arbitrary objects except ::RString. + * @post `dst` has the contents of `src` appended, with encoding + * converted into `dst`'s one, into the end of `dst`. + */ +VALUE rb_str_append(VALUE dst, VALUE src); + +/** + * Identical to rb_str_append(), except it also accepts an integer as a + * codepoint. This resembles `String#<<`. + * + * @param[out] dst Destination object. + * @param[in] src Source object, String or Numeric. + * @exception rb_eRangeError Source numeric is out of range. + * @exception rb_eEncCompatError Source string too long. + * @exception rb_eArgError Result string too big. + * @return The passed `dst`. + * @pre `dst` must not be any arbitrary objects except ::RString. + * @post `dst` has the contents of `src` appended, with encoding + * converted into `dst`'s one, into the end of `dst`. + */ +VALUE rb_str_concat(VALUE dst, VALUE src); + +/* random.c */ + +/** + * This is a universal hash function. + * + * @warning This function changes its value per process. + * @param[in] ptr Target message. + * @param[in] len Length of `ptr` in bytes. + * @return A pseudorandom number suitable for Hash's hash value. + * @see Aumasson, JP., Bernstein, D.J., "SipHash: A Fast Short-Input + * PRF", In proceedings of 13th International Conference on + * Cryptology in India (INDOCRYPT 2012), LNCS 7668, pp. 489-508, + * 2012. http://doi.org/10.1007/978-3-642-34931-7_28 +*/ st_index_t rb_memhash(const void *ptr, long len); -st_index_t rb_hash_start(st_index_t); -st_index_t rb_hash_uint32(st_index_t, uint32_t); -st_index_t rb_hash_uint(st_index_t, st_index_t); -st_index_t rb_hash_end(st_index_t); + +/** + * Starts a series of hashing. Suppose you have a struct: + * + * ```CXX + * struct foo_tag { + * unsigned char bar; + * uint32_t baz; + * }; + * ``` + * + * It is not a wise idea to call rb_memhash() over it, because there could be + * padding bits. Instead you should explicitly iterate over each fields: + * + * ```CXX + * foo_tag foo = { 0, 0, }; + * st_index_t hash = 0; + * + * hash = rb_hash_start(0); + * hash = rb_hash_uint(hash, foo.bar); + * hash = rb_hash_uint32(hash, foo.baz); + * hash = rb_hash_end(hash); + * ``` + * + * @param[in] i Initial value. + * @return A hash value. + */ +st_index_t rb_hash_start(st_index_t i); + +/** @alias{st_hash_uint32} */ #define rb_hash_uint32(h, i) st_hash_uint32((h), (i)) + +/** @alias{st_hash_uint} */ #define rb_hash_uint(h, i) st_hash_uint((h), (i)) + +/** @alias{st_hash_end} */ #define rb_hash_end(h) st_hash_end(h) -st_index_t rb_str_hash(VALUE); -int rb_str_hash_cmp(VALUE,VALUE); -int rb_str_comparable(VALUE, VALUE); -int rb_str_cmp(VALUE, VALUE); + +/* string.c */ + +/** + * Calculates a hash value of a string. This is one of the two functions that + * constructs struct ::st_hash_type. + * + * @param[in] str An object of ::RString. + * @return A hash value. + * @pre `str` must not be any arbitrary object except ::RString. + * + * @internal + * + * Although safe to call, there must be no particular use case of this function + * for extension libraries. Only ruby internals must know about it. + * + * This is not a simple alias of rb_memhash(), because it considers the passed + * string's encoding as well as its contents. + */ +st_index_t rb_str_hash(VALUE str); + +/** + * Compares two strings. This is one of the two functions that constructs + * struct ::st_hash_type. + * + * @param[in] str1 A string. + * @param[in] str2 Another string. + * @retval 1 They have identical contents, length, and encodings. + * @retval 0 Otherwise. + * @pre Both objects must not be any arbitrary objects except + * ::RString. + * + * @internal + * + * In contrast to rb_str_hash(), this could be handy for comparison that only + * concerns equality. rb_str_cmp() returns 1, 0, -1. + */ +int rb_str_hash_cmp(VALUE str1, VALUE str2); + +/** + * Checks if two strings are comparable each other or not. Because + * rb_str_cmp() must return "lesser than" or "greater than" information, + * comparing two strings needs a stricter restriction. Both sides must be in a + * same set of strings which have total order. This is to check that property. + * Intuitive it sounds? But they can have different encodings. A character + * and another might or might not appear in the same order in their codepoints. + * It is complicated than you think. + * + * @param[in] str1 A string. + * @param[in] str2 Another string. + * @retval 1 They agree on a total order. + * @retval 0 Otherwise. + * @pre Both objects must not be any arbitrary objects except + * ::RString. + */ +int rb_str_comparable(VALUE str1, VALUE str2); + +/** + * Compares two strings, as in `strcmp(3)`. This does not consider the current + * locale, but considers the encodings of both sides instead. + * + * @param[in] lhs A string. + * @param[in] rhs Another string. + * @retval -1 `lhs` is "bigger than" `rhs`. + * @retval 1 `rhs` is "bigger than" `lhs`. + * @retval 0 Otherwise, e.g. not comparable. + * @pre Both objects must not be any arbitrary objects except + * ::RString. + */ +int rb_str_cmp(VALUE lhs, VALUE rhs); + +/** + * Equality of two strings. + * + * If `str2` is not a String, it resorts to `str2 == str1`. Otherwise if they + * are not comparable, returns ::RUBY_Qfalse. Otherwise if they have the same + * contents and the length, returns ::RUBY_Qtrue. Otherwise, returns + * ::RUBY_Qfalse. + * + * @param[in] str1 A string. + * @param[in] str2 Another string. + * @retval RUBY_Qtrue They are equal. + * @retval RUBY_Qfalse They are either different, or not comparable. + */ VALUE rb_str_equal(VALUE str1, VALUE str2); -VALUE rb_str_drop_bytes(VALUE, long); -void rb_str_update(VALUE, long, long, VALUE); -VALUE rb_str_replace(VALUE, VALUE); -VALUE rb_str_inspect(VALUE); -VALUE rb_str_dump(VALUE); -VALUE rb_str_split(VALUE, const char*); + +/** + * Shrinks the given string for the given number of bytes. + * + * @param[out] str String to squash. + * @param[in] len Number of bytes to reduce. + * @exception rb_eRuntimeError `str` is `locktmp`-ed. + * @exception rb_eFrozenError `str` is frozen. + * @return The passed `str`. + * @pre `str` must not be any arbitrary objects except ::RString. + * @post `str` is shrunken. + * @warning Can break a multibyte character in middle. + * + * @internal + * + * What if `len` is negative? + */ +VALUE rb_str_drop_bytes(VALUE str, long len); + +/** + * Replaces some (or all) of the contents of the given string. This is the + * implementation of three-argumented `String#[]=`. + * + * @param[out] dst Target string to update. + * @param[in] beg Offset of the affected portion. + * @param[in] len Length of the affected portion. + * @param[in] src Object to be assigned. + * @exception rb_eTypeError `src` has no implicit conversion to String. + * @exception rb_eIndexError `len` is negative, or `beg` is OOB. + * @exception rb_eRuntimeError `dst` is `locktmp`-ed. + * @exception rb_eFrozenError `dst` is frozen. + * @note Unlike rb_str_substr(), this function raises. + * @post A portion of `dst` from `beg` to `len` is the stringised + * representation of `src`. If that replacement string is not the + * same length as the portion it is replacing, `dst` will be + * resized accordingly. + */ +void rb_str_update(VALUE dst, long beg, long len, VALUE src); + +/** + * Replaces the contents of the former object with the stringised contents of + * the latter. + * + * @param[out] dst Destination object. + * @param[in] src Source object. + * @exception rb_eTypeError `src` has no implicit conversion to String. + * @exception rb_eRuntimeError `dst` is `locktmp`-ed. + * @exception rb_eFrozenError `dst` is frozen. + * @return The passed `dst`. + * @pre `dst` must not be any arbitrary object except ::RString. + * @post `dst`'s former components are abandoned. It now has the + * identical encoding, length, and contents to `src`. + */ +VALUE rb_str_replace(VALUE dst, VALUE src); + +/** + * Generates a "readable" version of the receiver. + * + * @warning The output is _insecure_. Never feed one to `eval`. + * @warning The output is not always in the same encoding as the given one. + * @warning A character might or might not be escaped, depending on the + * result encoding. + * @param[in] str String to inspect. + * @return Its inspection, either in default internal encoding if any, or + * in default external encoding otherwise. + * @see rb_str_dump() + * + * @internal + * + * This is a (silent) fix of an actual vulnerability feeding `inspect` output + * strings to `eval`: + * https://github.com/hiki/hiki/commit/8771a6e25198e264a2bf9dc1c102fea2cc8ff975 + * + * ... and its advisory: + * http://hikiwiki.org/en/advisory20040712.html + */ +VALUE rb_str_inspect(VALUE str); + +/** + * "Inverse" of rb_eval_string(). Returns a quoted version of the string. All + * non-printing characters are replaced by `\uNNNN` or `\xHH` notation and all + * special characters are escaped. The result string is guaranteed to render a + * string of the same contents when passed to `eval` and friends. + * + * @param[in] str String to dump. + * @exception rb_eRuntimeError Too many escape sequences causes integer + * overflow on the length of the string. + * @return An US-ASCII string that includes all the necessary info to + * reconstruct the original string. + */ +VALUE rb_str_dump(VALUE str); + +/** + * Divides the given string based on the given delimiter. This is the + * 1-argument 0-block version of `String#split`. + * + * @param[in] str Object in question to split. + * @param[in] delim Delimiter, in C string. + * @exception rb_eTypeError `str` has no implicit conversion to String. + * @exception rb_eArgError `delim` is a null pointer. + * @return An array of strings, which are substrings of the passed `str`. + * If `delim` is an empty C string (i.e. `""`), `str` is split into + * each characters. If `delim` is a C string whose sole content is + * a whitespace (i.e. `" "`), `str` is split on whitespaces, with + * leading and trailing whitespace and runs of contiguous + * whitespace characters ignored. Otherwise, `str` is split + * according to `delim`. + */ +VALUE rb_str_split(VALUE str, const char *delim); + +/** + * This is a ::rb_gvar_setter_t that refutes non-string assignments. + * + * @exception rb_eTypeError Passed something non-string. + */ rb_gvar_setter_t rb_str_setter; -VALUE rb_str_intern(VALUE); -VALUE rb_sym_to_s(VALUE); -long rb_str_strlen(VALUE); + +/* symbol.c */ + +/** + * Identical to rb_to_symbol(), except it assumes the receiver being an + * instance of ::RString. + * + * @param[in] str The name of the id. + * @exception rb_eRuntimeError Too many symbols. + * @return A (possibly new) id whose value is the given `str`. + * @pre `str` must not be any arbitrary object except ::RString. + * @note These days Ruby internally has two kinds of symbols + * (static/dynamic). Symbols created using this function would + * become dynamic ones; i.e. would be garbage collected. It could + * be safer for you to use it than alternatives, when applicable. + */ +VALUE rb_str_intern(VALUE str); + +/* string.c */ + +/** + * This is an rb_sym2str() + rb_str_dup() combo. + * + * @param[in] sym A symbol to query. + * @return A string duplicating the symbol's backend storage. + * + * @internal + * + * This function causes SEGV when the passed value is a static symbol that + * doesn't exist. + */ +VALUE rb_sym_to_s(VALUE sym); + +/** + * Counts the number of characters (not bytes) that are stored inside of the + * given string. This of course depends on its encoding. Also this function + * generally runs in O(n), because for instance you have to scan the entire + * string to know how many characters are there in a UTF-8 string. + * + * @param[in] str Target string to query. + * @return Its number of characters. + */ +long rb_str_strlen(VALUE str); + +/** + * Identical to rb_str_strlen(), except it returns the value in ::rb_cInteger. + * + * @param[in] str Target string to query. + * @return Its number of characters. + */ VALUE rb_str_length(VALUE); -long rb_str_offset(VALUE, long); + +/** + * "Inverse" of rb_str_sublen(). This function scans the contents to find the + * byte index that matches the character index. Generally speaking this is an + * `O(n)` operation. Could be slow. + * + * @param[in] str The string to scan. + * @param[in] pos Offset, in characters. + * @return Offset, in bytes. + */ +long rb_str_offset(VALUE str, long pos); + RBIMPL_ATTR_PURE() -size_t rb_str_capacity(VALUE); -VALUE rb_str_ellipsize(VALUE, long); -VALUE rb_str_scrub(VALUE, VALUE); -VALUE rb_str_succ(VALUE); +/** + * Queries the capacity of the given string. + * + * @see ::RString::capa + * @param[in] str String in question. + * @return Its capacity. + */ +size_t rb_str_capacity(VALUE str); + +/** + * Shortens `str` and adds three dots, an ellipsis, if it is longer than `len` + * characters. The length of the returned string in characters is less than or + * equal to `len`. If the length of `str` is less than or equal `len`, returns + * `str` itself. The encoding of returned string is equal to that of passed + * one. The class of returned string is equal to that of passed one. + * + * @param[in] str The string to shorten. + * @param[in] len The maximum string length. + * @exception rb_eIndexError `len` is negative. + * @retval str No need to add ellipsis. + * @retval otherwise A new, shortened string. + * @note The length is counted in characters. + */ +VALUE rb_str_ellipsize(VALUE str, long len); + +/** + * "Cleanses" the string. A string has its encoding and its contents. They, + * in practice, do not always fit. There are strings in the wild that are + * "broken"; include bit patterns that are not allowed by its encoding. That + * can happen when a user copy&pasted something bad, network input got + * clobbered by a middleman, cosmic rays hit the physical memory, and many more + * occasions. This function takes such strings, and fills the "broken" portion + * with the passed replacement bit pattern. + * + * This function also takes a ruby block. That is a neat way to do things, but + * can be annoying when the caller function want to use a block for another + * purpose. + * + * @param[in] str Target string to scrub. + * @param[in] repl Replacement string. When it is a string, + * this function takes that as a replacement. + * When it is ::RUBY_Qnil, this function tries + * to yield a block (if any) and takes its + * evaluated value as a replacement. In case + * of ::RUBY_Qnil without a block, this + * function takes an encoding-specific default + * character (`U+FFFD`, for instance) as a last + * resort. + * @exception rb_eTypeError `repl` is neither string nor nil. + * @exception rb_eArgError `repl` itself is broken. + * @exception rb_eEncCompatError `repl` and `str` are incompatible. + * @retval RUBY_Qnil `str` is already clean. + * @retval otherwise A new, clean string. + */ +VALUE rb_str_scrub(VALUE str, VALUE repl); + +/** + * Searches for the "successor" of a string. This function is complicated! + * This is the only function in the entire ruby API (either C or Ruby) that + * generates a string out of thin air. First, the successor to an empty string + * is a new empty string: + * + * ```ruby + * ''.succ # => "" + * ``` + * + * Otherwise the successor is calculated by "incrementing" characters. The + * first character to be incremented is the rightmost alphanumeric: or, if no + * alphanumerics, the rightmost character: + * + * ```ruby + * 'THX1138'.succ # => "THX1139" + * '<<koala>>'.succ # => "<<koalb>>" + * '***'.succ # => '**+' + * ``` + * + * The successor to a digit is another digit, "carrying" to the next-left + * character for a "rollover" from 9 to 0, and prepending another digit if + * necessary: + * + * ```ruby + * '00'.succ # => "01" + * '09'.succ # => "10" + * '99'.succ # => "100" + * '-9'.succ # => "-10" + * ``` + * + * The successor to a letter is another letter of the same case, carrying to + * the next-left character for a rollover, and prepending another same-case + * letter if necessary: + * + * ```ruby + * 'aa'.succ # => "ab" + * 'az'.succ # => "ba" + * 'zz'.succ # => "aaa" + * 'AA'.succ # => "AB" + * 'AZ'.succ # => "BA" + * 'ZZ'.succ # => "AAA" + * ``` + * + * The successor to a non-alphanumeric character is the next character in the + * underlying character set's collating sequence, carrying to the next-left + * character for a rollover, and prepending another character if necessary: + * + * ```ruby + * s = "\u03A1" + * s.succ # => "\u03A3" # There is no such thing like \u03A2. + * s = 255.chr * 3 + * s # => "\xFF\xFF\xFF" + * s.succ # => "\x01\x00\x00\x00" + * ``` + * + * Carrying can occur between and among mixtures of alphanumeric characters: + * + * ```ruby + * s = 'zz99zz99' + * s.succ # => "aaa00aa00" + * s = '99zz99zz' + * s.succ # => "100aa00aa" + * s = '1.9.9' + * s.succ # => "2.0.0" + * ``` + * + * @param[in] orig Predecessor string. + * @return Successor string. + */ +VALUE rb_str_succ(VALUE orig); RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail. Don't bother. + * + * @param[in] str A C string. + * @return `strlen`, casted to `long`. + */ static inline long rbimpl_strlen(const char *str) { return RBIMPL_CAST((long)strlen(str)); } +RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail. Don't bother. + * + * @param[in] str A C string literal. + * @return Corresponding Ruby string. + */ static inline VALUE rbimpl_str_new_cstr(const char *str) { @@ -153,13 +1382,15 @@ rbimpl_str_new_cstr(const char *str) return rb_str_new_static(str, len); } -static inline VALUE -rbimpl_tainted_str_new_cstr(const char *str) -{ - long len = rbimpl_strlen(str); - return rb_tainted_str_new(str, len); -} - +RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail. Don't bother. + * + * @param[in] str A C string literal. + * @return Corresponding Ruby string. + */ static inline VALUE rbimpl_usascii_str_new_cstr(const char *str) { @@ -167,6 +1398,15 @@ rbimpl_usascii_str_new_cstr(const char *str) return rb_usascii_str_new_static(str, len); } +RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail. Don't bother. + * + * @param[in] str A C string literal. + * @return Corresponding Ruby string. + */ static inline VALUE rbimpl_utf8_str_new_cstr(const char *str) { @@ -174,6 +1414,15 @@ rbimpl_utf8_str_new_cstr(const char *str) return rb_utf8_str_new_static(str, len); } +RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail. Don't bother. + * + * @param[in] str A C string literal. + * @return Corresponding Ruby string. + */ static inline VALUE rbimpl_external_str_new_cstr(const char *str) { @@ -181,6 +1430,15 @@ rbimpl_external_str_new_cstr(const char *str) return rb_external_str_new(str, len); } +RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail. Don't bother. + * + * @param[in] str A C string literal. + * @return Corresponding Ruby string. + */ static inline VALUE rbimpl_locale_str_new_cstr(const char *str) { @@ -188,6 +1446,15 @@ rbimpl_locale_str_new_cstr(const char *str) return rb_locale_str_new(str, len); } +RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail. Don't bother. + * + * @param[in] str A C string literal. + * @return Corresponding Ruby string. + */ static inline VALUE rbimpl_str_buf_new_cstr(const char *str) { @@ -196,6 +1463,16 @@ rbimpl_str_buf_new_cstr(const char *str) return rb_str_buf_cat(buf, str, len); } +RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail. Don't bother. + * + * @param[out] buf A string buffer. + * @param[in] str A C string literal. + * @return `buf` itself. + */ static inline VALUE rbimpl_str_cat_cstr(VALUE buf, const char *str) { @@ -203,6 +1480,16 @@ rbimpl_str_cat_cstr(VALUE buf, const char *str) return rb_str_cat(buf, str, len); } +RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail. Don't bother. + * + * @param[in] exc An exception class. + * @param[in] str A C string literal. + * @return An instance of `exc`. + */ static inline VALUE rbimpl_exc_new_cstr(VALUE exc, const char *str) { @@ -210,88 +1497,275 @@ rbimpl_exc_new_cstr(VALUE exc, const char *str) return rb_exc_new(exc, str, len); } +/** + * Allocates an instance of ::rb_cString. + * + * @param[in] str A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString, of `len` bytes length, of + * "binary" encoding, whose contents are verbatim copy of `str`. + * @pre At least `len` bytes of continuous memory region shall be + * accessible via `str`. + */ #define rb_str_new(str, len) \ ((RBIMPL_CONSTANT_P(str) && \ RBIMPL_CONSTANT_P(len) ? \ rb_str_new_static : \ rb_str_new) ((str), (len))) +/** + * Identical to #rb_str_new, except it assumes the passed pointer is a pointer + * to a C string. + * + * @param[in] str A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString, of "binary" encoding, whose + * contents are verbatim copy of `str`. + * @pre `str` must not be a null pointer. + */ #define rb_str_new_cstr(str) \ ((RBIMPL_CONSTANT_P(str) ? \ rbimpl_str_new_cstr : \ rb_str_new_cstr) (str)) +/** + * Identical to #rb_str_new, except it generates a string of "US ASCII" + * encoding. This is different from rb_external_str_new(), not only for the + * output encoding, but also it doesn't convert the contents. + * + * @param[in] str A memory region of `len` bytes length. + * @param[in] len Length of `str`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString, of `len` bytes length, of + * "US ASCII" encoding, whose contents are verbatim copy of `str`. + */ #define rb_usascii_str_new(str, len) \ ((RBIMPL_CONSTANT_P(str) && \ RBIMPL_CONSTANT_P(len) ? \ rb_usascii_str_new_static : \ rb_usascii_str_new) ((str), (len))) +/** + * Identical to #rb_str_new, except it generates a string of "UTF-8" encoding. + * + * @param[in] str A memory region of `len` bytes length. + * @param[in] len Length of `str`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString, of `len` bytes length, of + * "UTF-8" encoding, whose contents are verbatim copy of `str`. + */ #define rb_utf8_str_new(str, len) \ ((RBIMPL_CONSTANT_P(str) && \ RBIMPL_CONSTANT_P(len) ? \ rb_utf8_str_new_static : \ rb_utf8_str_new) ((str), (len))) -#define rb_tainted_str_new_cstr(str) \ - ((RBIMPL_CONSTANT_P(str) ? \ - rbimpl_tainted_str_new_cstr : \ - rb_tainted_str_new_cstr) (str)) - +/** + * Identical to #rb_str_new_cstr, except it generates a string of "US ASCII" + * encoding. It can also be seen as a routine Identical to + * #rb_usascii_str_new, except it assumes the passed pointer is a pointer to a + * C string. + * + * @param[in] str A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString, of "US ASCII" encoding, whose + * contents are verbatim copy of `str`. + * @pre `str` must not be a null pointer. + */ #define rb_usascii_str_new_cstr(str) \ ((RBIMPL_CONSTANT_P(str) ? \ rbimpl_usascii_str_new_cstr : \ rb_usascii_str_new_cstr) (str)) +/** + * Identical to #rb_str_new_cstr, except it generates a string of "UTF-8" + * encoding. It can also be seen as a routine Identical to #rb_utf8_str_new, + * except it assumes the passed pointer is a pointer to a C string. + * + * @param[in] str A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString, of "UTF-8" encoding, whose contents + * are verbatim copy of `str`. + * @pre `str` must not be a null pointer. + */ #define rb_utf8_str_new_cstr(str) \ ((RBIMPL_CONSTANT_P(str) ? \ rbimpl_utf8_str_new_cstr : \ rb_utf8_str_new_cstr) (str)) +/** + * Identical to #rb_str_new_cstr, except it generates a string of "default + * external" encoding. + * + * @param[in] str A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString. In case encoding conversion from + * "default internal" to "default external" is fully defined over + * the given contents, then the return value is a string of + * "default external" encoding, whose contents are the converted + * ones. Otherwise the string is a junk. + * @warning It doesn't raise on a conversion failure and silently ends up in + * a corrupted output. You can know the failure by querying + * `valid_encoding?` of the result object. + * @pre `str` must not be a null pointer. + */ #define rb_external_str_new_cstr(str) \ ((RBIMPL_CONSTANT_P(str) ? \ rbimpl_external_str_new_cstr : \ rb_external_str_new_cstr) (str)) +/** + * Identical to #rb_external_str_new_cstr, except it generates a string of + * "locale" encoding instead of "default external". + * + * @param[in] str A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString. In case encoding conversion from + * "default internal" to "locale" is fully defined over the given + * contents, then the return value is a string of "locale" + * encoding, whose contents are the converted ones. Otherwise the + * string is a junk. + * @warning It doesn't raise on a conversion failure and silently ends up in + * a corrupted output. You can know the failure by querying + * `valid_encoding?` of the result object. + * @pre `str` must not be a null pointer. + */ #define rb_locale_str_new_cstr(str) \ ((RBIMPL_CONSTANT_P(str) ? \ rbimpl_locale_str_new_cstr : \ rb_locale_str_new_cstr) (str)) +/** + * Identical to #rb_str_new_cstr, except done differently. + * + * @param[in] str A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString, of "binary" encoding, whose + * contents are verbatim copy of `str`. + * @pre `str` must not be a null pointer. + */ #define rb_str_buf_new_cstr(str) \ ((RBIMPL_CONSTANT_P(str) ? \ rbimpl_str_buf_new_cstr : \ rb_str_buf_new_cstr) (str)) +/** + * Identical to rb_str_cat(), except it assumes the passed pointer is a pointer + * to a C string. + * + * @param[out] buf Destination object. + * @param[in] str Contents to append. + * @exception rb_eArgError Result string too big. + * @return The passed `buf`. + * @pre `buf` must not be any arbitrary objects except ::RString. + * @pre `str` must not be a null pointer. + * @post `buf` has the contents of `str` appended. + */ #define rb_str_cat_cstr(buf, str) \ ((RBIMPL_CONSTANT_P(str) ? \ rbimpl_str_cat_cstr : \ rb_str_cat_cstr) ((buf), (str))) +/** + * Identical to rb_exc_new(), except it assumes the passed pointer is a pointer + * to a C string. + * + * @param[out] exc A subclass of ::rb_eException. + * @param[in] str Message to raise. + * @return An instance of `exc` whose message is `str`. + * @pre `str` must not be a null pointer. + */ #define rb_exc_new_cstr(exc, str) \ ((RBIMPL_CONSTANT_P(str) ? \ rbimpl_exc_new_cstr : \ rb_exc_new_cstr) ((exc), (str))) -#define rb_str_new2 rb_str_new_cstr -#define rb_str_new3 rb_str_new_shared -#define rb_str_new4 rb_str_new_frozen -#define rb_str_new5 rb_str_new_with_class -#define rb_tainted_str_new2 rb_tainted_str_new_cstr -#define rb_str_buf_new2 rb_str_buf_new_cstr -#define rb_usascii_str_new2 rb_usascii_str_new_cstr -#define rb_str_buf_cat rb_str_cat -#define rb_str_buf_cat2 rb_str_cat_cstr -#define rb_str_cat2 rb_str_cat_cstr +#define rb_str_new2 rb_str_new_cstr /**< @old{rb_str_new_cstr} */ +#define rb_str_new3 rb_str_new_shared /**< @old{rb_str_new_shared} */ +#define rb_str_new4 rb_str_new_frozen /**< @old{rb_str_new_frozen} */ +#define rb_str_new5 rb_str_new_with_class /**< @old{rb_str_new_with_class} */ +#define rb_str_buf_new2 rb_str_buf_new_cstr /**< @old{rb_str_buf_new_cstr} */ +#define rb_usascii_str_new2 rb_usascii_str_new_cstr /**< @old{rb_usascii_str_new_cstr} */ +#define rb_str_buf_cat rb_str_cat /**< @alias{rb_str_cat} */ +#define rb_str_buf_cat2 rb_str_cat_cstr /**< @old{rb_usascii_str_new_cstr} */ +#define rb_str_cat2 rb_str_cat_cstr /**< @old{rb_str_cat_cstr} */ + +/** + * Length of a string literal. + * + * @param[in] str A C String literal. + * @return An integer constant expression that represents `str`'s length, + * in bytes, not including the terminating NUL character. + */ #define rb_strlen_lit(str) (sizeof(str "") - 1) + +/** + * Identical to rb_str_new_static(), except it cannot take string variables. + * + * @param[in] str A C string literal. + * @pre `str` must not be a variable. + * @return An instance of ::rb_cString, of "binary" encoding, whose backend + * storage is the passed C string literal. + * @warning It is a very bad idea to write to a C string literal (often + * immediate SEGV shall occur). Consider return values of this + * function be read-only. + */ #define rb_str_new_lit(str) rb_str_new_static((str), rb_strlen_lit(str)) + +/** + * Identical to rb_usascii_str_new_static(), except it cannot take string + * variables. + * + * @param[in] str A C string literal. + * @pre `str` must not be a variable. + * @return An instance of ::rb_cString, of "US ASCII" encoding, whose + * backend storage is the passed C string literal. + * @warning It is a very bad idea to write to a C string literal (often + * immediate SEGV shall occur). Consider return values of this + * function be read-only. + */ #define rb_usascii_str_new_lit(str) rb_usascii_str_new_static((str), rb_strlen_lit(str)) + +/** + * Identical to rb_utf8_str_new_static(), except it cannot take string + * variables. + * + * @param[in] str A C string literal. + * @pre `str` must not be a variable. + * @return An instance of ::rb_cString, of "UTF-8" encoding, whose backend + * storage is the passed C string literal. + * @warning It is a very bad idea to write to a C string literal (often + * immediate SEGV shall occur). Consider return values of this + * function be read-only. + */ #define rb_utf8_str_new_lit(str) rb_utf8_str_new_static((str), rb_strlen_lit(str)) + +/** + * Identical to rb_enc_str_new_static(), except it cannot take string + * variables. + * + * @param[in] str A C string literal. + * @param[in] enc A pointer to an encoding. + * @pre `str` must not be a variable. + * @return An instance of ::rb_cString, of the passed encoding, whose + * backend storage is the passed C string literal. + * @warning It is a very bad idea to write to a C string literal (often + * immediate SEGV shall occur). Consider return values of this + * function be read-only. + */ #define rb_enc_str_new_lit(str, enc) rb_enc_str_new_static((str), rb_strlen_lit(str), (enc)) -#define rb_str_new_literal(str) rb_str_new_lit(str) -#define rb_usascii_str_new_literal(str) rb_usascii_str_new_lit(str) -#define rb_utf8_str_new_literal(str) rb_utf8_str_new_lit(str) -#define rb_enc_str_new_literal(str, enc) rb_enc_str_new_lit(str, enc) + +#define rb_str_new_literal(str) rb_str_new_lit(str) /**< @alias{rb_str_new_lit} */ +#define rb_usascii_str_new_literal(str) rb_usascii_str_new_lit(str) /**< @alias{rb_usascii_str_new_lit} */ +#define rb_utf8_str_new_literal(str) rb_utf8_str_new_lit(str) /**< @alias{rb_utf8_str_new_lit} */ +#define rb_enc_str_new_literal(str, enc) rb_enc_str_new_lit(str, enc) /**< @alias{rb_enc_str_new_lit} */ RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/struct.h b/include/ruby/internal/intern/struct.h index 8818da96c7..16b3fad4e0 100644 --- a/include/ruby/internal/intern/struct.h +++ b/include/ruby/internal/intern/struct.h @@ -17,9 +17,10 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cStruct. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/intern/vm.h" /* rb_alloc_func_t */ #include "ruby/internal/value.h" @@ -27,21 +28,198 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* struct.c */ -VALUE rb_struct_new(VALUE, ...); -VALUE rb_struct_define(const char*, ...); -VALUE rb_struct_define_under(VALUE, const char*, ...); -VALUE rb_struct_alloc(VALUE, VALUE); -VALUE rb_struct_initialize(VALUE, VALUE); -VALUE rb_struct_aref(VALUE, VALUE); -VALUE rb_struct_aset(VALUE, VALUE, VALUE); -VALUE rb_struct_getmember(VALUE, ID); -VALUE rb_struct_s_members(VALUE); -VALUE rb_struct_members(VALUE); -VALUE rb_struct_size(VALUE s); -VALUE rb_struct_alloc_noinit(VALUE); -VALUE rb_struct_define_without_accessor(const char *, VALUE, rb_alloc_func_t, ...); + +/** + * Creates an instance of the given struct. + * + * @param[in] klass The class of the instance to allocate. + * @param[in] ... The fields. + * @return Allocated instance of `klass`. + * @pre `klass` must be a subclass of ::rb_cStruct. + * @note Number of variadic arguments must much that of the passed klass' + * fields. + */ +VALUE rb_struct_new(VALUE klass, ...); + +/** + * Defines a struct class. + * + * @param[in] name Name of the class. + * @param[in] ... Arbitrary number of `const char*`, terminated by + * NULL. Each of which are the name of fields. + * @exception rb_eNameError `name` is not a constant name. + * @exception rb_eTypeError `name` is already taken. + * @exception rb_eArgError Duplicated field name. + * @return The defined class. + * @post Global toplevel constant `name` is defined. + * @note `name` is allowed to be a null pointer. This function creates + * an anonymous struct class then. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. + * + * @internal + * + * Not seriously checked but it seems this function does not share its + * implementation with how `Struct.new` is implemented...? + */ +VALUE rb_struct_define(const char *name, ...); + +RBIMPL_ATTR_NONNULL((2)) +/** + * Identical to rb_struct_define(), except it defines the class under the + * specified namespace instead of global toplevel. + * + * @param[out] space Namespace that the defining class shall reside. + * @param[in] name Name of the class. + * @param[in] ... Arbitrary number of `const char*`, terminated by + * NULL. Each of which are the name of fields. + * @exception rb_eNameError `name` is not a constant name. + * @exception rb_eTypeError `name` is already taken. + * @exception rb_eArgError Duplicated field name. + * @return The defined class. + * @post `name` is a constant under `space`. + * @note In contrast to rb_struct_define(), it doesn't make any sense to + * pass a null pointer to this function. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. + */ +VALUE rb_struct_define_under(VALUE space, const char *name, ...); + +/** + * Identical to rb_struct_new(), except it takes the field values as a Ruby + * array. + * + * @param[in] klass The class of the instance to allocate. + * @param[in] values Field values. + * @return Allocated instance of `klass`. + * @pre `klass` must be a subclass of ::rb_cStruct. + * @pre `values` must be an instance of struct ::RArray. + */ +VALUE rb_struct_alloc(VALUE klass, VALUE values); + +/** + * Mass-assigns a struct's fields. + * + * @param[out] self An instance of a struct class to squash. + * @param[in] values New values. + * @return ::RUBY_Qnil. + */ +VALUE rb_struct_initialize(VALUE self, VALUE values); + +/** + * Identical to rb_struct_aref(), except it takes ::ID instead of ::VALUE. + * + * @param[in] self An instance of a struct class. + * @param[in] key Key to query. + * @exception rb_eTypeError `self` is not a struct. + * @exception rb_eNameError No such field. + * @return The value stored at `key` in `self`. + */ +VALUE rb_struct_getmember(VALUE self, ID key); + +/** + * Queries the list of the names of the fields of the given struct class. + * + * @param[in] klass A subclass of ::rb_cStruct. + * @return The list of the names of the fields of `klass`. + */ +VALUE rb_struct_s_members(VALUE klass); + +/** + * Queries the list of the names of the fields of the class of the given struct + * object. This is almost the same as calling rb_struct_s_members() over the + * class of the receiver. + * + * @internal + * + * "Almost"? What exactly is the difference? + * + * @endinternal + * + * @param[in] self An instance of a subclass of ::rb_cStruct. + * @return The list of the names of the fields. + */ +VALUE rb_struct_members(VALUE self); + +/** + * Allocates an instance of the given class. This consequential name is of + * course because rb_struct_alloc() not only allocates but also initialises an + * instance. The API design is broken. + * + * @param[in] klass A subclass of ::rb_cStruct. + * @return An allocated instance of `klass`, not initialised. + */ +VALUE rb_struct_alloc_noinit(VALUE klass); + +/** + * Identical to rb_struct_define(), except it does not define accessor methods. + * You have to define them yourself. Forget about the allocator function + * parameter; it is for internal use only. Extension libraries are unable to + * properly allocate a ruby struct, because `RStruct` is opaque. + * + * @internal + * + * Several flags must be set up properly for ::RUBY_T_STRUCT objects, which are + * also missing for extension libraries. + * + * @endinternal + * + * @param[in] name Name of the class. + * @param[in] super Superclass of the defining class. + * @param[in] func Must be 0 for extension libraries. + * @param[in] ... Arbitrary number of `const char*`, terminated by + * NULL. Each of which are the name of fields. + * @exception rb_eNameError `name` is not a constant name. + * @exception rb_eTypeError `name` is already taken. + * @exception rb_eArgError Duplicated field name. + * @return The defined class. + * @post Global toplevel constant `name` is defined. + * @note `name` is allowed to be a null pointer. This function creates + * an anonymous struct class then. + */ +VALUE rb_struct_define_without_accessor(const char *name, VALUE super, rb_alloc_func_t func, ...); + +RBIMPL_ATTR_NONNULL((2)) +/** + * Identical to rb_struct_define_without_accessor(), except it defines the + * class under the specified namespace instead of global toplevel. It can also + * be seen as a routine identical to rb_struct_define_under(), except it does + * not define accessor methods. + * + * @param[out] outer Namespace that the defining class shall reside. + * @param[in] class_name Name of the class. + * @param[in] super Superclass of the defining class. + * @param[in] alloc Must be 0 for extension libraries. + * @param[in] ... Arbitrary number of `const char*`, terminated by + * NULL. Each of which are the name of fields. + * @exception rb_eNameError `class_name` is not a constant name. + * @exception rb_eTypeError `class_name` is already taken. + * @exception rb_eArgError Duplicated field name. + * @return The defined class. + * @post `class_name` is a constant under `outer`. + * @note In contrast to rb_struct_define_without_accessor(), it doesn't + * make any sense to pass a null name. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. + */ VALUE rb_struct_define_without_accessor_under(VALUE outer, const char *class_name, VALUE super, rb_alloc_func_t alloc, ...); +/** + * Defines an anonymous data class. + * + * @endinternal + * + * @param[in] super Superclass of the defining class. Must be a + * descendant of ::rb_cData, or 0 as ::rb_cData. + * @param[in] ... Arbitrary number of `const char*`, terminated by + * NULL. Each of which are the name of fields. + * @exception rb_eArgError Duplicated field name. + * @return The defined class. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. + */ +VALUE rb_data_define(VALUE super, ...); + RBIMPL_SYMBOL_EXPORT_END() #endif /* RBIMPL_INTERN_STRUCT_H */ diff --git a/include/ruby/internal/intern/thread.h b/include/ruby/internal/intern/thread.h index 7c656a628c..716375acd7 100644 --- a/include/ruby/internal/intern/thread.h +++ b/include/ruby/internal/intern/thread.h @@ -17,11 +17,12 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cThread. */ -#include "ruby/internal/config.h" +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/cast.h" +#include "ruby/internal/config.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" @@ -30,45 +31,460 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() struct timeval; /* thread.c */ + +/** + * Tries to switch to another thread. This function blocks until the current + * thread re-acquires the GVL. + * + * @exception rb_eInterrupt Operation interrupted. + */ void rb_thread_schedule(void); -int rb_thread_wait_fd(int); -int rb_thread_fd_writable(int); -void rb_thread_fd_close(int); + +/** + * Blocks the current thread until the given file descriptor is ready to be + * read. + * + * @param[in] fd A file descriptor. + * @exception rb_eIOError Closed stream. + * @exception rb_eSystemCallError Situations like EBADF. + */ +int rb_thread_wait_fd(int fd); + +/** + * Identical to rb_thread_wait_fd(), except it blocks the current thread until + * the given file descriptor is ready to be written. + * + * @param[in] fd A file descriptor. + * @exception rb_eIOError Closed stream. + * @exception rb_eSystemCallError Situations like EBADF. + */ +int rb_thread_fd_writable(int fd); + +/** + * Notifies a closing of a file descriptor to other threads. Multiple threads + * can wait for the given file descriptor at once. If such file descriptor is + * closed, threads need to start propagating their exceptions. This is the API + * to kick that process. + * + * @param[in] fd A file descriptor. + * @note This function blocks until all the threads waiting for such fd + * have woken up. + */ +void rb_thread_fd_close(int fd); + +/** + * Checks if the thread this function is running is the only thread that is + * currently alive. + * + * @retval 1 Yes it is. + * @retval 0 No it isn't. + * + * @internal + * + * Above description is in fact inaccurate. There are Ractors these days. + */ int rb_thread_alone(void); -void rb_thread_sleep(int); + +/** + * Blocks for the given period of time. + * + * @warning This function can be interrupted by signals. + * @param[in] sec Duration in seconds. + * @exception rb_eInterrupt Interrupted. + */ +void rb_thread_sleep(int sec); + +/** + * Blocks indefinitely. + * + * @exception rb_eInterrupt Interrupted. + */ void rb_thread_sleep_forever(void); + +/** + * Identical to rb_thread_sleep_forever(), except the thread calling this + * function is considered "dead" when our deadlock checker is triggered. + * + * @exception rb_eInterrupt Interrupted. + */ void rb_thread_sleep_deadly(void); + +/** + * Stops the current thread. This is not the end of the thread's lifecycle. A + * stopped thread can later be woken up. + * + * @exception rb_eThreadError Stopping this thread would deadlock. + * @retval ::RUBY_Qnil Always. + * + * @internal + * + * The return value makes no sense at all. + */ VALUE rb_thread_stop(void); -VALUE rb_thread_wakeup(VALUE); -VALUE rb_thread_wakeup_alive(VALUE); -VALUE rb_thread_run(VALUE); -VALUE rb_thread_kill(VALUE); -VALUE rb_thread_create(VALUE (*)(void *), void*); -void rb_thread_wait_for(struct timeval); + +/** + * Marks a given thread as eligible for scheduling. + * + * @note It may still remain blocked on I/O. + * @note This does not invoke the scheduler itself. + * + * @param[out] thread Thread in question to wake up. + * @exception rb_eThreadError Stop flogging a dead horse. + * @return The passed thread. + * @post The passed thread is made runnable. + */ +VALUE rb_thread_wakeup(VALUE thread); + +/** + * Identical to rb_thread_wakeup(), except it doesn't raise on an already + * killed thread. + * + * @param[out] thread A thread to wake up. + * @retval RUBY_Qnil `thread` is already killed. + * @retval otherwise `thread` is alive. + * @post The passed thread is made runnable, unless killed. + */ +VALUE rb_thread_wakeup_alive(VALUE thread); + +/** + * This is a rb_thread_wakeup() + rb_thread_schedule() combo. + * + * @note There is no guarantee that this function yields to the passed + * thread. It may still remain blocked on I/O. + * @param[out] thread Thread in question to wake up. + * @exception rb_eThreadError Stop flogging a dead horse. + * @return The passed thread. + */ +VALUE rb_thread_run(VALUE thread); + +/** + * Terminates the given thread. Unlike a stopped thread, a killed thread could + * never be revived. This function does return, when passed e.g. an already + * killed thread. But if the passed thread is the only one, or a special + * thread called "main", then it also terminates the entire process. + * + * @param[out] thread The thread to terminate. + * @exception rb_eFatal The passed thread is the running thread. + * @exception rb_eSystemExit The passed thread is the last thread. + * @return The passed thread. + * @post Either the passed thread, or the process entirely, is killed. + * + * @internal + * + * It seems killing the main thread also kills the entire process even if there + * are multiple running ractors. No idea why. + */ +VALUE rb_thread_kill(VALUE thread); + +RBIMPL_ATTR_NONNULL((1)) +/** + * Creates a Ruby thread that is backended by a C function. + * + * @param[in] f The function to run on a thread. + * @param[in,out] g Passed through to `f`. + * @exception rb_eThreadError Could not create a ruby thread. + * @exception rb_eSystemCallError Situations like `EPERM`. + * @return Allocated instance of ::rb_cThread. + * @note This doesn't wait for anything. + */ +VALUE rb_thread_create(VALUE (*f)(void *g), void *g); + +/** + * Identical to rb_thread_sleep(), except it takes struct `timeval` instead. + * + * @warning This function can be interrupted by signals. + * @param[in] time Duration. + * @exception rb_eInterrupt Interrupted. + */ +void rb_thread_wait_for(struct timeval time); + +/** + * Obtains the "current" thread. + * + * @return The current thread of the current ractor of the current execution + * context. + * @pre This function must be called from a thread controlled by ruby. + */ VALUE rb_thread_current(void); + +/** + * Obtains the "main" thread. There are threads called main. Historically the + * (only) main thread was the one which runs when the process boots. Now that + * we have Ractor, there are more than one main threads. + * + * @return The main thread of the current ractor of the current execution + * context. + * @pre This function must be called from a thread controlled by ruby. + */ VALUE rb_thread_main(void); -VALUE rb_thread_local_aref(VALUE, ID); -VALUE rb_thread_local_aset(VALUE, ID, VALUE); + +/** + * This badly named function reads from a Fiber local storage. When this + * function was born there was no such thing like a Fiber. The world was + * innocent. But now... This is a Fiber local storage. Sorry. + * + * @param[in] thread Thread that the target Fiber is running. + * @param[in] key The name of the Fiber local storage to read. + * @retval RUBY_Qnil No such storage. + * @retval otherwise The value stored at `key`. + * @note There in fact are "true" thread local storage, but Ruby doesn't + * provide any interface of them to you, C programmers. + */ +VALUE rb_thread_local_aref(VALUE thread, ID key); + +/** + * This badly named function writes to a Fiber local storage. When this + * function was born there was no such thing like a Fiber. The world was + * innocent. But now... This is a Fiber local storage. Sorry. + * + * @param[in] thread Thread that the target Fiber is running. + * @param[in] key The name of the Fiber local storage to write. + * @param[in] val The new value of the storage. + * @exception rb_eFrozenError `thread` is frozen. + * @return The passed `val` as-is. + * @post Fiber local storage `key` has value of `val`. + * @note There in fact are "true" thread local storage, but Ruby doesn't + * provide any interface of them to you, C programmers. + */ +VALUE rb_thread_local_aset(VALUE thread, ID key, VALUE val); + +/** + * A `pthread_atfork(3posix)`-like API. Ruby expects its child processes to + * call this function at the very beginning of their processes. If you plan to + * fork a process don't forget to call it. + */ void rb_thread_atfork(void); + +/** + * :FIXME: situation of this function is unclear. It seems nobody uses it. + * Maybe a good idea to KonMari. + */ void rb_thread_atfork_before_exec(void); -VALUE rb_exec_recursive(VALUE(*)(VALUE, VALUE, int),VALUE,VALUE); -VALUE rb_exec_recursive_paired(VALUE(*)(VALUE, VALUE, int),VALUE,VALUE,VALUE); -VALUE rb_exec_recursive_outer(VALUE(*)(VALUE, VALUE, int),VALUE,VALUE); -VALUE rb_exec_recursive_paired_outer(VALUE(*)(VALUE, VALUE, int),VALUE,VALUE,VALUE); +/** + * "Recursion" API entry point. This basically calls the given function with + * the given arguments, but additionally with recursion flag. The flag is set + * to 1 if the execution have already experienced the passed `g` parameter + * before. + * + * @param[in] f The function that possibly recurs. + * @param[in,out] g Passed as-is to `f`. + * @param[in,out] h Passed as-is to `f`. + * @return The return value of f. + */ +VALUE rb_exec_recursive(VALUE (*f)(VALUE g, VALUE h, int r), VALUE g, VALUE h); + +/** + * Identical to rb_exec_recursive(), except it checks for the recursion on the + * ordered pair of `{ g, p }` instead of just `g`. + * + * @param[in] f The function that possibly recurs. + * @param[in,out] g Passed as-is to `f`. + * @param[in] p Paired object for recursion detection. + * @param[in,out] h Passed as-is to `f`. + */ +VALUE rb_exec_recursive_paired(VALUE (*f)(VALUE g, VALUE h, int r), VALUE g, VALUE p, VALUE h); + +/** + * Identical to rb_exec_recursive(), except it calls `f` for outermost + * recursion only. Inner recursions yield calls to rb_throw_obj(). + * + * @param[in] f The function that possibly recurs. + * @param[in,out] g Passed as-is to `f`. + * @param[in,out] h Passed as-is to `f`. + * @return The return value of f. + * + * @internal + * + * It seems nobody uses the "it calls rb_throw_obj()" part of this function. + * @shyouhei doesn't understand the needs. + */ +VALUE rb_exec_recursive_outer(VALUE (*f)(VALUE g, VALUE h, int r), VALUE g, VALUE h); + +/** + * Identical to rb_exec_recursive_outer(), except it checks for the recursion + * on the ordered pair of `{ g, p }` instead of just `g`. It can also be seen + * as a routine identical to rb_exec_recursive_paired(), except it calls `f` + * for outermost recursion only. Inner recursions yield calls to + * rb_throw_obj(). + * + * @param[in] f The function that possibly recurs. + * @param[in,out] g Passed as-is to `f`. + * @param[in] p Paired object for recursion detection. + * @param[in,out] h Passed as-is to `f`. + * + * @internal + * + * It seems nobody uses the "it calls rb_throw_obj()" part of this function. + * @shyouhei doesn't understand the needs. + */ +VALUE rb_exec_recursive_paired_outer(VALUE (*f)(VALUE g, VALUE h, int r), VALUE g, VALUE p, VALUE h); + +/** + * This is the type of UBFs. An UBF is a function that unblocks a blocking + * region. For instance when a thread is blocking due to `pselect(3posix)`, it + * is highly expected that `pthread_kill(3posix)` can interrupt the system call + * and the thread could revive. Or when a thread is blocking due to + * `waitpid(3posix)`, it is highly expected that killing the waited process + * should suffice. An UBF is a function that does such things. Designing your + * own UBF needs deep understanding of why your blocking region blocks, how + * threads work in ruby, and a matter of luck. It often is the case you simply + * cannot cancel something that had already begun. + * + * @see rb_thread_call_without_gvl() + */ typedef void rb_unblock_function_t(void *); + +/** + * @private + * + * This is an implementation detail. Must be a mistake to be here. + * + * @internal + * + * Why is this function type different from what rb_thread_call_without_gvl() + * takes? + */ typedef VALUE rb_blocking_function_t(void *); + +/** + * Checks for interrupts. In ruby, signals are masked by default. You can + * call this function at will to check if there are pending signals. In case + * there are, they would be handled in this function. + * + * If your extension library has a function that takes a long time, consider + * calling it periodically. + * + * @note It might switch to another thread. + */ void rb_thread_check_ints(void); + +/** + * Checks if the thread's execution was recently interrupted. If called from + * that thread, this function can be used to detect spurious wake-ups. + * + * @param[in] thval Thread in question. + * @retval 0 The thread was not interrupted. + * @retval otherwise The thread was interrupted recently. + * + * @internal + * + * Above description is not a lie. But actually the return value is an opaque + * trap vector. If you know which bit means which, you can know what happened. + */ int rb_thread_interrupted(VALUE thval); +/** + * A special UBF for blocking IO operations. You need deep understanding of + * what this actually do before using. Basically you should not use it from + * extension libraries. It is too easy to mess up. + */ #define RUBY_UBF_IO RBIMPL_CAST((rb_unblock_function_t *)-1) + +/** + * A special UBF for blocking process operations. You need deep understanding + * of what this actually do before using. Basically you should not use it from + * extension libraries. It is too easy to mess up. + */ #define RUBY_UBF_PROCESS RBIMPL_CAST((rb_unblock_function_t *)-1) + +/* thread_sync.c */ + +/** + * Creates a mutex. + * + * @return An allocated instance of rb_cMutex. + */ VALUE rb_mutex_new(void); + +/** + * Queries if there are any threads that holds the lock. + * + * @param[in] mutex The mutex in question. + * @retval RUBY_Qtrue The mutex is locked by someone. + * @retval RUBY_Qfalse The mutex is not locked by anyone. + */ VALUE rb_mutex_locked_p(VALUE mutex); + +/** + * Attempts to lock the mutex, without waiting for other threads to unlock it. + * Failure in locking the mutex can be detected by the return value. + * + * @param[out] mutex The mutex to lock. + * @retval RUBY_Qtrue Successfully locked by the current thread. + * @retval RUBY_Qfalse Otherwise. + * @note This function also returns ::RUBY_Qfalse when the mutex is + * already owned by the calling thread itself. + */ VALUE rb_mutex_trylock(VALUE mutex); + +/** + * Attempts to lock the mutex. It waits until the mutex gets available. + * + * @param[out] mutex The mutex to lock. + * @exception rb_eThreadError Recursive deadlock situation. + * @return The passed mutex. + * @post The mutex is owned by the current thread. + */ VALUE rb_mutex_lock(VALUE mutex); + +/** + * Releases the mutex. + * + * @param[out] mutex The mutex to unlock. + * @exception rb_eThreadError The mutex is not owned by the current thread. + * @return The passed mutex. + * @post Upon successful return the passed mutex is no longer owned by + * the current thread. + */ VALUE rb_mutex_unlock(VALUE mutex); + +/** + * Releases the lock held in the mutex and waits for the period of time; + * reacquires the lock on wakeup. + * + * @pre The lock has to be owned by the current thread beforehand. + * @param[out] self The target mutex. + * @param[in] timeout Duration, in seconds, in ::rb_cNumeric. + * @exception rb_eArgError `timeout` is negative. + * @exception rb_eRangeError `timeout` is out of range of `time_t`. + * @exception rb_eThreadError The mutex is not owned by the current thread. + * @return Number of seconds it actually slept. + * @warning It is a failure not to check the return value. This function + * can return spuriously for various reasons. Maybe other threads + * can rb_thread_wakeup(). Maybe an end user can press the + * Control and C key from the interactive console. On the other + * hand it can also take longer than the specified. The mutex + * could be locked by someone else. It waits then. + * @post Upon successful return the passed mutex is owned by the current + * thread. + * + * @internal + * + * This function is called from `ConditionVariable#wait`. So it is not a + * deprecated feature. However @shyouhei have never seen any similar mutex + * primitive available in any other languages than Ruby. + * + * EDIT: In 2021, @shyouhei asked @ko1 in person about this API. He answered + * that it is his invention. The motivation behind its design is to eliminate + * needs of condition variables as primitives. Unlike other languages, Ruby's + * `ConditionVariable` class was written in pure-Ruby initially. We don't have + * to implement machine-native condition variables in assembly each time we + * port Ruby to a new architecture. This function made it possible. "I felt I + * was a genius when this idea came to me", said @ko1. + * + * `rb_cConditionVariable` is now written in C for speed, though. + */ VALUE rb_mutex_sleep(VALUE self, VALUE timeout); + +/** + * Obtains the lock, runs the passed function, and releases the lock when it + * completes. + * + * @param[out] mutex The mutex to lock. + * @param[in] func What to do during the mutex is locked. + * @param[in,out] arg Passed as-is to `func`. + */ VALUE rb_mutex_synchronize(VALUE mutex, VALUE (*func)(VALUE arg), VALUE arg); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/time.h b/include/ruby/internal/intern/time.h index c7ae6ec2f5..df482862eb 100644 --- a/include/ruby/internal/intern/time.h +++ b/include/ruby/internal/intern/time.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cTime. */ #include "ruby/internal/config.h" @@ -26,6 +26,7 @@ # include <time.h> /* for time_t */ #endif +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" @@ -35,15 +36,124 @@ struct timespec; struct timeval; /* time.c */ -void rb_timespec_now(struct timespec *); -VALUE rb_time_new(time_t, long); -VALUE rb_time_nano_new(time_t, long); -VALUE rb_time_timespec_new(const struct timespec *, int); -VALUE rb_time_num_new(VALUE, VALUE); + +RBIMPL_ATTR_NONNULL(()) +/** + * Fills the current time into the given struct. + * + * @param[out] ts Return buffer. + * @exception rb_eSystemCallError Access denied for hardware clock. + * @post Current time is stored in `*ts`. + */ +void rb_timespec_now(struct timespec *ts); + +/** + * Creates an instance of ::rb_cTime with the given time and the local + * timezone. + * + * @param[in] sec Seconds since the UNIX epoch. + * @param[in] usec Subsecond part, in microseconds resolution. + * @exception rb_eRangeError Cannot express the time. + * @return An allocated instance of ::rb_cTime. + */ +VALUE rb_time_new(time_t sec, long usec); + +/** + * Identical to rb_time_new(), except it accepts the time in nanoseconds + * resolution. + * + * @param[in] sec Seconds since the UNIX epoch. + * @param[in] nsec Subsecond part, in nanoseconds resolution. + * @exception rb_eRangeError Cannot express the time. + * @return An allocated instance of ::rb_cTime. + */ +VALUE rb_time_nano_new(time_t sec, long nsec); + +RBIMPL_ATTR_NONNULL(()) +/** + * Creates an instance of ::rb_cTime, with given time and offset. + * + * @param[in] ts Time specifier. + * @param[in] offset Offset specifier, can take following values: + * - `INT_MAX`: `ts` is in local time. + * - `INT_MAX - 1`: `ts` is in UTC. + * - `-86400` to `86400`: fixed timezone. + * @exception rb_eArgError Malformed `offset`. + * @return An allocated instance of ::rb_cTime. + */ +VALUE rb_time_timespec_new(const struct timespec *ts, int offset); + +/** + * Identical to rb_time_timespec_new(), except it takes Ruby values instead of + * C structs. + * + * @param[in] timev Something numeric. Currently Integers, Rationals, + * and Floats are accepted. + * @param[in] off Offset specifier. As of 2.7 this argument is + * heavily extended to take following kinds of + * objects: + * - ::RUBY_Qundef ... means UTC. + * - ::rb_cString ... "+12:34" etc. + * - A mysterious "zone" object. This is largely + * undocumented. However the initial intent was + * that we want to accept + * `ActiveSupport::TimeZone` here. Other gems + * could also be possible... But how to make an + * acceptable class is beyond this document. + * @exception rb_eArgError Malformed `off`. + * @return An allocated instance of ::rb_cTime. + */ +VALUE rb_time_num_new(VALUE timev, VALUE off); + +/** + * Creates a "time interval". This basically converts an instance of + * ::rb_cNumeric into a struct `timeval`, but for instance negative time + * interval must not exist. + * + * @param[in] num An instance of ::rb_cNumeric. + * @exception rb_eArgError `num` is negative. + * @exception rb_eRangeError `num` is out of range of `timeval::tv_sec`. + * @return A struct that represents the identical time to `num`. + */ struct timeval rb_time_interval(VALUE num); + +/** + * Converts an instance of rb_cTime to a struct timeval that represents the + * identical point of time. It can also take something numeric; would consider + * it as a UNIX time then. + * + * @param[in] time Instance of either ::rb_cTime or ::rb_cNumeric. + * @exception rb_eRangeError `time` is out of range of `timeval::tv_sec`. + * @return A struct that represents the identical time to `num`. + */ struct timeval rb_time_timeval(VALUE time); + +/** + * Identical to rb_time_timeval(), except for return type. + * + * @param[in] time Instance of either ::rb_cTime or ::rb_cNumeric. + * @exception rb_eRangeError `time` is out of range of `timeval::tv_sec`. + * @return A struct that represents the identical time to `num`. + */ struct timespec rb_time_timespec(VALUE time); + +/** + * Identical to rb_time_interval(), except for return type. + * + * @param[in] num An instance of ::rb_cNumeric. + * @exception rb_eArgError `num` is negative. + * @exception rb_eRangeError `num` is out of range of `timespec::tv_sec`. + * @return A struct that represents the identical time to `num`. + */ struct timespec rb_time_timespec_interval(VALUE num); + +/** + * Queries the offset, in seconds between the time zone of the time and the + * UTC. + * + * @param[in] time An instance of ::rb_cTime. + * @return Numeric offset. + */ VALUE rb_time_utc_offset(VALUE time); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/variable.h b/include/ruby/internal/intern/variable.h index faa0cc004f..479c3950c1 100644 --- a/include/ruby/internal/intern/variable.h +++ b/include/ruby/internal/intern/variable.h @@ -17,9 +17,10 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to names inside of a Ruby program. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/attr/noreturn.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" @@ -28,56 +29,599 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* variable.c */ -VALUE rb_mod_name(VALUE); -VALUE rb_class_path(VALUE); -VALUE rb_class_path_cached(VALUE); -void rb_set_class_path(VALUE, VALUE, const char*); -void rb_set_class_path_string(VALUE, VALUE, VALUE); -VALUE rb_path_to_class(VALUE); -VALUE rb_path2class(const char*); -VALUE rb_class_name(VALUE); -VALUE rb_autoload_load(VALUE, ID); -VALUE rb_autoload_p(VALUE, ID); -VALUE rb_f_trace_var(int, const VALUE*); -VALUE rb_f_untrace_var(int, const VALUE*); + +/** + * Queries the name of a module. + * + * @param[in] mod An instance of ::rb_cModule. + * @retval RUBY_Qnil `mod` is anonymous. + * @retval otherwise `mod` is onymous. + */ +VALUE rb_mod_name(VALUE mod); + +/** + * Identical to rb_mod_name(), except it returns `#<Class: ...>` style + * inspection for anonymous modules. + * + * @param[in] mod An instance of ::rb_cModule. + * @return An instance of ::rb_cString representing `mod`'s path. + */ +VALUE rb_class_path(VALUE mod); + +/** + * @alias{rb_mod_name} + * + * @internal + * + * Am I missing something? Why we have the same thing in different names? + */ +VALUE rb_class_path_cached(VALUE mod); + +RBIMPL_ATTR_NONNULL(()) +/** + * Names a class. + * + * @param[out] klass Target module to name. + * @param[out] space Namespace that `klass` shall reside. + * @param[in] name Name of `klass`. + * @post `klass` has `space::klass` name. + */ +void rb_set_class_path(VALUE klass, VALUE space, const char *name); + +/** + * Identical to rb_set_class_path(), except it accepts the name as Ruby's + * string instead of C's. + * + * @param[out] klass Target module to name. + * @param[out] space Namespace that `klass` shall reside. + * @param[in] name Name of `klass`. + * @post `klass` has `space::klass` name. + */ +void rb_set_class_path_string(VALUE klass, VALUE space, VALUE name); + +/** + * Identical to rb_path2class(), except it accepts the path as Ruby's string + * instead of C's. + * + * @param[in] path Path to query. + * @exception rb_eArgError No such constant. + * @exception rb_eTypeError The path resolved to a non-module. + * @return Resolved class. + */ +VALUE rb_path_to_class(VALUE path); + +RBIMPL_ATTR_NONNULL(()) +/** + * Resolves a `Q::W::E::R`-style path string to the actual class it points. + * + * @param[in] path Path to query. + * @exception rb_eArgError No such constant. + * @exception rb_eTypeError The path resolved to a non-module. + * @return Resolved class. + */ +VALUE rb_path2class(const char *path); + +/** + * Queries the name of the given object's class. + * + * @param[in] obj Arbitrary object. + * @return An instance of ::rb_cString representing `obj`'s class' path. + */ +VALUE rb_class_name(VALUE obj); + +/** + * Kicks the autoload procedure as if it was "touched". + * + * @param[out] space Namespace where autoload is defined. + * @param[in] name Name of the autoloaded constant. + * @retval RUBY_Qfalse No such autoload. + * @retval RUBY_Qtrue Autoload successfully initiated. + * @note As an autoloaded library is expected to define `space::name`, + * it is a nature of this function to have process-global side + * effects. + * @note Multiple threads can simultaneously call this API. It blocks + * then. That must not last indefinitely but can take longer than + * you expect. + * + * @internal + * + * @shyouhei has no idea why extension libraries should use this API. + */ +VALUE rb_autoload_load(VALUE space, ID name); + +/** + * Queries if an autoload is defined at a point. + * + * @param[in] space Namespace where autoload is defined. + * @param[in] name Name of the autoloaded constant. + * @retval RUBY_Qnil No such autoload. + * @retval otherwise The feature (path) registered at `space::name`. + */ +VALUE rb_autoload_p(VALUE space, ID name); + +/** + * Traces a global variable. + * + * @param[in] argc Either 1 or 2. + * @param[in] argv Variable name, optionally a Proc. + * @retval RUBY_Qnil No previous tracers. + * @retval otherwise Previous tracers. + * + * @internal + * + * @shyouhei has no idea why extension libraries should use this API. + */ +VALUE rb_f_trace_var(int argc, const VALUE *argv); + +/** + * Deletes the passed tracer from the passed global variable, or if omitted, + * deletes everything. + * + * @param[in] argc Either 1 or 2. + * @param[in] argv Variable name, optionally a Proc. + * @retval RUBY_Qnil No previous tracers. + * @retval otherwise Deleted tracers. + * + * @internal + * + * @shyouhei has no idea why extension libraries should use this API. + */ +VALUE rb_f_untrace_var(int argc, const VALUE *argv); + +/** + * Queries the list of global variables. + * + * @return The list of the name of the global variables. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ VALUE rb_f_global_variables(void); -void rb_alias_variable(ID, ID); -void rb_copy_generic_ivar(VALUE,VALUE); -void rb_free_generic_ivar(VALUE); -VALUE rb_ivar_get(VALUE, ID); -VALUE rb_ivar_set(VALUE, ID, VALUE); -VALUE rb_ivar_defined(VALUE, ID); -void rb_ivar_foreach(VALUE, int (*)(ID, VALUE, st_data_t), st_data_t); -st_index_t rb_ivar_count(VALUE); -VALUE rb_attr_get(VALUE, ID); -VALUE rb_obj_instance_variables(VALUE); -VALUE rb_obj_remove_instance_variable(VALUE, VALUE); + +/** + * Aliases a global variable. Did you know that you can alias a global + * variable? It is like aliasing methods: + * + * ```ruby + * alias $dst $src + * ``` + * + * This C function does the same thing. + * + * @param[in] dst Destination name. + * @param[in] src Source name. + * @post A global variable named `dst` is defined to be an alias of a + * global variable named `src`. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +void rb_alias_variable(ID dst, ID src); + +/** + * Frees the list of instance variables. 3rd parties need not know, but there + * are several ways to store an object's instance variables, depending on its + * internal structure. This function makes sense when the passed objects is + * using so-called "generic" backend storage. People need not be aware of this + * working behind-the-scenes. + * + * @param[out] obj The object in question. + * + * @internal + * + * This just destroys the given object. @shyouhei has no idea why extension + * libraries should use this API. + */ +void rb_free_generic_ivar(VALUE obj); + +/** + * Identical to rb_iv_get(), except it accepts the name as an ::ID instead of a + * C string. + * + * @param[in] obj Target object. + * @param[in] name Target instance variable to query. + * @retval RUBY_nil No such instance variable. + * @retval otherwise The value assigned to the instance variable. + */ +VALUE rb_ivar_get(VALUE obj, ID name); + +/** + * Identical to rb_iv_set(), except it accepts the name as an ::ID instead of a + * C string. + * + * @param[out] obj Target object. + * @param[in] name Target instance variable. + * @param[in] val Value to assign. + * @exception rb_eFrozenError Can't modify `obj`. + * @exception rb_eArgError `obj` has too many instance variables. + * @return Passed value. + * @post An instance variable named `name` is defined if absent on + * `obj`, whose value is set to `val`. + */ +VALUE rb_ivar_set(VALUE obj, ID name, VALUE val); + +/** + * Queries if the instance variable is defined at the object. This roughly + * resembles `defined?(@name)` in `obj`'s context. + * + * @param[in] obj Target object. + * @param[in] name Target instance variable to query. + * @retval RUBY_Qtrue There is an instance variable. + * @retval RUBY_Qfalse No such instance variable. + */ +VALUE rb_ivar_defined(VALUE obj, ID name); + +/** + * Iterates over an object's instance variables. + * + * @param[in] obj Target object. + * @param[in] func Callback function. + * @param[in] arg Passed as-is to the last argument of `func`. + */ +void rb_ivar_foreach(VALUE obj, int (*func)(ID name, VALUE val, st_data_t arg), st_data_t arg); + +/** + * Number of instance variables defined on an object. + * + * @param[in] obj Target object. + * @return Number of instance variables defined on `obj`. + */ +st_index_t rb_ivar_count(VALUE obj); + +/** + * Identical to rb_ivar_get() + * + * @param[in] obj Target object. + * @param[in] name Target instance variable to query. + * @retval RUBY_nil No such instance variable. + * @retval otherwise The value assigned to the instance variable. + * + * @internal + * + * Am I missing something? Why we have the same thing in different names? + */ +VALUE rb_attr_get(VALUE obj, ID name); + +/** + * Resembles `Object#instance_variables`. + * + * @param[in] obj Target object to query. + * @return An array of instance variable names for the receiver. + * @note Simply defining an accessor does not create the corresponding + * instance variable. + */ +VALUE rb_obj_instance_variables(VALUE obj); + +/** + * Resembles `Object#remove_instance_variable`. + * + * @param[out] obj Target object. + * @param[in] name Variable name to remove, either in Symbol or String. + * @return What was removed. + * @pre Instance variable named `name` is deleted from `obj`. + */ +VALUE rb_obj_remove_instance_variable(VALUE obj, VALUE name); + +/** + * This API is mysterious. It has been there since the initial revision. No + * single bits of documents has ever been written. The function name doesn't + * describe anything. What should be passed to the argument, or what should be + * the return value, are not obvious. Yet it has evolved over time. The + * source code is written in counter-intuitive way (as of 3.0). + * + * Simply put, don't try to understand this API. + */ void *rb_mod_const_at(VALUE, void*); + +/** + * This is a variant of rb_mod_const_at(). As a result, it is also mysterious. + * It _seems_ it iterates over the ancestry tree of the module. But what that + * means is beyond a human brain. + */ void *rb_mod_const_of(VALUE, void*); + +/** + * This is another mysterious API that comes with no documents at all. It + * seems it expects some specific data structure for the passed pointer. But + * the details has never been made explicit. It seems nobody should use this + * API. + */ VALUE rb_const_list(void*); -VALUE rb_mod_constants(int, const VALUE *, VALUE); -VALUE rb_mod_remove_const(VALUE, VALUE); -int rb_const_defined(VALUE, ID); -int rb_const_defined_at(VALUE, ID); -int rb_const_defined_from(VALUE, ID); -VALUE rb_const_get(VALUE, ID); -VALUE rb_const_get_at(VALUE, ID); -VALUE rb_const_get_from(VALUE, ID); -void rb_const_set(VALUE, ID, VALUE); -VALUE rb_const_remove(VALUE, ID); + +/** + * Resembles `Module#constants`. List up the constants defined at the + * receiver. This includes the names of constants in any included modules, + * unless `argv[0]` is ::RUBY_Qfalse. + * + * The implementation makes no guarantees about the order in which the + * constants are yielded. + * + * @param[in] argc Either 0 or 1. + * @param[in] argv Pointer to ::RUBY_Qfalse, if `argc == 1`. + * @param[in] recv Target namespace. + * @return An array of symbols, which are constant names under `recv`. + */ +VALUE rb_mod_constants(int argc, const VALUE *argv, VALUE recv); + +/** + * Resembles `Module#remove_const`. + * + * @param[out] space Target namespace. + * @param[in] name Variable name to remove, either in Symbol or String. + * @return What was removed. + * @pre Constant named `space::name` is deleted. + * @note In case what was removed was in fact a module or a class, this + * operation does not affect its name. Which means when people + * for instance look at it using `p` etc., it still introduces + * itself using the deleted name. Can confuse people. + */ +VALUE rb_mod_remove_const(VALUE space, VALUE name); + +/** + * Queries if the constant is defined at the namespace. + * + * @param[in] space Target namespace. + * @param[in] name Target name to query. + * @retval RUBY_Qtrue There is a constant. + * @retval RUBY_Qfalse No such constant. + * + * @internal + * + * The return values are not typo! This function returns ruby values casted to + * `int`. Completely brain-damaged design. + */ +int rb_const_defined(VALUE space, ID name); + +/** + * Identical to rb_const_defined(), except it doesn't look for parent classes. + * For instance `Array` is a toplevel constant, which is visible from + * everywhere. But this function does not take such things into account. It + * concerns only what is directly defined inside of the given namespace. + * + * @param[in] space Target namespace. + * @param[in] name Target name to query. + * @retval RUBY_Qtrue There is a constant. + * @retval RUBY_Qfalse No such constant. + * + * @internal + * + * The return values are not typo! This function returns ruby values casted to + * `int`. Completely brain-damaged design. + */ +int rb_const_defined_at(VALUE space, ID name); + +/** + * Identical to rb_const_defined(), except it returns false for private + * constants. + * + * @param[in] space Target namespace. + * @param[in] name Target name to query. + * @retval RUBY_Qtrue There is a constant. + * @retval RUBY_Qfalse No such constant. + * + * @internal + * + * What does "from" mean? The name sounds quite cryptic. + * + * The return values are not typo! This function returns ruby values casted to + * `int`. Completely brain-damaged design. + */ +int rb_const_defined_from(VALUE space, ID name); + +/** + * Identical to rb_const_defined(), except it returns the actual defined value. + * + * @param[in] space Target namespace. + * @param[in] name Target name to query. + * @exception rb_eNameError No such constant. + * @return The defined constant. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +VALUE rb_const_get(VALUE space, ID name); + +/** + * Identical to rb_const_defined_at(), except it returns the actual defined + * value. It can also be seen as a routine identical to rb_const_get(), except + * it doesn't look for parent classes. + * + * @param[in] space Target namespace. + * @param[in] name Target name to query. + * @exception rb_eNameError No such constant. + * @return The defined constant. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +VALUE rb_const_get_at(VALUE space, ID name); + +/** + * Identical to rb_const_defined_at(), except it returns the actual defined + * value. It can also be seen as a routine identical to rb_const_get(), except + * it doesn't return a private constant. + * + * @param[in] space Target namespace. + * @param[in] name Target name to query. + * @exception rb_eNameError No such constant. + * @return The defined constant. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +VALUE rb_const_get_from(VALUE space, ID name); + +/** + * Names a constant. + * + * @param[out] space Target namespace. + * @param[in] name Target name to query. + * @param[in] val Value to define. + * @exception rb_eTypeError `space` is not a module. + * @post `name` is a constant under `space`, whose value is `val`. + * @note You can reassign. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +void rb_const_set(VALUE space, ID name, VALUE val); + +/** + * Identical to rb_mod_remove_const(), except it takes the name as ::ID instead + * of ::VALUE. + * + * @param[out] space Target namespace. + * @param[in] name Variable name to remove, either in Symbol or String. + * @return What was removed. + * @pre Constant named `space::name` is deleted. + * @note In case what was removed was in fact a module or a class, this + * operation does not affect its name. Which means when people + * for instance look at it using `p` etc., it still introduces + * itself using the deleted name. Can confuse people. + */ +VALUE rb_const_remove(VALUE space, ID name); + #if 0 /* EXPERIMENTAL: remove if no problem */ RBIMPL_ATTR_NORETURN() -VALUE rb_mod_const_missing(VALUE,VALUE); +/** + * This is the default implementation of `Module#const_missing`. + * + * @param[in] space Target namespace. + * @param[in] name Target name that is nonexistent. + * @exception rb_eNameError Always. + */ +VALUE rb_mod_const_missing(VALUE space, VALUE name); #endif -VALUE rb_cvar_defined(VALUE, ID); -void rb_cvar_set(VALUE, ID, VALUE); -VALUE rb_cvar_get(VALUE, ID); -VALUE rb_cvar_find(VALUE, ID, VALUE*); -void rb_cv_set(VALUE, const char*, VALUE); -VALUE rb_cv_get(VALUE, const char*); + +/** + * Queries if the given class has the given class variable. + * + * @param[in] klass Target class. + * @param[in] name Name to query. + * @return RUBY_Qtrue Yes there is. + * @return RUBY_Qfalse No there isn't. + * @pre `klass` must be an instance of rb_cModule. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +VALUE rb_cvar_defined(VALUE klass, ID name); + +/** + * Assigns a value to a class variable. + * + * @param[out] klass Target class. + * @param[in] name Variable name. + * @param[in] val Value to be assigned. + * @post `klass` has a class variable named `name` whose value is `val`. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +void rb_cvar_set(VALUE klass, ID name, VALUE val); + +/** + * Obtains a value from a class variable. + * + * @param[in] klass Target class. + * @param[in] name Variable name. + * @exception rb_eNameError Uninitialised class variable. + * @exception rb_eRuntimeError `[Bug#14541]` situation. + * @return Class variable named `name` under `klass`. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +VALUE rb_cvar_get(VALUE klass, ID name); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_cvar_get(), except it takes additional "front" pointer. + * This extra parameter is a buffer, which will have the class where the + * queried class variable actually resides. + * + * @param[in] klass Target class. + * @param[in] name Variable name. + * @param[out] front Return buffer. + * @exception rb_eNameError Uninitialised class variable. + * @exception rb_eRuntimeError `[Bug#14541]` situation. + * @return Class variable named `name` under `klass`. + * @post `front` has the class object, which is an ancestor of `klass`, + * where the queried class variable actually resides. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +VALUE rb_cvar_find(VALUE klass, ID name, VALUE *front); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_cvar_set(), except it accepts C's string instead of ::ID. + * + * @param[out] klass Target class. + * @param[in] name Variable name. + * @param[in] val Value to be assigned. + * @post `klass` has a class variable named `name` whose value is `val`. + */ +void rb_cv_set(VALUE klass, const char *name, VALUE val); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_cvar_get(), except it accepts C's string instead of ::ID. + * + * @param[in] klass Target class. + * @param[in] name Variable name. + * @exception rb_eNameError Uninitialised class variable. + * @exception rb_eRuntimeError `[Bug#14541]` situation. + * @return Class variable named `name` under `klass`. + */ +VALUE rb_cv_get(VALUE klass, const char *name); + +RBIMPL_ATTR_NONNULL(()) +/** + * @alias{rb_cv_set} + * + * @internal + * + * Am I missing something? Why we have the same thing in different names? + */ void rb_define_class_variable(VALUE, const char*, VALUE); -VALUE rb_mod_class_variables(int, const VALUE*, VALUE); -VALUE rb_mod_remove_cvar(VALUE, VALUE); + +/** + * Resembles `Module#class_variables`. List up the variables defined at the + * receiver. This includes the names of constants in any included modules, + * unless `argv[0]` is ::RUBY_Qfalse. + * + * The implementation makes no guarantees about the order in which the + * constants are yielded. + * + * @param[in] argc Either 0 or 1. + * @param[in] argv Pointer to ::RUBY_Qfalse, if `argc == 1`. + * @param[in] recv Target class. + * @return An array of symbols, which are class variable names under + * `recv`. + */ +VALUE rb_mod_class_variables(int argc, const VALUE *argv, VALUE recv); + +/** + * Resembles `Module#remove_class_variable`. + * + * @param[out] mod Target class. + * @param[in] name Variable name to remove, either in Symbol or String. + * @return What was removed. + * @pre Instance variable named `name` is deleted from `obj`. + */ +VALUE rb_mod_remove_cvar(VALUE mod, VALUE name); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/vm.h b/include/ruby/internal/intern/vm.h index 298a6ad2bb..29e0c7f534 100644 --- a/include/ruby/internal/intern/vm.h +++ b/include/ruby/internal/intern/vm.h @@ -17,9 +17,10 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Public APIs related to ::rb_cRubyVM. + * extension libraries. They could be written in C++98. + * @brief Public APIs related to rb_cRubyVM. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/attr/noreturn.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" @@ -27,40 +28,373 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* vm.c */ + +/** + * Resembles `__LINE__`. + * + * @retval 0 Current execution context not in a ruby method. + * @retval otherwise The current line number of the current thread of the + * current ractor of the current execution context. + */ int rb_sourceline(void); + +/** + * Resembles `__FILE__`. + * + * @retval 0 Current execution context not in a ruby method. + * @retval otherwise The current source path of the current thread of the + * current ractor of the current execution context. + * @note This may or may not be an absolute path. + */ const char *rb_sourcefile(void); + +/** + * Resembles `__method__`. + * + * @param[out] idp Return buffer for method id. + * @param[out] klassp Return buffer for class. + * @retval 0 Current execution context not in a method. + * @retval 1 Successful return. + * @post Upon successful return `*idp` and `*klassp` are updated to have + * the current method name and its defined class respectively. + * @note Both parameters can be `NULL`. + */ int rb_frame_method_id_and_class(ID *idp, VALUE *klassp); /* vm_eval.c */ -VALUE rb_check_funcall(VALUE, ID, int, const VALUE*); -VALUE rb_check_funcall_kw(VALUE, ID, int, const VALUE*, int); -void rb_remove_method(VALUE, const char*); -void rb_remove_method_id(VALUE, ID); -VALUE rb_eval_cmd_kw(VALUE, VALUE, int); -VALUE rb_apply(VALUE, ID, VALUE); +/** + * Identical to rb_funcallv(), except it returns ::RUBY_Qundef instead of + * raising ::rb_eNoMethodError. + * + * @param[in,out] recv Receiver of the method. + * @param[in] mid Name of the method to call. + * @param[in] argc Number of arguments. + * @param[in] argv Arbitrary number of method arguments. + * @retval RUBY_Qundef `recv` doesn't respond to `mid`. + * @retval otherwise What the method evaluates to. + */ +VALUE rb_check_funcall(VALUE recv, ID mid, int argc, const VALUE *argv); -VALUE rb_obj_instance_eval(int, const VALUE*, VALUE); -VALUE rb_obj_instance_exec(int, const VALUE*, VALUE); -VALUE rb_mod_module_eval(int, const VALUE*, VALUE); -VALUE rb_mod_module_exec(int, const VALUE*, VALUE); +/** + * Identical to rb_check_funcall(), except you can specify how to handle the + * last element of the given array. It can also be seen as a routine identical + * to rb_funcallv_kw(), except it returns ::RUBY_Qundef instead of raising + * ::rb_eNoMethodError. + * + * @param[in,out] recv Receiver of the method. + * @param[in] mid Name of the method to call. + * @param[in] argc Number of arguments. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @retval RUBY_Qundef `recv` doesn't respond to `mid`. + * @retval otherwise What the method evaluates to. + */ +VALUE rb_check_funcall_kw(VALUE recv, ID mid, int argc, const VALUE *argv, int kw_splat); + +/** + * This API is practically a variant of rb_proc_call_kw() now. Historically + * when there still was a concept called `$SAFE`, this was an API for that. + * But we no longer have that. This function basically ended its role. It + * just remains here because of no harm. + * + * @param[in] cmd A string, or something callable. + * @param[in] arg Argument passed to the call. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `arg`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `arg`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @return What the command evaluates to. + */ +VALUE rb_eval_cmd_kw(VALUE cmd, VALUE arg, int kw_splat); + +/** + * Identical to rb_funcallv(), except it takes Ruby's array instead of C's. + * @param[in,out] recv Receiver of the method. + * @param[in] mid Name of the method to call. + * @param[in] args An instance of ::RArray. + * @exception rb_eNoMethodError No such method. + * @exception rb_eException Any exceptions happen inside. + * @return What the method evaluates to. + * @pre `args` must be an ::RArray. Call `to_ary` beforehand when + * necessary. + */ +VALUE rb_apply(VALUE recv, ID mid, VALUE args); + +/** + * Evaluates a string containing Ruby source code, or the given block, within + * the context of the receiver. In order to set the context, the variable + * `self` is set to `recv` while the code is executing, giving the code access + * to `recv`'s instance variables and private methods. + * + * When given a block, `recv` is also passed in as the block's only argument. + * + * When given a string, the optional second and third parameters supply a + * filename and starting line number that are used when reporting compilation + * errors. + * + * @param[in] argc Number of objects in `argv` + * @param[in] argv C array of 0 up to 3 elements. + * @param[in] recv The object in question. + * @return What was evaluated. + */ +VALUE rb_obj_instance_eval(int argc, const VALUE *argv, VALUE recv); + +/** + * Executes the given block within the context of the receiver. In order to + * set the context, the variable `self` is set to `recv` while the code is + * executing, giving the code access to `recv`'s instance variables. Arguments + * are passed as block parameters. + * + * @param[in] argc Number of objects in `argv` + * @param[in] argv Arbitrary parameters to be passed to the block. + * @param[in] recv The object in question. + * @return What was evaluated. + * @note Don't confuse this with rb_obj_instance_eval(). The key + * difference is whether you can pass arbitrary parameters to the + * block, like this: + * + * ```ruby + * class Foo + * def initialize + * @foo = 5 + * end + * end + * Foo.new.instance_exec(7) {|i| @foo + i } # => 12 + * ``` + */ +VALUE rb_obj_instance_exec(int argc, const VALUE *argv, VALUE recv); + +/** + * Identical to rb_obj_instance_eval(), except it evaluates within the context + * of module. + * + * @param[in] argc Number of objects in `argv` + * @param[in] argv C array of 0 up to 3 elements. + * @param[in] mod The module in question. + * @pre `mod` must be a Module. + * @return What was evaluated. + */ +VALUE rb_mod_module_eval(int argc, const VALUE *argv, VALUE mod); + +/** + * Identical to rb_obj_instance_exec(), except it evaluates within the context + * of module. + * + * @param[in] argc Number of objects in `argv` + * @param[in] argv Arbitrary parameters to be passed to the block. + * @param[in] mod The module in question. + * @pre `mod` must be a Module. + * @return What was evaluated. + */ +VALUE rb_mod_module_exec(int argc, const VALUE *argv, VALUE mod); /* vm_method.c */ + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define HAVE_RB_DEFINE_ALLOC_FUNC 1 -typedef VALUE (*rb_alloc_func_t)(VALUE); -void rb_define_alloc_func(VALUE, rb_alloc_func_t); -void rb_undef_alloc_func(VALUE); -rb_alloc_func_t rb_get_alloc_func(VALUE); -void rb_clear_constant_cache(void); -void rb_alias(VALUE, ID, ID); -void rb_attr(VALUE,ID,int,int,int); -int rb_method_boundp(VALUE, ID, int); -int rb_method_basic_definition_p(VALUE, ID); - -int rb_obj_respond_to(VALUE, ID, int); -int rb_respond_to(VALUE, ID); + +/** + * This is the type of functions that ruby calls when trying to allocate an + * object. It is sometimes necessary to allocate extra memory regions for an + * object. When you define a class that uses ::RTypedData, it is typically the + * case. On such situations define a function of this type and pass it to + * rb_define_alloc_func(). + * + * @param[in] klass The class that this function is registered. + * @return A newly allocated instance of `klass`. + */ +typedef VALUE (*rb_alloc_func_t)(VALUE klass); + +/** + * Sets the allocator function of a class. + * + * @param[out] klass The class to modify. + * @param[in] func An allocator function for the class. + * @pre `klass` must be an instance of Class. + */ +void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func); + +/** + * Deletes the allocator function of a class. It is sometimes desirable to + * restrict creation of an instance of a class. For example it rarely makes + * sense for a DB adaptor class to allow programmers creating DB row objects + * without querying the DB itself. You can kill sporadic creation of such + * objects then, by nullifying the allocator function using this API. + * + * @param[out] klass The class to modify. + * @pre `klass` must be an instance of Class. + */ +void rb_undef_alloc_func(VALUE klass); + +/** + * Queries the allocator function of a class. + * + * @param[in] klass The class in question. + * @pre `klass` must be an instance of Class. + * @retval 0 No allocator function is registered. + * @retval otherwise The allocator function. + * + * @internal + * + * Who cares? @shyouhei finds no practical usage of the return value. Maybe we + * need KonMari. + */ +rb_alloc_func_t rb_get_alloc_func(VALUE klass); + +/** + * Clears the inline constant caches associated with a particular ID. Extension + * libraries should not bother with such things. Just forget about this API (or + * even, the presence of constant caches). + */ +void rb_clear_constant_cache_for_id(ID id); + +/** + * Resembles `alias`. + * + * @param[out] klass Where to define an alias. + * @param[in] dst New name. + * @param[in] src Existing name. + * @exception rb_eTypeError `klass` is not a class. + * @exception rb_eFrozenError `klass` is frozen. + * @exception rb_eNameError No such method named `src`. + * @post `klass` has a method named `dst`, which is the identical to its + * method named `src`. + */ +void rb_alias(VALUE klass, ID dst, ID src); + +/** + * This function resembles now-deprecated `Module#attr`. + * + * @param[out] klass Where to define an attribute. + * @param[in] name Name of an instance variable. + * @param[in] need_reader Whether attr_reader is needed. + * @param[in] need_writer Whether attr_writer is needed. + * @param[in] honour_visibility Whether to use the current visibility. + * @exception rb_eTypeError `klass` is not a class. + * @exception rb_eFrozenError `klass` is frozen. + * @post If `need_reader` is set `klass` has a method named `name`. + * @post If `need_writer` is set `klass` has a method named `name=`. + * + * @internal + * + * The three `int` arguments should have been bool, but there was no such thing + * like a bool when K&R was used in this project. + */ +void rb_attr(VALUE klass, ID name, int need_reader, int need_writer, int honour_visibility); + +RBIMPL_ATTR_NONNULL(()) +/** + * Removes a method. Don't confuse this to rb_undef_method(), which doesn't + * remove a method. This one resembles `Module#remove_method`. + * + * @param[out] klass The class to remove a method. + * @param[in] name Name of a method to be removed. + * @exception rb_eTypeError `klass` is a non-module. + * @exception rb_eFrozenError `klass` is frozen. + * @exception rb_eNameError No such method. + * @see rb_undef_method + */ +void rb_remove_method(VALUE klass, const char *name); + +/** + * Identical to rb_remove_method(), except it accepts the method name as ::ID. + * + * @param[out] klass The class to remove a method. + * @param[in] mid Name of a method to be removed. + * @exception rb_eTypeError `klass` is a non-module. + * @exception rb_eFrozenError `klass` is frozen. + * @exception rb_eNameError No such method. + * @see rb_undef + */ +void rb_remove_method_id(VALUE klass, ID mid); + +/** + * Queries if the klass has this method. This function has only one line of + * document in the implementation that states "// deprecated". Don't know what + * that means though. + * + * @param[in] klass The class in question. + * @param[in] id The method name to query. + * @param[in] ex Undocumented magic value. + * @retval false Method not found. + * @retval true There is a method. + * @pre `klass` must be a module. + * + * @internal + * + * @shyouhei has no motivation to describe what should be passed to `ex`. It + * seems this function should just be trashed. + */ +int rb_method_boundp(VALUE klass, ID id, int ex); + +/** + * Well... Let us hesitate from describing what a "basic definition" is. This + * nuanced concept should have been kept private. Just please. Don't touch + * it. This function is a badly distributed random number generator. Right? + * + * @param[in] klass The class in question. + * @param[in] mid The method name in question. + * @retval 1 It is. + * @retval 0 It isn't. + */ +int rb_method_basic_definition_p(VALUE klass, ID mid); + +/** + * Identical to rb_respond_to(), except it additionally takes the visibility + * parameter. This does not make difference unless the object has + * `respond_to?` undefined, but has `respond_to_missing?` defined. That case + * the passed argument becomes the second argument of `respond_to_missing?`. + * + * @param[in] obj The object in question. + * @param[in] mid The method name in question. + * @param[in] private_p This is the second argument of `obj`'s + * `respond_to_missing?`. + * @retval 1 Yes it does. + * @retval 0 No it doesn't. + */ +int rb_obj_respond_to(VALUE obj, ID mid, int private_p); + +/** + * Queries if the object responds to the method. This involves calling the + * object's `respond_to?` method. + * + * @param[in] obj The object in question. + * @param[in] mid The method name in question. + * @retval 1 Yes it does. + * @retval 0 No it doesn't. + */ +int rb_respond_to(VALUE obj, ID mid); RBIMPL_ATTR_NORETURN() +/** + * Raises ::rb_eNotImpError. This function is used as an argument to + * rb_define_method() etc. + * + * ```CXX + * rb_define_method(rb_cFoo, "foo", rb_f_notimplement, -1); + * ``` + * + * @param argc Unused parameter. + * @param argv Unused parameter. + * @param obj Unused parameter. + * @param marker Unused parameter. + * @exception rb_eNotImpError Always. + * @return Never returns. + * + * @internal + * + * See also the Q&A section of include/ruby/internal/anyargs.h. + */ VALUE rb_f_notimplement(int argc, const VALUE *argv, VALUE obj, VALUE marker); #if !defined(RUBY_EXPORT) && defined(_WIN32) RUBY_EXTERN VALUE (*const rb_f_notimplement_)(int, const VALUE *, VALUE, VALUE marker); @@ -68,7 +402,27 @@ RUBY_EXTERN VALUE (*const rb_f_notimplement_)(int, const VALUE *, VALUE, VALUE m #endif /* vm_backtrace.c */ + +/** + * Prints the backtrace out to the standard error. This just confuses people + * for no reason. Evil souls must only use it. + * + * @internal + * + * Actually it is very useful when called from an interactive GDB session. + */ void rb_backtrace(void); + +/** + * Creates the good old fashioned array-of-strings style backtrace info. + * + * @return An array which contains strings, which are the textual + * representations of the backtrace locations of the current thread of + * the current ractor of the current execution context. + * @note Ruby scripts can access more sophisticated + * `Thread::Backtrace::Location`. But it seems there is no way for C + * extensions to use that API. + */ VALUE rb_make_backtrace(void); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/interpreter.h b/include/ruby/internal/interpreter.h index 29dee60aab..a10e7ad2d8 100644 --- a/include/ruby/internal/interpreter.h +++ b/include/ruby/internal/interpreter.h @@ -17,10 +17,11 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Interpreter embedding APIs. */ #include "ruby/internal/attr/noreturn.h" +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" @@ -28,65 +29,275 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /** * @defgroup embed CRuby Embedding APIs + * * CRuby interpreter APIs. These are APIs to embed MRI interpreter into your * program. * These functions are not a part of Ruby extension library API. * Extension libraries of Ruby should not depend on these functions. + * * @{ */ -/** @defgroup ruby1 ruby(1) implementation +/** + * @defgroup ruby1 ruby(1) implementation + * * A part of the implementation of ruby(1) command. * Other programs that embed Ruby interpreter do not always need to use these * functions. + * * @{ */ +RBIMPL_ATTR_NONNULL(()) +/** + * Initializes the process for libruby. + * + * This function assumes this process is `ruby(1)` and it has just started. + * Usually programs that embed CRuby interpreter may not call this function, + * and may do their own initialization. + * + * @param[in] argc Pointer to process main's `argc`. + * @param[in] argv Pointer to process main's `argv`. + * @warning `argc` and `argv` cannot be `NULL`. + * + * @internal + * + * AFAIK Ruby does write to argv, especially `argv[0][0]`, via setproctitle(3). + * It is intentional that the argument is not const-qualified. + */ void ruby_sysinit(int *argc, char ***argv); + +/** + * Calls ruby_setup() and check error. + * + * Prints errors and calls exit(3) if an error occurred. + */ void ruby_init(void); + +/** + * Processes command line arguments and compiles the Ruby source to execute. + * + * This function does: + * - Processes the given command line flags and arguments for `ruby(1)` + * - Compiles the source code from the given argument, `-e` or `stdin`, and + * - Returns the compiled source as an opaque pointer to an internal data + * structure + * + * @param[in] argc Process main's `argc`. + * @param[in] argv Process main's `argv`. + * @return An opaque pointer to the compiled source or an internal special + * value. Pass it to ruby_executable_node() to detect which. + * @see ruby_executable_node + */ void* ruby_options(int argc, char** argv); + +/** + * Checks the return value of ruby_options(). + * + * ruby_options() sometimes returns a special value to indicate this process + * should immediately exit. This function checks if the case. Also stores the + * exit status that the caller have to pass to exit(3) into `*status`. + * + * @param[in] n A return value of ruby_options(). + * @param[out] status Pointer to the exit status of this process. + * @retval 0 The given value is such a special value. + * @retval otherwise The given opaque pointer is actually a compiled + * source. + */ int ruby_executable_node(void *n, int *status); + +/** + * Runs the given compiled source and exits this process. + * + * @param[in] n Opaque "node" pointer. + * @retval EXIT_SUCCESS Successfully run the source. + * @retval EXIT_FAILURE An error occurred. + */ int ruby_run_node(void *n); /* version.c */ +/** Prints the version information of the CRuby interpreter to stdout. */ void ruby_show_version(void); + #ifndef ruby_show_copyright +/** Prints the copyright notice of the CRuby interpreter to stdout. */ void ruby_show_copyright(void); #endif -/*! A convenience macro to call ruby_init_stack(). Must be placed just after - * variable declarations */ +/** + * A convenience macro to call ruby_init_stack(). + * Must be placed just after variable declarations. + */ #define RUBY_INIT_STACK \ VALUE variable_in_this_stack_frame; \ ruby_init_stack(&variable_in_this_stack_frame); -/*! @} */ +/** @} */ -void ruby_init_stack(volatile VALUE*); +/** + * Set stack bottom of Ruby implementation. + * + * You must call this function before any heap allocation by Ruby + * implementation. Or GC will break living objects. + * + * @param[in] addr A pointer somewhere on the stack, near its bottom. + */ +void ruby_init_stack(void *addr); +/** + * Initializes the VM and builtin libraries. + * + * @retval 0 Initialization succeeded. + * @retval otherwise An error occurred. + * + * @internal + * + * Though not a part of our public API, the return value is in fact an enum + * ruby_tag_type. You can see the potential "otherwise" values by looking at + * vm_core.h. + */ int ruby_setup(void); -int ruby_cleanup(volatile int); +/** + * Destructs the VM. + * + * Runs the VM finalization processes as well as ruby_finalize(), and frees + * resources used by the VM. + * + * @param[in] ex Default value to the return value. + * @retval EXIT_FAILURE An error occurred. + * @retval ex Successful cleanup. + * @note This function does not raise any exception. + */ +int ruby_cleanup(int ex); + +/** + * Runs the VM finalization processes. + * + * `END{}` and procs registered by `Kernel.#at_exit` are executed here. See the + * Ruby language spec for more details. + * + * @note This function is allowed to raise an exception if an error occurred. + */ void ruby_finalize(void); RBIMPL_ATTR_NORETURN() +/** Calls ruby_cleanup() and exits the process. */ void ruby_stop(int); +/** + * Checks for stack overflow. + * + * @retval true NG machine stack is about to overflow. + * @retval false OK there still is a room in the stack. + * + * @internal + * + * Does anybody use it? So far @shyouhei have never seen any actual use-case. + */ int ruby_stack_check(void); -size_t ruby_stack_length(VALUE**); +/** + * Queries what Ruby thinks is the machine stack. Ruby manages a region of + * memory. It calls that area the "machine stack". By calling this function, + * in spite of its name, you can obtain both one end of the stack and its + * length at once. Which means you can know the entire region. + * + * @param[out] topnotch On return the pointer points to the upmost address of + * the macihne stack that Ruby knows. + * @return Length of the machine stack that Ruby knows. + * + * @internal + * + * Does anybody use it? @shyouhei is quite skeptical if this is useful outside + * of the VM. Maybe it was a wrong idea to expose this API to 3rd parties. + */ +size_t ruby_stack_length(VALUE **topnotch); + +/** + * Identical to ruby_run_node(), except it returns an opaque execution status. + * You can pass it to rb_cleanup(). + * + * @param[in] n Opaque "node" pointer. + * @retval 0 Successful end-of-execution. + * @retval otherwise An error occurred. + * + * @internal + * + * Though not a part of our public API, the return value is in fact an enum + * ruby_tag_type. You can see the potential "otherwise" values by looking at + * vm_core.h. + */ int ruby_exec_node(void *n); +/** + * Sets the current script name to this value. + * + * This is similar to `$0 = name` in Ruby level but also affects + * `Method#location` and others. + * + * @param[in] name File name to set. + */ void ruby_script(const char* name); + +/** + * Identical to ruby_script(), except it takes the name as a Ruby String + * instance. + * + * @param[in] name File name to set. + */ void ruby_set_script_name(VALUE name); +/** Defines built-in variables */ void ruby_prog_init(void); -void ruby_set_argv(int, char**); -void *ruby_process_options(int, char**); + +/** + * Sets argv that ruby understands. Your program might have its own command + * line parameters etc. Handle them as you wish, and pass remaining parts of + * argv here. + * + * @param[in] argc Number of elements of `argv`. + * @param[in] argv Command line arguments. + */ +void ruby_set_argv(int argc, char **argv); + +/** + * Identical to ruby_options(), except it raises ruby-level exceptions on + * failure. + * + * @param[in] argc Process main's `argc`. + * @param[in] argv Process main's `argv`. + * @return An opaque "node" pointer. + */ +void *ruby_process_options(int argc, char **argv); + +/** + * Sets up `$LOAD_PATH`. + * + * @internal + * + * @shyouhei guesses this has to be called at very later stage, at least after + * the birth of object system. But is not exactly sure when. + */ void ruby_init_loadpath(void); -void ruby_incpush(const char*); + +/** + * Appends the given path to the end of the load path. + * + * @pre ruby_init_loadpath() must be done beforehand. + * @param[in] path The path you want to push to the load path. + */ +void ruby_incpush(const char *path); + +/** + * Clear signal handlers. + * + * Ruby installs its own signal handler (apart from those which user scripts + * set). This is to clear that. Must be called when the ruby part terminates, + * before switching to your program's own logic. + */ void ruby_sig_finalize(void); -/*! @} */ +/** @} */ RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/iterator.h b/include/ruby/internal/iterator.h index a2aee15d31..5f706460f8 100644 --- a/include/ruby/internal/iterator.h +++ b/include/ruby/internal/iterator.h @@ -17,49 +17,496 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Block related APIs. */ +#include "ruby/internal/attr/deprecated.h" #include "ruby/internal/attr/noreturn.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define RB_BLOCK_CALL_FUNC_STRICT 1 + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define RUBY_BLOCK_CALL_FUNC_TAKES_BLOCKARG 1 + +/** + * Shim for block function parameters. Historically ::rb_block_call_func_t had + * only two parameters. Over time it evolved to have much more than that. By + * using this macro you can absorb such API differences. + * + * ```CXX + * // This works since 2.1.0 + * VALUE my_own_iterator(RB_BLOCK_CALL_FUNC_ARGLIST(y, c)); + * ``` + */ #define RB_BLOCK_CALL_FUNC_ARGLIST(yielded_arg, callback_arg) \ VALUE yielded_arg, VALUE callback_arg, int argc, const VALUE *argv, VALUE blockarg + +/** + * This is the type of a function that the interpreter expect for C-backended + * blocks. Blocks are often written in Ruby. But C extensions might want to + * have their own blocks. In order to do so authors have to create a separate + * C function of this type, and pass its pointer to rb_block_call(). + * + * ```CXX + * VALUE + * my_own_iterator(RB_BLOCK_CALL_FUNC_ARGLIST(y, c)) + * { + * const auto plus = rb_intern("+"); + * return rb_funcall(c, plus, 1, y); + * } + * + * VALUE + * my_own_method(VALUE self) + * { + * const auto each = rb_intern("each"); + * return rb_block_call(self, each, 0, 0, my_own_iterator, self); + * } + * ``` + */ typedef VALUE rb_block_call_func(RB_BLOCK_CALL_FUNC_ARGLIST(yielded_arg, callback_arg)); + +/** + * Shorthand type that represents an iterator-written-in-C function pointer. + */ typedef rb_block_call_func *rb_block_call_func_t; -VALUE rb_each(VALUE); -VALUE rb_yield(VALUE); +/** + * This is a shorthand of calling `obj.each`. + * + * @param[in] obj The receiver. + * @return What `obj.each` returns. + * + * @internal + * + * Does anyone still need it? This API was to use with rb_iterate(), which is + * marked deprecated (see below). Old idiom to call an iterator was: + * + * ```CXX + * VALUE recv; + * VALUE iter_func(ANYARGS); + * VALUE iter_data; + * rb_iterate(rb_each, recv, iter_func, iter_data); + * ``` + */ +VALUE rb_each(VALUE obj); + +/** + * Yields the block. In Ruby there is a concept called a block. You can pass + * one to a method. In a method, when called with a block, you can yield it + * using this function. + * + * ```CXX + * VALUE + * iterate(VALUE self) + * { + * extern int get_n(VALUE); + * extern VALUE get_v(VALUE, VALUE); + * const auto n = get_n(self); + * + * for (int i=0; i<n; i++) { + * auto v = get_v(self, i); + * + * rb_yield(v); + * } + * return self; + * } + * ``` + * + * @param[in] val Passed to the block. + * @exception rb_eLocalJumpError There is no block given. + * @return Evaluated value of the given block. + */ +VALUE rb_yield(VALUE val); + +/** + * Identical to rb_yield(), except it takes variadic number of parameters and + * pass them to the block. + * + * @param[in] n Number of parameters. + * @param[in] ... List of arguments passed to the block. + * @exception rb_eLocalJumpError There is no block given. + * @return Evaluated value of the given block. + */ VALUE rb_yield_values(int n, ...); + +/** + * Identical to rb_yield_values(), except it takes the parameters as a C array + * instead of variadic arguments. + * + * @param[in] n Number of parameters. + * @param[in] argv List of arguments passed to the block. + * @exception rb_eLocalJumpError There is no block given. + * @return Evaluated value of the given block. + */ VALUE rb_yield_values2(int n, const VALUE *argv); + +/** + * Identical to rb_yield_values2(), except you can specify how to handle the + * last element of the given array. + * + * @param[in] n Number of parameters. + * @param[in] argv List of arguments passed to the block. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `ary`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `ary`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS makes no sense here. + * @exception rb_eLocalJumpError There is no block given. + * @return Evaluated value of the given block. + */ VALUE rb_yield_values_kw(int n, const VALUE *argv, int kw_splat); -VALUE rb_yield_splat(VALUE); -VALUE rb_yield_splat_kw(VALUE, int); + +/** + * Identical to rb_yield_values(), except it splats an array to generate the + * list of parameters. + * + * @param[in] ary Array to splat. + * @exception rb_eLocalJumpError There is no block given. + * @return Evaluated value of the given block. + */ +VALUE rb_yield_splat(VALUE ary); + +/** + * Identical to rb_yield_splat(), except you can specify how to handle the last + * element of the given array. + * + * @param[in] ary Array to splat. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `ary`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `ary`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS makes no sense here. + * @exception rb_eLocalJumpError There is no block given. + * @return Evaluated value of the given block. + */ +VALUE rb_yield_splat_kw(VALUE ary, int kw_splat); + +/** + * Pass a passed block. + * + * Sometimes you want to "pass" a block form one method to another. Suppose + * you have this Ruby method `foo`: + * + * ```ruby + * def foo(x, y) + * x.open(y) do |*z| + * yield(*z) + * end + * end + * ``` + * + * And suppose you want to translate this into C. Then rb_yield_block() + * function is usable in this situation. + * + * ```CXX + * VALUE + * foo_translated_into_C(VALUE self, VALUE x, VALUE y) + * { + * const auto open = rb_intern("open"); + * + * return rb_block_call(x, open, 1, &y, rb_yield_block, Qfalse); + * // ^^^^^^^^^^^^^^ Here. + * } + * ``` + * + * @see rb_funcall_passing_block + * + * @internal + * + * @shyouhei honestly doesn't understand why this is needed, given there + * already was rb_funcall_passing_block() at the time it was implemented. If + * somebody knows its raison d'etre, please improve the document :FIXME: + */ VALUE rb_yield_block(RB_BLOCK_CALL_FUNC_ARGLIST(yielded_arg, callback_arg)); /* rb_block_call_func */ + +/** + * Determines if the current method is given a keyword argument. + * + * @retval false No keyword argument is given. + * @retval true Keyword argument(s) are given. + * @ingroup defmethod + */ int rb_keyword_given_p(void); + +/** + * Determines if the current method is given a block. + * + * @retval false No block is given. + * @retval true A block is given. + * @ingroup defmethod + * + * @internal + * + * This function should have returned a bool. But at the time it was designed + * the project was entirely written in K&R C. + */ int rb_block_given_p(void); + +/** + * Declares that the current method needs a block. + * + * @exception rb_eLocalJumpError No block given. + * @ingroup defmethod + */ void rb_need_block(void); -VALUE rb_iterate(VALUE(*)(VALUE),VALUE,rb_block_call_func_t,VALUE); -DEPRECATED_BY(rb_block_call since 1.9, VALUE rb_iterate(VALUE(*)(VALUE),VALUE,rb_block_call_func_t,VALUE)); -VALUE rb_block_call(VALUE,ID,int,const VALUE*,rb_block_call_func_t,VALUE); -VALUE rb_block_call_kw(VALUE,ID,int,const VALUE*,rb_block_call_func_t,VALUE,int); -VALUE rb_rescue(VALUE(*)(VALUE),VALUE,VALUE(*)(VALUE,VALUE),VALUE); -VALUE rb_rescue2(VALUE(*)(VALUE),VALUE,VALUE(*)(VALUE,VALUE),VALUE,...); -VALUE rb_vrescue2(VALUE(*)(VALUE),VALUE,VALUE(*)(VALUE,VALUE),VALUE,va_list); -VALUE rb_ensure(VALUE(*)(VALUE),VALUE,VALUE(*)(VALUE),VALUE); -VALUE rb_catch(const char*,rb_block_call_func_t,VALUE); -VALUE rb_catch_obj(VALUE,rb_block_call_func_t,VALUE); + +#ifndef __cplusplus +RBIMPL_ATTR_DEPRECATED(("by: rb_block_call since 1.9")) +#endif +/** + * Old way to iterate a block. + * + * @deprecated This is an old API. Use rb_block_call() instead. + * @warning The passed function must at least once call a ruby method + * (to handle interrupts etc.) + * @param[in] func1 A function that could yield a value. + * @param[in,out] data1 Passed to `func1` + * @param[in] proc A function acts as a block. + * @param[in,out] data2 Passed to `proc` as the data2 parameter. + * @return What `func1` returns. + */ +VALUE rb_iterate(VALUE (*func1)(VALUE), VALUE data1, rb_block_call_func_t proc, VALUE data2); + +#ifdef __cplusplus +namespace ruby { +namespace backward { +/** + * Old way to iterate a block. + * + * @deprecated This is an old API. Use rb_block_call() instead. + * @warning The passed function must at least once call a ruby method + * (to handle interrupts etc.) + * @param[in] iter A function that could yield a value. + * @param[in,out] data1 Passed to `func1` + * @param[in] bl A function acts as a block. + * @param[in,out] data2 Passed to `proc` as the data2 parameter. + * @return What `func1` returns. + */ +static inline VALUE +rb_iterate_deprecated(VALUE (*iter)(VALUE), VALUE data1, rb_block_call_func_t bl, VALUE data2) +{ + return ::rb_iterate(iter, data1, bl, data2); +}}} + +RBIMPL_ATTR_DEPRECATED(("by: rb_block_call since 1.9")) +VALUE rb_iterate(VALUE (*func1)(VALUE), VALUE data1, rb_block_call_func_t proc, VALUE data2); +#endif + +/** + * Identical to rb_funcallv(), except it additionally passes a function as a + * block. When the method yields, `proc` is called with the yielded value as + * its first argument, and `data2` as the second. Yielded values would be + * packed into an array if multiple values are yielded at once. + * + * @param[in,out] obj Receiver. + * @param[in] mid Method signature. + * @param[in] argc Number of arguments. + * @param[in] argv Arguments passed to `obj.mid`. + * @param[in] proc A function acts as a block. + * @param[in,out] data2 Passed to `proc` as the data2 parameter. + * @return What `obj.mid` returns. + */ +VALUE rb_block_call(VALUE obj, ID mid, int argc, const VALUE *argv, rb_block_call_func_t proc, VALUE data2); + +/** + * Identical to rb_funcallv_kw(), except it additionally passes a function as a + * block. It can also be seen as a routine identical to rb_block_call(), + * except it handles keyword-ness of `argv[argc-1]`. + * + * @param[in,out] obj Receiver. + * @param[in] mid Method signature. + * @param[in] argc Number of arguments including the keywords. + * @param[in] argv Arguments passed to `obj.mid`. + * @param[in] proc A function acts as a block. + * @param[in,out] data2 Passed to `proc` as the data2 parameter. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @return What `obj.mid` returns. + */ +VALUE rb_block_call_kw(VALUE obj, ID mid, int argc, const VALUE *argv, rb_block_call_func_t proc, VALUE data2, int kw_splat); + +/** + * Identical to rb_rescue2(), except it does not take a list of exception + * classes. This is a shorthand of: + * + * ```CXX + * rb_rescue2(b_proc, data1, r_proc, data2, rb_eStandardError, (VALUE)0); + * ``` + * + * @param[in] b_proc A function which potentially raises an exception. + * @param[in,out] data1 Passed to `b_proc`. + * @param[in] r_proc A function which rescues an exception in `b_proc`. + * @param[in,out] data2 The first argument of `r_proc`. + * @return The return value of `b_proc` if no exception occurs, or the + * return value of `r_proc` otherwise. + * @see rb_rescue + * @see rb_ensure + * @see rb_protect + * @ingroup exception + */ +VALUE rb_rescue(VALUE (*b_proc)(VALUE), VALUE data1, VALUE (*r_proc)(VALUE, VALUE), VALUE data2); + +/** + * An equivalent of `rescue` clause. + * + * First it calls the function `b_proc` with `data1` as the argument. If + * nothing is thrown the function happily returns the return value of `b_proc`. + * When `b_proc` raises an exception, and the exception is a kind of one of the + * given exception classes, it then calls `r_proc` with `data2` and that + * exception. If the exception does not match any of them, it propagates. + * + * @param[in] b_proc A function which potentially raises an exception. + * @param[in,out] data1 Passed to `b_proc`. + * @param[in] r_proc A function which rescues an exception in `b_proc`. + * @param[in,out] data2 The first argument of `r_proc`. + * @param[in] ... 1 or more exception classes. Must be terminated by + * `(VALUE)0` + * @return The return value of `b_proc` if no exception occurs, or the + * return value of `r_proc` otherwise. + * @see rb_rescue + * @see rb_ensure + * @see rb_protect + * @ingroup exception + */ +VALUE rb_rescue2(VALUE (*b_proc)(VALUE), VALUE data1, VALUE (*r_proc)(VALUE, VALUE), VALUE data2, ...); + +/** + * Identical to rb_rescue2(), except it takes `va_list` instead of variadic + * number of arguments. This is exposed to 3rd parties because inline + * functions use it. Basically you don't have to bother. + * + * @param[in] b_proc A function which potentially raises an exception. + * @param[in,out] data1 Passed to `b_proc`. + * @param[in] r_proc A function which rescues an exception in `b_proc`. + * @param[in,out] data2 The first argument of `r_proc`. + * @param[in] ap 1 or more exception classes. Must be terminated by + * `(VALUE)0` + * @return The return value of `b_proc` if no exception occurs, or the + * return value of `r_proc` otherwise. + * @see rb_rescue + * @see rb_ensure + * @see rb_protect + * @ingroup exception + */ +VALUE rb_vrescue2(VALUE (*b_proc)(VALUE), VALUE data1, VALUE (*r_proc)(VALUE, VALUE), VALUE data2, va_list ap); + +/** + * An equivalent to `ensure` clause. Calls the function `b_proc` with `data1` + * as the argument, then calls `e_proc` with `data2` when execution terminated. + * + * @param[in] b_proc A function representing begin clause. + * @param[in,out] data1 Passed to `b_proc`. + * @param[in] e_proc A function representing ensure clause. + * @param[in,out] data2 Passed to `e_proc`. + * @retval RUBY_Qnil exception occurred inside of `b_proc`. + * @retval otherwise The return value of `b_proc`. + * @see rb_rescue + * @see rb_rescue2 + * @see rb_protect + * @ingroup exception + */ +VALUE rb_ensure(VALUE (*b_proc)(VALUE), VALUE data1, VALUE (*e_proc)(VALUE), VALUE data2); + +/** + * Executes the passed block and catches values thrown from inside of it. + * + * In case the block does not contain any throw`, this function returns the + * value of the last expression evaluated. + * + * ```CXX + * VALUE + * iter(RB_BLOCK_CALL_FUNC_ARGLIST(yielded, callback)) + * { + * return INT2FIX(123); + * } + * + * VALUE + * method(VALUE self) + * { + * return rb_catch("tag", iter, Qnil); // returns 123 + * } + * ``` + * + * In case there do exist `throw`, Ruby searches up its execution context for a + * `catch` block. When a matching catch is found, the block stops executing + * and returns that thrown value instead. + * + * ```CXX + * VALUE + * iter(RB_BLOCK_CALL_FUNC_ARGLIST(yielded, callback)) + * { + * rb_throw("tag", 456); + * return INT2FIX(123); + * } + * + * VALUE + * method(VALUE self) + * { + * return rb_catch("tag", iter, Qnil); // returns 456 + * } + * ``` + * + * @param[in] tag Arbitrary tag string. + * @param[in] func Function pointer that acts as a block. + * @param[in,out] data Extra parameter passed to `func`. + * @return Either caught value for `tag`, or the return value of `func` + * if nothing is thrown. + */ +VALUE rb_catch(const char *tag, rb_block_call_func_t func, VALUE data); + +/** + * Identical to rb_catch(), except it catches arbitrary Ruby objects. + * + * @param[in] tag Arbitrary tag object. + * @param[in] func Function pointer that acts as a block. + * @param[in,out] data Extra parameter passed to `func`. + * @return Either caught value for `tag`, or the return value of `func` + * if nothing is thrown. + */ +VALUE rb_catch_obj(VALUE tag, rb_block_call_func_t func, VALUE data); RBIMPL_ATTR_NORETURN() -void rb_throw(const char*,VALUE); +/** + * Transfers control to the end of the active `catch` block waiting for `tag`. + * Raises rb_eUncughtThrow if there is no `catch` block for the tag. The + * second parameter supplies a return value for the `catch` block, which + * otherwise defaults to ::RUBY_Qnil. For examples, see rb_catch(). + * + * @param[in] tag Tag string. + * @param[in] val Value to throw. + * @exception rb_eUncughtThrow There is no corresponding `catch` clause. + * @note It never returns. + */ +void rb_throw(const char *tag, VALUE val); RBIMPL_ATTR_NORETURN() -void rb_throw_obj(VALUE,VALUE); +/** + * Identical to rb_throw(), except it allows arbitrary Ruby object to become a + * tag. + * + * @param[in] tag Arbitrary object. + * @param[in] val Value to throw. + * @exception rb_eUncughtThrow There is no corresponding `catch` clause. + * @note It never returns. + */ +void rb_throw_obj(VALUE tag, VALUE val); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/memory.h b/include/ruby/internal/memory.h index 1f95387416..270cc1ac8b 100644 --- a/include/ruby/internal/memory.h +++ b/include/ruby/internal/memory.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Memory management stuff. */ #include "ruby/internal/config.h" @@ -38,7 +38,7 @@ # include <alloca.h> #endif -#if defined(_MSC_VER) && defined(_WIN64) +#if defined(_MSC_VER) && defined(_M_AMD64) # include <intrin.h> # pragma intrinsic(_umul128) #endif @@ -56,12 +56,15 @@ #include "ruby/internal/has/builtin.h" #include "ruby/internal/stdalign.h" #include "ruby/internal/stdbool.h" +#include "ruby/internal/stdckdint.h" #include "ruby/internal/xmalloc.h" #include "ruby/backward/2/limits.h" #include "ruby/backward/2/long_long.h" #include "ruby/backward/2/assume.h" #include "ruby/defines.h" +/** @cond INTERNAL_MACRO */ + /* Make alloca work the best possible way. */ #if defined(alloca) # /* Take that. */ @@ -75,18 +78,86 @@ extern "C" void *alloca(size_t); extern void *alloca(); #endif -#if defined(HAVE_INT128_T) && SIZEOF_SIZE_T <= 8 +/** @endcond */ + +#if defined(__DOXYGEN__) +/** + * @private + * + * Type that is as twice wider as size_t. This is an implementation detail of + * rb_mul_size_overflow(). People should not use it. This is not a good name + * either. + */ +typedef uint128_t DSIZE_T; +#elif defined(HAVE_INT128_T) && SIZEOF_SIZE_T <= 8 # define DSIZE_T uint128_t #elif SIZEOF_SIZE_T * 2 <= SIZEOF_LONG_LONG # define DSIZE_T unsigned LONG_LONG #endif +/** + * @private + * + * Maximum possible number of bytes that #RB_ALLOCV can allocate using + * `alloca`. Anything beyond this is allocated using rb_alloc_tmp_buffer(). + * This selection is transparent to users. People don't have to bother. + */ #ifdef C_ALLOCA # define RUBY_ALLOCV_LIMIT 0 #else # define RUBY_ALLOCV_LIMIT 1024 #endif +/** + * Prevents premature destruction of local objects. Ruby's garbage collector + * is conservative; it scans the C level machine stack as well. Possible in- + * use Ruby objects must remain visible on stack, to be properly marked as + * such. However contemporary C compilers do not interface well with this. + * Consider the following example: + * + * ```CXX + * auto s = rb_str_new_cstr(" world"); + * auto sptr = RSTRING_PTR(s); + * auto t = rb_str_new_cstr("hello,"); // Possible GC invocation + * auto u = rb_str_cat_cstr(t, sptr); + * + * RB_GC_GUARD(s); // ensure `s` (and thus `sptr`) do not get GC-ed + * ``` + * + * Here, without the #RB_GC_GUARD, the last use of `s` is _before_ the last use + * of `sptr`. Compilers could thus think `s` and `t` are allowed to overlap. + * That would eliminate `s` from the stack, while `sptr` is still in use. If + * our GC ran at that very moment, `s` gets swept out, which also destroys + * `sptr`. Boom! You got a SEGV. + * + * In order to prevent this scenario #RB_GC_GUARD must be placed _after_ the + * last use of `sptr`. Placing #RB_GC_GUARD before dereferencing `sptr` would + * be of no use. + * + * #RB_GC_GUARD would not be necessary at all in the above example if non- + * inlined function calls are made on the `s` variable after `sptr` is + * dereferenced. Thus, in the above example, calling any un-inlined function + * on `s` such as `rb_str_modify(s);` will ensure `s` stays on the stack or + * register to prevent a GC invocation from prematurely freeing it. + * + * Using the #RB_GC_GUARD macro is preferable to using the `volatile` keyword + * in C. #RB_GC_GUARD has the following advantages: + * + * - the intent of the macro use is clear. + * + * - #RB_GC_GUARD only affects its call site. OTOH `volatile` generates some + * extra code every time the variable is used, hurting optimisation. + * + * - `volatile` implementations may be buggy/inconsistent in some compilers + * and architectures. #RB_GC_GUARD is customisable for broken + * systems/compilers without negatively affecting other systems. + * + * - C++ since C++20 deprecates `volatile`. If you write your extension + * library in that language there is no escape but to use this macro. + * + * @param v A variable of ::VALUE type. + * @post `v` is still alive. + */ #ifdef __GNUC__ #define RB_GC_GUARD(v) \ (*__extension__ ({ \ @@ -101,65 +172,316 @@ extern void *alloca(); #define RB_GC_GUARD(v) (*rb_gc_guarded_ptr_val(&(v),(v))) #endif -/* Casts needed because void* is NOT compaible with others in C++. */ +/* Casts needed because void* is NOT compatible with others in C++. */ + +/** + * Convenient macro that allocates an array of n elements. + * + * @param type Type of array elements. + * @param n Length of the array. + * @exception rb_eNoMemError No space left for allocation. + * @exception rb_eArgError Integer overflow trying to calculate the length + * of continuous memory region of `n` elements of + * `type`. + * @return Storage instance that is capable of storing at least `n` + * elements of type `type`. + * @note It doesn't return NULL, even when `n` is zero. + * @warning The return value shall be invalidated exactly once by either + * ruby_xfree(), ruby_xrealloc(), or ruby_xrealloc2(). It is a + * failure to pass it to system free(), because the system and Ruby + * might or might not share the same malloc() implementation. + */ #define RB_ALLOC_N(type,n) RBIMPL_CAST((type *)ruby_xmalloc2((n), sizeof(type))) + +/** + * Shorthand of #RB_ALLOC_N with `n=1`. + * + * @param type Type of allocation. + * @exception rb_eNoMemError No space left for allocation. + * @return Storage instance that can hold an `type` object. + * @note It doesn't return NULL. + * @warning The return value shall be invalidated exactly once by either + * ruby_xfree(), ruby_xrealloc(), or ruby_xrealloc2(). It is a + * failure to pass it to system free(), because the system and Ruby + * might or might not share the same malloc() implementation. + */ #define RB_ALLOC(type) RBIMPL_CAST((type *)ruby_xmalloc(sizeof(type))) + +/** + * Identical to #RB_ALLOC_N() but also nullifies the allocated region before + * returning. + * + * @param type Type of array elements. + * @param n Length of the array. + * @exception rb_eNoMemError No space left for allocation. + * @exception rb_eArgError Integer overflow trying to calculate the length + * of continuous memory region of `n` elements of + * `type`. + * @return Storage instance that is capable of storing at least `n` + * elements of type `type`. + * @post Returned array is filled with zeros. + * @note It doesn't return NULL, even when `n` is zero. + * @warning The return value shall be invalidated exactly once by either + * ruby_xfree(), ruby_xrealloc(), or ruby_xrealloc2(). It is a + * failure to pass it to system free(), because the system and Ruby + * might or might not share the same malloc() implementation. + */ #define RB_ZALLOC_N(type,n) RBIMPL_CAST((type *)ruby_xcalloc((n), sizeof(type))) + +/** + * Shorthand of #RB_ZALLOC_N with `n=1`. + * + * @param type Type of allocation. + * @exception rb_eNoMemError No space left for allocation. + * @return Storage instance that can hold an `type` object. + * @post Returned object is filled with zeros. + * @note It doesn't return NULL. + * @warning The return value shall be invalidated exactly once by either + * ruby_xfree(), ruby_xrealloc(), or ruby_xrealloc2(). It is a + * failure to pass it to system free(), because the system and Ruby + * might or might not share the same malloc() implementation. + */ #define RB_ZALLOC(type) (RB_ZALLOC_N(type, 1)) + +/** + * Convenient macro that reallocates an array with a new size. + * + * @param var A variable of `type`, which points to a storage + * instance that was previously returned from + * either + * - ruby_xmalloc(), + * - ruby_xmalloc2(), + * - ruby_xcalloc(), + * - ruby_xrealloc(), or + * - ruby_xrealloc2(). + * @param type Type of allocation. + * @param n Requested new size of each element. + * @exception rb_eNoMemError No space left for allocation. + * @exception rb_eArgError Integer overflow trying to calculate the length + * of continuous memory region of `n` elements of + * `type`. + * @return Storage instance that is capable of storing at least `n` + * elements of type `type`. + * @pre The passed variable must point to a valid live storage instance. + * It is a failure to pass a variable that holds an already-freed + * pointer. + * @note It doesn't return NULL, even when `n` is zero. + * @warning Do not assume anything on the alignment of the return value. + * There is no guarantee that it inherits the passed argument's + * one. + * @warning The return value shall be invalidated exactly once by either + * ruby_xfree(), ruby_xrealloc(), or ruby_xrealloc2(). It is a + * failure to pass it to system free(), because the system and Ruby + * might or might not share the same malloc() implementation. + */ #define RB_REALLOC_N(var,type,n) \ ((var) = RBIMPL_CAST((type *)ruby_xrealloc2((void *)(var), (n), sizeof(type)))) +/** + * @deprecated This macro is dangerous (does not bother stack overflow at + * all). #RB_ALLOCV is the modern way to do the same thing. + * @param type Type of array elements. + * @param n Length of the array. + * @return A pointer on stack. + */ #define ALLOCA_N(type,n) \ RBIMPL_CAST((type *)alloca(rbimpl_size_mul_or_raise(sizeof(type), (n)))) -/* allocates _n_ bytes temporary buffer and stores VALUE including it - * in _v_. _n_ may be evaluated twice. */ +/** + * Identical to #RB_ALLOCV_N(), except that it allocates a number of bytes and + * returns a void* . + * + * @param v A variable to hold the just-in-case opaque Ruby object. + * @param n Size of allocation, in bytes. + * @return A void pointer to `n` bytes storage. + * @note `n` may be evaluated twice. + */ #define RB_ALLOCV(v, n) \ ((n) < RUBY_ALLOCV_LIMIT ? \ ((v) = 0, alloca(n)) : \ rb_alloc_tmp_buffer(&(v), (n))) + +/** + * Allocates a memory region, possibly on stack. If the given size exceeds + * #RUBY_ALLOCV_LIMIT, it allocates a dedicated opaque ruby object instead and + * let our GC sweep that region after use. Either way you can fire-and-forget. + * + * ```CXX + * #include <sys/types.h> + * + * VALUE + * foo(int n) + * { + * VALUE v; + * auto ptr = RB_ALLOCV(struct tms, v, n); + * ... + * // no need to free `ptr`. + * } + * ``` + * + * If you want to be super-duper polite you can also explicitly state the end + * of use of such memory region by calling #RB_ALLOCV_END(). + * + * @param type The type of array elements. + * @param v A variable to hold the just-in-case opaque Ruby object. + * @param n Number of elements requested to allocate. + * @return An array of `n` elements of `type`. + * @note `n` may be evaluated twice. + */ #define RB_ALLOCV_N(type, v, n) \ RBIMPL_CAST((type *) \ (((size_t)(n) < RUBY_ALLOCV_LIMIT / sizeof(type)) ? \ ((v) = 0, alloca((n) * sizeof(type))) : \ rb_alloc_tmp_buffer2(&(v), (n), sizeof(type)))) + +/** + * Polite way to declare that the given array is not used any longer. Calling + * this not mandatory. Our GC can baby-sit you. However it is not a very bad + * idea to use it when possible. Doing so could reduce memory footprint. + * + * @param v A variable previously passed to either #RB_ALLOCV/#RB_ALLOCV_N. + */ #define RB_ALLOCV_END(v) rb_free_tmp_buffer(&(v)) +/** + * Handy macro to erase a region of memory. + * + * @param p Target pointer. + * @param type Type of `p[0]` + * @param n Length of `p`. + * @return `p`. + * @post First `n` elements of `p` are squashed. + */ #define MEMZERO(p,type,n) memset((p), 0, rbimpl_size_mul_or_raise(sizeof(type), (n))) -#define MEMCPY(p1,p2,type,n) memcpy((p1), (p2), rbimpl_size_mul_or_raise(sizeof(type), (n))) + +/** + * Handy macro to call memcpy. + * + * @param p1 Destination pointer. + * @param p2 Source pointer. + * @param type Type of `p2[0]` + * @param n Length of `p2`. + * @return `p1`. + * @post First `n` elements of `p2` are copied into `p1`. + */ +#define MEMCPY(p1,p2,type,n) ruby_nonempty_memcpy((p1), (p2), rbimpl_size_mul_or_raise(sizeof(type), (n))) + +/** + * Handy macro to call memmove. + * + * @param p1 Destination pointer. + * @param p2 Source pointer. + * @param type Type of `p2[0]` + * @param n Length of `p2`. + * @return `p1`. + * @post First `n` elements of `p2` are copied into `p1`. + */ #define MEMMOVE(p1,p2,type,n) memmove((p1), (p2), rbimpl_size_mul_or_raise(sizeof(type), (n))) + +/** + * Handy macro to call memcmp + * + * @param p1 Target LHS. + * @param p2 Target RHS. + * @param type Type of `p1[0]` + * @param n Length of `p1`. + * @retval <0 `p1` is "less" than `p2`. + * @retval 0 `p1` is equal to `p2`. + * @retval >0 `p1` is "greater" than `p2`. + */ #define MEMCMP(p1,p2,type,n) memcmp((p1), (p2), rbimpl_size_mul_or_raise(sizeof(type), (n))) -#define ALLOC_N RB_ALLOC_N -#define ALLOC RB_ALLOC -#define ZALLOC_N RB_ZALLOC_N -#define ZALLOC RB_ZALLOC -#define REALLOC_N RB_REALLOC_N -#define ALLOCV RB_ALLOCV -#define ALLOCV_N RB_ALLOCV_N -#define ALLOCV_END RB_ALLOCV_END +#define ALLOC_N RB_ALLOC_N /**< @old{RB_ALLOC_N} */ +#define ALLOC RB_ALLOC /**< @old{RB_ALLOC} */ +#define ZALLOC_N RB_ZALLOC_N /**< @old{RB_ZALLOC_N} */ +#define ZALLOC RB_ZALLOC /**< @old{RB_ZALLOC} */ +#define REALLOC_N RB_REALLOC_N /**< @old{RB_REALLOC_N} */ +#define ALLOCV RB_ALLOCV /**< @old{RB_ALLOCV} */ +#define ALLOCV_N RB_ALLOCV_N /**< @old{RB_ALLOCV_N} */ +#define ALLOCV_END RB_ALLOCV_END /**< @old{RB_ALLOCV_END} */ -/* Expecting this struct to be eliminated by function inlinings */ +/** + * @private + * + * This is an implementation detail of rbimpl_size_mul_overflow(). + * + * @internal + * + * Expecting this struct to be eliminated by function inlinings. This is + * nothing more than std::variant<std::size_t> if we could use recent C++, but + * reality is we cannot. + */ struct rbimpl_size_mul_overflow_tag { - bool left; - size_t right; + bool left; /**< Whether overflow happened or not. */ + size_t right; /**< Multiplication result. */ }; RBIMPL_SYMBOL_EXPORT_BEGIN() RBIMPL_ATTR_RESTRICT() RBIMPL_ATTR_RETURNS_NONNULL() RBIMPL_ATTR_ALLOC_SIZE((2)) +RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail of #RB_ALLOCV(). People don't use this + * directly. + * + * @param[out] store Pointer to a variable. + * @param[in] len Requested number of bytes to allocate. + * @return Allocated `len` bytes array. + * @post `store` holds the corresponding tmp buffer object. + */ void *rb_alloc_tmp_buffer(volatile VALUE *store, long len); RBIMPL_ATTR_RESTRICT() RBIMPL_ATTR_RETURNS_NONNULL() RBIMPL_ATTR_ALLOC_SIZE((2,3)) +RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail of #RB_ALLOCV_N(). People don't use this + * directly. + * + * @param[out] store Pointer to a variable. + * @param[in] len Requested number of bytes to allocate. + * @param[in] count Number of elements in an array. + * @return Allocated `len` bytes array. + * @post `store` holds the corresponding tmp buffer object. + * + * @internal + * + * Although the meaning of `count` variable is clear, @shyouhei doesn't + * understand its needs. + */ void *rb_alloc_tmp_buffer_with_count(volatile VALUE *store, size_t len,size_t count); +/** + * @private + * + * This is an implementation detail of #RB_ALLOCV_END(). People don't use this + * directly. + * + * @param[out] store Pointer to a variable. + * @pre `store` is a NULL, or a pointer to a tmp buffer object. + * @post `*store` is ::RUBY_Qfalse. + * @post The object formerly stored in `store` is destroyed. + */ void rb_free_tmp_buffer(volatile VALUE *store); RBIMPL_ATTR_NORETURN() -void ruby_malloc_size_overflow(size_t, size_t); +/** + * @private + * + * This is an implementation detail of #RB_ALLOCV_N(). People don't use this + * directly. + * + * @param[in] x Arbitrary value. + * @param[in] y Arbitrary value. + * @exception rb_eArgError `x` * `y` would integer overflow. + */ +void ruby_malloc_size_overflow(size_t x, size_t y); #ifdef HAVE_RB_GC_GUARDED_PTR_VAL volatile VALUE *rb_gc_guarded_ptr_val(volatile VALUE *ptr, VALUE val); @@ -169,6 +491,15 @@ RBIMPL_SYMBOL_EXPORT_END() #ifdef _MSC_VER # pragma optimize("", off) +/** + * @private + * + * This is an implementation detail of #RB_GC_GUARD(). People don't use this + * directly. + * + * @param[in] ptr A pointer to an on-stack C variable. + * @return `ptr` as-is. + */ static inline volatile VALUE * rb_gc_guarded_ptr(volatile VALUE *ptr) { @@ -178,7 +509,19 @@ rb_gc_guarded_ptr(volatile VALUE *ptr) # pragma optimize("", on) #endif -/* Does anyone use it? Just here for backwards compatibility. */ +/** + * @deprecated This function was an implementation detail of old + * #RB_ALLOCV_N(). We no longer use it. @shyouhei suspects that + * there are no actual usage now. However it was not marked as + * private before. We cannot delete it any longer. + * @param[in] a Arbitrary value. + * @param[in] b Arbitrary value. + * @param[in] max Possible maximum value. + * @param[out] c A pointer to return the computation result. + * @retval 1 `c` is insane. + * @retval 0 `c` is sane. + * @post `c` holds `a` * `b`, but could be overflowed. + */ static inline int rb_mul_size_overflow(size_t a, size_t b, size_t max, size_t *c) { @@ -196,18 +539,39 @@ rb_mul_size_overflow(size_t a, size_t b, size_t max, size_t *c) return 0; } -#if RBIMPL_COMPILER_SINCE(GCC, 7, 0, 0) +#if defined(__DOXYGEN__) +RBIMPL_ATTR_CONSTEXPR(CXX14) +#elif RBIMPL_COMPILER_SINCE(GCC, 7, 0, 0) RBIMPL_ATTR_CONSTEXPR(CXX14) /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70507 */ #elif RBIMPL_COMPILER_SINCE(Clang, 7, 0, 0) RBIMPL_ATTR_CONSTEXPR(CXX14) /* https://bugs.llvm.org/show_bug.cgi?id=37633 */ #endif RBIMPL_ATTR_CONST() +/** + * @private + * + * This is an implementation detail of #RB_ALLOCV_N(). People don't use this + * directly. + * + * @param[in] x Arbitrary value. + * @param[in] y Arbitrary value. + * @return `{ left, right }`, where `left` is whether there is an integer + * overflow or not, and `right` is a (possibly overflowed) result + * of `x` * `y`. + * + * @internal + * + * This is in fact also an implementation detail of ruby_xmalloc2() etc. + */ static inline struct rbimpl_size_mul_overflow_tag rbimpl_size_mul_overflow(size_t x, size_t y) { struct rbimpl_size_mul_overflow_tag ret = { false, 0, }; -#if RBIMPL_HAS_BUILTIN(__builtin_mul_overflow) +#if defined(ckd_mul) + ret.left = ckd_mul(&ret.right, x, y); + +#elif RBIMPL_HAS_BUILTIN(__builtin_mul_overflow) ret.left = __builtin_mul_overflow(x, y, &ret.right); #elif defined(DSIZE_T) @@ -232,6 +596,21 @@ rbimpl_size_mul_overflow(size_t x, size_t y) return ret; } +/** + * @private + * + * This is an implementation detail of #RB_ALLOCV_N(). People don't use this + * directly. + * + * @param[in] x Arbitrary value. + * @param[in] y Arbitrary value. + * @exception rb_eArgError Multiplication could integer overflow. + * @return `x` * `y`. + * + * @internal + * + * This is in fact also an implementation detail of ruby_xmalloc2() etc. + */ static inline size_t rbimpl_size_mul_or_raise(size_t x, size_t y) { @@ -247,6 +626,20 @@ rbimpl_size_mul_or_raise(size_t x, size_t y) } } +/** + * This is an implementation detail of #RB_ALLOCV_N(). People don't use this + * directly. + * + * @param[out] store Pointer to a variable. + * @param[in] count Number of elements in an array. + * @param[in] elsize Size of each elements. + * @return Region of `count` * `elsize` bytes. + * @post `store` holds the corresponding tmp buffer object. + * + * @internal + * + * We might want to deprecate this function and make a `rbimpl_` counterpart. + */ static inline void * rb_alloc_tmp_buffer2(volatile VALUE *store, long count, size_t elsize) { @@ -255,7 +648,6 @@ rb_alloc_tmp_buffer2(volatile VALUE *store, long count, size_t elsize) return rb_alloc_tmp_buffer_with_count(store, total_size, cnt); } -#ifndef __MINGW32__ RBIMPL_SYMBOL_EXPORT_BEGIN() RBIMPL_ATTR_NOALIAS() RBIMPL_ATTR_NONNULL((1)) @@ -274,8 +666,5 @@ ruby_nonempty_memcpy(void *dest, const void *src, size_t n) } } RBIMPL_SYMBOL_EXPORT_END() -#undef memcpy -#define memcpy ruby_nonempty_memcpy -#endif #endif /* RBIMPL_MEMORY_H */ diff --git a/include/ruby/internal/method.h b/include/ruby/internal/method.h index 67600e8732..19feb0c10b 100644 --- a/include/ruby/internal/method.h +++ b/include/ruby/internal/method.h @@ -17,22 +17,188 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Creation and modification of Ruby methods. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" #include "ruby/backward/2/stdarg.h" +/** + * @defgroup defmethod Defining methods + * + * There are some APIs to define a method from C. + * These API takes a C function as a method body. + * + * ### Method body functions + * + * Method body functions must return a VALUE and + * can be one of the following form: + * + * #### Fixed number of parameters + * + * This form is a normal C function, excepting it takes + * a receiver object as the first argument. + * + * ```CXX + * static VALUE my_method(VALUE self, VALUE x, VALUE y); + * ``` + * + * #### argc and argv style + * + * This form takes three parameters: argc, argv and self. + * self is the receiver. argc is the number of arguments. + * argv is a pointer to an array of the arguments. + * + * ```CXX + * static VALUE my_method(int argc, VALUE *argv, VALUE self); + * ``` + * + * #### Ruby array style + * + * This form takes two parameters: self and args. + * self is the receiver. args is an Array object which + * contains the arguments. + * + * ```CXX + * static VALUE my_method(VALUE self, VALUE args); + * ``` + * + * ### Number of parameters + * + * Method defining APIs takes the number of parameters which the + * method will takes. This number is called argc. + * argc can be: + * + * - Zero or positive number. + * This means the method body function takes a fixed number of parameters. + * + * - `-1`. + * This means the method body function is "argc and argv" style. + * + * - `-2`. + * This means the method body function is "self and args" style. + * + * @{ + */ + RBIMPL_SYMBOL_EXPORT_BEGIN() -void rb_define_method(VALUE,const char*,VALUE(*)(ANYARGS),int); -void rb_define_module_function(VALUE,const char*,VALUE(*)(ANYARGS),int); -void rb_define_global_function(const char*,VALUE(*)(ANYARGS),int); +RBIMPL_ATTR_NONNULL(()) +/** + * Defines a method. + * + * @param[out] klass A module or a class. + * @param[in] mid Name of the function. + * @param[in] func The method body. + * @param[in] arity The number of parameters. See @ref defmethod. + * @note There are in fact 18 different prototypes for func. + * @see ::ruby::backward::cxxanyargs::define_method::rb_define_method + */ +void rb_define_method(VALUE klass, const char *mid, VALUE (*func)(ANYARGS), int arity); + +RBIMPL_ATTR_NONNULL(()) +/** + * Defines a module function for a module. + * + * @param[out] klass A module or a class. + * @param[in] mid Name of the function. + * @param[in] func The method body. + * @param[in] arity The number of parameters. See @ref defmethod. + * @note There are in fact 18 different prototypes for func. + * @see ::ruby::backward::cxxanyargs::define_method::rb_define_module_function + */ +void rb_define_module_function(VALUE klass, const char *mid, VALUE (*func)(ANYARGS), int arity); + +RBIMPL_ATTR_NONNULL(()) +/** + * Defines a global function. + * + * @param[in] mid Name of the function. + * @param[in] func The method body. + * @param[in] arity The number of parameters. See @ref defmethod. + * @note There are in fact 18 different prototypes for func. + * @see ::ruby::backward::cxxanyargs::define_method::rb_define_global_function + */ +void rb_define_global_function(const char *mid, VALUE (*func)(ANYARGS), int arity); + +RBIMPL_ATTR_NONNULL(()) +/** + * Defines an undef of a method. -- What? + * + * In ruby, there are two separate concepts called "undef" and "remove_method". + * The thing you imagine when you "un-define" a method is remove_method. This + * one on the other hand is masking of a previous method definition. Suppose + * for instance: + * + * ```ruby + * class Foo + * def foo + * end + * end + * + * class Bar < Foo + * def bar + * foo + * end + * end + * + * class Baz < Foo + * undef foo # <--- (*1) + * end + * ``` + * + * This `undef foo` at `(*1)` must not eliminate `Foo#foo`, because that method + * is also used from `Bar#bar`. So instead of physically executing the target + * method, `undef` inserts a special filtering entry to the class (`Baz` this + * case). That entry, when called, acts as if there were no methods at all. + * But the original can still be accessible, via ways like `Bar#bar` above. + * + * @param[out] klass The class to insert an undef. + * @param[in] name Name of the undef. + * @exception rb_eTypeError `klass` is a non-module. + * @exception rb_eFrozenError `klass` is frozen. + * @see rb_remove_method + */ +void rb_undef_method(VALUE klass, const char *name); + +RBIMPL_ATTR_NONNULL(()) +/** + * Defines an alias of a method. + * + * @param[in,out] klass The class which the original method belongs + * to; this is also where the new method will + * belong to. + * @param[in] dst A new name for the method. + * @param[in] src The original name of the method. + * @exception rb_eTypeError `klass` is a non-module. + * @exception rb_eFrozenError `klass` is frozen. + * @exception rb_eNameError There is no such method named as `src` in + * `klass`. + * + * @internal + * + * Above description is in fact a bit inaccurate because it ignores + * Refinements. + */ +void rb_define_alias(VALUE klass, const char *dst, const char *src); + +RBIMPL_ATTR_NONNULL(()) +/** + * Defines public accessor method(s) for an attribute. + * + * @param[out] klass The class which the attribute will belong to. + * @param[in] name Name of the attribute. + * @param[in] read Whether to define a getter method. + * @param[in] write Whether to define a setter method. + * @exception rb_eTypeError `klass` is a non-module. + * @exception rb_eFrozenError `klass` is frozen. + * @exception rb_eNameError `name` invalid as an attr e.g. an operator. + */ +void rb_define_attr(VALUE klass, const char *name, int read, int write); -void rb_undef_method(VALUE,const char*); -void rb_define_alias(VALUE,const char*,const char*); -void rb_define_attr(VALUE,const char*,int,int); +/** @} */ RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/module.h b/include/ruby/internal/module.h index 1dc6b820cc..97b0b2b8b0 100644 --- a/include/ruby/internal/module.h +++ b/include/ruby/internal/module.h @@ -17,26 +17,160 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Creation and modification of Ruby modules. */ #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" +/** + * @defgroup class Classes and their hierarchy. + * + * @par Terminology + * - class: same as in Ruby. + * - singleton class: class for a particular object. + * - eigenclass: = singleton class + * - metaclass: class of a class. Metaclass is a kind of singleton class. + * - metametaclass: class of a metaclass. + * - meta^(n)-class: class of a meta^(n-1)-class. + * - attached object: A singleton class knows its unique instance. + * The instance is called the attached object for the singleton class. + * @{ + */ + RBIMPL_SYMBOL_EXPORT_BEGIN() +RBIMPL_ATTR_NONNULL(()) +/** + * Defines a top-level class. + * + * @param[in] name Name of the class. + * @param[in] super A class from which the new class will derive. + * @exception rb_eTypeError The constant name `name` is already taken but the + * constant is not a class. + * @exception rb_eTypeError The class is already defined but the class can + * not be reopened because its superclass is not + * `super`. + * @exception rb_eArgError `super` is NULL. + * @return The created class. + * @post Top-level constant named `name` refers the returned class. + * @note If a class named `name` is already defined and its superclass is + * `super`, the function just returns the defined class. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. + * + * @internal + * + * There are classes without names, but you can't pass NULL here. You have to + * use other ways to create one. + */ +VALUE rb_define_class(const char *name, VALUE super); + +RBIMPL_ATTR_NONNULL(()) +/** + * Defines a top-level module. + * + * @param[in] name Name of the module. + * @exception rb_eTypeError The constant name `name` is already taken but the + * constant is not a module. + * @return The created module. + * @post Top-level constant named `name` refers the returned module. + * @note The GC does not collect nor move modules returned by this + * function. They are immortal. + * + * @internal + * + * There are modules without names, but you can't pass NULL here. You have to + * use other ways to create one. + */ +VALUE rb_define_module(const char *name); + +RBIMPL_ATTR_NONNULL(()) +/** + * Defines a class under the namespace of `outer`. + * + * @param[out] outer A class which contains the new class. + * @param[in] name Name of the new class + * @param[in] super A class from which the new class will derive. + * 0 means ::rb_cObject. + * @exception rb_eTypeError The constant name `name` is already taken but + * the constant is not a class. + * @exception rb_eTypeError The class is already defined but the class can + * not be reopened because its superclass is not + * `super`. + * @exception rb_eArgError `super` is NULL. + * @return The created class. + * @post `outer::name` refers the returned class. + * @note If a class named `name` is already defined and its superclass + * is `super`, the function just returns the defined class. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. + */ +VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super); + +RBIMPL_ATTR_NONNULL(()) +/** + * Defines a module under the namespace of `outer`. + * + * @param[out] outer A class which contains the new module. + * @param[in] name Name of the new module + * @exception rb_eTypeError The constant name `name` is already taken but + * the constant is not a class. + * @return The created module. + * @post `outer::name` refers the returned module. + * @note The GC does not collect nor move modules returned by this + * function. They are immortal. + */ +VALUE rb_define_module_under(VALUE outer, const char *name); + +/** + * Includes a module to a class. + * + * @param[out] klass Inclusion destination. + * @param[in] module Inclusion source. + * @exception rb_eArgError Cyclic inclusion. + * + * @internal + * + * :FIXME: @shyouhei suspects this function lacks assertion that the arguments + * being modules... Could silently SEGV if non-module was passed? + */ +void rb_include_module(VALUE klass, VALUE module); + +/** + * Extend the object with the module. + * + * @warning This is the same as `Module#extend_object`, not + * `Object#extend`! These two methods are very similar, but not + * identical. The difference is the hook. `Module#extend_object` + * does not invoke `Module#extended`, while `Object#extend` does. + * @param[out] obj Object to extend. + * @param[in] mod Module of extension. + */ +void rb_extend_object(VALUE obj, VALUE mod); + /** - * GC compaction note: class and modules returned by these four functions - * do not move. + * Identical to rb_include_module(), except it "prepends" the passed module to + * the klass, instead of includes. This affects how `super` resolves. For + * instance: + * + * ```ruby + * class Q; def foo; "<q/>" end end + * module W; def foo; "<w>#{super}</w>" end end + * class E < Q; include W; def foo; "<e>#{super}</e>" end end + * class R < Q; prepend W; def foo; "<r>#{super}</r>" end end + * + * E.new.foo # => "<e><w><q/></w></e>" + * r.new.foo # => "<W><r><q/></r></w>" + * ``` + * + * @param[out] klass Target class to modify. + * @param[in] module Module to prepend. + * @exception rb_eArgError Cyclic inclusion. */ -VALUE rb_define_class(const char*,VALUE); -VALUE rb_define_module(const char*); -VALUE rb_define_class_under(VALUE, const char*, VALUE); -VALUE rb_define_module_under(VALUE, const char*); +void rb_prepend_module(VALUE klass, VALUE module); -void rb_include_module(VALUE,VALUE); -void rb_extend_object(VALUE,VALUE); -void rb_prepend_module(VALUE,VALUE); +/** @} */ RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/newobj.h b/include/ruby/internal/newobj.h index 1bb4f0efa3..6eee2fa5fa 100644 --- a/include/ruby/internal/newobj.h +++ b/include/ruby/internal/newobj.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #NEWOBJ. */ #include "ruby/internal/attr/deprecated.h" @@ -29,36 +29,121 @@ #include "ruby/internal/value.h" #include "ruby/assert.h" -#define RB_NEWOBJ(obj,type) type *(obj) = RBIMPL_CAST((type *)rb_newobj()) -#define RB_NEWOBJ_OF(obj,type,klass,flags) type *(obj) = RBIMPL_CAST((type *)rb_newobj_of(klass, flags)) - -#define NEWOBJ RB_NEWOBJ -#define NEWOBJ_OF RB_NEWOBJ_OF /* core has special NEWOBJ_OF() in internal.h */ -#define OBJSETUP rb_obj_setup /* use NEWOBJ_OF instead of NEWOBJ()+OBJSETUP() */ -#define CLONESETUP rb_clone_setup -#define DUPSETUP rb_dup_setup +#define OBJSETUP rb_obj_setup /**< @old{rb_obj_setup} */ +#define CLONESETUP rb_clone_setup /**< @old{rb_clone_setup} */ +#define DUPSETUP rb_dup_setup /**< @old{rb_dup_setup} */ RBIMPL_SYMBOL_EXPORT_BEGIN() -VALUE rb_newobj(void); -VALUE rb_newobj_of(VALUE, VALUE); +/** + * Fills common fields in the object. + * + * @param[in,out] obj A Ruby object to be set up. + * @param[in] klass `obj` will belong to this class. + * @param[in] type One of ::ruby_value_type. + * @return The passed object. + * + * @internal + * + * Historically, authors of Ruby has described the `type` argument as "one of + * ::ruby_value_type". In reality it accepts either ::ruby_value_type, + * ::ruby_fl_type, or any combinations of the two. For instance + * `RUBY_T_STRING | RUBY_FL_FREEZE` is a valid value that this function takes, + * and means this is a frozen string. + * + * 3rd party extension libraries rarely need to allocate Strings this way. + * They normally only concern ::RUBY_T_DATA. This argument is mainly used for + * specifying flags, @shyouhei suspects. + */ VALUE rb_obj_setup(VALUE obj, VALUE klass, VALUE type); -VALUE rb_obj_class(VALUE); -VALUE rb_singleton_class_clone(VALUE); -void rb_singleton_class_attached(VALUE,VALUE); -void rb_copy_generic_ivar(VALUE,VALUE); + +/** + * Queries the class of an object. This is not always identical to + * `RBASIC_CLASS(obj)`. It searches for the nearest ancestor skipping + * singleton classes or included modules. + * + * @param[in] obj Object in question. + * @return The object's class, in a normal sense. + */ +VALUE rb_obj_class(VALUE obj); + +/** + * Clones a singleton class. An object can have its own singleton class. OK. + * Then what happens when a program clones such object? The singleton class + * that is attached to the source object must also be cloned. Otherwise a + * singleton object gets shared with two objects, which breaks "singleton"-ness + * of such class. + * + * This is basically an implementation detail of rb_clone_setup(). People + * need not be aware of this working behind-the-scene. + * + * @param[in] obj The object that has its own singleton class. + * @return Cloned singleton class. + */ +VALUE rb_singleton_class_clone(VALUE obj); + +/** + * Attaches a singleton class to its corresponding object. + * + * This is basically an implementation detail of rb_clone_setup(). People + * need not be aware of this working behind-the-scene. + * + * @param[in] klass The singleton class. + * @param[out] obj The object to attach a class. + * @pre The passed two objects must agree with each other that `klass` + * becomes a singleton class of `obj`. + * @post `klass` becomes the singleton class of `obj`. + */ +void rb_singleton_class_attached(VALUE klass, VALUE obj); + +/** + * Copies the list of instance variables. 3rd parties need not know, but there + * are several ways to store an object's instance variables, depending on its + * internal structure. This function makes sense when either of the passed + * objects are using so-called "generic" backend storage. This distinction is + * purely an implementation detail of rb_clone_setup(). People need not be + * aware of this working behind-the-scenes. + * + * @param[out] clone The destination object. + * @param[in] obj The source object. + */ +void rb_copy_generic_ivar(VALUE clone, VALUE obj); RBIMPL_SYMBOL_EXPORT_END() RBIMPL_ATTR_DEPRECATED(("This is no longer how Object#clone works.")) +/** + * @deprecated Not sure exactly when but at some time, the implementation of + * `Object#clone` stopped using this function. It remained + * untouched for a while, and then @shyouhei realised that they + * are no longer doing the same thing. It seems nobody seriously + * uses this function any longer. Let's just abandon it. + * + * @param[out] clone The destination object. + * @param[in] obj The source object. + */ static inline void rb_clone_setup(VALUE clone, VALUE obj) { + (void)clone; + (void)obj; return; } RBIMPL_ATTR_DEPRECATED(("This is no longer how Object#dup works.")) +/** + * @deprecated Not sure exactly when but at some time, the implementation of + * `Object#dup` stopped using this function. It remained + * untouched for a while, and then @shyouhei realised that they + * are no longer the same thing. It seems nobody seriously uses + * this function any longer. Let's just abandon it. + * + * @param[out] dup The destination object. + * @param[in] obj The source object. + */ static inline void rb_dup_setup(VALUE dup, VALUE obj) { + (void)dup; + (void)obj; return; } diff --git a/include/ruby/internal/rgengc.h b/include/ruby/internal/rgengc.h deleted file mode 100644 index 2681d41844..0000000000 --- a/include/ruby/internal/rgengc.h +++ /dev/null @@ -1,199 +0,0 @@ -#ifndef RBIMPL_RGENGC_H /*-*-C++-*-vi:se ft=cpp:*/ -#define RBIMPL_RGENGC_H -/** - * @file - * @author Ruby developers <ruby-core@ruby-lang.org> - * @copyright This file is a part of the programming language Ruby. - * Permission is hereby granted, to either redistribute and/or - * modify this file, provided that the conditions mentioned in the - * file COPYING are met. Consult the file for details. - * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are - * implementation details. Don't take them as canon. They could - * rapidly appear then vanish. The name (path) of this header file - * is also an implementation detail. Do not expect it to persist - * at the place it is now. Developers are free to move it anywhere - * anytime at will. - * @note To ruby-core: remember that this header can be possibly - * recursively included from extension libraries written in C++. - * Do not expect for instance `__VA_ARGS__` is always available. - * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief RGENGC write-barrier APIs. - * @see Sasada, K., "Gradual write-barrier insertion into a Ruby - * interpreter", in proceedings of the 2019 ACM SIGPLAN - * International Symposium on Memory Management (ISMM 2019), pp - * 115-121, 2019. https://doi.org/10.1145/3315573.3329986 - */ -#include "ruby/internal/attr/artificial.h" -#include "ruby/internal/attr/pure.h" -#include "ruby/internal/dllexport.h" -#include "ruby/internal/special_consts.h" -#include "ruby/internal/stdbool.h" -#include "ruby/internal/value.h" -#include "ruby/assert.h" -#include "ruby/backward/2/attributes.h" - -#undef USE_RGENGC -#define USE_RGENGC 1 - -#ifndef USE_RINCGC -# define USE_RINCGC 1 -#endif - -#ifndef USE_RGENGC_LOGGING_WB_UNPROTECT -# define USE_RGENGC_LOGGING_WB_UNPROTECT 0 -#endif - -#ifndef RGENGC_WB_PROTECTED_ARRAY -# define RGENGC_WB_PROTECTED_ARRAY 1 -#endif - -#ifndef RGENGC_WB_PROTECTED_HASH -# define RGENGC_WB_PROTECTED_HASH 1 -#endif - -#ifndef RGENGC_WB_PROTECTED_STRUCT -# define RGENGC_WB_PROTECTED_STRUCT 1 -#endif - -#ifndef RGENGC_WB_PROTECTED_STRING -# define RGENGC_WB_PROTECTED_STRING 1 -#endif - -#ifndef RGENGC_WB_PROTECTED_OBJECT -# define RGENGC_WB_PROTECTED_OBJECT 1 -#endif - -#ifndef RGENGC_WB_PROTECTED_REGEXP -# define RGENGC_WB_PROTECTED_REGEXP 1 -#endif - -#ifndef RGENGC_WB_PROTECTED_CLASS -# define RGENGC_WB_PROTECTED_CLASS 1 -#endif - -#ifndef RGENGC_WB_PROTECTED_FLOAT -# define RGENGC_WB_PROTECTED_FLOAT 1 -#endif - -#ifndef RGENGC_WB_PROTECTED_COMPLEX -# define RGENGC_WB_PROTECTED_COMPLEX 1 -#endif - -#ifndef RGENGC_WB_PROTECTED_RATIONAL -# define RGENGC_WB_PROTECTED_RATIONAL 1 -#endif - -#ifndef RGENGC_WB_PROTECTED_BIGNUM -# define RGENGC_WB_PROTECTED_BIGNUM 1 -#endif - -#ifndef RGENGC_WB_PROTECTED_NODE_CREF -# define RGENGC_WB_PROTECTED_NODE_CREF 1 -#endif - -/** - * @name Write barrier (WB) interfaces: - * @{ - * - * @note The following core interfaces can be changed in the future. Please - * catch up if you want to insert WB into C-extensions correctly. - */ - -/** - * WB for new reference from `a' to `b'. Write `b' into `*slot'. `slot' is a - * pointer in `a'. - */ -#define RB_OBJ_WRITE(a, slot, b) \ - RBIMPL_CAST(rb_obj_write((VALUE)(a), (VALUE *)(slot), (VALUE)(b), __FILE__, __LINE__)) -/** - * WB for new reference from `a' to `b'. This doesn't write any values, but - * only a WB declaration. `oldv' is replaced value with `b' (not used in - * current Ruby). - */ -#define RB_OBJ_WRITTEN(a, oldv, b) \ - RBIMPL_CAST(rb_obj_written((VALUE)(a), (VALUE)(oldv), (VALUE)(b), __FILE__, __LINE__)) -/** @} */ - -#define OBJ_PROMOTED_RAW RB_OBJ_PROMOTED_RAW -#define OBJ_PROMOTED RB_OBJ_PROMOTED -#define OBJ_WB_UNPROTECT RB_OBJ_WB_UNPROTECT - -#define RB_OBJ_WB_UNPROTECT(x) rb_obj_wb_unprotect(x, __FILE__, __LINE__) -#define RB_OBJ_WB_UNPROTECT_FOR(type, obj) \ - (RGENGC_WB_PROTECTED_##type ? OBJ_WB_UNPROTECT(obj) : obj) -#define RGENGC_LOGGING_WB_UNPROTECT rb_gc_unprotect_logging - -/** @cond INTERNAL_MACRO */ -#define RB_OBJ_PROMOTED_RAW RB_OBJ_PROMOTED_RAW -#define RB_OBJ_PROMOTED RB_OBJ_PROMOTED -/** @endcond */ - -RBIMPL_SYMBOL_EXPORT_BEGIN() -void rb_gc_writebarrier(VALUE a, VALUE b); -void rb_gc_writebarrier_unprotect(VALUE obj); -#if USE_RGENGC_LOGGING_WB_UNPROTECT -void rb_gc_unprotect_logging(void *objptr, const char *filename, int line); -#endif -RBIMPL_SYMBOL_EXPORT_END() - -RBIMPL_ATTR_PURE_UNLESS_DEBUG() -RBIMPL_ATTR_ARTIFICIAL() -static inline bool -RB_OBJ_PROMOTED_RAW(VALUE obj) -{ - RBIMPL_ASSERT_OR_ASSUME(RB_FL_ABLE(obj)); - return RB_FL_ANY_RAW(obj, RUBY_FL_PROMOTED); -} - -RBIMPL_ATTR_PURE_UNLESS_DEBUG() -RBIMPL_ATTR_ARTIFICIAL() -static inline bool -RB_OBJ_PROMOTED(VALUE obj) -{ - if (! RB_FL_ABLE(obj)) { - return false; - } - else { - return RB_OBJ_PROMOTED_RAW(obj); - } -} - -static inline VALUE -rb_obj_wb_unprotect(VALUE x, RB_UNUSED_VAR(const char *filename), RB_UNUSED_VAR(int line)) -{ -#if USE_RGENGC_LOGGING_WB_UNPROTECT - RGENGC_LOGGING_WB_UNPROTECT(RBIMPL_CAST((void *)x), filename, line); -#endif - rb_gc_writebarrier_unprotect(x); - return x; -} - -static inline VALUE -rb_obj_written(VALUE a, RB_UNUSED_VAR(VALUE oldv), VALUE b, RB_UNUSED_VAR(const char *filename), RB_UNUSED_VAR(int line)) -{ -#if USE_RGENGC_LOGGING_WB_UNPROTECT - RGENGC_LOGGING_OBJ_WRITTEN(a, oldv, b, filename, line); -#endif - - if (!RB_SPECIAL_CONST_P(b)) { - rb_gc_writebarrier(a, b); - } - - return a; -} - -static inline VALUE -rb_obj_write(VALUE a, VALUE *slot, VALUE b, RB_UNUSED_VAR(const char *filename), RB_UNUSED_VAR(int line)) -{ -#ifdef RGENGC_LOGGING_WRITE - RGENGC_LOGGING_WRITE(a, slot, b, filename, line); -#endif - - *slot = b; - - rb_obj_written(a, RUBY_Qundef /* ignore `oldv' now */, b, filename, line); - return a; -} - -#endif /* RBIMPL_RGENGC_H */ diff --git a/include/ruby/internal/scan_args.h b/include/ruby/internal/scan_args.h index d9329e7e98..1ed2bf6368 100644 --- a/include/ruby/internal/scan_args.h +++ b/include/ruby/internal/scan_args.h @@ -17,15 +17,17 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Compile-time static implementation of ::rb_scan_args(). * * This is a beast. It statically analyses the argument spec string, and * expands the assignment of variables into dedicated codes. */ +#include "ruby/assert.h" #include "ruby/internal/attr/diagnose_if.h" #include "ruby/internal/attr/error.h" #include "ruby/internal/attr/forceinline.h" +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/attr/noreturn.h" #include "ruby/internal/config.h" #include "ruby/internal/dllexport.h" @@ -38,29 +40,155 @@ #include "ruby/internal/static_assert.h" #include "ruby/internal/stdbool.h" #include "ruby/internal/value.h" -#include "ruby/assert.h" +/** + * @name Possible values that you should pass to rb_scan_args_kw(). + * @{ + */ + +/** Same behaviour as rb_scan_args(). */ #define RB_SCAN_ARGS_PASS_CALLED_KEYWORDS 0 + +/** The final argument should be a hash treated as keywords.*/ #define RB_SCAN_ARGS_KEYWORDS 1 + +/** + * Treat a final argument as keywords if it is a hash, and not as keywords + * otherwise. + */ #define RB_SCAN_ARGS_LAST_HASH_KEYWORDS 3 + +/** @} */ + +/** + * @name Possible values that you should pass to rb_funcallv_kw(). + * @{ + */ + +/** Do not pass keywords. */ #define RB_NO_KEYWORDS 0 + +/** Pass keywords, final argument should be a hash of keywords. */ #define RB_PASS_KEYWORDS 1 + +/** + * Pass keywords if current method is called with keywords, useful for argument + * delegation + */ #define RB_PASS_CALLED_KEYWORDS rb_keyword_given_p() -/* rb_scan_args() format allows ':' for optional hash */ + +/** @} */ + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define HAVE_RB_SCAN_ARGS_OPTIONAL_HASH 1 RBIMPL_SYMBOL_EXPORT_BEGIN() -int rb_scan_args(int, const VALUE*, const char*, ...); -int rb_scan_args_kw(int, int, const VALUE*, const char*, ...); +RBIMPL_ATTR_NONNULL((2, 3)) +/** + * Retrieves argument from argc and argv to given ::VALUE references according + * to the format string. The format can be described in ABNF as follows: + * + * ``` + * scan-arg-spec := param-arg-spec [keyword-arg-spec] [block-arg-spec] + * + * param-arg-spec := pre-arg-spec [post-arg-spec] / post-arg-spec / + * pre-opt-post-arg-spec + * pre-arg-spec := num-of-leading-mandatory-args + * [num-of-optional-args] + * post-arg-spec := sym-for-variable-length-args + * [num-of-trailing-mandatory-args] + * pre-opt-post-arg-spec := num-of-leading-mandatory-args num-of-optional-args + * num-of-trailing-mandatory-args + * keyword-arg-spec := sym-for-keyword-arg + * block-arg-spec := sym-for-block-arg + * + * num-of-leading-mandatory-args := DIGIT ; The number of leading mandatory + * ; arguments + * num-of-optional-args := DIGIT ; The number of optional arguments + * sym-for-variable-length-args := "*" ; Indicates that variable length + * ; arguments are captured as a ruby + * ; array + * num-of-trailing-mandatory-args := DIGIT ; The number of trailing mandatory + * ; arguments + * sym-for-keyword-arg := ":" ; Indicates that keyword argument + * ; captured as a hash. + * ; If keyword arguments are not + * ; provided, returns nil. + * sym-for-block-arg := "&" ; Indicates that an iterator block + * ; should be captured if given + * ``` + * + * For example, "12" means that the method requires at least one argument, and + * at most receives three (1+2) arguments. So, the format string must be + * followed by three variable references, which are to be assigned to captured + * arguments. For omitted arguments, variables are set to ::RUBY_Qnil. `NULL` + * can be put in place of a variable reference, which means the corresponding + * captured argument(s) should be just dropped. + * + * The number of given arguments, excluding an option hash or iterator block, + * is returned. + * + * @param[in] argc Length of `argv`. + * @param[in] argv Pointer to the arguments to parse. + * @param[in] fmt Format, in the language described above. + * @param[out] ... Variables to fill in. + * @exception rb_eFatal Malformed `fmt`. + * @exception rb_eArgError Arity mismatch. + * @return Actually parsed number of given arguments. + * @post Each values passed to `argv` is filled into the variadic + * arguments, according to the format. + */ +int rb_scan_args(int argc, const VALUE *argv, const char *fmt, ...); + +RBIMPL_ATTR_NONNULL((3, 4)) +/** + * Identical to rb_scan_args(), except it also accepts `kw_splat`. + * + * @param[in] kw_splat How to understand the keyword arguments. + * - RB_SCAN_ARGS_PASS_CALLED_KEYWORDS: Same behaviour as rb_scan_args(). + * - RB_SCAN_ARGS_KEYWORDS: The final argument is a kwarg. + * - RB_SCAN_ARGS_LAST_HASH_KEYWORDS: The final argument is a kwarg, iff it + * is a hash. + * @param[in] argc Length of `argv`. + * @param[in] argv Pointer to the arguments to parse. + * @param[in] fmt Format, in the language described above. + * @param[out] ... Variables to fill in. + * @exception rb_eFatal Malformed `fmt`. + * @exception rb_eArgError Arity mismatch. + * @return Actually parsed number of given arguments. + * @post Each values passed to `argv` is filled into the variadic + * arguments, according to the format. + */ +int rb_scan_args_kw(int kw_splat, int argc, const VALUE *argv, const char *fmt, ...); RBIMPL_ATTR_ERROR(("bad scan arg format")) +/** + * @private + * + * This is an implementation detail of rb_scan_args(). People don't use it + * directly. + */ void rb_scan_args_bad_format(const char*); RBIMPL_ATTR_ERROR(("variable argument length doesn't match")) +/** + * @private + * + * This is an implementation detail of rb_scan_args(). People don't use it + * directly. + */ void rb_scan_args_length_mismatch(const char*,int); RBIMPL_SYMBOL_EXPORT_END() +/** @cond INTERNAL_MACRO */ + /* If we could use constexpr the following macros could be inline functions * ... but sadly we cannot. */ @@ -106,13 +234,13 @@ RBIMPL_SYMBOL_EXPORT_END() # define rb_scan_args_verify(fmt, varc) RBIMPL_ASSERT_NOTHING #else # /* At one sight it _seems_ the expressions below could be written using -# * static assrtions. The reality is no, they don't. Because fmt is a string -# * literal, any operations against fmt cannot produce the "integer constant -# * expression"s, as defined in ISO/IEC 9899:2018 section 6.6 paragraph #6. -# * Static assertions need such integer constant expressions as defined in -# * ISO/IEC 9899:2018 section 6.7.10 paragraph #3. +# * static assertions. The reality is no, they don't. Because fmt is a +# * string literal, any operations against fmt cannot produce the "integer +# * constant expression"s, as defined in ISO/IEC 9899:2018 section 6.6 +# * paragraph #6. Static assertions need such integer constant expressions as +# * defined in ISO/IEC 9899:2018 section 6.7.10 paragraph #3. # * -# * GCC nonetheless constant-folds this into no-op, though. */ +# * GCC nonetheless constant-folds this into a no-op, though. */ # define rb_scan_args_verify(fmt, varc) \ (sizeof(char[1-2*(rb_scan_args_count(fmt)<0)])!=1 ? \ rb_scan_args_bad_format(fmt) : \ @@ -349,7 +477,12 @@ rb_scan_args_set(int kw_flag, int argc, const VALUE *argv, #undef rb_scan_args_next_param } -#if ! defined(HAVE_BUILTIN___BUILTIN_CHOOSE_EXPR_CONSTANT_P) +/** @endcond */ + +#if defined(__DOXYGEN__) +# /* don't bother */ + +#elif ! defined(HAVE_BUILTIN___BUILTIN_CHOOSE_EXPR_CONSTANT_P) # /* skip */ #elif ! defined(HAVE_VA_ARGS_MACRO) diff --git a/include/ruby/internal/special_consts.h b/include/ruby/internal/special_consts.h index f36a230af2..dc0a6b41d6 100644 --- a/include/ruby/internal/special_consts.h +++ b/include/ruby/internal/special_consts.h @@ -17,9 +17,9 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines enum ::ruby_special_consts. - * @see Sasada, K., "A Lighweight Representation of Floting-Point + * @see Sasada, K., "A Lightweight Representation of Floating-Point * Numbers on Ruby Interpreter", in proceedings of 10th JSSST * SIGPPL Workshop on Programming and Programming Languages * (PPL2008), pp. 9-16, 2008. @@ -31,6 +31,14 @@ #include "ruby/internal/stdbool.h" #include "ruby/internal/value.h" +/** + * @private + * @warning Do not touch this macro. + * @warning It is an implementation detail. + * @warning The value of this macro must match for ruby itself and all + * extension libraries, otherwise serious memory corruption shall + * occur. + */ #if defined(USE_FLONUM) # /* Take that. */ #elif SIZEOF_VALUE >= SIZEOF_DOUBLE @@ -39,27 +47,28 @@ # define USE_FLONUM 0 #endif +/** This is an old name of #RB_TEST. Not sure which name is preferred. */ #define RTEST RB_TEST -#define FIXNUM_P RB_FIXNUM_P -#define IMMEDIATE_P RB_IMMEDIATE_P -#define NIL_P RB_NIL_P -#define SPECIAL_CONST_P RB_SPECIAL_CONST_P -#define STATIC_SYM_P RB_STATIC_SYM_P +#define FIXNUM_P RB_FIXNUM_P /**< @old{RB_FIXNUM_P} */ +#define IMMEDIATE_P RB_IMMEDIATE_P /**< @old{RB_IMMEDIATE_P} */ +#define NIL_P RB_NIL_P /**< @old{RB_NIL_P} */ +#define SPECIAL_CONST_P RB_SPECIAL_CONST_P /**< @old{RB_SPECIAL_CONST_P} */ +#define STATIC_SYM_P RB_STATIC_SYM_P /**< @old{RB_STATIC_SYM_P} */ -#define Qfalse RUBY_Qfalse -#define Qnil RUBY_Qnil -#define Qtrue RUBY_Qtrue -#define Qundef RUBY_Qundef +#define Qfalse RUBY_Qfalse /**< @old{RUBY_Qfalse} */ +#define Qnil RUBY_Qnil /**< @old{RUBY_Qnil} */ +#define Qtrue RUBY_Qtrue /**< @old{RUBY_Qtrue} */ +#define Qundef RUBY_Qundef /**< @old{RUBY_Qundef} */ -/** @cond INTERNAL_MACRO */ -#define FIXNUM_FLAG RUBY_FIXNUM_FLAG -#define FLONUM_FLAG RUBY_FLONUM_FLAG -#define FLONUM_MASK RUBY_FLONUM_MASK -#define FLONUM_P RB_FLONUM_P -#define IMMEDIATE_MASK RUBY_IMMEDIATE_MASK -#define SYMBOL_FLAG RUBY_SYMBOL_FLAG +#define FIXNUM_FLAG RUBY_FIXNUM_FLAG /**< @old{RUBY_FIXNUM_FLAG} */ +#define FLONUM_FLAG RUBY_FLONUM_FLAG /**< @old{RUBY_FLONUM_FLAG} */ +#define FLONUM_MASK RUBY_FLONUM_MASK /**< @old{RUBY_FLONUM_MASK} */ +#define FLONUM_P RB_FLONUM_P /**< @old{RB_FLONUM_P} */ +#define IMMEDIATE_MASK RUBY_IMMEDIATE_MASK /**< @old{RUBY_IMMEDIATE_MASK} */ +#define SYMBOL_FLAG RUBY_SYMBOL_FLAG /**< @old{RUBY_SYMBOL_FLAG} */ +/** @cond INTERNAL_MACRO */ #define RB_FIXNUM_P RB_FIXNUM_P #define RB_FLONUM_P RB_FLONUM_P #define RB_IMMEDIATE_P RB_IMMEDIATE_P @@ -67,17 +76,29 @@ #define RB_SPECIAL_CONST_P RB_SPECIAL_CONST_P #define RB_STATIC_SYM_P RB_STATIC_SYM_P #define RB_TEST RB_TEST +#define RB_UNDEF_P RB_UNDEF_P +#define RB_NIL_OR_UNDEF_P RB_NIL_OR_UNDEF_P /** @endcond */ /** special constants - i.e. non-zero and non-fixnum constants */ enum RBIMPL_ATTR_ENUM_EXTENSIBILITY(closed) ruby_special_consts { -#if USE_FLONUM +#if defined(__DOXYGEN__) + RUBY_Qfalse, /**< @see ::rb_cFalseClass */ + RUBY_Qtrue, /**< @see ::rb_cTrueClass */ + RUBY_Qnil, /**< @see ::rb_cNilClass */ + RUBY_Qundef, /**< Represents so-called undef. */ + RUBY_IMMEDIATE_MASK, /**< Bit mask detecting special consts. */ + RUBY_FIXNUM_FLAG, /**< Flag to denote a fixnum. */ + RUBY_FLONUM_MASK, /**< Bit mask detecting a flonum. */ + RUBY_FLONUM_FLAG, /**< Flag to denote a flonum. */ + RUBY_SYMBOL_FLAG, /**< Flag to denote a static symbol. */ +#elif USE_FLONUM RUBY_Qfalse = 0x00, /* ...0000 0000 */ + RUBY_Qnil = 0x04, /* ...0000 0100 */ RUBY_Qtrue = 0x14, /* ...0001 0100 */ - RUBY_Qnil = 0x08, /* ...0000 1000 */ - RUBY_Qundef = 0x34, /* ...0011 0100 */ + RUBY_Qundef = 0x24, /* ...0010 0100 */ RUBY_IMMEDIATE_MASK = 0x07, /* ...0000 0111 */ RUBY_FIXNUM_FLAG = 0x01, /* ...xxxx xxx1 */ RUBY_FLONUM_MASK = 0x03, /* ...0000 0011 */ @@ -85,36 +106,53 @@ ruby_special_consts { RUBY_SYMBOL_FLAG = 0x0c, /* ...xxxx 1100 */ #else RUBY_Qfalse = 0x00, /* ...0000 0000 */ - RUBY_Qtrue = 0x02, /* ...0000 0010 */ - RUBY_Qnil = 0x04, /* ...0000 0100 */ - RUBY_Qundef = 0x06, /* ...0000 0110 */ + RUBY_Qnil = 0x02, /* ...0000 0010 */ + RUBY_Qtrue = 0x06, /* ...0000 0110 */ + RUBY_Qundef = 0x0a, /* ...0000 1010 */ RUBY_IMMEDIATE_MASK = 0x03, /* ...0000 0011 */ RUBY_FIXNUM_FLAG = 0x01, /* ...xxxx xxx1 */ RUBY_FLONUM_MASK = 0x00, /* any values ANDed with FLONUM_MASK cannot be FLONUM_FLAG */ RUBY_FLONUM_FLAG = 0x02, /* ...0000 0010 */ - RUBY_SYMBOL_FLAG = 0x0e, /* ...0000 1110 */ + RUBY_SYMBOL_FLAG = 0x0e, /* ...xxxx 1110 */ #endif - RUBY_SPECIAL_SHIFT = 8 /** Least significant 8 bits are reserved. */ + RUBY_SPECIAL_SHIFT = 8 /**< Least significant 8 bits are reserved. */ }; RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() -/* - * :NOTE: rbimpl_test HAS to be `__attribute__((const))` in order for clang to - * properly deduce `__builtin_assume()`. +/** + * Emulates Ruby's "if" statement. + * + * @param[in] obj An arbitrary ruby object. + * @retval false `obj` is either ::RUBY_Qfalse or ::RUBY_Qnil. + * @retval true Anything else. + * + * @internal + * + * It HAS to be `__attribute__((const))` in order for clang to properly deduce + * `__builtin_assume()`. */ static inline bool RB_TEST(VALUE obj) { /* + * if USE_FLONUM + * Qfalse: ....0000 0000 + * Qnil: ....0000 0100 + * ~Qnil: ....1111 1011 + * v ....xxxx xxxx + * ---------------------------- + * RTEST(v) ....xxxx x0xx + * + * if ! USE_FLONUM * Qfalse: ....0000 0000 - * Qnil: ....0000 1000 - * ~Qnil: ....1111 0111 + * Qnil: ....0000 0010 + * ~Qnil: ....1111 1101 * v ....xxxx xxxx * ---------------------------- - * RTEST(v) ....xxxx 0xxx + * RTEST(v) ....xxxx xx0x * * RTEST(v) can be 0 if and only if (v == Qfalse || v == Qnil). */ @@ -124,6 +162,13 @@ RB_TEST(VALUE obj) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Checks if the given object is nil. + * + * @param[in] obj An arbitrary ruby object. + * @retval true `obj` is ::RUBY_Qnil. + * @retval false Anything else. + */ static inline bool RB_NIL_P(VALUE obj) { @@ -133,6 +178,71 @@ RB_NIL_P(VALUE obj) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Checks if the given object is undef. + * + * @param[in] obj An arbitrary ruby object. + * @retval true `obj` is ::RUBY_Qundef. + * @retval false Anything else. + */ +static inline bool +RB_UNDEF_P(VALUE obj) +{ + return obj == RUBY_Qundef; +} + +RBIMPL_ATTR_CONST() +RBIMPL_ATTR_CONSTEXPR(CXX14) +RBIMPL_ATTR_ARTIFICIAL() +/** + * Checks if the given object is nil or undef. Can be used to see if + * a keyword argument is not given or given `nil`. + * + * @param[in] obj An arbitrary ruby object. + * @retval true `obj` is ::RUBY_Qnil or ::RUBY_Qundef. + * @retval false Anything else. + */ +static inline bool +RB_NIL_OR_UNDEF_P(VALUE obj) +{ + /* + * if USE_FLONUM + * Qundef: ....0010 0100 + * Qnil: ....0000 0100 + * mask: ....1101 1111 + * common_bits: ....0000 0100 + * --------------------------------- + * Qnil & mask ....0000 0100 + * Qundef & mask ....0000 0100 + * + * if ! USE_FLONUM + * Qundef: ....0000 1010 + * Qnil: ....0000 0010 + * mask: ....1111 0111 + * common_bits: ....0000 0010 + * ---------------------------- + * Qnil & mask ....0000 0010 + * Qundef & mask ....0000 0010 + * + * NIL_OR_UNDEF_P(v) can be true only when v is Qundef or Qnil. + */ + const VALUE mask = ~(RUBY_Qundef ^ RUBY_Qnil); + const VALUE common_bits = RUBY_Qundef & RUBY_Qnil; + return (obj & mask) == common_bits; +} + +RBIMPL_ATTR_CONST() +RBIMPL_ATTR_CONSTEXPR(CXX11) +RBIMPL_ATTR_ARTIFICIAL() +/** + * Checks if the given object is a so-called Fixnum. + * + * @param[in] obj An arbitrary ruby object. + * @retval true `obj` is a Fixnum. + * @retval false Anything else. + * @note Fixnum was a thing in the 20th century, but it is rather an + * implementation detail today. + */ static inline bool RB_FIXNUM_P(VALUE obj) { @@ -142,6 +252,17 @@ RB_FIXNUM_P(VALUE obj) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX14) RBIMPL_ATTR_ARTIFICIAL() +/** + * Checks if the given object is a static symbol. + * + * @param[in] obj An arbitrary ruby object. + * @retval true `obj` is a static symbol + * @retval false Anything else. + * @see RB_DYNAMIC_SYM_P() + * @see RB_SYMBOL_P() + * @note These days there are static and dynamic symbols, just like we + * once had Fixnum/Bignum back in the old days. + */ static inline bool RB_STATIC_SYM_P(VALUE obj) { @@ -153,6 +274,16 @@ RB_STATIC_SYM_P(VALUE obj) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Checks if the given object is a so-called Flonum. + * + * @param[in] obj An arbitrary ruby object. + * @retval true `obj` is a Flonum. + * @retval false Anything else. + * @see RB_FLOAT_TYPE_P() + * @note These days there are Flonums and non-Flonum floats, just like we + * once had Fixnum/Bignum back in the old days. + */ static inline bool RB_FLONUM_P(VALUE obj) { @@ -166,6 +297,16 @@ RB_FLONUM_P(VALUE obj) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Checks if the given object is an immediate i.e. an object which has no + * corresponding storage inside of the object space. + * + * @param[in] obj An arbitrary ruby object. + * @retval true `obj` is a Flonum. + * @retval false Anything else. + * @see RB_FLOAT_TYPE_P() + * @note The concept of "immediate" is purely C specific. + */ static inline bool RB_IMMEDIATE_P(VALUE obj) { @@ -175,16 +316,33 @@ RB_IMMEDIATE_P(VALUE obj) RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() +/** + * Checks if the given object is of enum ::ruby_special_consts. + * + * @param[in] obj An arbitrary ruby object. + * @retval true `obj` is a special constant. + * @retval false Anything else. + */ static inline bool RB_SPECIAL_CONST_P(VALUE obj) { - return RB_IMMEDIATE_P(obj) || ! RB_TEST(obj); + return RB_IMMEDIATE_P(obj) || obj == RUBY_Qfalse; } RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) -/* This function is to mimic old rb_special_const_p macro but have anyone - * actually used its return value? Wasn't it just something no one needed? */ +/** + * Identical to RB_SPECIAL_CONST_P, except it returns a ::VALUE. + * + * @param[in] obj An arbitrary ruby object. + * @retval RUBY_Qtrue `obj` is a special constant. + * @retval RUBY_Qfalse Anything else. + * + * @internal + * + * This function is to mimic old rb_special_const_p macro but have anyone + * actually used its return value? Wasn't it just something no one needed? + */ static inline VALUE rb_special_const_p(VALUE obj) { diff --git a/include/ruby/internal/static_assert.h b/include/ruby/internal/static_assert.h index d4bdadf196..b9ff6646e7 100644 --- a/include/ruby/internal/static_assert.h +++ b/include/ruby/internal/static_assert.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_STATIC_ASSERT. */ #include <assert.h> @@ -71,7 +71,7 @@ #else # define RBIMPL_STATIC_ASSERT(name, expr) \ - typedef int static_assert_ ## name ## _check[1 - 2 * !(expr)] + MAYBE_UNUSED(typedef int static_assert_ ## name ## _check[1 - 2 * !(expr)]) #endif #endif /* RBIMPL_STATIC_ASSERT_H */ diff --git a/include/ruby/internal/stdalign.h b/include/ruby/internal/stdalign.h index 02eb7ab959..ec68f6a882 100644 --- a/include/ruby/internal/stdalign.h +++ b/include/ruby/internal/stdalign.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #RBIMPL_ALIGNAS / #RBIMPL_ALIGNOF */ #include "ruby/internal/config.h" @@ -83,7 +83,9 @@ * @see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69560 * @see https://bugs.llvm.org/show_bug.cgi?id=26547 */ -#if defined(__cplusplus) +#if defined(__DOXYGEN__) +# define RBIMPL_ALIGNOF alignof +#elif defined(__cplusplus) # /* C++11 `alignof()` can be buggy. */ # /* see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69560 */ # /* But don't worry, we can use templates. */ @@ -117,7 +119,7 @@ struct rbimpl_alignof { # * There are 2 known pitfalls for this fallback implementation: # * # * First, it is either an undefined behaviour (C) or an explicit error (C++) -# * to define a struct inside of `offsetof`. C compilers tend to accept such +# * to define a struct inside of `offsetof`. C compilers tend to accept such # * things, but AFAIK C++ has no room to allow. # * # * Second, there exist T such that `struct { char _; T t; }` is invalid. A diff --git a/include/ruby/internal/stdbool.h b/include/ruby/internal/stdbool.h index 0cd5103a05..1ca61136ba 100644 --- a/include/ruby/internal/stdbool.h +++ b/include/ruby/internal/stdbool.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief C99 shim for <stdbool.h> */ #include "ruby/internal/config.h" @@ -39,7 +39,7 @@ # /* Take stdbool.h definition. */ # include <stdbool.h> -#else +#elif !defined(HAVE__BOOL) typedef unsigned char _Bool; # /* See also http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2229.htm */ # define bool _Bool diff --git a/include/ruby/internal/stdckdint.h b/include/ruby/internal/stdckdint.h new file mode 100644 index 0000000000..d02530136e --- /dev/null +++ b/include/ruby/internal/stdckdint.h @@ -0,0 +1,60 @@ +#ifndef RBIMPL_STDCKDINT_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RBIMPL_STDCKDINT_H +/** + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief C23 shim for <stdckdint.h> + */ +#include "ruby/internal/config.h" +#include "ruby/internal/has/builtin.h" +#include "ruby/internal/stdbool.h" + +#ifdef __has_include +# if __has_include(<stdckdint.h>) +# /* Conforming C23 situation; e.g. recent clang */ +# define RBIMPL_HAVE_STDCKDINT_H +# endif +#endif + +#ifdef HAVE_STDCKDINT_H +# /* Some OSes (most notably FreeBSD) have this file. */ +# define RBIMPL_HAVE_STDCKDINT_H +#endif + +#ifdef RBIMPL_HAVE_STDCKDINT_H +# /* Take that. */ +# include <stdckdint.h> + +#elif RBIMPL_HAS_BUILTIN(__builtin_add_overflow) +# define ckd_add(x, y, z) ((bool)__builtin_add_overflow((y), (z), (x))) +# define ckd_sub(x, y, z) ((bool)__builtin_sub_overflow((y), (z), (x))) +# define ckd_mul(x, y, z) ((bool)__builtin_mul_overflow((y), (z), (x))) +# define __STDC_VERSION_STDCKDINT_H__ 202311L + +#/* elif defined(__cplusplus) */ +#/* :TODO: if we assume C++11 we can use `<type_traits>` to implement them. */ + +#else +# /* intentionally leave them undefined */ +# /* to make `#ifdef ckd_add` etc. work as intended. */ +# undef ckd_add +# undef ckd_sub +# undef ckd_mul +# undef __STDC_VERSION_STDCKDINT_H__ +#endif + +#endif /* RBIMPL_STDCKDINT_H */ diff --git a/include/ruby/internal/symbol.h b/include/ruby/internal/symbol.h index 762f1e8f9b..869a31115c 100644 --- a/include/ruby/internal/symbol.h +++ b/include/ruby/internal/symbol.h @@ -17,12 +17,12 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines #rb_intern */ #include "ruby/internal/config.h" -#ifdef HAVE_STDDEF_H +#ifdef STDC_HEADERS # include <stddef.h> #endif @@ -30,43 +30,248 @@ # include <string.h> #endif +#include "ruby/internal/attr/noalias.h" #include "ruby/internal/attr/nonnull.h" #include "ruby/internal/attr/pure.h" -#include "ruby/internal/attr/noalias.h" #include "ruby/internal/cast.h" #include "ruby/internal/constant_p.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/has/builtin.h" #include "ruby/internal/value.h" -#define RB_ID2SYM rb_id2sym -#define RB_SYM2ID rb_sym2id -#define ID2SYM RB_ID2SYM -#define SYM2ID RB_SYM2ID -#define CONST_ID_CACHE RUBY_CONST_ID_CACHE -#define CONST_ID RUBY_CONST_ID +#define RB_ID2SYM rb_id2sym /**< @alias{rb_id2sym} */ +#define RB_SYM2ID rb_sym2id /**< @alias{rb_sym2id} */ +#define ID2SYM RB_ID2SYM /**< @old{RB_ID2SYM} */ +#define SYM2ID RB_SYM2ID /**< @old{RB_SYM2ID} */ +#define CONST_ID_CACHE RUBY_CONST_ID_CACHE /**< @old{RUBY_CONST_ID_CACHE} */ +#define CONST_ID RUBY_CONST_ID /**< @old{RUBY_CONST_ID} */ /** @cond INTERNAL_MACRO */ #define rb_intern_const rb_intern_const /** @endcond */ RBIMPL_SYMBOL_EXPORT_BEGIN() -ID rb_sym2id(VALUE); -VALUE rb_id2sym(ID); -ID rb_intern(const char*); -ID rb_intern2(const char*, long); + +/** + * Converts an instance of ::rb_cSymbol into an ::ID. + * + * @param[in] obj An instance of ::rb_cSymbol. + * @exception rb_eTypeError `obj` is not an instance of ::rb_cSymbol. + * @return An ::ID of the identical symbol. + */ +ID rb_sym2id(VALUE obj); + +/** + * Allocates an instance of ::rb_cSymbol that has the given id. + * + * @param[in] id An id. + * @retval RUBY_Qfalse No such id ever existed in the history. + * @retval Otherwise An allocated ::rb_cSymbol instance. + */ +VALUE rb_id2sym(ID id); + +RBIMPL_ATTR_NONNULL(()) +/** + * Finds or creates a symbol of the given name. + * + * @param[in] name The name of the id. + * @exception rb_eRuntimeError Too many symbols. + * @return A (possibly new) id whose value is the given name. + * @note These days Ruby internally has two kinds of symbols (static / + * dynamic). Symbols created using this function would become a + * static one; i.e. would never be garbage collected. It is up to + * you to avoid memory leaks. Think twice before using it. + */ +ID rb_intern(const char *name); + +/** + * Identical to rb_intern(), except it additionally takes the length of the + * string. This way you can have a symbol that contains NUL characters. + * + * @param[in] name The name of the id. + * @param[in] len Length of `name`. + * @exception rb_eRuntimeError Too many symbols. + * @return A (possibly new) id whose value is the given name. + * @note These days Ruby internally has two kinds of symbols + * (static/dynamic). Symbols created using this function would + * become static ones; i.e. would never be garbage collected. It + * is up to you to avoid memory leaks. Think twice before using + * it. + */ +ID rb_intern2(const char *name, long len); + +/** + * Identical to rb_intern(), except it takes an instance of ::rb_cString. + * + * @param[in] str The name of the id. + * @pre `str` must either be an instance of ::rb_cSymbol, or an instance + * of ::rb_cString, or responds to `#to_str` method. + * @exception rb_eTypeError Can't convert `str` into ::rb_cString. + * @exception rb_eRuntimeError Too many symbols. + * @return A (possibly new) id whose value is the given str. + * @note These days Ruby internally has two kinds of symbols + * (static/dynamic). Symbols created using this function would + * become static ones; i.e. would never be garbage collected. It + * is up to you to avoid memory leaks. Think twice before using + * it. + */ ID rb_intern_str(VALUE str); -const char *rb_id2name(ID); -ID rb_check_id(volatile VALUE *); -ID rb_to_id(VALUE); -VALUE rb_id2str(ID); -VALUE rb_sym2str(VALUE); + +/** + * Retrieves the name mapped to the given id. + * + * @param[in] id An id to query. + * @retval NULL No such id ever existed in the history. + * @retval otherwise A name that the id represents. + * @note The return value is managed by the interpreter. Don't pass it + * to free(). + */ +const char *rb_id2name(ID id); + +RBIMPL_ATTR_NONNULL(()) +/** + * Detects if the given name is already interned or not. It first tries to + * convert the argument to an instance of ::rb_cString if it is neither an + * instance of ::rb_cString nor ::rb_cSymbol. The conversion result is written + * back to the variable. Then queries if that name was already interned + * before. If found it returns such id, otherwise zero. + * + * We eventually introduced this API to avoid inadvertent symbol pin-down. + * Before, there was no way to know if an ID was already interned or not + * without actually creating one (== leaking memory). By using this API you + * can avoid such situations: + * + * ```CXX + * bool does_interning_this_leak_memory(VALUE obj) + * { + * auto tmp = obj; + * if (auto id = rb_check_id(&tmp); id) { + * return false; + * } + * else { + * return true; // Let GC sweep tmp if necessary. + * } + * } + * ``` + * + * @param[in,out] namep A pointer to a name to query. + * @pre The object referred by `*namep` must either be an instance + * of ::rb_cSymbol, or an instance of ::rb_cString, or responds + * to `#to_str` method. + * @exception rb_eTypeError Can't convert `*namep` into ::rb_cString. + * @exception rb_eEncodingError Given string is non-ASCII. + * @retval 0 No such id ever existed in the history. + * @retval otherwise The id that represents the given name. + * @post The object that `*namep` points to is a converted result + * object, which is always an instance of either ::rb_cSymbol + * or ::rb_cString. + * @see https://bugs.ruby-lang.org/issues/5072 + * + * @internal + * + * @shyouhei doesn't know why this has to raise rb_eEncodingError. + */ +ID rb_check_id(volatile VALUE *namep); + +/** + * @copydoc rb_intern_str() + * + * @internal + * + * :FIXME: Can anyone tell us what is the difference between this one and + * rb_intern_str()? As far as @shyouhei reads the implementation it seems what + * rb_to_id() does is is just waste some CPU time, then call rb_intern_str(). + * He hopes he is wrong. + */ +ID rb_to_id(VALUE str); + +/** + * Identical to rb_id2name(), except it returns a Ruby's String instead of C's. + * + * @param[in] id An id to query. + * @retval RUBY_Qfalse No such id ever existed in the history. + * @retval otherwise An instance of ::rb_cString with the name of id. + * + * @internal + * + * In reality "rb_id2str() is identical to rb_id2name() except it returns Ruby + * string" is just describing things upside down; truth is `rb_id2name(foo)` is + * a shorthand of `RSTRING_PTR(rb_id2str(foo))`. + */ +VALUE rb_id2str(ID id); + +/** + * Identical to rb_id2str(), except it takes an instance of ::rb_cSymbol rather + * than an ::ID. + * + * @param[in] id An id to query. + * @retval RUBY_Qfalse No such id ever existed in the history. + * @retval otherwise An instance of ::rb_cString with the name of id. + */ +VALUE rb_sym2str(VALUE id); + +/** + * Identical to rb_intern_str(), except it generates a dynamic symbol if + * necessary. + * + * @param[in] name The name of the id. + * @pre `name` must either be an instance of ::rb_cSymbol, or an + * instance of ::rb_cString, or responds to `#to_str` method. + * @exception rb_eTypeError Can't convert `name` into ::rb_cString. + * @exception rb_eRuntimeError Too many symbols. + * @return A (possibly new) id whose value is the given name. + * @note These days Ruby internally has two kinds of symbols + * (static/dynamic). Symbols created using this function would + * become dynamic ones; i.e. would be garbage collected. It could + * be safer for you to use it than alternatives, when applicable. + */ VALUE rb_to_symbol(VALUE name); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_check_id(), except it returns an instance of ::rb_cSymbol + * instead. + * + * @param[in,out] namep A pointer to a name to query. + * @pre The object referred by `*namep` must either be an instance + * of ::rb_cSymbol, or an instance of ::rb_cString, or responds + * to `#to_str` method. + * @exception rb_eTypeError Can't convert `*namep` into ::rb_cString. + * @exception rb_eEncodingError Given string is non-ASCII. + * @retval RUBY_Qnil No such id ever existed in the history. + * @retval otherwise The id that represents the given name. + * @post The object that `*namep` points to is a converted result + * object, which is always an instance of either ::rb_cSymbol + * or ::rb_cString. + * @see https://bugs.ruby-lang.org/issues/5072 + * + * @internal + * + * @shyouhei doesn't know why this has to raise rb_eEncodingError. + */ VALUE rb_check_symbol(volatile VALUE *namep); RBIMPL_SYMBOL_EXPORT_END() RBIMPL_ATTR_PURE() RBIMPL_ATTR_NONNULL(()) +/** + * This is a "tiny optimisation" over rb_intern(). If you pass a string + * _literal_, and if your C compiler can special-case strlen of such literal to + * strength-reduce into an integer constant expression, then this inline + * function can precalc a part of conversion. + * + * @note This function also works happily for non-constant strings. Why + * bother then? Just apply liberally to everything. + * @note But #rb_intern() could be faster on compilers with statement + * expressions, because they can cache the created ::ID. + * @param[in] str The name of the id. + * @exception rb_eRuntimeError Too many symbols. + * @return A (possibly new) id whose value is the given str. + * @note These days Ruby internally has two kinds of symbols (static / + * dynamic). Symbols created using this function would become a + * static one; i.e. would never be garbage collected. It is up to + * you to avoid memory leaks. Think twice before using it. + */ static inline ID rb_intern_const(const char *str) { @@ -76,6 +281,11 @@ rb_intern_const(const char *str) RBIMPL_ATTR_NOALIAS() RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail of #rb_intern(). Just don't use it. + */ static inline ID rbimpl_intern_const(ID *ptr, const char *str) { @@ -86,13 +296,21 @@ rbimpl_intern_const(ID *ptr, const char *str) return *ptr; } -/* Does anyone use it? Preserved for backward compat. */ +/** + * Old implementation detail of rb_intern(). + * @deprecated Does anyone use it? Preserved for backward compat. + */ #define RUBY_CONST_ID_CACHE(result, str) \ { \ static ID rb_intern_id_cache; \ rbimpl_intern_const(&rb_intern_id_cache, (str)); \ result rb_intern_id_cache; \ } + +/** + * Old implementation detail of rb_intern(). + * @deprecated Does anyone use it? Preserved for backward compat. + */ #define RUBY_CONST_ID(var, str) \ do { \ static ID rbimpl_id; \ diff --git a/include/ruby/internal/value.h b/include/ruby/internal/value.h index b87fe140af..805cd83513 100644 --- a/include/ruby/internal/value.h +++ b/include/ruby/internal/value.h @@ -17,14 +17,79 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines ::VALUE and ::ID. */ #include "ruby/internal/static_assert.h" #include "ruby/backward/2/long_long.h" #include "ruby/backward/2/limits.h" -#if defined HAVE_UINTPTR_T && 0 +#if defined(__DOXYGEN__) + +/** + * Type that represents a Ruby object. It is an unsigned integer of some kind, + * depending on platforms. + * + * ```CXX + * VALUE value = rb_eval_string("ARGF.readlines.map.with_index"); + * ``` + * + * @warning ::VALUE is not a pointer. + * @warning ::VALUE can be wider than `long`. + */ +typedef uintptr_t VALUE; + +/** + * Type that represents a Ruby identifier such as a variable name. + * + * ```CXX + * ID method = rb_intern("method"); + * VALUE result = rb_funcall(obj, method, 0); + * ``` + * + * @note ::rb_cSymbol is a Ruby-level data type for the same thing. + */ +typedef uintptr_t ID; + +/** + * A signed integer type that has the same width with ::VALUE. + * + * @internal + * + * @shyouhei wonders: is it guaranteed that `uintptr_t` and `intptr_t` are the + * same width? As far as I read ISO/IEC 9899:2018 section 7.20.1.4 paragraph 1 + * no such description is given... or defined elsewhere? + */ +typedef intptr_t SIGNED_VALUE; + +/** + * Identical to `sizeof(VALUE)`, except it is a macro that can also be used + * inside of preprocessor directives such as `#if`. Handy on occasions. + */ +#define SIZEOF_VALUE SIZEOF_UINTPTR_T + +/** + * @private + * + * A compile-time constant of type ::VALUE whose value is 0. + */ +#define RBIMPL_VALUE_NULL UINTPTR_C(0) + +/** + * @private + * + * A compile-time constant of type ::VALUE whose value is 1. + */ +#define RBIMPL_VALUE_ONE UINTPTR_C(1) + +/** + * @private + * + * Maximum possible value that a ::VALUE can take. + */ +#define RBIMPL_VALUE_FULL UINTPTR_MAX + +#elif defined HAVE_UINTPTR_T && 0 typedef uintptr_t VALUE; typedef uintptr_t ID; # define SIGNED_VALUE intptr_t @@ -59,8 +124,10 @@ typedef unsigned LONG_LONG ID; # error ---->> ruby requires sizeof(void*) == sizeof(long) or sizeof(LONG_LONG) to be compiled. <<---- #endif +/** @cond INTERNAL_MACRO */ RBIMPL_STATIC_ASSERT(sizeof_int, SIZEOF_INT == sizeof(int)); RBIMPL_STATIC_ASSERT(sizeof_long, SIZEOF_LONG == sizeof(long)); RBIMPL_STATIC_ASSERT(sizeof_long_long, SIZEOF_LONG_LONG == sizeof(LONG_LONG)); RBIMPL_STATIC_ASSERT(sizeof_voidp, SIZEOF_VOIDP == sizeof(void *)); +/** @endcond */ #endif /* RBIMPL_VALUE_H */ diff --git a/include/ruby/internal/value_type.h b/include/ruby/internal/value_type.h index 52b284cb4a..977f60a009 100644 --- a/include/ruby/internal/value_type.h +++ b/include/ruby/internal/value_type.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Defines enum ::ruby_value_type. */ #include "ruby/internal/assume.h" @@ -30,6 +30,7 @@ #include "ruby/internal/constant_p.h" #include "ruby/internal/core/rbasic.h" #include "ruby/internal/dllexport.h" +#include "ruby/internal/error.h" #include "ruby/internal/has/builtin.h" #include "ruby/internal/special_consts.h" #include "ruby/internal/stdbool.h" @@ -52,41 +53,40 @@ # error Bail out due to conflicting definition of T_DATA. #endif -#define T_ARRAY RUBY_T_ARRAY -#define T_BIGNUM RUBY_T_BIGNUM -#define T_CLASS RUBY_T_CLASS -#define T_COMPLEX RUBY_T_COMPLEX -#define T_DATA RUBY_T_DATA -#define T_FALSE RUBY_T_FALSE -#define T_FILE RUBY_T_FILE -#define T_FIXNUM RUBY_T_FIXNUM -#define T_FLOAT RUBY_T_FLOAT -#define T_HASH RUBY_T_HASH -#define T_ICLASS RUBY_T_ICLASS -#define T_IMEMO RUBY_T_IMEMO -#define T_MASK RUBY_T_MASK -#define T_MATCH RUBY_T_MATCH -#define T_MODULE RUBY_T_MODULE -#define T_MOVED RUBY_T_MOVED -#define T_NIL RUBY_T_NIL -#define T_NODE RUBY_T_NODE -#define T_NONE RUBY_T_NONE -#define T_OBJECT RUBY_T_OBJECT -#define T_RATIONAL RUBY_T_RATIONAL -#define T_REGEXP RUBY_T_REGEXP -#define T_STRING RUBY_T_STRING -#define T_STRUCT RUBY_T_STRUCT -#define T_SYMBOL RUBY_T_SYMBOL -#define T_TRUE RUBY_T_TRUE -#define T_UNDEF RUBY_T_UNDEF -#define T_ZOMBIE RUBY_T_ZOMBIE -#define T_PAYLOAD RUBY_T_PAYLOAD +#define T_ARRAY RUBY_T_ARRAY /**< @old{RUBY_T_ARRAY} */ +#define T_BIGNUM RUBY_T_BIGNUM /**< @old{RUBY_T_BIGNUM} */ +#define T_CLASS RUBY_T_CLASS /**< @old{RUBY_T_CLASS} */ +#define T_COMPLEX RUBY_T_COMPLEX /**< @old{RUBY_T_COMPLEX} */ +#define T_DATA RUBY_T_DATA /**< @old{RUBY_T_DATA} */ +#define T_FALSE RUBY_T_FALSE /**< @old{RUBY_T_FALSE} */ +#define T_FILE RUBY_T_FILE /**< @old{RUBY_T_FILE} */ +#define T_FIXNUM RUBY_T_FIXNUM /**< @old{RUBY_T_FIXNUM} */ +#define T_FLOAT RUBY_T_FLOAT /**< @old{RUBY_T_FLOAT} */ +#define T_HASH RUBY_T_HASH /**< @old{RUBY_T_HASH} */ +#define T_ICLASS RUBY_T_ICLASS /**< @old{RUBY_T_ICLASS} */ +#define T_IMEMO RUBY_T_IMEMO /**< @old{RUBY_T_IMEMO} */ +#define T_MASK RUBY_T_MASK /**< @old{RUBY_T_MASK} */ +#define T_MATCH RUBY_T_MATCH /**< @old{RUBY_T_MATCH} */ +#define T_MODULE RUBY_T_MODULE /**< @old{RUBY_T_MODULE} */ +#define T_MOVED RUBY_T_MOVED /**< @old{RUBY_T_MOVED} */ +#define T_NIL RUBY_T_NIL /**< @old{RUBY_T_NIL} */ +#define T_NODE RUBY_T_NODE /**< @old{RUBY_T_NODE} */ +#define T_NONE RUBY_T_NONE /**< @old{RUBY_T_NONE} */ +#define T_OBJECT RUBY_T_OBJECT /**< @old{RUBY_T_OBJECT} */ +#define T_RATIONAL RUBY_T_RATIONAL /**< @old{RUBY_T_RATIONAL} */ +#define T_REGEXP RUBY_T_REGEXP /**< @old{RUBY_T_REGEXP} */ +#define T_STRING RUBY_T_STRING /**< @old{RUBY_T_STRING} */ +#define T_STRUCT RUBY_T_STRUCT /**< @old{RUBY_T_STRUCT} */ +#define T_SYMBOL RUBY_T_SYMBOL /**< @old{RUBY_T_SYMBOL} */ +#define T_TRUE RUBY_T_TRUE /**< @old{RUBY_T_TRUE} */ +#define T_UNDEF RUBY_T_UNDEF /**< @old{RUBY_T_UNDEF} */ +#define T_ZOMBIE RUBY_T_ZOMBIE /**< @old{RUBY_T_ZOMBIE} */ -#define BUILTIN_TYPE RB_BUILTIN_TYPE -#define DYNAMIC_SYM_P RB_DYNAMIC_SYM_P -#define RB_INTEGER_TYPE_P rb_integer_type_p -#define SYMBOL_P RB_SYMBOL_P -#define rb_type_p RB_TYPE_P +#define BUILTIN_TYPE RB_BUILTIN_TYPE /**< @old{RB_BUILTIN_TYPE} */ +#define DYNAMIC_SYM_P RB_DYNAMIC_SYM_P /**< @old{RB_DYNAMIC_SYM_P} */ +#define RB_INTEGER_TYPE_P rb_integer_type_p /**< @old{rb_integer_type_p} */ +#define SYMBOL_P RB_SYMBOL_P /**< @old{RB_SYMBOL_P} */ +#define rb_type_p RB_TYPE_P /**< @alias{RB_TYPE_P} */ /** @cond INTERNAL_MACRO */ #define RB_BUILTIN_TYPE RB_BUILTIN_TYPE @@ -103,6 +103,7 @@ #endif /** @endcond */ +/** @old{rb_type} */ #define TYPE(_) RBIMPL_CAST((int)rb_type(_)) /** C-level type of an object. */ @@ -133,7 +134,6 @@ ruby_value_type { RUBY_T_SYMBOL = 0x14, /**< @see struct ::RSymbol */ RUBY_T_FIXNUM = 0x15, /**< Integers formerly known as Fixnums. */ RUBY_T_UNDEF = 0x16, /**< @see ::RUBY_Qundef */ - RUBY_T_PAYLOAD = 0x17, /**< @see ::RPayload */ RUBY_T_IMEMO = 0x1a, /**< @see struct ::RIMemo */ RUBY_T_NODE = 0x1b, /**< @see struct ::RNode */ @@ -141,26 +141,64 @@ ruby_value_type { RUBY_T_ZOMBIE = 0x1d, /**< @see struct ::RZombie */ RUBY_T_MOVED = 0x1e, /**< @see struct ::RMoved */ - RUBY_T_MASK = 0x1f + RUBY_T_MASK = 0x1f /**< Bitmask of ::ruby_value_type. */ }; RBIMPL_SYMBOL_EXPORT_BEGIN() RBIMPL_ATTR_COLD() +/** + * @private + * + * This was the old implementation of Check_Type(), but they diverged. This + * one remains for theoretical backwards compatibility. People normally need + * not use it. + * + * @param[in] obj An object. + * @param[in] t A type. + * @exception rb_eTypeError `obj` is not of type `t`. + * @exception rb_eFatal `obj` is corrupt. + * @post Upon successful return `obj` is guaranteed to have type `t`. + * + * @internal + * + * The second argument shall have been enum ::ruby_value_type. But at the time + * matz designed this function he still used K&R C. There was no such thing + * like a function prototype. We can no longer change this API. + */ void rb_check_type(VALUE obj, int t); RBIMPL_SYMBOL_EXPORT_END() RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * Queries the type of the object. + * + * @param[in] obj Object in question. + * @pre `obj` must not be a special constant. + * @return The type of `obj`. + */ static inline enum ruby_value_type RB_BUILTIN_TYPE(VALUE obj) { RBIMPL_ASSERT_OR_ASSUME(! RB_SPECIAL_CONST_P(obj)); +#if 0 && defined __GNUC__ && !defined __clang__ + /* Don't move the access to `flags` before the preceding + * RB_SPECIAL_CONST_P check. */ + __asm volatile("": : :"memory"); +#endif VALUE ret = RBASIC(obj)->flags & RUBY_T_MASK; return RBIMPL_CAST((enum ruby_value_type)ret); } RBIMPL_ATTR_PURE_UNLESS_DEBUG() +/** + * Queries if the object is an instance of ::rb_cInteger. + * + * @param[in] obj Object in question. + * @retval true It is. + * @retval false It isn't. + */ static inline bool rb_integer_type_p(VALUE obj) { @@ -176,6 +214,12 @@ rb_integer_type_p(VALUE obj) } RBIMPL_ATTR_PURE_UNLESS_DEBUG() +/** + * Identical to RB_BUILTIN_TYPE(), except it can also accept special constants. + * + * @param[in] obj Object in question. + * @return The type of `obj`. + */ static inline enum ruby_value_type rb_type(VALUE obj) { @@ -208,6 +252,13 @@ rb_type(VALUE obj) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * Queries if the object is an instance of ::rb_cFloat. + * + * @param[in] obj Object in question. + * @retval true It is. + * @retval false It isn't. + */ static inline bool RB_FLOAT_TYPE_P(VALUE obj) { @@ -224,6 +275,13 @@ RB_FLOAT_TYPE_P(VALUE obj) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * Queries if the object is a dynamic symbol. + * + * @param[in] obj Object in question. + * @retval true It is. + * @retval false It isn't. + */ static inline bool RB_DYNAMIC_SYM_P(VALUE obj) { @@ -237,6 +295,13 @@ RB_DYNAMIC_SYM_P(VALUE obj) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * Queries if the object is an instance of ::rb_cSymbol. + * + * @param[in] obj Object in question. + * @retval true It is. + * @retval false It isn't. + */ static inline bool RB_SYMBOL_P(VALUE obj) { @@ -246,6 +311,16 @@ RB_SYMBOL_P(VALUE obj) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() RBIMPL_ATTR_FORCEINLINE() +/** + * @private + * + * This is an implementation detail of RB_TYPE_P(). Just don't use it. + * + * @param[in] obj An object. + * @param[in] t A type. + * @retval true `obj` is of type `t`. + * @retval false Otherwise. + */ static bool rbimpl_RB_TYPE_P_fastpath(VALUE obj, enum ruby_value_type t) { @@ -283,6 +358,19 @@ rbimpl_RB_TYPE_P_fastpath(VALUE obj, enum ruby_value_type t) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() +/** + * Queries if the given object is of given type. + * + * @param[in] obj An object. + * @param[in] t A type. + * @retval true `obj` is of type `t`. + * @retval false Otherwise. + * + * @internal + * + * This function is a super-duper hot path. Optimised targeting modern C + * compilers and x86_64 architecture. + */ static inline bool RB_TYPE_P(VALUE obj, enum ruby_value_type t) { @@ -323,34 +411,39 @@ RB_TYPE_P(VALUE obj, enum ruby_value_type t) RBIMPL_ATTR_PURE() RBIMPL_ATTR_ARTIFICIAL() -/* Defined in ruby/internal/core/rtypeddata.h */ +/** + * @private + * Defined in ruby/internal/core/rtypeddata.h + */ static inline bool rbimpl_rtypeddata_p(VALUE obj); RBIMPL_ATTR_ARTIFICIAL() +/** + * Identical to RB_TYPE_P(), except it raises exceptions on predication + * failure. + * + * @param[in] v An object. + * @param[in] t A type. + * @exception rb_eTypeError `obj` is not of type `t`. + * @exception rb_eFatal `obj` is corrupt. + * @post Upon successful return `obj` is guaranteed to have type `t`. + */ static inline void Check_Type(VALUE v, enum ruby_value_type t) { if (RB_UNLIKELY(! RB_TYPE_P(v, t))) { - goto slowpath; + goto unexpected_type; } - else if (t != RUBY_T_DATA) { - goto fastpath; - } - else if (rbimpl_rtypeddata_p(v)) { - /* The intention itself is not necessarily clear to me, but at least it - * is intentional to rule out typed data here. See commit - * a7c32bf81d3391cfb78cfda278f469717d0fb794. */ - goto slowpath; + else if (t == RUBY_T_DATA && rbimpl_rtypeddata_p(v)) { + /* Typed data is not simple `T_DATA`, see `rb_check_type` */ + goto unexpected_type; } else { - goto fastpath; + return; } - fastpath: - return; - - slowpath: /* <- :TODO: mark this label as cold. */ - rb_check_type(v, t); + unexpected_type: + rb_unexpected_type(v, t); } #endif /* RBIMPL_VALUE_TYPE_H */ diff --git a/include/ruby/internal/variable.h b/include/ruby/internal/variable.h index b0cfa61a62..c017ffe3f7 100644 --- a/include/ruby/internal/variable.h +++ b/include/ruby/internal/variable.h @@ -17,45 +17,320 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief C-function backended Ruby-global variables. + * extension libraries. They could be written in C++98. + * @brief Declares rb_define_variable(). */ #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/attr/noreturn.h" RBIMPL_SYMBOL_EXPORT_BEGIN() +/** + * Type that represents a global variable getter function. + * + * @param[in] id The variable name. + * @param[in,out] data Where the value is stored. + * @return The value that shall be visible from Ruby. + */ typedef VALUE rb_gvar_getter_t(ID id, VALUE *data); + +/** + * Type that represents a global variable setter function. + * + * @param[in] val The value to set. + * @param[in] id The variable name. + * @param[in,out] data Where the value is to be stored. + */ typedef void rb_gvar_setter_t(VALUE val, ID id, VALUE *data); + +/** + * Type that represents a global variable marker function. + * + * @param[in] var Where the value is to be stored. + */ typedef void rb_gvar_marker_t(VALUE *var); +/** + * @deprecated + * + * This function has no actual usage (than in ruby itself). Please ignore. It + * was a bad idea to expose this function to 3rd parties, but we can no longer + * delete it. + */ rb_gvar_getter_t rb_gvar_undef_getter; + +/** + * @deprecated + * + * This function has no actual usage (than in ruby itself). Please ignore. It + * was a bad idea to expose this function to 3rd parties, but we can no longer + * delete it. + */ rb_gvar_setter_t rb_gvar_undef_setter; + +/** + * @deprecated + * + * This function has no actual usage (than in ruby itself). Please ignore. It + * was a bad idea to expose this function to 3rd parties, but we can no longer + * delete it. + */ rb_gvar_marker_t rb_gvar_undef_marker; +/** + * This is the getter function that backs global variables defined from a ruby + * script. Extension libraries can use this if its global variable needs no + * custom logic. + */ rb_gvar_getter_t rb_gvar_val_getter; + +/** + * This is the setter function that backs global variables defined from a ruby + * script. Extension libraries can use this if its global variable needs no + * custom logic. + */ rb_gvar_setter_t rb_gvar_val_setter; + +/** + * This is the setter function that backs global variables defined from a ruby + * script. Extension libraries can use this if its global variable needs no + * custom logic. + */ rb_gvar_marker_t rb_gvar_val_marker; +/** + * @deprecated + * + * This function has no actual usage (than in ruby itself). Please ignore. It + * was a bad idea to expose this function to 3rd parties, but we can no longer + * delete it. + */ rb_gvar_getter_t rb_gvar_var_getter; + +/** + * @deprecated + * + * This function has no actual usage (than in ruby itself). Please ignore. It + * was a bad idea to expose this function to 3rd parties, but we can no longer + * delete it. + */ rb_gvar_setter_t rb_gvar_var_setter; + +/** + * @deprecated + * + * This function has no actual usage (than in ruby itself). Please ignore. It + * was a bad idea to expose this function to 3rd parties, but we can no longer + * delete it. + */ rb_gvar_marker_t rb_gvar_var_marker; RBIMPL_ATTR_NORETURN() +/** + * This function just raises ::rb_eNameError. Handy when you want to prohibit + * a global variable from being squashed by someone. + */ rb_gvar_setter_t rb_gvar_readonly_setter; -void rb_define_variable(const char*,VALUE*); -void rb_define_virtual_variable(const char*,rb_gvar_getter_t*,rb_gvar_setter_t*); -void rb_define_hooked_variable(const char*,VALUE*,rb_gvar_getter_t*,rb_gvar_setter_t*); -void rb_define_readonly_variable(const char*,const VALUE*); -void rb_define_const(VALUE,const char*,VALUE); -void rb_define_global_const(const char*,VALUE); - -VALUE rb_gv_set(const char*, VALUE); -VALUE rb_gv_get(const char*); -VALUE rb_iv_get(VALUE, const char*); -VALUE rb_iv_set(VALUE, const char*, VALUE); +RBIMPL_ATTR_NONNULL(()) +/** + * "Shares" a global variable between Ruby and C. Normally a Ruby-level global + * variable is stored somewhere deep inside of the interpreter's execution + * context, but this way you can explicitly specify its storage. + * + * ```CXX + * static VALUE foo; + * + * extern "C" void + * init_Foo(void) + * { + * foo = rb_eval_string("..."); + * rb_define_variable("$foo", &foo); + * } + * ``` + * + * In the above example a Ruby global variable named `$foo` is stored in a C + * global variable named `foo`. + * + * @param[in] name Variable (Ruby side). + * @param[in] var Variable (C side). + * @post Ruby level global variable named `name` is defined if absent, + * and its storage is set to `var`. + */ +void rb_define_variable(const char *name, VALUE *var); + +RBIMPL_ATTR_NONNULL((1)) +/** + * Defines a global variable that is purely function-backended. By using this + * API a programmer can define a global variable that dynamically changes from + * time to time. + * + * @param[in] name Variable name, in C's string. + * @param[in] getter A getter function. + * @param[in] setter A setter function. + * @post Ruby level global variable named `name` is defined if absent. + * + * @internal + * + * @shyouhei doesn't know if this is an Easter egg or an official feature, but + * you can pass 0 to the third argument (setter). That effectively nullifies + * any efforts to write to the defining global variable. + */ +void rb_define_virtual_variable(const char *name, rb_gvar_getter_t *getter, rb_gvar_setter_t *setter); + +RBIMPL_ATTR_NONNULL((1)) +/** + * Identical to rb_define_virtual_variable(), but can also specify a storage. + * A programmer can use the storage for e.g. memoisation, storing intermediate + * computation result, etc. + * + * Also you can pass 0 to this function, unlike other variants: + * + * - When getter is 0 ::rb_gvar_var_getter is used instead. + * - When setter is 0 ::rb_gvar_var_setter is used instead. + * - When data is 0, you must specify a non-zero setter function. Otherwise + * ::rb_gvar_var_setter tries to write to `*NULL`, and just causes SEGV. + * + * @param[in] name Variable name, in C's string. + * @param[in] var Variable storage. + * @param[in] getter A getter function. + * @param[in] setter A setter function. + * @post Ruby level global variable named `name` is defined if absent. + */ +void rb_define_hooked_variable(const char *name, VALUE *var, rb_gvar_getter_t *getter, rb_gvar_setter_t *setter); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_define_variable(), except it does not allow Ruby programs to + * assign values to such global variable. C codes can still set values at + * will. This could be handy for you when implementing an `errno`-like + * experience, where a method updates a read-only global variable as a side- + * effect. + * + * @param[in] name Variable (Ruby side). + * @param[in] var Variable (C side). + * @post Ruby level global variable named `name` is defined if absent, + * and its storage is set to `var`. + */ +void rb_define_readonly_variable(const char *name, const VALUE *var); + +RBIMPL_ATTR_NONNULL(()) +/** + * Defines a Ruby level constant under a namespace. + * + * @param[out] klass Namespace for the constant to reside. + * @param[in] name Name of the constant. + * @param[in] val Value of the constant. + * @exception rb_eTypeError `klass` is not a kind of ::rb_cModule. + * @exception rb_eFrozenError `klass` is frozen. + * @post Ruby level constant `klass::name` is defined to be `val`. + * @note This API does not stop you from defining a constant that is + * unable to reach from ruby (like for instance passing + * non-capital letter to `name`). + * @note This API does not stop you from overwriting a constant that + * already exist. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +void rb_define_const(VALUE klass, const char *name, VALUE val); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_define_const(), except it defines that of "global", + * i.e. toplevel constant. + * + * @param[in] name Name of the constant. + * @param[in] val Value of the constant. + * @exception rb_eFrozenError ::rb_cObject is frozen. + * @post Ruby level constant \::name is defined to be `val`. + * @note This API does not stop you from defining a constant that is + * unable to reach from ruby (like for instance passing + * non-capital letter to `name`). + * @note This API does not stop you from overwriting a constant that + * already exist. + */ +void rb_define_global_const(const char *name, VALUE val); + +RBIMPL_ATTR_NONNULL(()) +/** + * Asserts that the given constant is deprecated. Attempt to refer such + * constant will produce a warning. + * + * @param[in] mod Namespace of the target constant. + * @param[in] name Name of the constant. + * @exception rb_eNameError No such constant. + * @exception rb_eFrozenError `mod` is frozen. + * @post `name` under `mod` is deprecated. + */ +void rb_deprecate_constant(VALUE mod, const char *name); + +RBIMPL_ATTR_NONNULL(()) +/** + * Assigns to a global variable. + * + * @param[in] name Target global variable. + * @param[in] val Value to assign. + * @return Passed value. + * @post Ruby level global variable named `name` is defined if absent, + * whose value is set to `val`. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with + * `set_trace_func`. + */ +VALUE rb_gv_set(const char *name, VALUE val); + +RBIMPL_ATTR_NONNULL(()) +/** + * Obtains a global variable. + * + * @param[in] name Global variable to query. + * @retval RUBY_Qnil The global variable does not exist. + * @retval otherwise The value assigned to the global variable. + * + * @internal + * + * Unlike rb_gv_set(), there is no way to trace this function. + */ +VALUE rb_gv_get(const char *name); + +RBIMPL_ATTR_NONNULL(()) +/** + * Obtains an instance variable. + * + * @param[in] obj Target object. + * @param[in] name Target instance variable to query. + * @exception rb_eEncodingError `name` is corrupt (contains Hanzi etc.). + * @retval RUBY_nil No such instance variable. + * @retval otherwise The value assigned to the instance variable. + */ +VALUE rb_iv_get(VALUE obj, const char *name); + +RBIMPL_ATTR_NONNULL(()) +/** + * Assigns to an instance variable. + * + * @param[out] obj Target object. + * @param[in] name Target instance variable. + * @param[in] val Value to assign. + * @exception rb_eFrozenError Can't modify `obj`. + * @exception rb_eArgError `obj` has too many instance variables. + * @return Passed value. + * @post An instance variable named `name` is defined if absent on + * `obj`, whose value is set to `val`. + * + * @internal + * + * This function does not stop you form creating an ASCII-incompatible instance + * variable, but there is no way to get one because rb_iv_get raises exceptions + * for such things. This design seems broken... But no idea why. + */ +VALUE rb_iv_set(VALUE obj, const char *name, VALUE val); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/warning_push.h b/include/ruby/internal/warning_push.h index ca521290c9..f5981633f8 100644 --- a/include/ruby/internal/warning_push.h +++ b/include/ruby/internal/warning_push.h @@ -17,9 +17,8 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Defines RBIMPL_WARNING_PUSH. - * @cond INTERNAL_MACRO + * extension libraries. They could be written in C++98. + * @brief Defines #RBIMPL_WARNING_PUSH. * * ### Q&A ### * @@ -46,7 +45,41 @@ #include "ruby/internal/compiler_is.h" #include "ruby/internal/compiler_since.h" -#if RBIMPL_COMPILER_SINCE(MSVC, 12, 0, 0) +#if defined(__DOXYGEN__) + +/** + * @private + * + * Pushes compiler warning state. + */ +#define RBIMPL_WARNING_PUSH() __pragma(warning(push)) + +/** + * @private + * + * Pops compiler warning state. + */ +#define RBIMPL_WARNING_POP() __pragma(warning(pop)) + +/** + * @private + * + * Turns a warning into a fatal error. + * + * @param flag A flag that represents the kind of warnings. + */ +#define RBIMPL_WARNING_ERROR(flag) __pragma(warning(error: flag)) + +/** + * @private + * + * Suppresses a warning. + * + * @param flag A flag that represents the kind of warnings. + */ +#define RBIMPL_WARNING_IGNORED(flag) __pragma(warning(disable: flag)) + +#elif RBIMPL_COMPILER_SINCE(MSVC, 12, 0, 0) # /* Not sure exactly when but it seems VC++ 6.0 is a version with it.*/ # define RBIMPL_WARNING_PUSH() __pragma(warning(push)) # define RBIMPL_WARNING_POP() __pragma(warning(pop)) diff --git a/include/ruby/internal/xmalloc.h b/include/ruby/internal/xmalloc.h index 76da1eb099..57552e4e7d 100644 --- a/include/ruby/internal/xmalloc.h +++ b/include/ruby/internal/xmalloc.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Declares ::ruby_xmalloc(). */ #include "ruby/internal/config.h" @@ -37,16 +37,25 @@ #include "ruby/internal/attr/returns_nonnull.h" #include "ruby/internal/dllexport.h" +/** + * @private + * @warning Do not touch this macro. + * @warning It is an implementation detail. + * @warning It was a failure at the first place to let you know about it. + * @warning The value of this macro must match for ruby itself and all + * extension libraries, otherwise serious memory corruption shall + * occur. + */ #ifndef USE_GC_MALLOC_OBJ_INFO_DETAILS # define USE_GC_MALLOC_OBJ_INFO_DETAILS 0 #endif -#define xmalloc ruby_xmalloc -#define xmalloc2 ruby_xmalloc2 -#define xcalloc ruby_xcalloc -#define xrealloc ruby_xrealloc -#define xrealloc2 ruby_xrealloc2 -#define xfree ruby_xfree +#define xmalloc ruby_xmalloc /**< @old{ruby_xmalloc} */ +#define xmalloc2 ruby_xmalloc2 /**< @old{ruby_xmalloc2} */ +#define xcalloc ruby_xcalloc /**< @old{ruby_xcalloc} */ +#define xrealloc ruby_xrealloc /**< @old{ruby_xrealloc} */ +#define xrealloc2 ruby_xrealloc2 /**< @old{ruby_xrealloc2} */ +#define xfree ruby_xfree /**< @old{ruby_xfree} */ RBIMPL_SYMBOL_EXPORT_BEGIN() @@ -114,9 +123,9 @@ RBIMPL_ATTR_RESTRICT() RBIMPL_ATTR_RETURNS_NONNULL() RBIMPL_ATTR_ALLOC_SIZE((1,2)) /** - * Identical to ruby_xmalloc2(), except it zero-fills the region before it - * returns. This could also be seen as a routine identical to ruby_xmalloc(), - * except it calls calloc() instead of malloc() internally. + * Identical to ruby_xmalloc2(), except it returns a zero-filled storage + * instance. It can also be seen as a routine identical to ruby_xmalloc(), + * except it calls calloc() instead of malloc(). * * @param[in] nelems Number of elements. * @param[in] elemsiz Size of an element. @@ -125,6 +134,7 @@ RBIMPL_ATTR_ALLOC_SIZE((1,2)) * @return A valid pointer to an allocated storage instance; which has at * least `nelems` * `elemsiz` bytes width, with appropriate * alignment detected by the underlying calloc() routine. + * @post The returned storage instance is filled with zeros. * @note It doesn't return NULL. * @note Unlike some calloc() implementations, it allocates something and * returns a meaningful value even when `nelems` or `elemsiz` or @@ -145,22 +155,28 @@ RBIMPL_ATTR_ALLOC_SIZE((2)) * Resize the storage instance. * * @param[in] ptr A valid pointer to a storage instance that was - * previously returned from either ruby_xmalloc(), - * ruby_xmalloc2(), ruby_xcalloc(), - * ruby_xrealloc(), or ruby_xrealloc2(). + * previously returned from either: + * - ruby_xmalloc(), + * - ruby_xmalloc2(), + * - ruby_xcalloc(), + * - ruby_xrealloc(), or + * - ruby_xrealloc2(). * @param[in] newsiz Requested new amount of memory. * @exception rb_eNoMemError No space left for `newsiz` bytes allocation. - * @retval ptr In case the function returns the passed pointer - * as-is, the storage instance that the pointer - * holds is either grown or shrunken to have at - * least `newsiz` bytes. - * @retval otherwise A valid pointer to a newly allocated storage - * instance which has at least `newsiz` bytes - * width, and holds previous contents of `ptr`. In - * this case `ptr` is invalidated as if it was - * passed to ruby_xfree(). + * @return A valid pointer to a (possibly newly allocated) storage + * instance; which has at least `newsiz` bytes width, with + * appropriate alignment detected by the underlying realloc() + * routine. + * @pre The passed pointer must point to a valid live storage instance. + * It is a failure to pass an already freed pointer. + * @post In case the function returns the passed pointer as-is, the + * storage instance that the pointer holds is either grown or + * shrunken to have at least `newsiz` bytes. Otherwise a valid + * pointer to a newly allocated storage instance is returned. In + * this case `ptr` is invalidated as if it was passed to + * ruby_xfree(). * @note It doesn't return NULL. - * @warning Unlike some realloc() implementations, passing zero to `elemsiz` + * @warning Unlike some realloc() implementations, passing zero to `newsiz` * is not the same as calling ruby_xfree(), because this function * never returns NULL. Something meaningful still returns then. * @warning It is a failure not to check the return value. Do not assume @@ -193,23 +209,28 @@ RBIMPL_ATTR_ALLOC_SIZE((2,3)) * etc. provides, but also interacts with our GC. * * @param[in] ptr A valid pointer to a storage instance that was - * previously returned from either ruby_xmalloc(), - * ruby_xmalloc2(), ruby_xcalloc(), - * ruby_xrealloc(), or ruby_xrealloc2(). - + * previously returned from either: + * - ruby_xmalloc(), + * - ruby_xmalloc2(), + * - ruby_xcalloc(), + * - ruby_xrealloc(), or + * - ruby_xrealloc2(). * @param[in] newelems Requested new number of elements. * @param[in] newsiz Requested new size of each element. * @exception rb_eNoMemError No space left for allocation. * @exception rb_eArgError `newelems` * `newsiz` would overflow. - * @retval ptr In case the function returns the passed pointer - * as-is, the storage instance that the pointer - * holds is either grown or shrunken to have at - * least `newelems` * `newsiz` bytes. - * @retval otherwise A valid pointer to a newly allocated storage - * instance which has at least `newelems` * - * `newsiz` bytes width, and holds previous - * contents of `ptr`. In this case `ptr` is - * invalidated as if it was passed to ruby_xfree(). + * @return A valid pointer to a (possibly newly allocated) storage + * instance; which has at least `newelems` * `newsiz` bytes width, + * with appropriate alignment detected by the underlying realloc() + * routine. + * @pre The passed pointer must point to a valid live storage instance. + * It is a failure to pass an already freed pointer. + * @post In case the function returns the passed pointer as-is, the + * storage instance that the pointer holds is either grown or + * shrunken to have at least `newelems` * `newsiz` bytes. + * Otherwise a valid pointer to a newly allocated storage instance + * is returned. In this case `ptr` is invalidated as if it was + * passed to ruby_xfree(). * @note It doesn't return NULL. * @warning Unlike some realloc() implementations, passing zero to either * `newelems` or `elemsiz` are not the same as calling @@ -233,9 +254,18 @@ RBIMPL_ATTR_NOEXCEPT(realloc(ptr, newelems * newsiz)) /** * Deallocates a storage instance. * - * @param[out] ptr Either NULL, or a valid pointer previously returned from - * one of ruby_xmalloc(), ruby_xmalloc2(), ruby_xcalloc(), - * ruby_xrealloc(), or ruby_xrealloc2(). + * @param[out] ptr Either + * - NULL, or + * - a valid pointer previously returned from one of: + * - ruby_xmalloc(), + * - ruby_xmalloc2(), + * - ruby_xcalloc(), + * - ruby_xrealloc(), or + * - ruby_xrealloc2(). + * @pre The passed pointer must point to a valid live storage instance. + * It is a failure to pass an already freed pointer. + * @post The storage instance pointed by the passed pointer gets + * invalidated; it is no longer addressable. * @warning Every single storage instance that was previously allocated by * either ruby_xmalloc(), ruby_xmalloc2(), ruby_xcalloc(), * ruby_xrealloc(), or ruby_xrealloc2() shall be invalidated @@ -253,7 +283,7 @@ void ruby_xfree(void *ptr) RBIMPL_ATTR_NOEXCEPT(free(ptr)) ; -#if USE_GC_MALLOC_OBJ_INFO_DETAILS || defined(__DOXYGEN) +#if USE_GC_MALLOC_OBJ_INFO_DETAILS # define ruby_xmalloc(s1) ruby_xmalloc_with_location(s1, __FILE__, __LINE__) # define ruby_xmalloc2(s1, s2) ruby_xmalloc2_with_location(s1, s2, __FILE__, __LINE__) # define ruby_xcalloc(s1, s2) ruby_xcalloc_with_location(s1, s2, __FILE__, __LINE__) diff --git a/include/ruby/io.h b/include/ruby/io.h index dfef85bbe3..e9dfeda5b1 100644 --- a/include/ruby/io.h +++ b/include/ruby/io.h @@ -20,6 +20,8 @@ #endif #include <errno.h> + +/** @cond INTERNAL_MACRO */ #if defined(HAVE_POLL) # ifdef _AIX # define reqevents events @@ -33,153 +35,1037 @@ # undef revents # endif # define RB_WAITFD_IN POLLIN -# define RB_WAITFD_PRI POLLPRI +# if defined(POLLPRI) +# define RB_WAITFD_PRI POLLPRI +# else +# define RB_WAITFD_PRI 0 +# endif # define RB_WAITFD_OUT POLLOUT #else # define RB_WAITFD_IN 0x001 # define RB_WAITFD_PRI 0x002 # define RB_WAITFD_OUT 0x004 #endif +/** @endcond */ -typedef enum { - RUBY_IO_READABLE = RB_WAITFD_IN, - RUBY_IO_WRITABLE = RB_WAITFD_OUT, - RUBY_IO_PRIORITY = RB_WAITFD_PRI, -} rb_io_event_t; - +#include "ruby/internal/attr/const.h" +#include "ruby/internal/attr/packed_struct.h" +#include "ruby/internal/attr/pure.h" +#include "ruby/internal/attr/noreturn.h" #include "ruby/internal/dllexport.h" +#include "ruby/internal/value.h" + +// IO#wait, IO#wait_readable, IO#wait_writable, IO#wait_priority are defined by this implementation. +#define RUBY_IO_WAIT_METHODS + +// Used as the default timeout argument to `rb_io_wait` to use the `IO#timeout` value. +#define RUBY_IO_TIMEOUT_DEFAULT Qnil + RBIMPL_SYMBOL_EXPORT_BEGIN() -PACKED_STRUCT_UNALIGNED(struct rb_io_buffer_t { +struct stat; +struct timeval; + +/** + * Indicates that a timeout has occurred while performing an IO operation. + */ +RUBY_EXTERN VALUE rb_eIOTimeoutError; + +/** + * Type of events that an IO can wait. + * + * @internal + * + * This is visible from extension libraries because `io/wait` wants it. + */ +enum rb_io_event { + RUBY_IO_READABLE = RB_WAITFD_IN, /**< `IO::READABLE` */ + RUBY_IO_WRITABLE = RB_WAITFD_OUT, /**< `IO::WRITABLE` */ + RUBY_IO_PRIORITY = RB_WAITFD_PRI, /**< `IO::PRIORITY` */ +}; + +typedef enum rb_io_event rb_io_event_t; + +/** + * IO buffers. This is an implementation detail of ::rb_io_t::wbuf and + * ::rb_io_t::rbuf. People don't manipulate it directly. + */ +RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_BEGIN() +struct rb_io_internal_buffer { + + /** Pointer to the underlying memory region, of at least `capa` bytes. */ char *ptr; /* off + len <= capa */ + + /** Offset inside of `ptr`. */ int off; + + /** Length of the buffer. */ int len; + + /** Designed capacity of the buffer. */ int capa; -}); -typedef struct rb_io_buffer_t rb_io_buffer_t; +} RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_END(); -typedef struct rb_io_t { +/** @alias{rb_io_buffer_t} */ +typedef struct rb_io_internal_buffer rb_io_buffer_t; + +/** Decomposed encoding flags (e.g. `"enc:enc2""`). */ +/* + * enc enc2 read action write action + * NULL NULL force_encoding(default_external) write the byte sequence of str + * e1 NULL force_encoding(e1) convert str.encoding to e1 + * e1 e2 convert from e2 to e1 convert str.encoding to e2 + */ +struct rb_io_encoding { + /** Internal encoding. */ + rb_encoding *enc; + /** External encoding. */ + rb_encoding *enc2; + /** + * Flags. + * + * @see enum ::ruby_econv_flag_type + */ + int ecflags; + /** + * Flags as Ruby hash. + * + * @internal + * + * This is set. But used from nowhere maybe? + */ + VALUE ecopts; +}; + +#ifndef HAVE_RB_IO_T +#define HAVE_RB_IO_T 1 +/** Ruby's IO, metadata and buffers. */ +struct rb_io { + /** The IO's Ruby level counterpart. */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) VALUE self; - FILE *stdio_file; /* stdio ptr for read/write if available */ - int fd; /* file descriptor */ - int mode; /* mode flags: FMODE_XXXs */ - rb_pid_t pid; /* child's pid (for pipes) */ - int lineno; /* number of lines read */ - VALUE pathv; /* pathname for file */ - void (*finalize)(struct rb_io_t*,int); /* finalize proc */ + /** stdio ptr for read/write, if available. */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) + FILE *stdio_file; - rb_io_buffer_t wbuf, rbuf; + /** file descriptor. */ + RBIMPL_ATTR_DEPRECATED(("rb_io_descriptor")) + int fd; - VALUE tied_io_for_writing; + /** mode flags: FMODE_XXXs */ + RBIMPL_ATTR_DEPRECATED(("rb_io_mode")) + int mode; + + /** child's pid (for pipes) */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) + rb_pid_t pid; + + /** number of lines read */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) + int lineno; + + /** pathname for file */ + RBIMPL_ATTR_DEPRECATED(("rb_io_path")) + VALUE pathv; - /* - * enc enc2 read action write action - * NULL NULL force_encoding(default_external) write the byte sequence of str - * e1 NULL force_encoding(e1) convert str.encoding to e1 - * e1 e2 convert from e2 to e1 convert str.encoding to e2 + /** finalize proc */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) + void (*finalize)(struct rb_io*,int); + + /** Write buffer. */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) + rb_io_buffer_t wbuf; + + /** + * (Byte) read buffer. Note also that there is a field called + * ::rb_io_t::cbuf, which also concerns read IO. */ - struct rb_io_enc_t { - rb_encoding *enc; - rb_encoding *enc2; - int ecflags; - VALUE ecopts; - } encs; + RBIMPL_ATTR_DEPRECATED(("with no replacement")) + rb_io_buffer_t rbuf; + /** + * Duplex IO object, if set. + * + * @see rb_io_set_write_io() + */ + RBIMPL_ATTR_DEPRECATED(("rb_io_get_write_io")) + VALUE tied_io_for_writing; + + RBIMPL_ATTR_DEPRECATED(("with no replacement")) + struct rb_io_encoding encs; /**< Decomposed encoding flags. */ + + /** Encoding converter used when reading from this IO. */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) rb_econv_t *readconv; + + /** + * rb_io_ungetc() destination. This buffer is read before checking + * ::rb_io_t::rbuf + */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) rb_io_buffer_t cbuf; + /** Encoding converter used when writing to this IO. */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) rb_econv_t *writeconv; + + /** + * This is, when set, an instance of ::rb_cString which holds the "common" + * encoding. Write conversion can convert strings twice... In case + * conversion from encoding X to encoding Y does not exist, Ruby finds an + * encoding Z that bridges the two, so that X to Z to Y conversion happens. + */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) VALUE writeconv_asciicompat; + + /** Whether ::rb_io_t::writeconv is already set up. */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) int writeconv_initialized; + + /** + * Value of ::rb_io_t::rb_io_enc_t::ecflags stored right before + * initialising ::rb_io_t::writeconv. + */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) int writeconv_pre_ecflags; + + /** + * Value of ::rb_io_t::rb_io_enc_t::ecopts stored right before initialising + * ::rb_io_t::writeconv. + */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) VALUE writeconv_pre_ecopts; + /** + * This is a Ruby level mutex. It avoids multiple threads to write to an + * IO at once; helps for instance rb_io_puts() to ensure newlines right + * next to its arguments. + * + * This of course doesn't help inter-process IO interleaves, though. + */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) VALUE write_lock; -} rb_io_t; -typedef struct rb_io_enc_t rb_io_enc_t; + /** + * The timeout associated with this IO when performing blocking operations. + */ + RBIMPL_ATTR_DEPRECATED(("rb_io_timeout/rb_io_set_timeout")) + VALUE timeout; +}; +#endif -#define HAVE_RB_IO_T 1 +typedef struct rb_io rb_io_t; + +/** @alias{rb_io_enc_t} */ +typedef struct rb_io_encoding rb_io_enc_t; + +/** + * @name Possible flags for ::rb_io_t::mode + * + * @{ + */ +/** The IO is opened for reading. */ #define FMODE_READABLE 0x00000001 + +/** The IO is opened for writing. */ #define FMODE_WRITABLE 0x00000002 + +/** The IO is opened for both read/write. */ #define FMODE_READWRITE (FMODE_READABLE|FMODE_WRITABLE) + +/** + * The IO is in "binary mode". This is not what everything rb_io_binmode() + * concerns. This low-level flag is to stop CR <-> CRLF conversions that would + * happen in the underlying operating system. + * + * Setting this one and #FMODE_TEXTMODE at the same time is a contradiction. + * Setting this one and #ECONV_NEWLINE_DECORATOR_MASK at the same time is also + * a contradiction. + */ #define FMODE_BINMODE 0x00000004 + +/** + * The IO is in "sync mode". All output is immediately flushed to the + * underlying operating system then. Can be set via rb_io_synchronized(), but + * there is no way except calling `IO#sync=` to reset. + */ #define FMODE_SYNC 0x00000008 + +/** + * The IO is a TTY. What is a TTY and what isn't depends on the underlying + * operating system's `isatty(3)` output. You cannot change this. + */ #define FMODE_TTY 0x00000010 + +/** + * Ruby eventually detects that the IO is bidirectional. For instance a TTY + * has such property. There are several other things known to be duplexed. + * Additionally you (extension library authors) can also implement your own + * bidirectional IO subclasses. One of such example is `Socket`. + */ #define FMODE_DUPLEX 0x00000020 + +/** + * The IO is opened for appending. This mode always writes at the end of the + * IO. Ruby manages this flag for record but basically the logic behind this + * mode is at the underlying operating system. We almost do nothing. + */ #define FMODE_APPEND 0x00000040 + +/** + * The IO is opened for creating. This makes sense only when the destination + * file does not exist at the time the IO object was created. This is the + * default mode for writing, but you can pass `"r+"` to `IO.open` etc., to + * reroute this creation. + */ #define FMODE_CREATE 0x00000080 /* #define FMODE_NOREVLOOKUP 0x00000100 */ + +/** + * This flag amends the effect of #FMODE_CREATE, so that if there already is a + * file at the given path the operation fails. Using this you can be sure that + * the file you get is a fresh new one. + */ #define FMODE_EXCL 0x00000400 + +/** + * This flag amends the effect of #FMODE_CREATE, so that if there already is a + * file at the given path it gets truncated. + */ #define FMODE_TRUNC 0x00000800 + +/** + * The IO is in "text mode". On systems where such mode make sense, this flag + * changes the way the IO handles the contents. On POSIX systems it is + * basically a no-op, but with this flag set you can optionally let Ruby + * manually convert newlines, unlike when in binary mode: + * + * ```ruby + * IO.open("/p/a/t/h", "wt", crlf_newline: true) # "wb" is NG. + * ``` + * + * Setting this one and #FMODE_BINMODE at the same time is a contradiction. + */ #define FMODE_TEXTMODE 0x00001000 -/* #define FMODE_PREP 0x00010000 */ +/** + * This flag means that an IO object is wrapping an "external" file descriptor, + * which is owned by something outside the Ruby interpreter (usually a C extension). + * Ruby will not close this file when the IO object is garbage collected. + * If this flag is set, then IO#autoclose? is false, and vice-versa. + * + * This flag was previously called FMODE_PREP internally. + */ +#define FMODE_EXTERNAL 0x00010000 + +/* #define FMODE_SIGNAL_ON_EPIPE 0x00020000 */ + +/** + * This flag amends the encoding of the IO so that the BOM of the contents of + * the IO takes effect. + */ #define FMODE_SETENC_BY_BOM 0x00100000 /* #define FMODE_UNIX 0x00200000 */ /* #define FMODE_INET 0x00400000 */ /* #define FMODE_INET6 0x00800000 */ +/** @} */ + +/** + * Allocate a new IO object, with the given file descriptor. + */ +VALUE rb_io_open_descriptor(VALUE klass, int descriptor, int mode, VALUE path, VALUE timeout, struct rb_io_encoding *encoding); + +/** + * Returns whether or not the underlying IO is closed. + * + * @return Whether the underlying IO is closed. + */ +VALUE rb_io_closed_p(VALUE io); + +/** + * Queries the underlying IO pointer. + * + * @param[in] obj An IO object. + * @param[out] fp A variable of type ::rb_io_t. + * @exception rb_eFrozenError `obj` is frozen. + * @exception rb_eIOError `obj` is closed. + * @post `fp` holds `obj`'s underlying IO. + */ #define RB_IO_POINTER(obj,fp) rb_io_check_closed((fp) = RFILE(rb_io_taint_check(obj))->fptr) + +/** + * This is an old name of #RB_IO_POINTER. Not sure if we want to deprecate + * this macro. There still are tons of usages out there in the wild. + */ #define GetOpenFile RB_IO_POINTER +/** + * Fills an IO object. This makes the best sense when called from inside of an + * `#initialize` method of a 3rd party extension library that inherits + * ::rb_cIO. + * + * If the passed IO is already opened for something it first closes that and + * opens a new one instead. + * + * @param[out] obj An IO object to fill in. + * @param[out] fp A variable of type ::rb_io_t. + * @exception rb_eTypeError `obj` is not ::RUBY_T_FILE. + * @post `fp` holds `obj`'s underlying IO. + */ #define RB_IO_OPEN(obj, fp) do {\ (fp) = rb_io_make_open_file(obj);\ } while (0) + +/** + * This is an old name of #RB_IO_OPEN. Not sure if we want to deprecate this + * macro. There still are usages out there in the wild. + */ #define MakeOpenFile RB_IO_OPEN +/** + * @private + * + * This is an implementation detail of #RB_IO_OPEN. People don't use it + * directly. + * + * @param[out] obj An IO object to fill in. + * @exception rb_eTypeError `obj` is not ::RUBY_T_FILE. + * @return `obj`'s backend IO. + * @post `obj` is initialised. + */ rb_io_t *rb_io_make_open_file(VALUE obj); +/** + * Finds or creates a stdio's file structure from a Ruby's one. This can be + * handy if you want to call an external API that accepts `FILE *`. + * + * @note Note however, that `FILE`s can have their own buffer. Mixing Ruby's + * and stdio's file are basically dangerous. Use with care. + * + * @param[in,out] fptr Target IO. + * @return A stdio's file, created if absent. + * @post `fptr` has its corresponding stdio's file. + * + * @internal + * + * We had rich support for `FILE` before! In the days of 1.8.x ::rb_io_t was + * like this: + * + * ```CXX + * typedef struct rb_io { + * FILE *f; // stdio ptr for read/write + * FILE *f2; // additional ptr for rw pipes + * int mode; // mode flags + * int pid; // child's pid (for pipes) + * int lineno; // number of lines read + * char *path; // pathname for file + * void (*finalize) _((struct rb_io*,int)); // finalize proc + * } rb_io_t; + *``` + * + * But we eventually abandoned this layout. It was too difficult. We could + * not have fine-grained control over the `f` field. + * + * - `FILE` tends to be an opaque struct. It does not interface well with + * `select(2)` etc. This makes IO multiplexing quite hard. Using stdio, + * there is arguably no portable way to know if `fwrite(3)` blocks. + * + * - Nonblocking mode, which is another core concept that enables IO + * multiplexing, does not interface with stdio routines at all. + * + * - Detection of duplexed IO is also hard for the same reason. + * + * - `feof(3)` is not portable. + * https://mail.python.org/pipermail/python-dev/2001-January/011390.html + * + * - Solaris was a thing back then. They could not have more than 256 `FILE` + * structures at a time. Their file descriptors ware stored in an + * `unsigned char`. + * + * - It is next to impossible to avoid SEGV, especially when a thread tries to + * `ungetc(3)`-ing from a `FILE` which is `fread(3)`-ed by another one. + * + * In short, it is a bad idea to let someone else manage IO buffers, especially + * someone you cannot control. This still applies to extension libraries + * methinks. Ruby doesn't prevent you from shooting yourself in the foot, but + * consider yourself warned here. + */ FILE *rb_io_stdio_file(rb_io_t *fptr); -FILE *rb_fdopen(int, const char*); +/** + * Identical to rb_io_stdio_file(), except it takes file descriptors instead of + * Ruby's IO. It can also be seen as a compatibility layer to wrap + * `fdopen(3)`. Nowadays all supporting systems, including Windows, have + * `fdopen`. Why not use them. + * + * @param[in] fd A file descriptor. + * @param[in] modestr C string, something like `"r+"`. + * @exception rb_eSystemCallError `fdopen` failed for some reason. + * @return A stdio's file associated with `fd`. + * @note Interpretation of `modestr` depends on the underlying operating + * system. On glibc you might be able to pass e.g. `"rm"`, but + * that's an extension to POSIX. + */ +FILE *rb_fdopen(int fd, const char *modestr); + +/** + * Maps a file mode string (that rb_file_open() takes) into a mixture of + * `FMODE_` flags. This for instance returns + * `FMODE_WRITABLE | FMODE_TRUNC | FMODE_CREATE | FMODE_EXCL` for `"wx"`. + * + * @note You cannot pass this return value to OS provided `open(2)` etc. + * + * @param[in] modestr File mode, in C's string. + * @exception rb_eArgError `modestr` is broken. + * @return A set of flags. + * + * @internal + * + * rb_io_modestr_fmode() is not a pure function because it raises. + */ int rb_io_modestr_fmode(const char *modestr); + +/** + * Identical to rb_io_modestr_fmode(), except it returns a mixture of `O_` + * flags. This for instance returns `O_WRONLY | O_TRUNC | O_CREAT | O_EXCL` for + * `"wx"`. + * + * @param[in] modestr File mode, in C's string. + * @exception rb_eArgError `modestr` is broken. + * @return A set of flags. + * + * @internal + * + * rb_io_modestr_oflags() is not a pure function because it raises. + */ int rb_io_modestr_oflags(const char *modestr); -CONSTFUNC(int rb_io_oflags_fmode(int oflags)); -void rb_io_check_writable(rb_io_t*); -void rb_io_check_readable(rb_io_t*); + +RBIMPL_ATTR_CONST() +/** + * Converts an oflags (that rb_io_modestr_oflags() returns) to a fmode (that + * rb_io_mode_flags() returns). This is a purely functional operation. + * + * @param[in] oflags A set of `O_` flags. + * @return Corresponding set of `FMODE_` flags. + */ +int rb_io_oflags_fmode(int oflags); + +/** + * Asserts that an IO is opened for writing. + * + * @param[in] fptr An IO you want to write to. + * @exception rb_eIOError `fptr` is not for writing. + * @post Upon successful return `fptr` is ready for writing. + * + * @internal + * + * The parameter must have been `const rb_io_t *`. + */ +void rb_io_check_writable(rb_io_t *fptr); + +/** @alias{rb_io_check_byte_readable} */ +void rb_io_check_readable(rb_io_t *fptr); + +/** + * Asserts that an IO is opened for character-based reading. A character can + * be wider than a byte. Because of this we have to buffer reads from + * descriptors. This fiction checks if that is possible. + * + * @param[in] fptr An IO you want to read characters from. + * @exception rb_eIOError `fptr` is not for reading. + * @post Upon successful return `fptr` is ready for reading characters. + * + * @internal + * + * Unlike rb_io_check_writable() the parameter cannot be `const rb_io_t *`. + * Behind the scene this operation flushes its write buffers. This is because + * of OpenSSL. They mandate this way. + * + * @see "Can I use OpenSSL's SSL library with non-blocking I/O?" + * https://www.openssl.org/docs/faq.html + */ void rb_io_check_char_readable(rb_io_t *fptr); + +/** + * Asserts that an IO is opened for byte-based reading. Byte-based and + * character-based reading operations cannot be mixed at a time. + * + * @param[in] fptr An IO you want to read characters from. + * @exception rb_eIOError `fptr` is not for reading. + * @post Upon successful return `fptr` is ready for reading bytes. + */ void rb_io_check_byte_readable(rb_io_t *fptr); -int rb_io_fptr_finalize(rb_io_t*); -void rb_io_synchronized(rb_io_t*); -void rb_io_check_initialized(rb_io_t*); -void rb_io_check_closed(rb_io_t*); + +/** + * Destroys the given IO. Any pending operations are flushed. + * + * @note It makes no sense to call this function from anywhere outside of your + * class' ::rb_data_type_struct::dfree. + * + * @param[out] fptr IO to close. + * @post `fptr` is no longer a valid pointer. + */ +int rb_io_fptr_finalize(rb_io_t *fptr); + +/** + * Sets #FMODE_SYNC. + * + * @note There is no way for C extensions to undo this operation. + * + * @param[out] fptr IO to set the flag. + * @exception rb_eIOError `fptr` is not opened. + * @post `fptr` is in sync mode. + */ +void rb_io_synchronized(rb_io_t *fptr); + +/** + * Asserts that the passed IO is initialised. + * + * @param[in] fptr IO that you expect be initialised. + * @exception rb_eIOError `fptr` is not initialised. + * @post `fptr` is initialised. + */ +void rb_io_check_initialized(rb_io_t *fptr); + +/** + * This badly named function asserts that the passed IO is _open_. + * + * @param[in] fptr An IO + * @exception rb_eIOError `fptr` is closed. + * @post `fptr` is open. + */ +void rb_io_check_closed(rb_io_t *fptr); + +/** + * Identical to rb_io_check_io(), except it raises exceptions on conversion + * failures. + * + * @param[in] io Target object. + * @exception rb_eTypeError No implicit conversion to IO. + * @return Return value of `obj.to_io`. + * @see rb_str_to_str + * @see rb_ary_to_ary + */ VALUE rb_io_get_io(VALUE io); + +/** + * Try converting an object to its IO representation using its `to_io` method, + * if any. If there is no such thing, returns ::RUBY_Qnil. + * + * @param[in] io Arbitrary ruby object to convert. + * @exception rb_eTypeError `obj.to_io` returned something non-IO. + * @retval RUBY_Qnil No conversion from `obj` to IO defined. + * @retval otherwise Converted IO representation of `obj`. + * @see rb_check_array_type + * @see rb_check_string_type + * @see rb_check_hash_type + */ VALUE rb_io_check_io(VALUE io); + +/** + * Queries the tied IO for writing. An IO can be duplexed. Fine. The thing + * is, that characteristics could sometimes be achieved by the underlying + * operating system (for instance a socket's duplexity is by nature) but + * sometimes by us. Notable example is a bidirectional pipe. Suppose you + * have: + * + * ```ruby + * fp = IO.popen("-", "r+") + * ``` + * + * This pipe is duplexed (the `"r+"`). You can both read from/write to it. + * However your operating system may or may not implement bidirectional pipes. + * FreeBSD is one of such operating systems known to have one; OTOH Linux is + * known to lack such things. So to achieve maximum portability, Ruby's + * bidirectional pipes are done purely in user land. A pipe in ruby can have + * multiple file descriptors; one for reading and the other for writing. This + * API is to obtain the IO port which corresponds to the passed one, for + * writing. + * + * @param[in] io An IO. + * @return Its tied IO for writing, if any, or `io` itself otherwise. + */ VALUE rb_io_get_write_io(VALUE io); + +/** + * Assigns the tied IO for writing. See rb_io_get_write_io() for what a "tied + * IO for writing" is. + * + * @param[out] io An IO. + * @param[in] w Another IO. + * @retval RUBY_Qnil There was no tied IO for writing for `io`. + * @retval otherwise The IO formerly tied to `io`. + * @post `io` ties `w` for writing. + * + * @internal + * + * @shyouhei doesn't think there is any needs of this function for 3rd party + * extension libraries. + */ VALUE rb_io_set_write_io(VALUE io, VALUE w); + +/** + * Instructs the OS to put its internal file structure into "nonblocking mode". + * This is an in-Kernel concept. Reading from/writing to that file using C + * function calls would return -1 with errno set. However when it comes to a + * ruby program, we hide that error behind our `IO#read` method. Ruby level + * `IO#read` blocks regardless of this flag. If you want to avoid blocking, + * you should consider using methods like `IO#readpartial`. + * + * ```ruby + * require 'io/nonblock' + * STDIN.nonblock = true + * STDIN.gets # blocks. + * ``` + * + * As of writing there is a room of this API in Fiber schedulers. A Fiber + * scheduler could be written in a way its behaviour depends on this property. + * You need an in-depth understanding of how schedulers work to properly + * leverage this, though. + * + * @note Note however that nonblocking-ness propagates across process + * boundaries. You must really carefully watch your step when turning + * for instance `stderr` into nonblock mode (it tends to be shared + * across many processes). Also it is a complete disaster to mix a + * nonblocking file and stdio, and `stderr` tends to be under control of + * stdio in other processes. + * + * @param[out] fptr An IO that is to ne nonblocking. + * @post Descriptor that `fptr` describes is under nonblocking mode. + * + * @internal + * + * There is `O_NONBLOCK` but not `FMODE_NONBLOCK`. You cannot atomically + * create a nonblocking file descriptor using our API. + */ void rb_io_set_nonblock(rb_io_t *fptr); + +/** + * Returns the path for the given IO. + * + */ +VALUE rb_io_path(VALUE io); + +/** + * Returns an integer representing the numeric file descriptor for + * <em>io</em>. + * + * @param[in] io An IO. + * @retval int A file descriptor. + */ +int rb_io_descriptor(VALUE io); + +/** + * Get the mode of the IO. + * + */ +int rb_io_mode(VALUE io); + +/** + * This function breaks down the option hash that `IO#initialize` takes into + * components. This is an implementation detail of rb_io_extract_modeenc() + * today. People prefer that API instead. + * + * @param[in] opt The hash to decompose. + * @param[out] enc_p Return value buffer. + * @param[out] enc2_p Return value buffer. + * @param[out] fmode_p Return value buffer. + * @exception rb_eTypeError `opt` is broken. + * @exception rb_eArgError Specified encoding does not exist. + * @retval 1 Components got extracted. + * @retval 0 Otherwise. + * @post `enc_p` is the specified internal encoding. + * @post `enc2_p` is the specified external encoding. + * @post `fmode_p` is the specified set of `FMODE_` modes. + */ int rb_io_extract_encoding_option(VALUE opt, rb_encoding **enc_p, rb_encoding **enc2_p, int *fmode_p); + +/** + * This function can be seen as an extended version of + * rb_io_extract_encoding_option() that not only concerns the option hash but + * also mode string and so on. This should be mixed with rb_scan_args() like: + * + * ```CXX + * // This method mimics File.new + * static VALUE + * your_method(int argc, const VALUE *argv, VALUE self) + * { + * VALUE f; // file name + * VALUE m; // open mode + * VALUE p; // permission (O_CREAT) + * VALUE k; // keywords + * rb_io_enc_t c; // converter + * int oflags; + * int fmode; + * + * int n = rb_scan_args(argc, argv, "12:", &f, &m, &p, &k); + * rb_io_extract_modeenc(&m, &p, k, &oflags, &fmode, &c); + * + * // Every local variables declared so far has been properly filled here. + * ... + * } + * ``` + * + * @param[in,out] vmode_p Pointer to a mode object. + * @param[in,out] vperm_p Pointer to a permission object. + * @param[in] opthash Keyword arguments + * @param[out] oflags_p `O_` flags return buffer. + * @param[out] fmode_p `FMODE_` flags return buffer. + * @param[out] convconfig_p Encoding config return buffer. + * @exception rb_eTypeError Unexpected object (e.g. Time) passed. + * @exception rb_eArgError Contradiction inside of params. + * @post `*vmode_p` is a mode object (filled if any). + * @post `*vperm_p` is a permission object (filled if any). + * @post `*oflags_p` is filled with `O_` flags. + * @post `*fmode_p` is filled with `FMODE_` flags. + * @post `*convconfig_p` is filled with conversion instructions. + * + * @internal + * + * ```rbs + * class File + * def initialize: ( + * (String | int) path, + * ?(String | int) fmode, + * ?(String | int) perm, + * ?mode: (String | int), + * ?flags: int, + * ?external_encoding: (Encoding | String), + * ?internal_encoding: (Encoding | String), + * ?encoding: String, + * ?textmode: bool, + * ?binmode: bool, + * ?autoclose: bool, + * ?invalid: :replace, + * ?undef: :replace, + * ?replace: String, + * ?fallback: (Hash | Proc | Method), + * ?xml: (:text | :attr), + * ?crlf_newline: bool, + * ?cr_newline: bool, + * ?universal_newline: bool + * ) -> void + * ``` + */ void rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash, int *oflags_p, int *fmode_p, rb_io_enc_t *convconfig_p); + +/* :TODO: can this function be __attribute__((warn_unused_result)) or not? */ +/** + * Buffered write to the passed IO. + * + * @param[out] io Destination IO. + * @param[in] buf Contents to go to `io`. + * @param[in] size Number of bytes of `buf`. + * @exception rb_eFrozenError `io` is frozen. + * @exception rb_eIOError `io` is not open for writing. + * @exception rb_eSystemCallError `writev(2)` failed for some reason. + * @retval -1 Write failed. + * @retval otherwise Number of bytes actually written. + * @post `buf` is written to `io`. + * @note Partial write is a thing. It is a failure not to check the + * return value. + */ ssize_t rb_io_bufwrite(VALUE io, const void *buf, size_t size); //RBIMPL_ATTR_DEPRECATED(("use rb_io_maybe_wait_readable")) +/** + * Blocks until the passed file descriptor gets readable. + * + * @deprecated We now prefer rb_io_maybe_wait_readable() over this one. + * @param[in] fd The file descriptor to wait. + * @exception rb_eIOError Bad file descriptor. + * @return 0 or 1 (meaning unclear). + * @post `fd` is ready for reading. + */ int rb_io_wait_readable(int fd); //RBIMPL_ATTR_DEPRECATED(("use rb_io_maybe_wait_writable")) +/** + * Blocks until the passed file descriptor gets writable. + * + * @deprecated We now prefer rb_io_maybe_wait_writable() over this one. + * @param[in] fd The file descriptor to wait. + * @exception rb_eIOError Bad file descriptor. + * @return 0 or 1 (meaning unclear). + */ int rb_io_wait_writable(int fd); //RBIMPL_ATTR_DEPRECATED(("use rb_io_wait")) +/** + * Blocks until the passed file descriptor is ready for the passed events. + * + * @deprecated We now prefer rb_io_maybe_wait() over this one. + * @param[in] fd The file descriptor to wait. + * @param[in] events A set of enum ::rb_io_event_t. + * @param[in,out] tv Timeout. + * @retval 0 Operation timed out. + * @retval -1 `select(2)` failed for some reason. + * @retval otherwise A set of enum ::rb_io_event_t. + * @note Depending on your operating system `tv` might or might not + * be updated (POSIX permits both). Portable programs must + * have no assumptions. + */ int rb_wait_for_single_fd(int fd, int events, struct timeval *tv); +/** + * Get the timeout associated with the specified io object. + * + * @param[in] io An IO object. + * @retval RUBY_Qnil There is no associated timeout. + * @retval Otherwise The timeout value. + */ +VALUE rb_io_timeout(VALUE io); + +/** + * Set the timeout associated with the specified io object. This timeout is + * used as a best effort timeout to prevent operations from blocking forever. + * + * @param[in] io An IO object. + * @param[in] timeout A timeout value. Must respond to #to_f. + * @ + */ +VALUE rb_io_set_timeout(VALUE io, VALUE timeout); + +/** + * Blocks until the passed IO is ready for the passed events. The "events" + * here is a Ruby level integer, which is an OR-ed value of `IO::READABLE`, + * `IO::WRITable`, and `IO::PRIORITY`. + * + * If timeout is `Qnil`, it will use the default timeout as given by + * `rb_io_timeout(io)`. + * + * @param[in] io An IO object to wait. + * @param[in] events See above. + * @param[in] timeout Time, or numeric seconds since UNIX epoch. + * If Qnil, use the default timeout. If Qfalse + * or Qundef, wait forever. + * @exception rb_eIOError `io` is not open. + * @exception rb_eRangeError `timeout` is out of range. + * @exception rb_eSystemCallError `select(2)` failed for some reason. + * @retval RUBY_Qfalse Operation timed out. + * @retval Otherwise Actual events reached. + */ VALUE rb_io_wait(VALUE io, VALUE events, VALUE timeout); + +/** + * Identical to rb_io_wait() except it additionally takes previous errno. If + * the passed errno indicates for instance `EINTR`, this function returns + * immediately. This is expected to be called in a loop. + * + * ```CXX + * while (true) { + * + * ... // Your interesting operation here + * // `errno` could be updated + * + * rb_io_maybe_wait(errno, io, ev, Qnil); + * } + * ``` + * + * @param[in] error System errno. + * @param[in] io An IO object to wait. + * @param[in] events An integer set of interests. + * @param[in] timeout Time, or numeric seconds since UNIX epoch. + * @exception rb_eIOError `io` is not open. + * @exception rb_eRangeError `timeout` is out of range. + * @exception rb_eSystemCallError `select(2)` failed for some reason. + * @retval RUBY_Qfalse Operation timed out. + * @retval Otherwise Actual events reached. + * + * @internal + * + * This function to return ::RUBY_Qfalse on timeout could be unintended. It + * seems timeout feature has some rough edge. + */ VALUE rb_io_maybe_wait(int error, VALUE io, VALUE events, VALUE timeout); + +/** + * Blocks until the passed IO is ready for reading, if that makes sense for the + * passed errno. This is a special case of rb_io_maybe_wait() that only + * concerns for reading. + * + * @param[in] error System errno. + * @param[in] io An IO object to wait. + * @param[in] timeout Time, or numeric seconds since UNIX epoch. + * @exception rb_eIOError `io` is not open. + * @exception rb_eRangeError `timeout` is out of range. + * @exception rb_eSystemCallError `select(2)` failed for some reason. + * @retval 0 Operation timed out. + * @retval Otherwise Always returns ::RUBY_IO_READABLE. + */ int rb_io_maybe_wait_readable(int error, VALUE io, VALUE timeout); + +/** + * Blocks until the passed IO is ready for writing, if that makes sense for the + * passed errno. This is a special case of rb_io_maybe_wait() that only + * concernsfor writing. + * + * @param[in] error System errno. + * @param[in] io An IO object to wait. + * @param[in] timeout Time, or numeric seconds since UNIX epoch. + * @exception rb_eIOError `io` is not open. + * @exception rb_eRangeError `timeout` is out of range. + * @exception rb_eSystemCallError `select(2)` failed for some reason. + * @retval 0 Operation timed out. + * @retval Otherwise Always returns ::RUBY_IO_WRITABLE. + */ int rb_io_maybe_wait_writable(int error, VALUE io, VALUE timeout); +/** @cond INTERNAL_MACRO */ /* compatibility for ruby 1.8 and older */ #define rb_io_mode_flags(modestr) [<"rb_io_mode_flags() is obsolete; use rb_io_modestr_fmode()">] #define rb_io_modenum_flags(oflags) [<"rb_io_modenum_flags() is obsolete; use rb_io_oflags_fmode()">] +/** @endcond */ -VALUE rb_io_taint_check(VALUE); -NORETURN(void rb_eof_error(void)); +/** + * @deprecated This function once was a thing in the old days, but makes no + * sense any longer today. Exists here for backwards + * compatibility only. You can safely forget about it. + * + * @param[in] obj Object in question. + * @exception rb_eFrozenError obj is frozen. + * @return The passed `obj` + */ +VALUE rb_io_taint_check(VALUE obj); -void rb_io_read_check(rb_io_t*); -int rb_io_read_pending(rb_io_t*); +RBIMPL_ATTR_NORETURN() +/** + * Utility function to raise ::rb_eEOFError. + * + * @exception rb_eEOFError End of file situation. + * @note It never returns. + */ +void rb_eof_error(void); -struct stat; -VALUE rb_stat_new(const struct stat *); +/** + * Blocks until there is a pending read in the passed IO. If there already is + * it just returns. + * + * @param[out] fptr An IO to wait for reading. + * @post The are bytes to be read. + */ +void rb_io_read_check(rb_io_t *fptr); + +RBIMPL_ATTR_PURE() +/** + * Queries if the passed IO has any pending reads. Unlike rb_io_read_check() + * this doesn't block; has no side effects. + * + * @param[in] fptr An IO which can have pending reads. + * @retval 0 The IO is empty. + * @retval 1 There is something buffered. + */ +int rb_io_read_pending(rb_io_t *fptr); + +/** + * Constructs an instance of ::rb_cStat from the passed information. + * + * @param[in] st A stat. + * @return Allocated new instance of ::rb_cStat. + */ +VALUE rb_stat_new(const struct stat *st); /* gc.c */ diff --git a/include/ruby/io/buffer.h b/include/ruby/io/buffer.h new file mode 100644 index 0000000000..e4d98bf051 --- /dev/null +++ b/include/ruby/io/buffer.h @@ -0,0 +1,110 @@ +#ifndef RUBY_IO_BUFFER_H +#define RUBY_IO_BUFFER_H +/** + * @file + * @author Samuel Williams + * @date Fri 2 Jul 2021 16:29:01 NZST + * @copyright Copyright (C) 2021 Samuel Williams + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + */ + +#pragma once + +#include "ruby/ruby.h" +#include "ruby/internal/config.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +// WARNING: This entire interface is experimental and may change in the future! +#define RB_IO_BUFFER_EXPERIMENTAL 1 + +#define RUBY_IO_BUFFER_VERSION 2 + +// The `IO::Buffer` class. +RUBY_EXTERN VALUE rb_cIOBuffer; + +// The operating system page size. +RUBY_EXTERN size_t RUBY_IO_BUFFER_PAGE_SIZE; + +// The default buffer size, usually a (small) multiple of the page size. +// Can be overridden by the RUBY_IO_BUFFER_DEFAULT_SIZE environment variable. +RUBY_EXTERN size_t RUBY_IO_BUFFER_DEFAULT_SIZE; + +// Represents the internal state of the buffer. +// More than one flag can be set at a time. +enum rb_io_buffer_flags { + // The memory in the buffer is owned by someone else. + // More specifically, it means that someone else owns the buffer and we shouldn't try to resize it. + RB_IO_BUFFER_EXTERNAL = 1, + // The memory in the buffer is allocated internally. + RB_IO_BUFFER_INTERNAL = 2, + // The memory in the buffer is mapped. + // A non-private mapping is marked as external. + RB_IO_BUFFER_MAPPED = 4, + + // A mapped buffer that is also shared. + RB_IO_BUFFER_SHARED = 8, + + // The buffer is locked and cannot be resized. + // More specifically, it means we can't change the base address or size. + // A buffer is typically locked before a system call that uses the data. + RB_IO_BUFFER_LOCKED = 32, + + // The buffer mapping is private and will not impact other processes or the underlying file. + RB_IO_BUFFER_PRIVATE = 64, + + // The buffer is read-only and cannot be modified. + RB_IO_BUFFER_READONLY = 128, + + // The buffer is backed by a file. + RB_IO_BUFFER_FILE = 256, +}; + +// Represents the endian of the data types. +enum rb_io_buffer_endian { + // The least significant units are put first. + RB_IO_BUFFER_LITTLE_ENDIAN = 4, + RB_IO_BUFFER_BIG_ENDIAN = 8, + +#if defined(WORDS_BIGENDIAN) + RB_IO_BUFFER_HOST_ENDIAN = RB_IO_BUFFER_BIG_ENDIAN, +#else + RB_IO_BUFFER_HOST_ENDIAN = RB_IO_BUFFER_LITTLE_ENDIAN, +#endif + + RB_IO_BUFFER_NETWORK_ENDIAN = RB_IO_BUFFER_BIG_ENDIAN +}; + +VALUE rb_io_buffer_new(void *base, size_t size, enum rb_io_buffer_flags flags); +VALUE rb_io_buffer_map(VALUE io, size_t size, rb_off_t offset, enum rb_io_buffer_flags flags); + +VALUE rb_io_buffer_lock(VALUE self); +VALUE rb_io_buffer_unlock(VALUE self); +int rb_io_buffer_try_unlock(VALUE self); + +VALUE rb_io_buffer_free(VALUE self); +VALUE rb_io_buffer_free_locked(VALUE self); + +// Access the internal buffer and flags. Validates the pointers. +// The points may not remain valid if the source buffer is manipulated. +// Consider using rb_io_buffer_lock if needed. +enum rb_io_buffer_flags rb_io_buffer_get_bytes(VALUE self, void **base, size_t *size); +void rb_io_buffer_get_bytes_for_reading(VALUE self, const void **base, size_t *size); +void rb_io_buffer_get_bytes_for_writing(VALUE self, void **base, size_t *size); + +VALUE rb_io_buffer_transfer(VALUE self); +void rb_io_buffer_resize(VALUE self, size_t size); +void rb_io_buffer_clear(VALUE self, uint8_t value, size_t offset, size_t length); + +// The length is the minimum required length. +VALUE rb_io_buffer_read(VALUE self, VALUE io, size_t length, size_t offset); +VALUE rb_io_buffer_pread(VALUE self, VALUE io, rb_off_t from, size_t length, size_t offset); +VALUE rb_io_buffer_write(VALUE self, VALUE io, size_t length, size_t offset); +VALUE rb_io_buffer_pwrite(VALUE self, VALUE io, rb_off_t from, size_t length, size_t offset); + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_IO_BUFFER_H */ diff --git a/include/ruby/memory_view.h b/include/ruby/memory_view.h index 4492c06b38..42309d5afc 100644 --- a/include/ruby/memory_view.h +++ b/include/ruby/memory_view.h @@ -1,4 +1,4 @@ -#ifndef RUBY_MEMORY_VIEW_H +#ifndef RUBY_MEMORY_VIEW_H /*-*-C++-*-vi:se ft=cpp:*/ #define RUBY_MEMORY_VIEW_H 1 /** * @file @@ -10,11 +10,25 @@ * @brief Memory View. */ -#include "ruby/internal/dllexport.h" -#include "ruby/internal/stdbool.h" -#include "ruby/internal/value.h" -#include "ruby/intern.h" +#include "ruby/internal/config.h" +#ifdef STDC_HEADERS +# include <stddef.h> /* size_t */ +#endif + +#ifdef HAVE_SYS_TYPES_H +# include <sys/types.h> /* ssize_t */ +#endif + +#include "ruby/internal/attr/pure.h" /* RBIMPL_ATTR_PURE */ +#include "ruby/internal/core/rtypeddata.h" /* rb_data_type_t */ +#include "ruby/internal/dllexport.h" /* RUBY_EXTERN */ +#include "ruby/internal/stdbool.h" /* bool */ +#include "ruby/internal/value.h" /* VALUE */ + +/** + * Flags passed to rb_memory_view_get(), then to ::rb_memory_view_get_func_t. + */ enum ruby_memory_view_flags { RUBY_MEMORY_VIEW_SIMPLE = 0, RUBY_MEMORY_VIEW_WRITABLE = (1<<0), @@ -27,29 +41,56 @@ enum ruby_memory_view_flags { RUBY_MEMORY_VIEW_INDIRECT = (1<<6) | RUBY_MEMORY_VIEW_STRIDES, }; +/** Memory view component metadata. */ typedef struct { + /** @see ::rb_memory_view_t::format */ char format; - unsigned native_size_p: 1; - unsigned little_endian_p: 1; + + /** :FIXME: what is a "native" size is unclear. */ + bool native_size_p; + + /** Endian of the component */ + bool little_endian_p; + + /** The component's offset. */ size_t offset; + + /** The component's size. */ size_t size; + + /** + * How many numbers of components are there. For instance "CCC"'s repeat is + * 3. + */ size_t repeat; } rb_memory_view_item_component_t; +/** + * A MemoryView structure, `rb_memory_view_t`, is used for exporting objects' + * MemoryView. + * + * This structure contains the reference of the object, which is the owner of + * the MemoryView, the pointer to the head of exported memory, and the metadata + * that describes the structure of the memory. The metadata can describe + * multidimensional arrays with strides. + */ typedef struct { - /* The original object that has the memory exported via this memory view. */ + /** + * The original object that has the memory exported via this memory view. + */ VALUE obj; - /* The pointer to the exported memory. */ + /** The pointer to the exported memory. */ void *data; - /* The number of bytes in data. */ + /** The number of bytes in data. */ ssize_t byte_size; - /* true for readonly memory, false for writable memory. */ + /** true for readonly memory, false for writable memory. */ bool readonly; - /* A string to describe the format of an element, or NULL for unsigned bytes. + /** + * A string to describe the format of an element, or NULL for unsigned bytes. * The format string is a sequence of the following pack-template specifiers: * * c, C, s, s!, S, S!, n, v, i, i!, I, I!, l, l!, L, L!, @@ -69,83 +110,204 @@ typedef struct { */ const char *format; - /* The number of bytes in each element. + /** + * The number of bytes in each element. * item_size should equal to rb_memory_view_item_size_from_format(format). */ ssize_t item_size; + /** Description of each components. */ struct { - /* The array of rb_memory_view_item_component_t that describes the + /** + * The array of rb_memory_view_item_component_t that describes the * item structure. rb_memory_view_prepare_item_desc and * rb_memory_view_get_item allocate this memory if needed, * and rb_memory_view_release frees it. */ const rb_memory_view_item_component_t *components; - /* The number of components in an item. */ + /** The number of components in an item. */ size_t length; } item_desc; - /* The number of dimension. */ + /** The number of dimension. */ ssize_t ndim; - /* ndim size array indicating the number of elements in each dimension. + /** + * ndim size array indicating the number of elements in each dimension. * This can be NULL when ndim == 1. */ const ssize_t *shape; - /* ndim size array indicating the number of bytes to skip to go to the + /** + * ndim size array indicating the number of bytes to skip to go to the * next element in each dimension. */ const ssize_t *strides; - /* The offset in each dimension when this memory view exposes a nested array. + /** + * The offset in each dimension when this memory view exposes a nested array. * Or, NULL when this memory view exposes a flat array. */ const ssize_t *sub_offsets; - /* the private data for managing this exported memory */ - void *const private; + /** The private data for managing this exported memory */ + void *private_data; + + /** DO NOT TOUCH THIS: The memory view entry for the internal use */ + const struct rb_memory_view_entry *_memory_view_entry; } rb_memory_view_t; +/** Type of function of ::rb_memory_view_entry_t::get_func. */ typedef bool (* rb_memory_view_get_func_t)(VALUE obj, rb_memory_view_t *view, int flags); + +/** Type of function of ::rb_memory_view_entry_t::release_func. */ typedef bool (* rb_memory_view_release_func_t)(VALUE obj, rb_memory_view_t *view); + +/** Type of function of ::rb_memory_view_entry_t::available_p_func. */ typedef bool (* rb_memory_view_available_p_func_t)(VALUE obj); -typedef struct { +/** Operations applied to a specific kind of a memory view. */ +typedef struct rb_memory_view_entry { + /** + * Exports a memory view from a Ruby object. + */ rb_memory_view_get_func_t get_func; + + /** + * Releases a memory view that was previously generated using + * ::rb_memory_view_entry_t::get_func. + */ rb_memory_view_release_func_t release_func; + + /** + * Queries if an object understands memory view protocol. + */ rb_memory_view_available_p_func_t available_p_func; } rb_memory_view_entry_t; RBIMPL_SYMBOL_EXPORT_BEGIN() /* memory_view.c */ + +/** + * Associates the passed class with the passed memory view entry. This has to + * be called before actually creating a memory view from an instance. + */ bool rb_memory_view_register(VALUE klass, const rb_memory_view_entry_t *entry); RBIMPL_ATTR_PURE() +/** + * Return `true` if the data in the MemoryView `view` is row-major contiguous. + * + * Return `false` otherwise. + */ bool rb_memory_view_is_row_major_contiguous(const rb_memory_view_t *view); + RBIMPL_ATTR_PURE() +/** + * Return `true` if the data in the MemoryView `view` is column-major + * contiguous. + * + * Return `false` otherwise. + */ bool rb_memory_view_is_column_major_contiguous(const rb_memory_view_t *view); + RBIMPL_ATTR_NOALIAS() +/** + * Fill the `strides` array with byte-Strides of a contiguous array of the + * given shape with the given element size. + */ void rb_memory_view_fill_contiguous_strides(const ssize_t ndim, const ssize_t item_size, const ssize_t *const shape, const bool row_major_p, ssize_t *const strides); + RBIMPL_ATTR_NOALIAS() +/** + * Fill the members of `view` as an 1-dimensional byte array. + */ bool rb_memory_view_init_as_byte_array(rb_memory_view_t *view, VALUE obj, void *data, const ssize_t len, const bool readonly); + +/** + * Deconstructs the passed format string, as describe in + * ::rb_memory_view_t::format. + */ ssize_t rb_memory_view_parse_item_format(const char *format, rb_memory_view_item_component_t **members, size_t *n_members, const char **err); + +/** + * Calculate the number of bytes occupied by an element. + * + * When the calculation fails, the failed location in `format` is stored into + * `err`, and returns `-1`. + */ ssize_t rb_memory_view_item_size_from_format(const char *format, const char **err); + +/** + * Calculate the location of the item indicated by the given `indices`. + * + * The length of `indices` must equal to `view->ndim`. + * + * This function initializes `view->item_desc` if needed. + */ void *rb_memory_view_get_item_pointer(rb_memory_view_t *view, const ssize_t *indices); + +/** + * Return a value that consists of item members. + * + * When an item is a single member, the return value is a single value. + * + * When an item consists of multiple members, an array will be returned. + */ VALUE rb_memory_view_extract_item_members(const void *ptr, const rb_memory_view_item_component_t *members, const size_t n_members); + +/** Fill the `item_desc` member of `view`. */ void rb_memory_view_prepare_item_desc(rb_memory_view_t *view); + +/** * Return a value that consists of item members in the given memory view. */ VALUE rb_memory_view_get_item(rb_memory_view_t *view, const ssize_t *indices); +/** + * Return `true` if `obj` supports to export a MemoryView. Return `false` + * otherwise. + * + * If this function returns `true`, it doesn't mean the function + * `rb_memory_view_get` will succeed. + */ bool rb_memory_view_available_p(VALUE obj); + +/** + * If the given `obj` supports to export a MemoryView that conforms the given + * `flags`, this function fills `view` by the information of the MemoryView and + * returns `true`. In this case, the reference count of `obj` is increased. + * + * If the given combination of `obj` and `flags` cannot export a MemoryView, + * this function returns `false`. The content of `view` is not touched in this + * case. + * + * The exported MemoryView must be released by `rb_memory_view_release` when + * the MemoryView is no longer needed. + */ bool rb_memory_view_get(VALUE obj, rb_memory_view_t* memory_view, int flags); + +/** + * Release the given MemoryView `view` and decrement the reference count of + * `memory_view->obj`. + * + * Consumers must call this function when the MemoryView is no longer needed. + * Missing to call this function leads memory leak. + */ bool rb_memory_view_release(rb_memory_view_t* memory_view); /* for testing */ +/** @cond INTERNAL_MACRO */ RUBY_EXTERN VALUE rb_memory_view_exported_object_registry; RUBY_EXTERN const rb_data_type_t rb_memory_view_exported_object_registry_data_type; +/** @endcond */ RBIMPL_SYMBOL_EXPORT_END() RBIMPL_ATTR_PURE() +/** + * Return `true` if the data in the MemoryView `view` is row-major or + * column-major contiguous. + * + * Return `false` otherwise. + */ static inline bool rb_memory_view_is_contiguous(const rb_memory_view_t *view) { diff --git a/include/ruby/missing.h b/include/ruby/missing.h index 7d551248ae..aea6c9088d 100644 --- a/include/ruby/missing.h +++ b/include/ruby/missing.h @@ -1,7 +1,6 @@ #ifndef RUBY_MISSING_H /*-*-C++-*-vi:se ft=cpp:*/ #define RUBY_MISSING_H 1 /** - * @file * @author $Author$ * @date Sat May 11 23:46:03 JST 2002 * @copyright This file is a part of the programming language Ruby. @@ -34,11 +33,24 @@ # include <sys/time.h> #endif +#ifdef HAVE_SYS_STAT_H +# include <sys/stat.h> +#endif + +#ifdef HAVE_UNISTD_H +# include <unistd.h> +#endif + +#ifdef HAVE_STDIO_H +# include <stdio.h> +#endif + #ifdef HAVE_IEEEFP_H # include <ieeefp.h> #endif #include "ruby/internal/dllexport.h" +#include "ruby/internal/attr/format.h" #ifndef M_PI # define M_PI 3.14159265358979323846 @@ -92,10 +104,6 @@ RUBY_EXTERN int eaccess(const char*, int); RUBY_EXTERN double round(double); /* numeric.c */ #endif -#ifndef HAVE_FINITE -RUBY_EXTERN int finite(double); -#endif - #ifndef HAVE_FLOCK RUBY_EXTERN int flock(int, int); #endif @@ -152,35 +160,9 @@ RUBY_EXTERN const union bytesequence4_or_float rb_nan; # define HUGE_VAL ((double)INFINITY) #endif -#if defined(isinf) -# /* Take that. */ -#elif defined(HAVE_ISINF) -# /* Take that. */ -#elif defined(HAVE_FINITE) && defined(HAVE_ISNAN) -# define isinf(x) (!finite(x) && !isnan(x)) -#elif defined(__cplusplus) && __cplusplus >= 201103L -# // <cmath> must include constexpr bool isinf(double); -#else -RUBY_EXTERN int isinf(double); -#endif - -#if defined(isnan) -# /* Take that. */ -#elif defined(HAVE_ISNAN) -# /* Take that. */ -#elif defined(__cplusplus) && __cplusplus >= 201103L -# // <cmath> must include constexpr bool isnan(double); -#else -RUBY_EXTERN int isnan(double); -#endif - -#if defined(isfinite) -# /* Take that. */ -#elif defined(HAVE_ISFINITE) -# /* Take that. */ -#else -# define HAVE_ISFINITE 1 -# define isfinite(x) finite(x) +#ifndef HAVE_FINITE +# define HAVE_FINITE 1 +# define finite(x) isfinite(x) #endif #ifndef HAVE_NAN @@ -228,10 +210,6 @@ RUBY_EXTERN size_t strlcpy(char *, const char*, size_t); RUBY_EXTERN size_t strlcat(char *, const char*, size_t); #endif -#ifndef HAVE_SIGNBIT -RUBY_EXTERN int signbit(double x); -#endif - #ifndef HAVE_FFS RUBY_EXTERN int ffs(int); #endif @@ -246,6 +224,7 @@ RUBY_EXTERN int ruby_close(int); #endif #ifndef HAVE_SETPROCTITLE +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2) RUBY_EXTERN void setproctitle(const char *fmt, ...); #endif @@ -257,6 +236,107 @@ RUBY_EXTERN void setproctitle(const char *fmt, ...); RUBY_EXTERN void explicit_bzero(void *b, size_t len); #endif +#ifndef HAVE_TZSET +RUBY_EXTERN void tzset(void); +#endif + +#ifndef HAVE_POSIX_MADVISE +RUBY_EXTERN int posix_madvise(void *, size_t, int); +#endif + +#ifndef HAVE_GETEUID +RUBY_EXTERN rb_uid_t geteuid(void); +#endif + +#ifndef HAVE_GETUID +RUBY_EXTERN rb_uid_t getuid(void); +#endif + +#ifndef HAVE_GETEGID +RUBY_EXTERN rb_gid_t getegid(void); +#endif + +#ifndef HAVE_GETGID +RUBY_EXTERN rb_gid_t getgid(void); +#endif + +#ifndef HAVE_GETLOGIN +RUBY_EXTERN char *getlogin(void); +#endif + +#ifndef HAVE_GETPPID +RUBY_EXTERN rb_pid_t getppid(void); +#endif + +#ifndef HAVE_UMASK +RUBY_EXTERN rb_mode_t umask(rb_mode_t); +#endif + +#ifndef HAVE_CHMOD +RUBY_EXTERN int chmod(const char *, rb_mode_t); +#endif + +#ifndef HAVE_CHOWN +RUBY_EXTERN int chown(const char *, rb_uid_t, rb_gid_t); +#endif + +#ifndef HAVE_PCLOSE +RUBY_EXTERN int pclose(FILE *); +#endif + +#ifndef HAVE_POPEN +RUBY_EXTERN FILE *popen(const char *, const char *); +#endif + +#ifndef HAVE_PIPE +RUBY_EXTERN int pipe(int [2]); +#endif + +#ifndef HAVE_DUP +RUBY_EXTERN int dup(int); +#endif + +#ifndef HAVE_DUP2 +RUBY_EXTERN int dup2(int, int); +#endif + +#ifndef HAVE_KILL +RUBY_EXTERN int kill(rb_pid_t, int); +#endif + +#ifndef HAVE_EXECL +RUBY_EXTERN int execl(const char *, const char *, ...); +#endif + +#ifndef HAVE_EXECLE +RUBY_EXTERN int execle(const char *, const char *, ...); +#endif + +#ifndef HAVE_EXECV +RUBY_EXTERN int execv(const char *, char *const []); +#endif + +#ifndef HAVE_EXECVE +RUBY_EXTERN int execve(const char *, char *const [], char *const []); +#endif + +#ifndef HAVE_SHUTDOWN +RUBY_EXTERN int shutdown(int, int); +#endif + +#ifndef HAVE_SYSTEM +RUBY_EXTERN int system(const char *); +#endif + +#ifndef WNOHANG +# define WNOHANG 0 +#endif + +#ifndef HAVE_WAITPID +# define HAVE_WAITPID 1 +RUBY_EXTERN rb_pid_t waitpid(rb_pid_t, int *, int); +#endif + RBIMPL_SYMBOL_EXPORT_END() #endif /* RUBY_MISSING_H */ diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h index 6187b37dc3..d233336316 100644 --- a/include/ruby/onigmo.h +++ b/include/ruby/onigmo.h @@ -356,9 +356,9 @@ int onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, c #define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc) ONIG_EXTERN -int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc); +int onigenc_mbclen(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc); -#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_approximate(p,e,enc) +#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen(p,e,enc) #define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len) #define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc) #define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len) @@ -793,6 +793,13 @@ typedef struct re_pattern_buffer { OnigDistance dmin; /* min-distance of exact or map */ OnigDistance dmax; /* max-distance of exact or map */ + /* rb_hrtime_t from hrtime.h */ +#ifdef MY_RUBY_BUILD_MAY_TIME_TRAVEL + int128_t timelimit; +#else + uint64_t timelimit; +#endif + /* regex_t link chain */ struct re_pattern_buffer* chain; /* escape compile-conflict */ } OnigRegexType; @@ -837,6 +844,8 @@ void onig_free(OnigRegex); ONIG_EXTERN void onig_free_body(OnigRegex); ONIG_EXTERN +int onig_reg_copy(OnigRegex* reg, OnigRegex orig_reg); +ONIG_EXTERN OnigPosition onig_scan(OnigRegex reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*), void* callback_arg); ONIG_EXTERN OnigPosition onig_search(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option); @@ -845,6 +854,8 @@ OnigPosition onig_search_gpos(OnigRegex, const OnigUChar* str, const OnigUChar* ONIG_EXTERN OnigPosition onig_match(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option); ONIG_EXTERN +int onig_check_linear_time(OnigRegex reg); +ONIG_EXTERN OnigRegion* onig_region_new(void); ONIG_EXTERN void onig_region_init(OnigRegion* region); diff --git a/include/ruby/ractor.h b/include/ruby/ractor.h index 1d6687456c..7811616f6d 100644 --- a/include/ruby/ractor.h +++ b/include/ruby/ractor.h @@ -1,4 +1,4 @@ -#ifndef RUBY_RACTOR_H +#ifndef RUBY_RACTOR_H /*-*-C++-*-vi:se ft=cpp:*/ #define RUBY_RACTOR_H 1 /** @@ -11,50 +11,246 @@ * modify this file, provided that the conditions mentioned in the * file COPYING are met. Consult the file for details. */ +#include "internal/dllexport.h" /* RUBY_EXTERN is here */ +#include "internal/fl_type.h" /* FL_TEST_RAW is here */ +#include "internal/special_consts.h" /* RB_SPECIAL_CONSTS_P is here */ +#include "internal/stdbool.h" /* bool is here */ +#include "internal/value.h" /* VALUE is here */ +/** Type that defines a ractor-local storage. */ struct rb_ractor_local_storage_type { + + /** + * A function to mark a ractor-local storage. + * + * @param[out] ptr A ractor-local storage. + * @post Ruby objects inside of `ptr` are marked. + */ void (*mark)(void *ptr); + + /** + * A function to destruct a ractor-local storage. + * + * @param[out] ptr A ractor-local storage. + * @post `ptr` is not a valid pointer. + */ void (*free)(void *ptr); // TODO: update }; +/** (Opaque) struct that holds a ractor-local storage key. */ typedef struct rb_ractor_local_key_struct *rb_ractor_local_key_t; -RUBY_SYMBOL_EXPORT_BEGIN +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * `Ractor` class. + * + * @ingroup object + */ RUBY_EXTERN VALUE rb_cRactor; +/** + * Queries the standard input of the current Ractor that is calling this + * function. + * + * @return An IO. + * @note This can be different from the process-global one. + */ VALUE rb_ractor_stdin(void); + +/** + * Queries the standard output of the current Ractor that is calling this + * function. + * + * @return An IO. + * @note This can be different from the process-global one. + */ VALUE rb_ractor_stdout(void); + +/** + * Queries the standard error of the current Ractor that is calling this + * function. + * + * @return An IO. + * @note This can be different from the process-global one. + */ VALUE rb_ractor_stderr(void); -void rb_ractor_stdin_set(VALUE); -void rb_ractor_stdout_set(VALUE); -void rb_ractor_stderr_set(VALUE); +/** + * Assigns an IO to the standard input of the Ractor that is calling this + * function. + * + * @param[in] io An IO. + * @post `io` is the standard input of the current ractor. + * @post In case the calling Ractor is the main Ractor, it also updates + * the process global ::rb_stdin. + */ +void rb_ractor_stdin_set(VALUE io); + +/** + * Assigns an IO to the standard output of the Ractor that is calling this + * function. + * + * @param[in] io An IO. + * @post `io` is the standard input of the current ractor. + * @post In case the calling Ractor is the main Ractor, it also updates + * the process global ::rb_stdout. + */ +void rb_ractor_stdout_set(VALUE io); + +/** + * Assigns an IO to the standard error of the Ractor that is calling this + * function. + * + * @param[in] io An IO. + * @post `io` is the standard input of the current ractor. + * @post In case the calling Ractor is the main Ractor, it also updates + * the process global ::rb_stderr. + */ +void rb_ractor_stderr_set(VALUE io); + +/** + * Issues a new key. + * + * @return A newly issued ractor-local storage key. Keys issued using this + * key can be associated to a Ruby object per Ractor. + */ rb_ractor_local_key_t rb_ractor_local_storage_value_newkey(void); + +/** + * Queries the key. + * + * @param[in] key A ractor-local storage key to lookup. + * @retval RUBY_Qnil No such key. + * @retval otherwise A value corresponds to `key` in the current Ractor. + * @note This cannot distinguish between a nonexistent key and a key + * exists and corresponds to ::RUBY_Qnil. + */ VALUE rb_ractor_local_storage_value(rb_ractor_local_key_t key); + +/** + * Queries the key. + * + * @param[in] key A ractor-local storage key to lookup. + * @param[out] val Return value buffer. + * @retval false `key` not found. + * @retval true `key` found. + * @post `val` is updated so that it has the value corresponds to `key` + * in the current Ractor. + */ bool rb_ractor_local_storage_value_lookup(rb_ractor_local_key_t key, VALUE *val); + +/** + * Associates the passed value to the passed key. + * + * @param[in] key A ractor-local storage key. + * @param[in] val Arbitrary ruby object. + * @post `val` corresponds to `key` in the current Ractor. + */ void rb_ractor_local_storage_value_set(rb_ractor_local_key_t key, VALUE val); +/** + * A type of ractor-local storage that destructs itself using ::ruby_xfree. + * + * @internal + * + * Why it is visible from 3rd party extension libraries is not obvious to + * @shyouhei. + */ RUBY_EXTERN const struct rb_ractor_local_storage_type rb_ractor_local_storage_type_free; + +/** @alias{rb_ractor_local_storage_type_free} */ #define RB_RACTOR_LOCAL_STORAGE_TYPE_FREE (&rb_ractor_local_storage_type_free) +/** + * Extended version of rb_ractor_local_storage_value_newkey(). It additionally + * takes the type of the issuing key. + * + * @param[in] type How the value associated with the issuing key should + * behave. + * @return A newly issued ractor-local storage key, of type `type`. + */ rb_ractor_local_key_t rb_ractor_local_storage_ptr_newkey(const struct rb_ractor_local_storage_type *type); + +/** + * Identical to rb_ractor_local_storage_value() except the return type. + * + * @param[in] key A ractor-local storage key to lookup. + * @retval NULL No such key. + * @retval otherwise A value corresponds to `key` in the current Ractor. + */ void *rb_ractor_local_storage_ptr(rb_ractor_local_key_t key); + +/** + * Identical to rb_ractor_local_storage_value_set() except the parameter type. + * + * @param[in] key A ractor-local storage key. + * @param[in] ptr A pointer that conforms `key`'s type. + * @post `ptr` corresponds to `key` in the current Ractor. + */ void rb_ractor_local_storage_ptr_set(rb_ractor_local_key_t key, void *ptr); +/** + * Destructively transforms the passed object so that multiple Ractors can + * share it. What is a shareable object and what is not is a nuanced concept, + * and @ko1 says the definition can still change. However extension library + * authors might interest to learn how to use #RUBY_TYPED_FROZEN_SHAREABLE. + * + * @param[out] obj Arbitrary ruby object to modify. + * @exception rb_eRactorError Ractors cannot share `obj` by nature. + * @return Passed `obj`. + * @post Multiple Ractors can share `obj`. + * + * @internal + * + * In case an exception is raised, `obj` remains in an intermediate state where + * some of its part is frozen and others are not. @shyouhei is not sure if it + * is either an intended behaviour, current implementation limitation, or + * simply a bug. Note also that there is no way to "melt" a frozen object. + */ VALUE rb_ractor_make_shareable(VALUE obj); + +/** + * Identical to rb_ractor_make_shareable(), except it returns a (deep) copy of + * the passed one instead of modifying it in-place. + * + * @param[in] obj Arbitrary ruby object to duplicate. + * @exception rb_eRactorError Ractors cannot share `obj` by nature. + * @return A deep copy of `obj` which is sharable among Ractors. + */ VALUE rb_ractor_make_shareable_copy(VALUE obj); -RUBY_SYMBOL_EXPORT_END +RBIMPL_SYMBOL_EXPORT_END() +/** + * Queries if the passed object has previously classified as shareable or not. + * This doesn't mean anything in practice... Objects can be shared later. + * Always use rb_ractor_shareable_p() instead. + * + * @param[in] obj Object in question. + * @retval RUBY_FL_SHAREABLE It once was shareable before. + * @retval 0 Otherwise. + */ #define RB_OBJ_SHAREABLE_P(obj) FL_TEST_RAW((obj), RUBY_FL_SHAREABLE) +/** + * Queries if multiple Ractors can share the passed object or not. Ractors run + * without protecting each other. Sharing an object among them is basically + * dangerous, disabled by default. However there are objects that are + * extremely carefully implemented to be Ractor-safe; for instance integers + * have such property. This function can classify that. + * + * @param[in] obj Arbitrary ruby object. + * @retval true `obj` is capable of shared across ractors. + * @retval false `obj` cannot travel across ractor boundaries. + */ static inline bool rb_ractor_shareable_p(VALUE obj) { bool rb_ractor_shareable_p_continue(VALUE obj); - if (SPECIAL_CONST_P(obj)) { + if (RB_SPECIAL_CONST_P(obj)) { return true; } else if (RB_OBJ_SHAREABLE_P(obj)) { diff --git a/include/ruby/random.h b/include/ruby/random.h index 56b2dd413f..f3df0d96fb 100644 --- a/include/ruby/random.h +++ b/include/ruby/random.h @@ -1,4 +1,4 @@ -#ifndef RUBY_RANDOM_H +#ifndef RUBY_RANDOM_H /*-*-C++-*-vi:se ft=cpp:*/ #define RUBY_RANDOM_H 1 /** * @file @@ -8,69 +8,327 @@ * Permission is hereby granted, to either redistribute and/or * modify this file, provided that the conditions mentioned in the * file COPYING are met. Consult the file for details. + * + * This is a set of APIs to roll your own subclass of ::rb_cRandom. An + * illustrative example of such PRNG can be found at + * `ext/-test-/random/loop.c`. */ #include "ruby/ruby.h" +/* + * version + * 0: before versioning; deprecated + * 1: added version, flags and init_32bit function + */ +#define RUBY_RANDOM_INTERFACE_VERSION_MAJOR 1 +#define RUBY_RANDOM_INTERFACE_VERSION_MINOR 0 + +#define RUBY_RANDOM_PASTE_VERSION_SUFFIX(x, y, z) x##_##y##_##z +#define RUBY_RANDOM_WITH_VERSION_SUFFIX(name, major, minor) \ + RUBY_RANDOM_PASTE_VERSION_SUFFIX(name, major, minor) +#define rb_random_data_type \ + RUBY_RANDOM_WITH_VERSION_SUFFIX(rb_random_data_type, \ + RUBY_RANDOM_INTERFACE_VERSION_MAJOR, \ + RUBY_RANDOM_INTERFACE_VERSION_MINOR) +#define RUBY_RANDOM_INTERFACE_VERSION_INITIALIZER \ + {RUBY_RANDOM_INTERFACE_VERSION_MAJOR, RUBY_RANDOM_INTERFACE_VERSION_MINOR} +#define RUBY_RANDOM_INTERFACE_VERSION_MAJOR_MAX 0xff +#define RUBY_RANDOM_INTERFACE_VERSION_MINOR_MAX 0xff + RBIMPL_SYMBOL_EXPORT_BEGIN() +/** + * Base components of the random interface. + * + * @internal + * + * Ideally this could be an empty class if we could assume C++, but in C a + * struct must have at least one field. + */ struct rb_random_struct { + /** Seed, passed through e.g. `Random.new` */ VALUE seed; }; -typedef struct rb_random_struct rb_random_t; +typedef struct rb_random_struct rb_random_t; /**< @see ::rb_random_struct */ + +RBIMPL_ATTR_NONNULL(()) +/** + * This is the type of functions called when your random object is initialised. + * Passed buffer is the seed object basically. But in Ruby a number can be + * really big. This type of functions accept such big integers as a series of + * machine words. + * + * @param[out] rng Your random struct to fill in. + * @param[in] buf Seed, maybe converted from a bignum. + * @param[in] len Number of words of `buf`. + * @post `rng` is initialised using the passed seeds. + */ +typedef void rb_random_init_func(rb_random_t *rng, const uint32_t *buf, size_t len); + +RBIMPL_ATTR_NONNULL(()) +/** + * This is the type of functions called when your random object is initialised. + * Passed data is the seed integer. + * + * @param[out] rng Your random struct to fill in. + * @param[in] data Seed, single word. + * @post `rng` is initialised using the passed seeds. + */ +typedef void rb_random_init_int32_func(rb_random_t *rng, uint32_t data); + +RBIMPL_ATTR_NONNULL(()) +/** + * This is the type of functions called from your object's `#rand` method. + * + * @param[out] rng Your random struct to extract an integer from. + * @return A random number. + * @post `rng` is consumed somehow. + */ +typedef unsigned int rb_random_get_int32_func(rb_random_t *rng); + +RBIMPL_ATTR_NONNULL(()) +/** + * This is the type of functions called from your object's `#bytes` method. + * + * @param[out] rng Your random struct to extract an integer from. + * @param[out] buf Return buffer of at least `len` bytes length. + * @param[in] len Number of bytes of `buf`. + * @post `rng` is consumed somehow. + * @post `buf` is filled with random bytes. + */ +typedef void rb_random_get_bytes_func(rb_random_t *rng, void *buf, size_t len); -typedef void rb_random_init_func(rb_random_t *, const uint32_t *, size_t); -typedef unsigned int rb_random_get_int32_func(rb_random_t *); -typedef void rb_random_get_bytes_func(rb_random_t *, void *, size_t); -typedef double rb_random_get_real_func(rb_random_t *, int); +RBIMPL_ATTR_NONNULL(()) +/** + * This is the type of functions called from your object's `#rand` method. + * + * @param[out] rng Your random struct to extract an integer from. + * @param[in] excl Pass nonzero value here to indicate you don't want 1.0. + * @return A random number of range 0.0 to 1.0. + * @post `rng` is consumed somehow. + */ +typedef double rb_random_get_real_func(rb_random_t *rng, int excl); +/** PRNG algorithmic interface, analogous to Ruby level classes. */ typedef struct { + /** Number of bits of seed numbers. */ size_t default_seed_bits; + + /** + * Major/minor versions of this interface + */ + struct { + uint8_t major, minor; + } version; + + /** + * Reserved flags + */ + uint16_t flags; + + /** Function to initialize from uint32_t array. */ rb_random_init_func *init; + + /** Function to initialize from single uint32_t. */ + rb_random_init_int32_func *init_int32; + + /** Function to obtain a random integer. */ rb_random_get_int32_func *get_int32; + + /** + * Function to obtain a series of random bytes. If your PRNG have a native + * method to yield arbitrary number of bytes use that to implement this. + * But in case you lack such things, you can do so by using + * rb_rand_bytes_int32() + * + * ```CXX + * extern rb_random_get_int32_func your_get_int32_func; + * + * void + * your_get_byes_func(rb_random_t *rng, void *buf, size_t len) + * { + * rb_rand_bytes_int32(your_get_int32_func, rng, buf, len); + * } + * ``` + */ rb_random_get_bytes_func *get_bytes; + + /** + * Function to obtain a random double. If your PRNG have a native method + * to yield a floating point random number use that to implement this. But + * in case you lack such things, you can do so by using + * rb_int_pair_to_real(). + * + * ```CXX + * extern rb_random_get_int32_func your_get_int32_func; + * + * void + * your_get_real_func(rb_random_t *rng, int excl) + * { + * auto a = your_get_int32_func(rng); + * auto b = your_get_int32_func(rng); + * return rb_int_pair_to_real(a, b, excl); + * } + * ``` + */ rb_random_get_real_func *get_real; } rb_random_interface_t; +/** + * This utility macro defines 4 functions named prefix_init, prefix_init_int32, + * prefix_get_int32, prefix_get_bytes. + */ #define RB_RANDOM_INTERFACE_DECLARE(prefix) \ static void prefix##_init(rb_random_t *, const uint32_t *, size_t); \ + static void prefix##_init_int32(rb_random_t *, uint32_t); \ static unsigned int prefix##_get_int32(rb_random_t *); \ static void prefix##_get_bytes(rb_random_t *, void *, size_t) +/** + * Identical to #RB_RANDOM_INTERFACE_DECLARE except it also declares + * prefix_get_real. + */ #define RB_RANDOM_INTERFACE_DECLARE_WITH_REAL(prefix) \ RB_RANDOM_INTERFACE_DECLARE(prefix); \ static double prefix##_get_real(rb_random_t *, int) +/** + * This utility macro expands to the names declared using + * #RB_RANDOM_INTERFACE_DECLARE. Expected to be used inside of a + * ::rb_random_interface_t initialiser: + * + * ```CXX + * RB_RANDOM_INTERFACE_DECLARE(foo); + * + * static inline constexpr rb_random_interface_t foo_interface = { + * 32768, // bits + * RB_RANDOM_INTERFACE_DEFINE(foo), + * }; + * ``` + */ #define RB_RANDOM_INTERFACE_DEFINE(prefix) \ + RUBY_RANDOM_INTERFACE_VERSION_INITIALIZER, 0, \ prefix##_init, \ + prefix##_init_int32, \ prefix##_get_int32, \ prefix##_get_bytes +/** + * Identical to #RB_RANDOM_INTERFACE_DEFINE except it also defines + * prefix_get_real. + */ #define RB_RANDOM_INTERFACE_DEFINE_WITH_REAL(prefix) \ RB_RANDOM_INTERFACE_DEFINE(prefix), \ prefix##_get_real +#define RB_RANDOM_DEFINE_INIT_INT32_FUNC(prefix) \ + static void prefix##_init_int32(rb_random_t *rnd, uint32_t data) \ + { \ + prefix##_init(rnd, &data, 1); \ + } + #if defined _WIN32 && !defined __CYGWIN__ typedef rb_data_type_t rb_random_data_type_t; # define RB_RANDOM_PARENT 0 #else + +/** This is the type of ::rb_random_data_type. */ typedef const rb_data_type_t rb_random_data_type_t; + +/** + * This utility macro can be used when you define your own PRNG type: + * + * ```CXX + * static inline constexpr rb_random_interface_t your_if = { + * 0, RB_RANDOM_INTERFACE_DEFINE(your), + * }; + * + * static inline constexpr rb_random_data_type_t your_prng_type = { + * "your PRNG", + * { rb_random_mark, }, + * RB_RANDOM_PARENT, // <<-- HERE + * &your_if, + * 0, + * } + * ``` + */ # define RB_RANDOM_PARENT &rb_random_data_type #endif +/** + * This macro is expected to be called exactly once at the beginning of a + * program, possibly from inside of your `Init_Foo()` function. Depending on + * platforms #RB_RANDOM_PARENT can require a fixup. This routine does that + * when necessary. + */ #define RB_RANDOM_DATA_INIT_PARENT(random_data) \ rbimpl_random_data_init_parent(&random_data) +/** + * This is the implementation of ::rb_data_type_struct::dmark for + * ::rb_random_data_type. In case your PRNG does not involve Ruby objects at + * all (which is quite likely), you can simply reuse it. + * + * @param[out] ptr Target to mark, which is a ::rb_random_t this case. + */ void rb_random_mark(void *ptr); + +/** + * Initialises an allocated ::rb_random_t instance. Call it from your own + * initialiser appropriately. + * + * @param[out] rnd Your PRNG's base part. + * @post `rnd` is filled with an initial state. + */ void rb_random_base_init(rb_random_t *rnd); + +/** + * Generates a 64 bit floating point number by concatenating two 32bit unsigned + * integers. + * + * @param[in] a Most significant 32 bits of the result. + * @param[in] b Least significant 32 bits of the result. + * @param[in] excl Whether the result should exclude 1.0 or not. + * @return A double, whose range is either `[0, 1)` or `[0, 1]`. + * @see ::rb_random_interface_t::get_real() + * + * @internal + * + * This in fact has nothing to do with PRNGs. + */ double rb_int_pair_to_real(uint32_t a, uint32_t b, int excl); -void rb_rand_bytes_int32(rb_random_get_int32_func *, rb_random_t *, void *, size_t); + +/** + * Repeatedly calls the passed function over and over again until the passed + * buffer is filled with random bytes. + * + * @param[in] func Generator function. + * @param[out] prng Passed as-is to `func`. + * @param[out] buff Return buffer. + * @param[in] size Number of words of `buff`. + * @post `buff` is filled with random bytes. + * @post `prng` is updated by `func`. + * @see ::rb_random_interface_t::get_bytes() + */ +void rb_rand_bytes_int32(rb_random_get_int32_func *func, rb_random_t *prng, void *buff, size_t size); + +/** + * The data that holds the backend type of ::rb_cRandom. Used as your PRNG's + * ::rb_data_type_struct::parent. + */ RUBY_EXTERN const rb_data_type_t rb_random_data_type; RBIMPL_SYMBOL_EXPORT_END() RBIMPL_ATTR_PURE_UNLESS_DEBUG() /* :TODO: can this function be __attribute__((returns_nonnull)) or not? */ +/** + * Queries the interface of the passed random object. + * + * @param[in] obj An instance (of a subclass) of ::rb_cRandom. + * @return Its corresponding ::rb_random_interface_t interface. + */ static inline const rb_random_interface_t * rb_rand_if(VALUE obj) { @@ -81,6 +339,15 @@ rb_rand_if(VALUE obj) } RBIMPL_ATTR_NOALIAS() +/** + * @private + * + * This is an implementation detail of #RB_RANDOM_DATA_INIT_PARENT. People + * don't use it directly. + * + * @param[out] random_data Region to fill. + * @post ::rb_random_data_type is filled appropriately. + */ static inline void rbimpl_random_data_init_parent(rb_random_data_type_t *random_data) { diff --git a/include/ruby/re.h b/include/ruby/re.h index ec0f425db0..f86d6f26cf 100644 --- a/include/ruby/re.h +++ b/include/ruby/re.h @@ -11,23 +11,161 @@ * file COPYING are met. Consult the file for details. */ #include "ruby/internal/config.h" -#include <sys/types.h> + +#ifdef HAVE_SYS_TYPES_H +# include <sys/types.h> +#endif + #include <stdio.h> +#include "ruby/onigmo.h" #include "ruby/regex.h" #include "ruby/internal/core/rmatch.h" #include "ruby/internal/dllexport.h" +struct re_registers; /* Defined in onigmo.h */ + RBIMPL_SYMBOL_EXPORT_BEGIN() -VALUE rb_reg_regcomp(VALUE); -long rb_reg_search(VALUE, VALUE, long, int); -VALUE rb_reg_regsub(VALUE, VALUE, struct re_registers *, VALUE); -long rb_reg_adjust_startpos(VALUE, VALUE, long, int); -void rb_match_busy(VALUE); -VALUE rb_reg_quote(VALUE); +/** + * Creates a new instance of ::rb_cRegexp. It can be seen as a specialised + * version of rb_reg_new_str() where it does not take options. + * + * @param[in] str Source code in String. + * @return Allocated new instance of ::rb_cRegexp. + */ +VALUE rb_reg_regcomp(VALUE str); + +/** + * Runs the passed regular expression over the passed string. Unlike + * rb_reg_search() this function also takes position and direction of the + * search, which make it possible for this function to run from in middle of + * the string. + * + * @param[in] re Regular expression to execute. + * @param[in] str Target string to search. + * @param[in] pos Offset in `str` to start searching, in bytes. + * @param[in] dir `pos`' direction; 0 means left-to-right, 1 for + * the opposite. + * @exception rb_eArgError `re` is broken. + * @exception rb_eRegexpError `re` is malformed. + * @retval -1 Match failed. + * @retval otherwise Offset of first such byte where match happened. + * @post `Regexp.last_match` is updated. + * @post `$&`, `$~`, etc., are updated. + * + * @internal + * + * Distinction between raising ::rb_eArgError and ::rb_eRegexpError is not + * obvious, at least to @shyouhei. + */ +long rb_reg_search(VALUE re, VALUE str, long pos, int dir); + +/** + * Substitution. This is basically the implementation of `String#sub`. Also + * `String#gsub` repeatedly calls this function. + * + * @param[in] repl Replacement string, e.g. `"\\1\\2"` + * @param[in] src Source string, to be replaced. + * @param[in] regs Matched data generated by applying `rexp` to `src`. + * @param[in] rexp Regular expression. + * @return A substituted string. + * + * @internal + * + * This function does not check for encoding compatibility. `String#sub!` + * etc. employ their own checker. + * + * `regs` should have been `const struct re_registers *` because it is read + * only. Kept as-is for compatibility. + */ +VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp); + +/** + * Tell us if this is a wrong idea, but it seems this function has no usage at + * all. Just remains here for theoretical backwards compatibility. + * + * @param[in] re Regular expression to execute. + * @param[in] str Target string to search. + * @param[in] pos Offset in `str` to start searching, in bytes. + * @param[in] dir `pos`' direction; 0 means left-to-right, 1 for + * the opposite. + * @return Adjusted nearest offset to `pos` inside of `str`, where is a + * character boundary. + * + */ +long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int dir); + +/** + * Escapes any characters that would have special meaning in a regular + * expression. + * + * @param[in] str Target string to escape. + * @return A copy of `str` whose contents are escaped. + */ +VALUE rb_reg_quote(VALUE str); + +/** + * Exercises various checks and preprocesses so that the given regular + * expression can be applied to the given string. The preprocess here includes + * (but not limited to) for instance encoding conversion. + * + * @param[in] re Target regular expression. + * @param[in] str What `re` is about to run on. + * @exception rb_eArgError `re` does not fit for `str`. + * @exception rb_eEncCompatError `re` and `str` are incompatible. + * @exception rb_eRegexpError `re` is malformed. + * @return A preprocessesed pattern buffer ready to be applied to `str`. + * @note The return value is manages by our GC. Don't free. + * + * @internal + * + * The return type, `regex_t *`, is defined in `<ruby/onigmo.h>`, _and_ + * _conflicts_ with POSIX's `<regex.h>`. We can no longer save the situation + * at this point. Just don't mix the two. + */ regex_t *rb_reg_prepare_re(VALUE re, VALUE str); -int rb_reg_region_copy(struct re_registers *, const struct re_registers *); + +/** + * Runs a regular expression match using function `match`. Performs preparation, + * error handling, and memory cleanup. + * + * @param[in] re Target regular expression. + * @param[in] str What `re` is about to run on. + * @param[in] match The function to run to match `str` against `re`. + * @param[in] args Pointer to arguments to pass into `match`. + * @param[out] regs Registers on a successful match. + * @exception rb_eArgError `re` does not fit for `str`. + * @exception rb_eEncCompatError `re` and `str` are incompatible. + * @exception rb_eRegexpError `re` is malformed. + * @return Match position on a successful match, `ONIG_MISMATCH` otherwise. + * + * @internal + * + * The type `regex_t *` is defined in `<ruby/onigmo.h>`, _and_ + * _conflicts_ with POSIX's `<regex.h>`. We can no longer save the situation + * at this point. Just don't mix the two. + */ +OnigPosition rb_reg_onig_match(VALUE re, VALUE str, + OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args), + void *args, struct re_registers *regs); + +/** + * Duplicates a match data. This is roughly the same as `onig_region_copy()`, + * except it tries to GC when there is not enough memory. + * + * @param[out] dst Target registers to fill. + * @param[in] src Source registers to duplicate. + * @exception rb_eNoMemError Not enough memory. + * @retval 0 Successful + * @retval ONIGERR_MEMORY Not enough memory, even after GC (unlikely). + * @post `dst` has identical contents to `src`. + * + * @internal + * + * It seems this function is here for `ext/strscan` and nothing else. + */ +int rb_reg_region_copy(struct re_registers *dst, const struct re_registers *src); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/regex.h b/include/ruby/regex.h index 22dae3231d..53278173f8 100644 --- a/include/ruby/regex.h +++ b/include/ruby/regex.h @@ -1,7 +1,6 @@ #ifndef ONIGURUMA_REGEX_H /*-*-C++-*-vi:se ft=cpp:*/ #define ONIGURUMA_REGEX_H 1 /** - * @file * @author $Author$ * @copyright Copyright (C) 1993-2007 Yukihiro Matsumoto * @copyright This file is a part of the programming language Ruby. diff --git a/include/ruby/ruby.h b/include/ruby/ruby.h index 341a716953..035f02c70b 100644 --- a/include/ruby/ruby.h +++ b/include/ruby/ruby.h @@ -14,6 +14,8 @@ */ #include "ruby/internal/config.h" +/* @shyouhei doesn't understand why we need <intrinsics.h> at this very + * beginning of the entire <ruby.h> circus. */ #ifdef HAVE_INTRINSICS_H # include <intrinsics.h> #endif @@ -21,6 +23,7 @@ #include <stdarg.h> #include "defines.h" +#include "ruby/internal/abi.h" #include "ruby/internal/anyargs.h" #include "ruby/internal/arithmetic.h" #include "ruby/internal/core.h" @@ -40,7 +43,6 @@ #include "ruby/internal/method.h" #include "ruby/internal/module.h" #include "ruby/internal/newobj.h" -#include "ruby/internal/rgengc.h" #include "ruby/internal/scan_args.h" #include "ruby/internal/special_consts.h" #include "ruby/internal/symbol.h" @@ -55,19 +57,59 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* Module#methods, #singleton_methods and so on return Symbols */ +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define USE_SYMBOL_AS_METHOD_NAME 1 -VALUE rb_get_path(VALUE); +/** + * Converts an object to a path. It first tries `#to_path` method if any, then + * falls back to `#to_str` method. + * + * @param[in] obj Arbitrary ruby object. + * @exception rb_eArgError `obj` contains a NUL byte. + * @exception rb_eTypeError `obj` is not path-ish. + * @exception rb_eEncCompatError No encoding conversion from `obj` to path. + * @return Converted path object. + */ +VALUE rb_get_path(VALUE obj); + +/** + * Ensures that the parameter object is a path. + * + * @param[in,out] v Arbitrary ruby object. + * @exception rb_eArgError `v` contains a NUL byte. + * @exception rb_eTypeError `v` is not path-ish. + * @exception rb_eEncCompatError `v` is not path-compatible. + * @post `v` is a path. + */ #define FilePathValue(v) (RB_GC_GUARD(v) = rb_get_path(v)) +/** + * @deprecated This function is an alias of rb_get_path() now. The part that + * did "no_checksafe" was deleted. It remains here because of no + * harm. + */ VALUE rb_get_path_no_checksafe(VALUE); + +/** + * This macro actually does the same thing as #FilePathValue now. The "String" + * part indicates that this is for when a string is treated like a pathname, + * rather than the actual pathname on the file systems. For examples: + * `Dir.fnmatch?`, `File.join`, `File.basename`, etc. + */ #define FilePathStringValue(v) ((v) = rb_get_path(v)) +/** @cond INTERNAL_MACRO */ #if defined(HAVE_BUILTIN___BUILTIN_CONSTANT_P) && defined(HAVE_STMT_AND_DECL_IN_EXPR) # define rb_varargs_argc_check_runtime(argc, vargc) \ (((argc) <= (vargc)) ? (argc) : \ (rb_fatal("argc(%d) exceeds actual arguments(%d)", \ - argc, vargc), 0)) + argc, vargc), 0)) # define rb_varargs_argc_valid_p(argc, vargc) \ ((argc) == 0 ? (vargc) <= 1 : /* [ruby-core:85266] [Bug #14425] */ \ (argc) == (vargc)) @@ -76,64 +118,305 @@ VALUE rb_get_path_no_checksafe(VALUE); ERRORFUNC((" argument length doesn't match"), int rb_varargs_bad_length(int,int)); # else # define rb_varargs_bad_length(argc, vargc) \ - ((argc)/rb_varargs_argc_valid_p(argc, vargc)) + ((argc)/rb_varargs_argc_valid_p(argc, vargc)) # endif # define rb_varargs_argc_check(argc, vargc) \ __builtin_choose_expr(__builtin_constant_p(argc), \ - (rb_varargs_argc_valid_p(argc, vargc) ? (argc) : \ - rb_varargs_bad_length(argc, vargc)), \ - rb_varargs_argc_check_runtime(argc, vargc)) + (rb_varargs_argc_valid_p(argc, vargc) ? (argc) : \ + rb_varargs_bad_length(argc, vargc)), \ + rb_varargs_argc_check_runtime(argc, vargc)) # else # define rb_varargs_argc_check(argc, vargc) \ - rb_varargs_argc_check_runtime(argc, vargc) + rb_varargs_argc_check_runtime(argc, vargc) # endif #endif +/** @endcond */ -const char *rb_class2name(VALUE); -const char *rb_obj_classname(VALUE); +/** + * Queries the name of the passed class. + * + * @param[in] klass An instance of a class. + * @return The name of `klass`. + * @note Return value is managed by our GC. Don't free. + */ +const char *rb_class2name(VALUE klass); -void rb_p(VALUE); +/** + * Queries the name of the class of the passed object. + * + * @param[in] obj Arbitrary ruby object. + * @return The name of the class of `obj`. + * @note Return value is managed by our GC. Don't free. + */ +const char *rb_obj_classname(VALUE obj); -VALUE rb_equal(VALUE,VALUE); +/** + * Inspects an object. It first calls the argument's `#inspect` method, then + * feeds its result string into ::rb_stdout. + * + * This is identical to Ruby level `Kernel#p`, except it takes only one object. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +void rb_p(VALUE obj); -VALUE rb_require(const char*); +/** + * This function is an optimised version of calling `#==`. It checks equality + * between two objects by first doing a fast identity check using using C's + * `==` (same as `BasicObject#equal?`). If that check fails, it calls `#==` + * dynamically. This optimisation actually affects semantics, because when + * `#==` returns false for the same object obj, `rb_equal(obj, obj)` would + * still return true. This happens for `Float::NAN`, where `Float::NAN == + * Float::NAN` is `false`, but `rb_equal(Float::NAN, Float::NAN)` is `true`. + * + * @param[in] lhs Comparison LHS. + * @param[in] rhs Comparison RHS. + * @retval RUBY_Qtrue They are the same. + * @retval RUBY_Qfalse They are different. + */ +VALUE rb_equal(VALUE lhs, VALUE rhs); + +/** + * Identical to rb_require_string(), except it takes C's string instead of + * Ruby's. + * + * @param[in] feature Name of a feature, e.g. `"json"`. + * @exception rb_eLoadError No such feature. + * @exception rb_eRuntimeError `$"` is frozen; unable to push. + * @retval RUBY_Qtrue The feature is loaded for the first time. + * @retval RUBY_Qfalse The feature has already been loaded. + * @post `$"` is updated. + */ +VALUE rb_require(const char *feature); #include "ruby/intern.h" +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define RUBY_VM 1 /* YARV */ + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define HAVE_NATIVETHREAD + +/** + * Queries if the thread which calls this function is a ruby's thread. + * "Ruby's" in this context is a thread created using one of our APIs like + * rb_thread_create(). There are distinctions between ruby's and other + * threads. For instance calling ruby methods are allowed only from inside of + * a ruby's thread. + * + * @retval 1 The current thread is a Ruby's thread. + * @retval 0 The current thread is a random thread from outside of Ruby. + */ int ruby_native_thread_p(void); +/** + * @private + * + * This macro is for internal use. Must be a mistake to place here. + */ #define InitVM(ext) {void InitVM_##ext(void);InitVM_##ext();} -PRINTF_ARGS(int ruby_snprintf(char *str, size_t n, char const *fmt, ...), 3, 4); +RBIMPL_ATTR_NONNULL((3)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 3, 4) +/** + * Our own locale-insensitive version of `snprintf(3)`. It can also be seen as + * a routine identical to rb_sprintf(), except it writes back to the passed + * buffer instead of allocating a new Ruby object. + * + * @param[out] str Return buffer + * @param[in] n Number of bytes of `str`. + * @param[in] fmt A `printf`-like format specifier. + * @param[in] ... Variadic number of contents to format. + * @return Number of bytes that would have been written to `str`, if `n` + * was large enough. Comparing this to `n` can give you insights + * that the buffer is too small or too big. Especially passing 0 + * to `n` gives you the exact number of bytes necessary to hold + * the result string without writing anything to anywhere. + * @post `str` holds up to `n-1` bytes of formatted contents (and the + * terminating NUL character.) + */ +int ruby_snprintf(char *str, size_t n, char const *fmt, ...); + +RBIMPL_ATTR_NONNULL((3)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 3, 0) +/** + * Identical to ruby_snprintf(), except it takes a `va_list`. It can also be + * seen as a routine identical to rb_vsprintf(), except it writes back to the + * passed buffer instead of allocating a new Ruby object. + * + * @param[out] str Return buffer + * @param[in] n Number of bytes of `str`. + * @param[in] fmt A `printf`-like format specifier. + * @param[in] ap Contents to format. + * @return Number of bytes that would have been written to `str`, if `n` + * was large enough. Comparing this to `n` can give you insights + * that the buffer is too small or too big. Especially passing 0 + * to `n` gives you the exact number of bytes necessary to hold + * the result string without writing anything to anywhere. + * @post `str` holds up to `n-1` bytes of formatted contents (and the + * terminating NUL character.) + */ int ruby_vsnprintf(char *str, size_t n, char const *fmt, va_list ap); +#include <errno.h> + +/** + * @name Errno handling routines for userland threads + * @note POSIX chapter 2 section 3 states that for each thread of a process, + * the value of `errno` shall not be affected by function calls or + * assignments to `errno` by other threads. + * + * Soooo this `#define errno` below seems like a noob mistake at first sight. + * If you look at its actual implementation, the functions are just adding one + * level of indirection. It doesn't make any sense sorry? But yes! @ko1 told + * @shyouhei that this is inevitable. + * + * The ultimate reason is because Ruby now has N:M threads implemented. + * Threads of that sort change their context in user land. A function can be + * "transferred" between threads in middle of their executions. Let us for + * instance consider: + * + * ```cxx + * void foo() + * { + * auto i = errno; + * close(0); + * errno = i; + * } + * ``` + * + * This function (if ran under our Ractor) could change its running thread at + * the `close` function. But the two `errno` invocations are different! Look + * how the source code above is compiled by clang 17 with `-O3` flag @ Linux: + * + * ``` + * foo(int): # @foo(int) + * push rbp + * push r14 + * push rbx + * mov ebx, edi + * call __errno_location@PLT + * mov r14, rax + * mov ebp, dword ptr [rax] + * mov edi, ebx + * call close@PLT + * mov dword ptr [r14], ebp + * pop rbx + * pop r14 + * pop rbp + * ret + * ``` + * + * Notice how `__errno_location@PLT` is `call`-ed only once. The compiler + * assumes that the location of `errno` does not change during a function call. + * Sadly this is no longer true for us. The `close@PLT` now changes threads, + * which should also change where `errno` is stored. + * + * With the `#define errno` below the compilation result changes to this: + * + * ``` + * foo(int): # @foo(int) + * push rbp + * push rbx + * push rax + * mov ebx, edi + * call rb_errno_ptr()@PLT + * mov ebp, dword ptr [rax] + * mov edi, ebx + * call close@PLT + * call rb_errno_ptr()@PLT + * mov dword ptr [rax], ebp + * add rsp, 8 + * pop rbx + * pop rbp + * ret + * ``` + * + * Which fixes the problem. + */ + +/** + * Identical to system `errno`. + * + * @return The last set `errno` number. + */ +int rb_errno(void); + +/** + * Set the errno. + * + * @param err New `errno`. + * @post `errno` is now set to `err`. + */ +void rb_errno_set(int err); + +/** + * The location of `errno` + * + * @return The (thread-specific) location of `errno`. + */ +int *rb_errno_ptr(void); + +/** + * Not sure if it is necessary for extension libraries but this is where the + * "bare" errno is located. + * + * @return The location of `errno`. + */ +static inline int * +rb_orig_errno_ptr(void) +{ + return &errno; +} + +#define rb_orig_errno errno /**< System-provided original `errno`. */ +#undef errno +#define errno (*rb_errno_ptr()) /**< Ractor-aware version of `errno`. */ + +/** @} */ + + +/** @cond INTERNAL_MACRO */ #if RBIMPL_HAS_WARNING("-Wgnu-zero-variadic-macro-arguments") # /* Skip it; clang -pedantic doesn't like the following */ #elif defined(__GNUC__) && defined(HAVE_VA_ARGS_MACRO) && defined(__OPTIMIZE__) # define rb_yield_values(argc, ...) \ __extension__({ \ - const int rb_yield_values_argc = (argc); \ - const VALUE rb_yield_values_args[] = {__VA_ARGS__}; \ - const int rb_yield_values_nargs = \ - (int)(sizeof(rb_yield_values_args) / sizeof(VALUE)); \ - rb_yield_values2( \ - rb_varargs_argc_check(rb_yield_values_argc, rb_yield_values_nargs), \ - rb_yield_values_nargs ? rb_yield_values_args : NULL); \ + const int rb_yield_values_argc = (argc); \ + const VALUE rb_yield_values_args[] = {__VA_ARGS__}; \ + const int rb_yield_values_nargs = \ + (int)(sizeof(rb_yield_values_args) / sizeof(VALUE)); \ + rb_yield_values2( \ + rb_varargs_argc_check(rb_yield_values_argc, rb_yield_values_nargs), \ + rb_yield_values_nargs ? rb_yield_values_args : NULL); \ }) # define rb_funcall(recv, mid, argc, ...) \ __extension__({ \ - const int rb_funcall_argc = (argc); \ - const VALUE rb_funcall_args[] = {__VA_ARGS__}; \ - const int rb_funcall_nargs = \ - (int)(sizeof(rb_funcall_args) / sizeof(VALUE)); \ + const int rb_funcall_argc = (argc); \ + const VALUE rb_funcall_args[] = {__VA_ARGS__}; \ + const int rb_funcall_nargs = \ + (int)(sizeof(rb_funcall_args) / sizeof(VALUE)); \ rb_funcallv(recv, mid, \ - rb_varargs_argc_check(rb_funcall_argc, rb_funcall_nargs), \ - rb_funcall_nargs ? rb_funcall_args : NULL); \ + rb_varargs_argc_check(rb_funcall_argc, rb_funcall_nargs), \ + rb_funcall_nargs ? rb_funcall_args : NULL); \ }) #endif +/** @endcond */ #ifndef RUBY_DONT_SUBST #include "ruby/subst.h" diff --git a/include/ruby/st.h b/include/ruby/st.h index 1e4bb80686..f35ab43603 100644 --- a/include/ruby/st.h +++ b/include/ruby/st.h @@ -98,6 +98,8 @@ struct st_table { enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK, ST_REPLACE}; +size_t rb_st_table_size(const struct st_table *tbl); +#define st_table_size rb_st_table_size st_table *rb_st_init_table(const struct st_hash_type *); #define st_init_table rb_st_init_table st_table *rb_st_init_table_with_size(const struct st_hash_type *, st_index_t); diff --git a/include/ruby/subst.h b/include/ruby/subst.h index cf48a3909c..d7b9a63050 100644 --- a/include/ruby/subst.h +++ b/include/ruby/subst.h @@ -1,7 +1,6 @@ #ifndef RUBY_SUBST_H /*-*-C++-*-vi:se ft=cpp:*/ #define RUBY_SUBST_H 1 /** - * @file * @author Ruby developers <ruby-core@ruby-lang.org> * @copyright This file is a part of the programming language Ruby. * Permission is hereby granted, to either redistribute and/or diff --git a/include/ruby/thread.h b/include/ruby/thread.h index b05537badb..337f477fd0 100644 --- a/include/ruby/thread.h +++ b/include/ruby/thread.h @@ -10,33 +10,316 @@ * modify this file, provided that the conditions mentioned in the * file COPYING are met. Consult the file for details. */ -#include "ruby/intern.h" +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/intern/thread.h" /* rb_unblock_function_t */ #include "ruby/internal/dllexport.h" -/* flags for rb_nogvl */ +/** + * @name Flags for rb_nogvl() + * + * @{ + */ + +/** + * Passing this flag to rb_nogvl() prevents it from checking interrupts. + * Interrupts can impact your program negatively. For instance consider + * following callback function: + * + * ```CXX + * static inline int fd; // set elsewhere. + * static inline auto callback(auto buf) { + * auto tmp = ruby_xmalloc(BUFSIZ); + * auto ret = ruby_xmalloc(sizeof(ssize_t)); // (a) + * auto n = read(fd, tmp, BUFSIZ); // (b) + * memcpy(buf, tmp, n); // (c) + * memcpy(ret, n, sizeof(n)); + * ruby_xfree(tmp); + * return ret; + * } + * ``` + * + * Here, if it gets interrupted at (a) or (b), `read(2)` is cancelled and this + * function leaks memory (which is not a good thing of course, but...). But if + * it gets interrupted at (c), where `read(2)` is already done, interruption is + * way more catastrophic because what was read gets lost. To reroute this kind + * of problem you should set this flag. And check interrupts elsewhere at your + * own risk. + */ #define RB_NOGVL_INTR_FAIL (0x1) + +/** + * Passing this flag to rb_nogvl() indicates that the passed UBF is + * async-signal-safe. An UBF could be async safe, and that makes things + * simpler. However async unsafe UBFs are just okay. If unsure, you can + * safely leave it unspecified. + * + * @internal + * + * This makes sense only in case of POSIX threads. + */ #define RB_NOGVL_UBF_ASYNC_SAFE (0x2) +/** @} */ + RBIMPL_SYMBOL_EXPORT_BEGIN() +RBIMPL_ATTR_NONNULL((1)) +/** + * (Re-)acquires the GVL. This manoeuvre makes it possible for an out-of-GVL + * routine to one-shot call a ruby method. + * + * What this function does: + * + * 1. Blocks until it acquires the GVL. + * 2. Calls the passed function. + * 3. Releases the GVL. + * 4. Returns what was returned form the passed function. + * + * @param[in] func What to call with GVL. + * @param[in,out] data1 Passed as-is to `func`. + * @return What was returned from `func`. + * @warning `func` must not return a Ruby object. If it did such return + * value would escape from GC's scope; would not be marked. + * @warning Global escapes from this function just yield whatever fatal + * undefined behaviours. You must make sure that `func` does + * not raise, by properly rescuing everything using + * e.g. rb_protect(). + * @warning You cannot convert a non-Ruby thread into a Ruby thread + * using this API. This function makes sense only from inside + * of a rb_thread_call_without_gvl()'s callback. + */ void *rb_thread_call_with_gvl(void *(*func)(void *), void *data1); +RBIMPL_ATTR_NONNULL((1)) +/** + * Allows the passed function to run in parallel with other Ruby threads. + * + * What this function does: + * + * 1. Checks (and handles) pending interrupts. + * 2. Releases the GVL. (Others can run here in parallel...) + * 3. Calls the passed function. + * 4. Blocks until it re-acquires the GVL. + * 5. Checks interrupts that happened between 2 to 4. + * + * In case other threads interfaced with this thread using rb_thread_kill() + * etc., the passed UBF is additionally called. See ::rb_unblock_function_t + * for details. + * + * Unlike rb_thread_call_without_gvl2() this function also reacts to signals + * etc. + * + * @param[in] func A function to call without GVL. + * @param[in,out] data1 Passed as-is to `func`. + * @param[in] ubf An UBF to cancel `func`. + * @param[in,out] data2 Passed as-is to `ubf`. + * @return What `func` returned, or 0 in case `ubf` cancelled `func`. + * @warning You cannot use most of Ruby C APIs like calling methods or + * raising exceptions from any of the functions passed to it. + * If that is dead necessary use rb_thread_call_with_gvl() to + * re-acquire the GVL. + * @warning In short, this API is difficult. @ko1 recommends you to use + * other ways if any. We lack experiences to use this API. If + * you find any corner cases etc., please report it to the + * devs. + * @warning Releasing and re-acquiring the GVL are expensive operations. + * For a short-running `func`, it might be faster to just call + * `func` with blocking everything else. Be sure to benchmark + * your code to see if it is actually worth releasing the GVL. + */ void *rb_thread_call_without_gvl(void *(*func)(void *), void *data1, - rb_unblock_function_t *ubf, void *data2); + rb_unblock_function_t *ubf, void *data2); + +RBIMPL_ATTR_NONNULL((1)) +/** + * Identical to rb_thread_call_without_gvl(), except it does not interface with + * signals etc. As described in #RB_NOGVL_INTR_FAIL, interrupts can hurt you. + * In case this function detects an interrupt, it returns immediately. You can + * record progress of your callback and check it after returning from this + * function. + * + * What this function does: + * + * 1. Checks for pending interrupts and if any, just returns. + * 2. Releases the GVL. (Others can run here in parallel...) + * 3. Calls the passed function. + * 4. Blocks until it re-acquires the GVL. + * + * @param[in] func A function to call without GVL. + * @param[in,out] data1 Passed as-is to `func`. + * @param[in] ubf An UBF to cancel `func`. + * @param[in,out] data2 Passed as-is to `ubf`. + * @return What `func` returned, or 0 in case `func` did not return. + */ void *rb_thread_call_without_gvl2(void *(*func)(void *), void *data1, - rb_unblock_function_t *ubf, void *data2); + rb_unblock_function_t *ubf, void *data2); /* * XXX: unstable/unapproved - out-of-tree code should NOT not depend * on this until it hits Ruby 2.6.1 */ + +RBIMPL_ATTR_NONNULL((1)) +/** + * Identical to rb_thread_call_without_gvl(), except it additionally takes + * "flags" that change the behaviour. + * + * @param[in] func A function to call without GVL. + * @param[in,out] data1 Passed as-is to `func`. + * @param[in] ubf An UBF to cancel `func`. + * @param[in,out] data2 Passed as-is to `ubf`. + * @param[in] flags Flags. + * @return What `func` returned, or 0 in case `func` did not return. + */ void *rb_nogvl(void *(*func)(void *), void *data1, rb_unblock_function_t *ubf, void *data2, int flags); +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define RUBY_CALL_WO_GVL_FLAG_SKIP_CHECK_INTS_AFTER 0x01 + +/** + * @private + * @deprecated It seems even in the old days it made no sense...? + */ #define RUBY_CALL_WO_GVL_FLAG_SKIP_CHECK_INTS_ +/** + * Declare the current Ruby thread should acquire a dedicated + * native thread on M:N thread scheduler. + * + * If a C extension (or a library which the extension relies on) should + * keep to run on a native thread (e.g. using thread-local-storage), + * this function allocates a dedicated native thread for the thread. + * + * @return `false` if the thread already running on a dedicated native + * thread. Otherwise `true`. + */ +bool rb_thread_lock_native_thread(void); + +/** + * Triggered when a new thread is started. + * + * @note The callback will be called *without* the GVL held. + */ +#define RUBY_INTERNAL_THREAD_EVENT_STARTED 1 << 0 + +/** +* Triggered when a thread attempt to acquire the GVL. +* +* @note The callback will be called *without* the GVL held. +*/ +#define RUBY_INTERNAL_THREAD_EVENT_READY 1 << 1 /** acquiring GVL */ + +/** + * Triggered when a thread successfully acquired the GVL. + * + * @note The callback will be called *with* the GVL held. + */ +#define RUBY_INTERNAL_THREAD_EVENT_RESUMED 1 << 2 /** acquired GVL */ + +/** + * Triggered when a thread released the GVL. + * + * @note The callback will be called *without* the GVL held. + */ +#define RUBY_INTERNAL_THREAD_EVENT_SUSPENDED 1 << 3 /** released GVL */ + +/** + * Triggered when a thread exits. + * + * @note The callback will be called *without* the GVL held. + */ +#define RUBY_INTERNAL_THREAD_EVENT_EXITED 1 << 4 /** thread terminated */ + +#define RUBY_INTERNAL_THREAD_EVENT_MASK 0xff /** All Thread events */ + +typedef struct rb_internal_thread_event_data { + VALUE thread; +} rb_internal_thread_event_data_t; + +typedef void (*rb_internal_thread_event_callback)(rb_event_flag_t event, + const rb_internal_thread_event_data_t *event_data, + void *user_data); +typedef struct rb_internal_thread_event_hook rb_internal_thread_event_hook_t; + +/** + * Registers a thread event hook function. + * + * @param[in] func A callback. + * @param[in] events A set of events that `func` should run. + * @param[in] data Passed as-is to `func`. + * @return An opaque pointer to the hook, to unregister it later. + * @note This functionality is a noop on Windows and WebAssembly. + * @note The callback will be called without the GVL held, except for the + * RESUMED event. + * @note Callbacks are not guaranteed to be executed on the native threads + * that corresponds to the Ruby thread. To identify which Ruby thread + * the event refers to, you must use `event_data->thread`. + * @warning This function MUST not be called from a thread event callback. + */ +rb_internal_thread_event_hook_t *rb_internal_thread_add_event_hook( + rb_internal_thread_event_callback func, rb_event_flag_t events, + void *data); + + +/** + * Unregister the passed hook. + * + * @param[in] hook. The hook to unregister. + * @return Whether the hook was found and unregistered. + * @note This functionality is a noop on Windows and WebAssembly. + * @warning This function MUST not be called from a thread event callback. +*/ +bool rb_internal_thread_remove_event_hook( + rb_internal_thread_event_hook_t * hook); + + +typedef int rb_internal_thread_specific_key_t; +#define RB_INTERNAL_THREAD_SPECIFIC_KEY_MAX 8 +/** + * Create a key to store thread specific data. + * + * These APIs are designed for tools using + * rb_internal_thread_event_hook APIs. + * + * Note that only `RB_INTERNAL_THREAD_SPECIFIC_KEY_MAX` keys + * can be created. raises `ThreadError` if exceeded. + * + * Usage: + * // at initialize time: + * int tool_key; // gvar + * Init_tool() { + * tool_key = rb_internal_thread_specific_key_create(); + * } + * + * // at any timing: + * rb_internal_thread_specific_set(thread, tool_key, per_thread_data); + * ... + * per_thread_data = rb_internal_thread_specific_get(thread, tool_key); + */ +rb_internal_thread_specific_key_t rb_internal_thread_specific_key_create(void); + +/** + * Get thread and tool specific data. + * + * This function is async signal safe and thread safe. + */ +void *rb_internal_thread_specific_get(VALUE thread_val, rb_internal_thread_specific_key_t key); + +/** + * Set thread and tool specific data. + * + * This function is async signal safe and thread safe. + */ +void rb_internal_thread_specific_set(VALUE thread_val, rb_internal_thread_specific_key_t key, void *data); + RBIMPL_SYMBOL_EXPORT_END() #endif /* RUBY_THREAD_H */ diff --git a/include/ruby/thread_native.h b/include/ruby/thread_native.h index 343c02c30d..8217a67514 100644 --- a/include/ruby/thread_native.h +++ b/include/ruby/thread_native.h @@ -9,9 +9,7 @@ * Permission is hereby granted, to either redistribute and/or * modify this file, provided that the conditions mentioned in the * file COPYING are met. Consult the file for details. - */ - -/* + * * This file contains wrapper APIs for native thread primitives * which Ruby interpreter uses. * @@ -21,7 +19,6 @@ * please use Mutex directly. */ - #if defined(_WIN32) #include <windows.h> typedef HANDLE rb_nativethread_id_t; @@ -31,6 +28,11 @@ typedef union rb_thread_lock_union { CRITICAL_SECTION crit; } rb_nativethread_lock_t; +struct rb_thread_cond_struct { + struct cond_event_entry *next; + struct cond_event_entry *prev; +}; + typedef struct rb_thread_cond_struct rb_nativethread_cond_t; #elif defined(HAVE_PTHREAD_H) @@ -40,33 +42,169 @@ typedef pthread_t rb_nativethread_id_t; typedef pthread_mutex_t rb_nativethread_lock_t; typedef pthread_cond_t rb_nativethread_cond_t; +#elif defined(__wasi__) // no-thread platforms + +typedef struct rb_nativethread_id_t *rb_nativethread_id_t; +typedef struct rb_nativethread_lock_t *rb_nativethread_lock_t; +typedef struct rb_nativethread_cond_t *rb_nativethread_cond_t; + +#elif defined(__DOXYGEN__) + +/** Opaque type that holds an ID of a native thread. */ +struct rb_nativethread_id_t; + +/** Opaque type that holds a lock. */ +struct rb_nativethread_lock_t; + +/** Opaque type that holds a condition variable. */ +struct rb_nativethread_cond_t; + #else #error "unsupported thread type" #endif -RUBY_SYMBOL_EXPORT_BEGIN +RBIMPL_SYMBOL_EXPORT_BEGIN() +/** + * Queries the ID of the native thread that is calling this function. + * + * @return The caller thread's native ID. + */ rb_nativethread_id_t rb_nativethread_self(void); +/** + * Fills the passed lock with an initial value. + * + * @param[out] lock A mutex to initialise. + * @post `lock` is updated to its initial state. + * + * @internal + * + * There is no data structure that analogous to pthread_once_t in ruby. It is + * pretty much tricky (if not impossible) to properly initialise a mutex + * exactly once. + */ void rb_nativethread_lock_initialize(rb_nativethread_lock_t *lock); + +/** + * Destroys the passed mutex. + * + * @param[out] lock A mutex to kill. + * @post `lock` is no longer eligible for other functions. + * + * @internal + * + * It is an undefined behaviour (see `pthread_mutex_destroy(3posix)`) to + * destroy a locked mutex. So it has to be unlocked. But an unlocked mutex + * can of course be locked by another thread. That's the ultimate reason why + * we do mutex. There is an inevitable race condition here. 2017 edition of + * IEEE 1003.1 issue 7 says in its rationale that "care must be taken". Care? + * How? + * + * @shyouhei thinks that POSIX is broken by design. + */ void rb_nativethread_lock_destroy(rb_nativethread_lock_t *lock); + +/** + * Blocks until the current thread obtains a lock. + * + * @param[out] lock A mutex to lock. + * @post `lock` is owned by the current native thread. + */ void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock); + +/** + * Releases a lock. + * + * @param[out] lock A mutex to unlock. + * @pre `lock` is owned by the current native thread. + * @post `lock` is not owned by the current native thread. + */ void rb_nativethread_lock_unlock(rb_nativethread_lock_t *lock); +/** @alias{rb_nativethread_lock_lock} */ void rb_native_mutex_lock(rb_nativethread_lock_t *lock); + +/** + * Identical to rb_native_mutex_lock(), except it doesn't block in case + * rb_native_mutex_lock() would. + * + * @param[out] lock A mutex to lock. + * @retval 0 `lock` is successfully owned by the current thread. + * @retval EBUSY `lock` is owned by someone else. + */ int rb_native_mutex_trylock(rb_nativethread_lock_t *lock); + +/** @alias{rb_nativethread_lock_unlock} */ void rb_native_mutex_unlock(rb_nativethread_lock_t *lock); + +/** @alias{rb_nativethread_lock_initialize} */ void rb_native_mutex_initialize(rb_nativethread_lock_t *lock); + +/** @alias{rb_nativethread_lock_destroy} */ void rb_native_mutex_destroy(rb_nativethread_lock_t *lock); +/** + * Signals a condition variable. + * + * @param[out] cond A condition variable to ping. + * @post More than one threads waiting for `cond` gets signalled. + * @note This function can spuriously wake multiple threads up. + * `pthread_cond_signal(3posix)` says it can even be "impossible + * to avoid the unblocking of more than one thread blocked on a + * condition variable". Just brace spurious wakeups. + */ void rb_native_cond_signal(rb_nativethread_cond_t *cond); + +/** + * Signals a condition variable. + * + * @param[out] cond A condition variable to ping. + * @post All threads waiting for `cond` gets signalled. + */ void rb_native_cond_broadcast(rb_nativethread_cond_t *cond); + +/** + * Waits for the passed condition variable to be signalled. + * + * @param[out] cond A condition variable to wait. + * @param[out] mutex A mutex. + * @pre `mutex` is owned by the current thread. + * @post `mutex` is owned by the current thread. + * @note This can wake up spuriously. + */ void rb_native_cond_wait(rb_nativethread_cond_t *cond, rb_nativethread_lock_t *mutex); + +/** + * Identical to rb_native_cond_wait(), except it additionally takes timeout in + * msec resolution. Timeouts can be detected by catching exceptions. + * + * @param[out] cond A condition variable to wait. + * @param[out] mutex A mutex. + * @param[in] msec Timeout. + * @exception rb_eSystemCallError `Errno::ETIMEDOUT` for timeout. + * @pre `mutex` is owned by the current thread. + * @post `mutex` is owned by the current thread. + * @note This can wake up spuriously. + */ void rb_native_cond_timedwait(rb_nativethread_cond_t *cond, rb_nativethread_lock_t *mutex, unsigned long msec); + +/** + * Fills the passed condition variable with an initial value. + * + * @param[out] cond A condition variable to initialise. + * @post `cond` is updated to its initial state. + */ void rb_native_cond_initialize(rb_nativethread_cond_t *cond); -void rb_native_cond_destroy(rb_nativethread_cond_t *cond); -RUBY_SYMBOL_EXPORT_END +/** + * Destroys the passed condition variable. + * + * @param[out] cond A condition variable to kill. + * @post `cond` is no longer eligible for other functions. + */ +void rb_native_cond_destroy(rb_nativethread_cond_t *cond); +RBIMPL_SYMBOL_EXPORT_END() #endif diff --git a/include/ruby/util.h b/include/ruby/util.h index af022dacbd..12e69c4b80 100644 --- a/include/ruby/util.h +++ b/include/ruby/util.h @@ -9,42 +9,230 @@ * Permission is hereby granted, to either redistribute and/or * modify this file, provided that the conditions mentioned in the * file COPYING are met. Consult the file for details. + * @warning DO NOT ADD RANDOM GARBAGES IN THIS FILE! Contents of this file + * reside here for historical reasons. Find a right place for your + * API! */ #include "ruby/internal/config.h" + +#ifdef STDC_HEADERS +# include <stddef.h> /* size_t */ +#endif + +#ifdef HAVE_SYS_TYPES_H +# include <sys/types.h> /* ssize_t */ +#endif + +#include "ruby/internal/attr/noalias.h" +#include "ruby/internal/attr/nodiscard.h" +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/attr/restrict.h" +#include "ruby/internal/attr/returns_nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/defines.h" RBIMPL_SYMBOL_EXPORT_BEGIN() +/** an approximation of ceil(n * log10(2)), up to 1,048,576 (1<<20) + * without overflow within 32-bit calculation + */ #define DECIMAL_SIZE_OF_BITS(n) (((n) * 3010 + 9998) / 9999) -/* an approximation of ceil(n * log10(2)), up to 65536 at least */ +/** an approximation of decimal representation size for n-bytes */ +#define DECIMAL_SIZE_OF_BYTES(n) DECIMAL_SIZE_OF_BITS((n) * CHAR_BIT) + +/** + * An approximation of decimal representation size. `expr` may be a + * type name + */ +#define DECIMAL_SIZE_OF(expr) DECIMAL_SIZE_OF_BYTES(sizeof(expr)) + +/** + * Character to number mapping like `'a'` -> `10`, `'b'` -> `11` etc. For + * punctuation etc., the value is -1. "36" terminology comes from the fact + * that this is the table behind `str.to_i(36)`. + */ +RUBY_EXTERN const signed char ruby_digit36_to_number_table[]; + +/** + * Characters that Ruby accepts as hexadecimal digits. This is `/\h/` expanded + * into an array. + */ +RUBY_EXTERN const char ruby_hexdigits[]; + +/** + * Scans the passed string, assuming the string is a textual representation of + * an integer. Stops when encountering something non-digit for the passed + * base. + * + * @note This does not understand minus sign. + * @note This does not understand e.g. `0x` prefix. + * @note It is a failure to pass `0` to `base`, unlike ruby_strtoul(). + * @param[in] str Target string of digits to interpret. + * @param[in] len Number of bytes of `str`, or -1 to detect `NUL`. + * @param[in] base Base, `2` to `36` inclusive. + * @param[out] retlen Return value buffer. + * @param[out] overflow Return value buffer. + * @return Interpreted numeric representation of `str`. + * @post `retlen` is the number of bytes scanned so far. + * @post `overflow` is set to true if the string represents something + * bigger than `ULONG_MAX`. Something meaningful still returns; + * which is the designed belabour of C's unsigned arithmetic. + */ +unsigned long ruby_scan_digits(const char *str, ssize_t len, int base, size_t *retlen, int *overflow); + +/** @old{ruby_scan_oct} */ #define scan_oct(s,l,e) ((int)ruby_scan_oct((s),(l),(e))) -unsigned long ruby_scan_oct(const char *, size_t, size_t *); + +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL(()) +/** + * Interprets the passed string as an octal unsigned integer. Stops when + * encounters something not understood. + * + * @param[in] str C string to scan. + * @param[in] len Length of `str`. + * @param[out] consumed Return value buffer. + * @return Parsed integer. + * @post `ret` is the number of characters read. + * + * @internal + * + * No consideration is made for integer overflows. As the return value is + * unsigned this function has fully defined behaviour, but you cannot know if + * there was an integer wrap-around or not. + */ +unsigned long ruby_scan_oct(const char *str, size_t len, size_t *consumed); + +/** @old{ruby_scan_hex} */ #define scan_hex(s,l,e) ((int)ruby_scan_hex((s),(l),(e))) -unsigned long ruby_scan_hex(const char *, size_t, size_t *); +RBIMPL_ATTR_NONNULL(()) +/** + * Interprets the passed string a hexadecimal unsigned integer. Stops when + * encounters something not understood. + * + * @param[in] str C string to scan. + * @param[in] len Length of `str`. + * @param[out] ret Return value buffer. + * @return Parsed integer. + * @post `ret` is the number of characters read. + * + * @internal + * + * No consideration is made for integer overflows. As the return value is + * unsigned this function has fully defined behaviour, but you cannot know if + * there was an integer wrap-around or not. + */ +unsigned long ruby_scan_hex(const char *str, size_t len, size_t *ret); + +/** + * Reentrant implementation of quick sort. If your system provides something + * (like C11 qsort_s), this is a thin wrapper of that routine. Otherwise + * resorts to our own version. + */ #ifdef HAVE_GNU_QSORT_R # define ruby_qsort qsort_r #else void ruby_qsort(void *, const size_t, const size_t, - int (*)(const void *, const void *, void *), void *); + int (*)(const void *, const void *, void *), void *); #endif -void ruby_setenv(const char *, const char *); -void ruby_unsetenv(const char *); +RBIMPL_ATTR_NONNULL((1)) +/** + * Sets an environment variable. In case of POSIX this is a wrapper of + * `setenv(3)`. But there are systems which lack one. We try hard emulating. + * + * @param[in] key An environment variable. + * @param[in] val A value to be associated with `key`, or 0. + * @exception rb_eSystemCallError `setenv(3)` failed for some reason. + * @post Environment variable `key` is created if necessary. Its value + * is updated to be `val`. + */ +void ruby_setenv(const char *key, const char *val); + +RBIMPL_ATTR_NONNULL(()) +/** + * Deletes the passed environment variable, if any. + * + * @param[in] key An environment variable. + * @exception rb_eSystemCallError `unsetenv(3)` failed for some reason. + * @post Environment variable `key` does not exist. + */ +void ruby_unsetenv(const char *key); + +RBIMPL_ATTR_NODISCARD() +RBIMPL_ATTR_RESTRICT() +RBIMPL_ATTR_RETURNS_NONNULL() +RBIMPL_ATTR_NONNULL(()) +/** + * This is our own version of `strdup(3)` that uses ruby_xmalloc() instead of + * system malloc (benefits our GC). + * + * @param[in] str Target C string to duplicate. + * @return An allocated C string holding the identical contents. + * @note Return value must be discarded using ruby_xfree(). + */ +char *ruby_strdup(const char *str); -char *ruby_strdup(const char *); #undef strdup +/** + * @alias{ruby_strdup} + * + * @internal + * + * @shyouhei doesn't think it is a wise idea. ruby_strdup()'s return value + * must be passed to ruby_xfree(), but this macro makes it almost impossible. + */ #define strdup(s) ruby_strdup(s) +RBIMPL_ATTR_NODISCARD() +RBIMPL_ATTR_RESTRICT() +RBIMPL_ATTR_RETURNS_NONNULL() +/** + * This is our own version of `getcwd(3)` that uses ruby_xmalloc() instead of + * system malloc (benefits our GC). + * + * @return An allocated C string holding the process working directory. + * @note Return value must be discarded using ruby_xfree(). + */ char *ruby_getcwd(void); -double ruby_strtod(const char *, char **); +RBIMPL_ATTR_NONNULL((1)) +/** + * Our own locale-insensitive version of `strtod(3)`. The conversion is done + * as if the current locale is set to the "C" locale, no matter actual runtime + * locale settings. + * + * @param[in] str Decimal or hexadecimal representation of a floating + * point number. + * @param[out] endptr NULL, or an arbitrary pointer (overwritten on return). + * @return Converted number. + * @post If `endptr` is not NULL, it is updated to point the first such + * byte where conversion failed. + * @note This function sets `errno` on failure. + * - `ERANGE`: Converted integer is out of range of `double`. + * @see William D. Clinger, "How to Read Floating Point Numbers + * Accurately" in Proc. ACM SIGPLAN '90, pp. 92-101. + * https://doi.org/10.1145/93542.93557 + */ +double ruby_strtod(const char *str, char **endptr); + #undef strtod +/** @alias{ruby_strtod} */ #define strtod(s,e) ruby_strtod((s),(e)) -void ruby_each_words(const char *, void (*)(const char*, int, void*), void *); +RBIMPL_ATTR_NONNULL((2)) +/** + * Scans the passed string, with calling the callback function every time it + * encounters a "word". A word here is a series of characters separated by + * either a space (of IEEE 1003.1 section 7.3.1.1), or a `','`. + * + * @param[in] str Target string to split into each words. + * @param[in] func Callback function. + * @param[in,out] argv Passed as-is to `func`. + */ +void ruby_each_words(const char *str, void (*func)(const char *word, int len, void *argv), void *argv); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/version.h b/include/ruby/version.h index d25008fad9..e9113177de 100644 --- a/include/ruby/version.h +++ b/include/ruby/version.h @@ -20,40 +20,139 @@ * check the features with mkmf.rb instead. */ -/* The origin. */ +/** + * @name The origin. + * + * These information never change. Just written here to remember. + * + * @{ + */ + +/** Author of this project. */ #define RUBY_AUTHOR "Yukihiro Matsumoto" + +/** Ruby's birth year. */ #define RUBY_BIRTH_YEAR 1993 + +/** Ruby's birth month. */ #define RUBY_BIRTH_MONTH 2 + +/** Ruby's birth day. */ #define RUBY_BIRTH_DAY 24 -/* API version */ +/** @} */ + +/** + * @name The API version. + * + * API version is different from binary version. These numbers are for API + * stability. When you have distinct API versions x and y, you cannot expect + * codes targeted to x also works for y. + * + * However let us repeat here that it's a BAD idea to check + * #RUBY_API_VERSION_CODE form extension libraries. Different API versions are + * just different. There is no such thing like upper compatibility. + * + * @{ + */ + +/** + * Major version. This digit changes sometimes for various reasons, but that + * doesn't mean a total rewrite. Practically when it comes to API versioning, + * major and minor version changes are equally catastrophic. + */ #define RUBY_API_VERSION_MAJOR 3 -#define RUBY_API_VERSION_MINOR 1 + +/** + * Minor version. As of writing this version changes annually. Greater + * version doesn't mean "better"; they just mean years passed. + */ +#define RUBY_API_VERSION_MINOR 4 + +/** + * Teeny version. This digit is kind of reserved these days. Kept 0 for the + * entire 2.x era. Waiting for future uses. + */ #define RUBY_API_VERSION_TEENY 0 + +/** + * This macro is API versions encoded into a C integer. + * + * @note Use mkmf. + * @note Don't rely on it. + */ #define RUBY_API_VERSION_CODE (RUBY_API_VERSION_MAJOR*10000+RUBY_API_VERSION_MINOR*100+RUBY_API_VERSION_TEENY) +/** @} */ + #ifdef RUBY_EXTERN /* Internal note: this file could be included from verconf.mk _before_ * generating config.h, on Windows. The #ifdef above is to trick such * situation. */ RBIMPL_SYMBOL_EXPORT_BEGIN() -/* - * Interfaces from extension libraries. +/** + * @name Interfaces from extension libraries. * * Before using these infos, think thrice whether they are really * necessary or not, and if the answer was yes, think twice a week * later again. + * + * @{ */ + +/** API versions, in { major, minor, teeny } order. */ RUBY_EXTERN const int ruby_api_version[3]; + +/** + * Stringised version. + * + * @note This is the runtime version, not the API version. For instance it + * was `"2.5.9"` when ::ruby_api_version was `{ 2, 5, 0 }`. + */ RUBY_EXTERN const char ruby_version[]; + +/** Date of release, in a C string. */ RUBY_EXTERN const char ruby_release_date[]; + +/** + * Target platform identifier, in a C string. + * + * @note Seasoned UNIX programmers should beware that this "platform + * identifier" is our invention; not always identical to so-called + * target triplets that GNU systems use. For instance on @shyouhei's + * machine, ::ruby_platform is `"x64_64-linux"` while its target triplet + * is `x86_64-pc-linux-gnu`. + * @note Note also that we support Windows. + */ RUBY_EXTERN const char ruby_platform[]; + +/** + * This is a monotonic increasing integer that describes specific "patch" + * level. You can know the exact changeset your binary is running by this info + * (and ::ruby_version), unless this is -1. -1 means there is no release yet + * for the version; ruby is actively developed. 0 means the initial GA version. + */ RUBY_EXTERN const int ruby_patchlevel; + +/** + * This is what `ruby -v` prints to the standard error. Something like: + * `"ruby 2.5.9p229 (2021-04-05 revision 67829) [x86_64-linux]"`. This doesn't + * include runtime options like a JIT being enabled. + */ RUBY_EXTERN const char ruby_description[]; + +/** Copyright notice. */ RUBY_EXTERN const char ruby_copyright[]; + +/** + * This is just `"ruby"` for us. But different implementations can have + * different strings here. + */ RUBY_EXTERN const char ruby_engine[]; +/** @} */ + RBIMPL_SYMBOL_EXPORT_END() #endif diff --git a/include/ruby/vm.h b/include/ruby/vm.h index 7bdd567453..8779780952 100644 --- a/include/ruby/vm.h +++ b/include/ruby/vm.h @@ -9,21 +9,26 @@ * Permission is hereby granted, to either redistribute and/or * modify this file, provided that the conditions mentioned in the * file COPYING are met. Consult the file for details. + * + * We planned to have multiple VMs run side-by-side. The API here was a + * preparation of that feature. The topic branch was eventually abandoned, and + * we now have Ractor. This file is kind of obsolescent. */ #include "ruby/internal/dllexport.h" RBIMPL_SYMBOL_EXPORT_BEGIN() -/* Place holder. - * - * We will prepare VM creation/control APIs on 1.9.2 or later. - * +/** + * The opaque struct to hold VM internals. Its fields are intentionally hidden + * from extension libraries because it changes drastically time to time. */ - -/* VM type declaration */ typedef struct rb_vm_struct ruby_vm_t; -/* core API */ +/** + * Destructs the passed VM. You don't have to call this API directly now, + * because there is no way to create one. There is only one VM at one time. + * ruby_stop() should just suffice. + */ int ruby_vm_destruct(ruby_vm_t *vm); /** @@ -44,6 +49,13 @@ int ruby_vm_destruct(ruby_vm_t *vm); */ void ruby_vm_at_exit(void(*func)(ruby_vm_t *)); +/** + * Returns whether the Ruby VM will free all memory at shutdown. + * + * @return true if free-at-exit is enabled, false otherwise. + */ +bool ruby_free_at_exit_p(void); + RBIMPL_SYMBOL_EXPORT_END() #endif /* RUBY_VM_H */ diff --git a/include/ruby/win32.h b/include/ruby/win32.h index 4978655e3c..27a3467606 100644 --- a/include/ruby/win32.h +++ b/include/ruby/win32.h @@ -19,11 +19,6 @@ RUBY_SYMBOL_EXPORT_BEGIN */ /* - * Definitions for NT port of Perl - */ - - -/* * Ok now we can include the normal include files. */ @@ -40,6 +35,7 @@ extern "C++" { /* template without extern "C++" */ #endif #include <winsock2.h> #include <ws2tcpip.h> +#include <mswsock.h> #if !defined(_MSC_VER) || _MSC_VER >= 1400 #include <iphlpapi.h> #endif @@ -152,13 +148,19 @@ typedef int clockid_t; #define open rb_w32_uopen #define close(h) rb_w32_close(h) #define fclose(f) rb_w32_fclose(f) -#define read(f, b, s) rb_w32_read(f, b, s) -#define write(f, b, s) rb_w32_write(f, b, s) +#define read(f, b, s) rb_w32_read(f, b, s) +#define write(f, b, s) rb_w32_write(f, b, s) +#define pread(f, b, s, o) rb_w32_pread(f, b, s, o) +#define pwrite(f, b, s, o) rb_w32_pwrite(f, b, s, o) #define getpid() rb_w32_getpid() +#undef HAVE_GETPPID +#define HAVE_GETPPID 1 #define getppid() rb_w32_getppid() #define sleep(x) rb_w32_Sleep((x)*1000) #define Sleep(msec) (void)rb_w32_Sleep(msec) +#undef HAVE_EXECV +#define HAVE_EXECV 1 #undef execv #define execv(path,argv) rb_w32_uaspawn(P_OVERLAY,path,argv) #undef isatty @@ -191,7 +193,6 @@ struct stati128 { long st_ctimensec; }; -#define off_t __int64 #define stat stati128 #undef SIZEOF_STRUCT_STAT_ST_INO #define SIZEOF_STRUCT_STAT_ST_INO sizeof(unsigned __int64) @@ -299,7 +300,6 @@ extern DWORD rb_w32_osver(void); extern int rb_w32_uchown(const char *, int, int); extern int rb_w32_ulink(const char *, const char *); extern ssize_t rb_w32_ureadlink(const char *, char *, size_t); -extern ssize_t rb_w32_wreadlink(const WCHAR *, WCHAR *, size_t); extern int rb_w32_usymlink(const char *src, const char *link); extern int gettimeofday(struct timeval *, struct timezone *); extern int clock_gettime(clockid_t, struct timespec *); @@ -309,7 +309,9 @@ extern rb_pid_t wait(int *); extern rb_pid_t rb_w32_uspawn(int, const char *, const char*); extern rb_pid_t rb_w32_uaspawn(int, const char *, char *const *); extern rb_pid_t rb_w32_uaspawn_flags(int, const char *, char *const *, DWORD); -extern int kill(int, int); +#undef HAVE_KILL +#define HAVE_KILL 1 +extern int kill(rb_pid_t, int); extern int fcntl(int, int, ...); extern int rb_w32_set_nonblock(int); extern rb_pid_t rb_w32_getpid(void); @@ -343,14 +345,6 @@ rb_infinity_float(void) #endif #if !defined __MINGW32__ || defined __NO_ISOCEXT -#ifndef isnan -#define isnan(x) _isnan(x) -#endif -static inline int -finite(double x) -{ - return _finite(x); -} #ifndef copysign #define copysign(a, b) _copysign(a, b) #endif @@ -359,8 +353,6 @@ scalb(double a, long b) { return _scalb(a, b); } -#else -__declspec(dllimport) extern int finite(double); #endif #if !defined S_IFIFO && defined _S_IFIFO @@ -398,6 +390,7 @@ __declspec(dllimport) extern int finite(double); #endif #define S_IFLNK 0xa000 +#define S_IFSOCK 0xc000 /* * define this so we can do inplace editing @@ -405,9 +398,9 @@ __declspec(dllimport) extern int finite(double); #define SUFFIX -extern int rb_w32_ftruncate(int fd, off_t length); -extern int rb_w32_truncate(const char *path, off_t length); -extern int rb_w32_utruncate(const char *path, off_t length); +extern int rb_w32_ftruncate(int fd, rb_off_t length); +extern int rb_w32_truncate(const char *path, rb_off_t length); +extern int rb_w32_utruncate(const char *path, rb_off_t length); #undef HAVE_FTRUNCATE #define HAVE_FTRUNCATE 1 @@ -657,6 +650,8 @@ extern char *rb_w32_strerror(int); #undef setsockopt #define setsockopt(s, v, n, o, l) rb_w32_setsockopt(s, v, n, o, l) +#undef HAVE_SHUTDOWN +#define HAVE_SHUTDOWN 1 #undef shutdown #define shutdown(s, h) rb_w32_shutdown(s, h) @@ -704,10 +699,10 @@ extern char *rb_w32_strerror(int); #endif struct tms { - long tms_utime; - long tms_stime; - long tms_cutime; - long tms_cstime; + long tms_utime; + long tms_stime; + long tms_cutime; + long tms_cstime; }; int rb_w32_times(struct tms *); @@ -724,7 +719,9 @@ int rb_w32_fclose(FILE*); int rb_w32_pipe(int[2]); ssize_t rb_w32_read(int, void *, size_t); ssize_t rb_w32_write(int, const void *, size_t); -off_t rb_w32_lseek(int, off_t, int); +ssize_t rb_w32_pread(int, void *, size_t, rb_off_t offset); +ssize_t rb_w32_pwrite(int, const void *, size_t, rb_off_t offset); +rb_off_t rb_w32_lseek(int, rb_off_t, int); int rb_w32_uutime(const char *, const struct utimbuf *); int rb_w32_uutimes(const char *, const struct timeval *); int rb_w32_uutimensat(int /* must be AT_FDCWD */, const char *, const struct timespec *, int /* must be 0 */); @@ -806,6 +803,25 @@ double rb_w32_pow(double x, double y); #define pow rb_w32_pow #endif +// mmap tiny emulation +#define MAP_FAILED ((void *)-1) + +#define PROT_READ 0x01 +#define PROT_WRITE 0x02 +#define PROT_EXEC 0x04 + +#define MAP_PRIVATE 0x0002 +#define MAP_ANON 0x1000 +#define MAP_ANONYMOUS MAP_ANON + +extern void *rb_w32_mmap(void *, size_t, int, int, int, rb_off_t); +extern int rb_w32_munmap(void *, size_t); +extern int rb_w32_mprotect(void *, size_t, int); + +#define mmap(a, l, p, f, d, o) rb_w32_mmap(a, l, p, f, d, o) +#define munmap(a, l) rb_w32_munmap(a, l) +#define mprotect(a, l, prot) rb_w32_mprotect(a, l, prot) + #if defined(__cplusplus) #if 0 { /* satisfy cc-mode */ |