diff options
Diffstat (limited to 'include')
94 files changed, 5790 insertions, 4295 deletions
diff --git a/include/ruby/assert.h b/include/ruby/assert.h index c9f2c3fbef..e9edd9e640 100644 --- a/include/ruby/assert.h +++ b/include/ruby/assert.h @@ -22,6 +22,7 @@ */ #include "ruby/internal/assume.h" #include "ruby/internal/attr/cold.h" +#include "ruby/internal/attr/format.h" #include "ruby/internal/attr/noreturn.h" #include "ruby/internal/cast.h" #include "ruby/internal/dllexport.h" @@ -103,7 +104,7 @@ # /* keep NDEBUG undefined */ #elif (RBIMPL_NDEBUG == 0) && (RBIMPL_RUBY_DEBUG == 0) -# /* The (*1) situation in avobe diagram. */ +# /* The (*1) situation in above diagram. */ # define RUBY_DEBUG 0 # define RUBY_NDEBUG 1 # define NDEBUG @@ -132,6 +133,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() RBIMPL_ATTR_NORETURN() RBIMPL_ATTR_COLD() void rb_assert_failure(const char *file, int line, const char *name, const char *expr); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_COLD() +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 5, 6) +void rb_assert_failure_detail(const char *file, int line, const char *name, const char *expr, const char *fmt, ...); RBIMPL_SYMBOL_EXPORT_END() #ifdef RUBY_FUNCTION_NAME_STRING @@ -147,8 +153,28 @@ RBIMPL_SYMBOL_EXPORT_END() * * @param mesg The message to display. */ -#define RUBY_ASSERT_FAIL(mesg) \ +#if defined(HAVE___VA_OPT__) +# if RBIMPL_HAS_WARNING("-Wgnu-zero-variadic-macro-arguments") +/* __VA_OPT__ is to be used for the zero variadic macro arguments + * cases. */ +RBIMPL_WARNING_IGNORED(-Wgnu-zero-variadic-macro-arguments) +# endif +# define RBIMPL_VA_OPT_ARGS(...) __VA_OPT__(,) __VA_ARGS__ + +# define RUBY_ASSERT_FAIL(mesg, ...) \ + rb_assert_failure##__VA_OPT__(_detail)( \ + __FILE__, __LINE__, RBIMPL_ASSERT_FUNC, mesg RBIMPL_VA_OPT_ARGS(__VA_ARGS__)) +#elif !defined(__cplusplus) +# define RBIMPL_VA_OPT_ARGS(...) + +# define RUBY_ASSERT_FAIL(mesg, ...) \ + rb_assert_failure(__FILE__, __LINE__, RBIMPL_ASSERT_FUNC, mesg) +#else +# undef RBIMPL_VA_OPT_ARGS + +# define RUBY_ASSERT_FAIL(mesg) \ rb_assert_failure(__FILE__, __LINE__, RBIMPL_ASSERT_FUNC, mesg) +#endif /** * Asserts that the expression is truthy. If not aborts with the message. @@ -156,15 +182,25 @@ RBIMPL_SYMBOL_EXPORT_END() * @param expr What supposedly evaluates to true. * @param mesg The message to display on failure. */ -#define RUBY_ASSERT_MESG(expr, mesg) \ +#if defined(RBIMPL_VA_OPT_ARGS) +# define RUBY_ASSERT_MESG(expr, ...) \ + (RB_LIKELY(expr) ? RBIMPL_ASSERT_NOTHING : RUBY_ASSERT_FAIL(__VA_ARGS__)) +#else +# define RUBY_ASSERT_MESG(expr, mesg) \ (RB_LIKELY(expr) ? RBIMPL_ASSERT_NOTHING : RUBY_ASSERT_FAIL(mesg)) +#endif /** * A variant of #RUBY_ASSERT that does not interface with #RUBY_DEBUG. * * @copydetails #RUBY_ASSERT */ -#define RUBY_ASSERT_ALWAYS(expr) RUBY_ASSERT_MESG((expr), #expr) +#if defined(RBIMPL_VA_OPT_ARGS) +# define RUBY_ASSERT_ALWAYS(expr, ...) \ + RUBY_ASSERT_MESG(expr, #expr RBIMPL_VA_OPT_ARGS(__VA_ARGS__)) +#else +# define RUBY_ASSERT_ALWAYS(expr) RUBY_ASSERT_MESG((expr), #expr) +#endif /** * Asserts that the given expression is truthy if and only if #RUBY_DEBUG is truthy. @@ -172,9 +208,18 @@ RBIMPL_SYMBOL_EXPORT_END() * @param expr What supposedly evaluates to true. */ #if RUBY_DEBUG -# define RUBY_ASSERT(expr) RUBY_ASSERT_MESG((expr), #expr) +# if defined(RBIMPL_VA_OPT_ARGS) +# define RUBY_ASSERT(expr, ...) \ + RUBY_ASSERT_MESG((expr), #expr RBIMPL_VA_OPT_ARGS(__VA_ARGS__)) +# else +# define RUBY_ASSERT(expr) RUBY_ASSERT_MESG((expr), #expr) +# endif #else -# define RUBY_ASSERT(expr) RBIMPL_ASSERT_NOTHING +# if defined(RBIMPL_VA_OPT_ARGS) +# define RUBY_ASSERT(/* expr, */...) RBIMPL_ASSERT_NOTHING +# else +# define RUBY_ASSERT(expr) RBIMPL_ASSERT_NOTHING +# endif #endif /** @@ -187,9 +232,18 @@ RBIMPL_SYMBOL_EXPORT_END() /* Currently `RUBY_DEBUG == ! defined(NDEBUG)` is always true. There is no * difference any longer between this one and `RUBY_ASSERT`. */ #if defined(NDEBUG) -# define RUBY_ASSERT_NDEBUG(expr) RBIMPL_ASSERT_NOTHING +# if defined(RBIMPL_VA_OPT_ARGS) +# define RUBY_ASSERT_NDEBUG(/* expr, */...) RBIMPL_ASSERT_NOTHING +# else +# define RUBY_ASSERT_NDEBUG(expr) RBIMPL_ASSERT_NOTHING +# endif #else -# define RUBY_ASSERT_NDEBUG(expr) RUBY_ASSERT_MESG((expr), #expr) +# if defined(RBIMPL_VA_OPT_ARGS) +# define RUBY_ASSERT_NDEBUG(expr, ...) \ + RUBY_ASSERT_MESG((expr), #expr RBIMPL_VA_OPT_ARGS(__VA_ARGS__)) +# else +# define RUBY_ASSERT_NDEBUG(expr) RUBY_ASSERT_MESG((expr), #expr) +# endif #endif /** @@ -197,10 +251,20 @@ RBIMPL_SYMBOL_EXPORT_END() * @param mesg The message to display on failure. */ #if RUBY_DEBUG -# define RUBY_ASSERT_MESG_WHEN(cond, expr, mesg) RUBY_ASSERT_MESG((expr), (mesg)) +# if defined(RBIMPL_VA_OPT_ARGS) +# define RUBY_ASSERT_MESG_WHEN(cond, /* expr, */...) \ + RUBY_ASSERT_MESG(__VA_ARGS__) +# else +# define RUBY_ASSERT_MESG_WHEN(cond, expr, mesg) RUBY_ASSERT_MESG((expr), (mesg)) +# endif #else -# define RUBY_ASSERT_MESG_WHEN(cond, expr, mesg) \ +# if defined(RBIMPL_VA_OPT_ARGS) +# define RUBY_ASSERT_MESG_WHEN(cond, expr, ...) \ + ((cond) ? RUBY_ASSERT_MESG((expr), __VA_ARGS__) : RBIMPL_ASSERT_NOTHING) +# else +# define RUBY_ASSERT_MESG_WHEN(cond, expr, mesg) \ ((cond) ? RUBY_ASSERT_MESG((expr), (mesg)) : RBIMPL_ASSERT_NOTHING) +# endif #endif /** @@ -210,7 +274,23 @@ RBIMPL_SYMBOL_EXPORT_END() * @param cond Extra condition that shall hold for assertion to take effect. * @param expr What supposedly evaluates to true. */ -#define RUBY_ASSERT_WHEN(cond, expr) RUBY_ASSERT_MESG_WHEN((cond), (expr), #expr) +#if defined(RBIMPL_VA_OPT_ARGS) +# define RUBY_ASSERT_WHEN(cond, expr, ...) \ + RUBY_ASSERT_MESG_WHEN(cond, expr, #expr RBIMPL_VA_OPT_ARGS(__VA_ARGS__)) +#else +# define RUBY_ASSERT_WHEN(cond, expr) RUBY_ASSERT_MESG_WHEN((cond), (expr), #expr) +#endif + +/** + * A variant of #RUBY_ASSERT that asserts when either #RUBY_DEBUG or built-in + * type of `obj` is `type`. + * + * @param obj Object to check its built-in typue. + * @param type Built-in type constant, T_ARRAY, T_STRING, etc. + */ +#define RUBY_ASSERT_BUILTIN_TYPE(obj, type) \ + RUBY_ASSERT(RB_TYPE_P(obj, type), \ + "Actual type is %s", rb_builtin_type_name(BUILTIN_TYPE(obj))) /** * This is either #RUBY_ASSERT or #RBIMPL_ASSUME, depending on #RUBY_DEBUG. diff --git a/include/ruby/atomic.h b/include/ruby/atomic.h index 3eb80fbf7d..043a6a9945 100644 --- a/include/ruby/atomic.h +++ b/include/ruby/atomic.h @@ -139,6 +139,15 @@ typedef unsigned int rb_atomic_t; rbimpl_atomic_cas(&(var), (oldval), (newval)) /** + * Atomic load. This loads `var` with an atomic intrinsic and returns + * its value. + * + * @param var A variable of ::rb_atomic_t + * @return What was stored in `var`j + */ +#define RUBY_ATOMIC_LOAD(var) rbimpl_atomic_load(&(var)) + +/** * Identical to #RUBY_ATOMIC_EXCHANGE, except for the return type. * * @param var A variable of ::rb_atomic_t. @@ -280,6 +289,17 @@ typedef unsigned int rb_atomic_t; RBIMPL_CAST(rbimpl_atomic_ptr_exchange((void **)&(var), (void *)val)) /** + * Identical to #RUBY_ATOMIC_LOAD, except it expects its arguments are `void*`. + * There are cases where ::rb_atomic_t is 32bit while `void*` is 64bit. This + * should be used for size related operations to support such platforms. + * + * @param var A variable of `void*` + * @return The value of `var` (without tearing) + */ +#define RUBY_ATOMIC_PTR_LOAD(var) \ + RBIMPL_CAST(rbimpl_atomic_ptr_load((void **)&var)) + +/** * Identical to #RUBY_ATOMIC_CAS, except it expects its arguments are `void*`. * There are cases where ::rb_atomic_t is 32bit while `void*` is 64bit. This * should be used for size related operations to support such platforms. @@ -404,7 +424,7 @@ rbimpl_atomic_size_add(volatile size_t *ptr, size_t val) #elif defined(HAVE_GCC_SYNC_BUILTINS) __sync_add_and_fetch(ptr, val); -#elif defined(_WIN32) && defined(_M_AMD64) +#elif defined(_WIN64) /* Ditto for `InterlockeExchangedAdd`. */ InterlockedExchangeAdd64(ptr, val); @@ -456,13 +476,15 @@ rbimpl_atomic_size_inc(volatile size_t *ptr) #elif defined(HAVE_GCC_ATOMIC_BUILTINS) || defined(HAVE_GCC_SYNC_BUILTINS) rbimpl_atomic_size_add(ptr, 1); -#elif defined(_WIN32) && defined(_M_AMD64) +#elif defined(_WIN64) InterlockedIncrement64(ptr); #elif defined(__sun) && defined(HAVE_ATOMIC_H) && (defined(_LP64) || defined(_I32LPx)) atomic_inc_ulong(ptr); #else + RBIMPL_STATIC_ASSERT(size_of_size_t, sizeof *ptr == sizeof(rb_atomic_t)); + rbimpl_atomic_size_add(ptr, 1); #endif @@ -538,7 +560,7 @@ rbimpl_atomic_size_sub(volatile size_t *ptr, size_t val) #elif defined(HAVE_GCC_SYNC_BUILTINS) __sync_sub_and_fetch(ptr, val); -#elif defined(_WIN32) && defined(_M_AMD64) +#elif defined(_WIN64) const ssize_t neg = -1; InterlockedExchangeAdd64(ptr, neg * val); @@ -590,13 +612,15 @@ rbimpl_atomic_size_dec(volatile size_t *ptr) #elif defined(HAVE_GCC_ATOMIC_BUILTINS) || defined(HAVE_GCC_SYNC_BUILTINS) rbimpl_atomic_size_sub(ptr, 1); -#elif defined(_WIN32) && defined(_M_AMD64) +#elif defined(_WIN64) InterlockedDecrement64(ptr); #elif defined(__sun) && defined(HAVE_ATOMIC_H) && (defined(_LP64) || defined(_I32LPx)) atomic_dec_ulong(ptr); #else + RBIMPL_STATIC_ASSERT(size_of_size_t, sizeof *ptr == sizeof(rb_atomic_t)); + rbimpl_atomic_size_sub(ptr, 1); #endif @@ -688,7 +712,7 @@ rbimpl_atomic_size_exchange(volatile size_t *ptr, size_t val) #elif defined(HAVE_GCC_SYNC_BUILTINS) return __sync_lock_test_and_set(ptr, val); -#elif defined(_WIN32) && defined(_M_AMD64) +#elif defined(_WIN64) return InterlockedExchange64(ptr, val); #elif defined(__sun) && defined(HAVE_ATOMIC_H) && (defined(_LP64) || defined(_I32LPx)) @@ -749,6 +773,21 @@ rbimpl_atomic_value_exchange(volatile VALUE *ptr, VALUE val) RBIMPL_ATTR_ARTIFICIAL() RBIMPL_ATTR_NOALIAS() RBIMPL_ATTR_NONNULL((1)) +static inline rb_atomic_t +rbimpl_atomic_load(volatile rb_atomic_t *ptr) +{ +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + return __atomic_load_n(ptr, __ATOMIC_SEQ_CST); +#else + return rbimpl_atomic_fetch_add(ptr, 0); +#endif +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) static inline void rbimpl_atomic_set(volatile rb_atomic_t *ptr, rb_atomic_t val) { @@ -823,7 +862,7 @@ rbimpl_atomic_size_cas(volatile size_t *ptr, size_t oldval, size_t newval) #elif defined(HAVE_GCC_SYNC_BUILTINS) return __sync_val_compare_and_swap(ptr, oldval, newval); -#elif defined(_WIN32) && defined(_M_AMD64) +#elif defined(_WIN64) return InterlockedCompareExchange64(ptr, newval, oldval); #elif defined(__sun) && defined(HAVE_ATOMIC_H) && (defined(_LP64) || defined(_I32LPx)) @@ -875,6 +914,22 @@ rbimpl_atomic_ptr_cas(void **ptr, const void *oldval, const void *newval) RBIMPL_ATTR_ARTIFICIAL() RBIMPL_ATTR_NOALIAS() RBIMPL_ATTR_NONNULL((1)) +static inline void * +rbimpl_atomic_ptr_load(void **ptr) +{ +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + return __atomic_load_n(ptr, __ATOMIC_SEQ_CST); +#else + void *val = *ptr; + return rbimpl_atomic_ptr_cas(ptr, val, val); +#endif +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) static inline VALUE rbimpl_atomic_value_cas(volatile VALUE *ptr, VALUE oldval, VALUE newval) { diff --git a/include/ruby/backward.h b/include/ruby/backward.h index e12f98ac47..f804c2c36e 100644 --- a/include/ruby/backward.h +++ b/include/ruby/backward.h @@ -13,59 +13,13 @@ #define RBIMPL_ATTR_DEPRECATED_SINCE(ver) RBIMPL_ATTR_DEPRECATED(("since " #ver)) #define RBIMPL_ATTR_DEPRECATED_INTERNAL(ver) RBIMPL_ATTR_DEPRECATED(("since "#ver", also internal")) +#define RBIMPL_ATTR_DEPRECATED_INTERNAL_ONLY() RBIMPL_ATTR_DEPRECATED(("only for internal use")) -/* eval.c */ -RBIMPL_ATTR_DEPRECATED_SINCE(2.2) void rb_disable_super(); -RBIMPL_ATTR_DEPRECATED_SINCE(2.2) void rb_enable_super(); - -/* hash.c */ -RBIMPL_ATTR_DEPRECATED_SINCE(2.2) void rb_hash_iter_lev(); -RBIMPL_ATTR_DEPRECATED_SINCE(2.2) void rb_hash_ifnone(); - -/* string.c */ -RBIMPL_ATTR_DEPRECATED_SINCE(2.2) void rb_str_associate(); -RBIMPL_ATTR_DEPRECATED_SINCE(2.2) void rb_str_associated(); - -/* variable.c */ -RBIMPL_ATTR_DEPRECATED_SINCE(2.5) void rb_autoload(); - -/* eval.c */ -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.6) void rb_frozen_class_p(); -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.7) void rb_exec_end_proc(); - -/* error.c */ -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.3) void rb_compile_error(); -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.3) void rb_compile_error_with_enc(); -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.3) void rb_compile_error_append(); - -/* gc.c */ -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.7) void rb_gc_call_finalizer_at_exit(); - -/* signal.c */ -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.7) void rb_trap_exit(); - -/* struct.c */ -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.4) void rb_struct_ptr(); - -/* thread.c */ -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.7) void rb_clear_trace_func(); - -/* variable.c */ -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.7) void rb_generic_ivar_table(); -RBIMPL_ATTR_DEPRECATED_INTERNAL(2.6) NORETURN(VALUE rb_mod_const_missing(VALUE, VALUE)); +RBIMPL_ATTR_DEPRECATED_INTERNAL_ONLY() void rb_clear_constant_cache(void); /* from version.c */ #if defined(RUBY_SHOW_COPYRIGHT_TO_DIE) && !!(RUBY_SHOW_COPYRIGHT_TO_DIE+0) -/* for source code backward compatibility */ -RBIMPL_ATTR_DEPRECATED_SINCE(2.4) -static inline int -ruby_show_copyright_to_die(int exitcode) -{ - ruby_show_copyright(); - return exitcode; -} -#define ruby_show_copyright() /* defer EXIT_SUCCESS */ \ - (exit(ruby_show_copyright_to_die(EXIT_SUCCESS))) +# error RUBY_SHOW_COPYRIGHT_TO_DIE is deprecated #endif #endif /* RUBY_RUBY_BACKWARD_H */ diff --git a/include/ruby/backward/2/assume.h b/include/ruby/backward/2/assume.h index 265421df79..d148710127 100644 --- a/include/ruby/backward/2/assume.h +++ b/include/ruby/backward/2/assume.h @@ -24,8 +24,6 @@ #include "ruby/internal/assume.h" #include "ruby/internal/has/builtin.h" -#undef ASSUME /* Kill config.h definition */ -#undef UNREACHABLE /* Kill config.h definition */ #define ASSUME RBIMPL_ASSUME /**< @old{RBIMPL_ASSUME} */ #define UNREACHABLE RBIMPL_UNREACHABLE() /**< @old{RBIMPL_UNREACHABLE} */ #define UNREACHABLE_RETURN RBIMPL_UNREACHABLE_RETURN /**< @old{RBIMPL_UNREACHABLE_RETURN} */ diff --git a/include/ruby/backward/2/attributes.h b/include/ruby/backward/2/attributes.h index 73acfc9dc0..916d9e9d5b 100644 --- a/include/ruby/backward/2/attributes.h +++ b/include/ruby/backward/2/attributes.h @@ -39,6 +39,7 @@ #include "ruby/internal/attr/noinline.h" #include "ruby/internal/attr/nonnull.h" #include "ruby/internal/attr/noreturn.h" +#include "ruby/internal/attr/packed_struct.h" #include "ruby/internal/attr/pure.h" #include "ruby/internal/attr/restrict.h" #include "ruby/internal/attr/returns_nonnull.h" @@ -80,10 +81,8 @@ #undef NOINLINE #define NOINLINE(x) RBIMPL_ATTR_NOINLINE() x -#ifndef MJIT_HEADER -# undef ALWAYS_INLINE -# define ALWAYS_INLINE(x) RBIMPL_ATTR_FORCEINLINE() x -#endif +#undef ALWAYS_INLINE +#define ALWAYS_INLINE(x) RBIMPL_ATTR_FORCEINLINE() x #undef ERRORFUNC #define ERRORFUNC(mesg, x) RBIMPL_ATTR_ERROR(mesg) x @@ -147,17 +146,14 @@ #define NORETURN(x) RBIMPL_ATTR_NORETURN() x #define NORETURN_STYLE_NEW -#ifndef PACKED_STRUCT -# define PACKED_STRUCT(x) x -#endif +#undef PACKED_STRUCT +#define PACKED_STRUCT(x) \ + RBIMPL_ATTR_PACKED_STRUCT_BEGIN() x RBIMPL_ATTR_PACKED_STRUCT_END() -#ifndef PACKED_STRUCT_UNALIGNED -# if UNALIGNED_WORD_ACCESS -# define PACKED_STRUCT_UNALIGNED(x) PACKED_STRUCT(x) -# else -# define PACKED_STRUCT_UNALIGNED(x) x -# endif -#endif +#undef PACKED_STRUCT_UNALIGNED +#define PACKED_STRUCT_UNALIGNED(x) \ + RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_BEGIN() x \ + RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_END() #undef RB_UNUSED_VAR #define RB_UNUSED_VAR(x) x RBIMPL_ATTR_MAYBE_UNUSED() diff --git a/include/ruby/debug.h b/include/ruby/debug.h index c88da9c43d..f7c8e6ca8d 100644 --- a/include/ruby/debug.h +++ b/include/ruby/debug.h @@ -10,6 +10,7 @@ * modify this file, provided that the conditions mentioned in the * file COPYING are met. Consult the file for details. */ +#include "ruby/internal/attr/deprecated.h" #include "ruby/internal/attr/nonnull.h" #include "ruby/internal/attr/returns_nonnull.h" #include "ruby/internal/dllexport.h" @@ -51,6 +52,25 @@ RBIMPL_ATTR_NONNULL((3)) int rb_profile_frames(int start, int limit, VALUE *buff, int *lines); /** + * Queries mysterious "frame"s of the given range. + * + * A per-thread version of rb_profile_frames(). + * Arguments and return values are the same with rb_profile_frames() with the + * exception of the first argument _thread_, which accepts the Thread to be + * profiled/queried. + * + * @param[in] thread The Ruby Thread to be profiled. + * @param[in] start Start position (0 means the topmost). + * @param[in] limit Number objects of `buff`. + * @param[out] buff Return buffer. + * @param[out] lines Return buffer. + * @return Number of objects filled into `buff`. + * @post `buff` is filled with backtrace pointers. + * @post `lines` is filled with `__LINE__` of each backtraces. + */ +int rb_profile_thread_frames(VALUE thread, int start, int limit, VALUE *buff, int *lines); + +/** * Queries the path of the passed backtrace. * * @param[in] frame What rb_profile_frames() returned. @@ -207,6 +227,17 @@ typedef VALUE (*rb_debug_inspector_func_t)(const rb_debug_inspector_t *dc, void VALUE rb_debug_inspector_open(rb_debug_inspector_func_t func, void *data); /** + * Queries the backtrace object of the context. This is as if you call + * `caller_locations` at the point of debugger. + * + * @param[in] dc A debug context. + * @return An array of `Thread::Backtrace::Location` which represents the + * current point of execution at `dc`. + + */ +VALUE rb_debug_inspector_backtrace_locations(const rb_debug_inspector_t *dc); + +/** * Queries the current receiver of the passed context's upper frame. * * @param[in] dc A debug context. @@ -250,15 +281,27 @@ VALUE rb_debug_inspector_frame_binding_get(const rb_debug_inspector_t *dc, long VALUE rb_debug_inspector_frame_iseq_get(const rb_debug_inspector_t *dc, long index); /** - * Queries the backtrace object of the context. This is as if you call - * `caller_locations` at the point of debugger. + * Queries the depth of the passed context's upper frame. * - * @param[in] dc A debug context. - * @return An array of `Thread::Backtrace::Location` which represents the - * current point of execution at `dc`. + * Note that the depth is not same as the frame index because debug_inspector + * skips some special frames but the depth counts all frames. + * + * @param[in] dc A debug context. + * @param[in] index Index of the frame from top to bottom. + * @exception rb_eArgError `index` out of range. + * @retval The depth at `index`-th frame in Integer. + */ +VALUE rb_debug_inspector_frame_depth(const rb_debug_inspector_t *dc, long index); + +// A macro to recognize `rb_debug_inspector_frame_depth()` is available or not +#define RB_DEBUG_INSPECTOR_FRAME_DEPTH(dc, index) rb_debug_inspector_frame_depth(dc, index) +/** + * Return current frmae depth. + * + * @retval The depth of the current frame in Integer. */ -VALUE rb_debug_inspector_backtrace_locations(const rb_debug_inspector_t *dc); +VALUE rb_debug_inspector_current_depth(void); /** @} */ @@ -573,48 +616,157 @@ VALUE rb_tracearg_object(rb_trace_arg_t *trace_arg); /* * Postponed Job API - * rb_postponed_job_register and rb_postponed_job_register_one are - * async-signal-safe and used via SIGPROF by the "stackprof" RubyGem + * + * This API is designed to be called from contexts where it is not safe to run Ruby + * code (e.g. because they do not hold the GVL or because GC is in progress), and + * defer a callback to run in a context where it _is_ safe. The primary intended + * users of this API is for sampling profilers like the "stackprof" gem; these work + * by scheduling the periodic delivery of a SIGPROF signal, and inside the C-level + * signal handler, deferring a job to collect a Ruby backtrace when it is next safe + * to do so. + * + * Ruby maintains a small, fixed-size postponed job table. An extension using this + * API should first call `rb_postponed_job_preregister` to register a callback + * function in this table and obtain a handle of type `rb_postponed_job_handle_t` + * to it. Subsequently, the callback can be triggered by calling + * `rb_postponed_job_trigger` with that handle, or the `data` associated with the + * callback function can be changed by calling `rb_postponed_job_preregister` again. + * + * Because the postponed job table is quite small (it only has 32 entries on most + * common systems), extensions should generally only preregister one or two `func` + * values. + * + * Historically, this API provided two functions `rb_postponed_job_register` and + * `rb_postponed_job_register_one`, which claimed to be fully async-signal-safe and + * would call back the provided `func` and `data` at an appropriate time. However, + * these functions were subject to race conditions which could cause crashes when + * racing with Ruby's internal use of them. These two functions are still present, + * but are marked as deprecated and have slightly changed semantics: + * + * * rb_postponed_job_register now works like rb_postponed_job_register_one i.e. + * `func` will only be executed at most one time each time Ruby checks for + * interrupts, no matter how many times it is registered + * * They are also called with the last `data` to be registered, not the first + * (which is how rb_postponed_job_register_one previously worked) */ + /** * Type of postponed jobs. * - * @param[in,out] arg What was passed to rb_postponed_job_register(). + * @param[in,out] arg What was passed to `rb_postponed_job_preregister` */ typedef void (*rb_postponed_job_func_t)(void *arg); /** - * Registers a postponed job. + * The type of a handle returned from `rb_postponed_job_preregister` and + * passed to `rb_postponed_job_trigger` + */ +typedef unsigned int rb_postponed_job_handle_t; +#define POSTPONED_JOB_HANDLE_INVALID ((rb_postponed_job_handle_t)UINT_MAX) + +/** + * Pre-registers a func in Ruby's postponed job preregistration table, + * returning an opaque handle which can be used to trigger the job later. Generally, + * this function will be called during the initialization routine of an extension. + * + * The returned handle can be used later to call `rb_postponed_job_trigger`. This will + * cause Ruby to call back into the registered `func` with `data` at a later time, in + * a context where the GVL is held and it is safe to perform Ruby allocations. + * + * If the given `func` was already pre-registered, this function will overwrite the + * stored data with the newly passed data, and return the same handle instance as + * was previously returned. + * + * If this function is called concurrently with the same `func`, then the stored data + * could be the value from either call (but will definitely be one of them). + * + * If this function is called to update the data concurrently with a call to + * `rb_postponed_job_trigger` on the same handle, it's undefined whether `func` will + * be called with the old data or the new data. + * + * Although the current implementation of this function is in fact async-signal-safe and + * has defined semantics when called concurrently on the same `func`, a future Ruby + * version might require that this method be called under the GVL; thus, programs which + * aim to be forward-compatible should call this method whilst holding the GVL. + * + * @param[in] flags Unused and ignored + * @param[in] func The function to be pre-registered + * @param[in] data The data to be pre-registered + * @retval POSTPONED_JOB_HANDLE_INVALID The job table is full; this registration + * did not succeed and no further registration will do so for + * the lifetime of the program. + * @retval otherwise A handle which can be passed to `rb_postponed_job_trigger` + */ +rb_postponed_job_handle_t rb_postponed_job_preregister(unsigned int flags, rb_postponed_job_func_t func, void *data); + +/** + * Triggers a pre-registered job registered with rb_postponed_job_preregister, + * scheduling it for execution the next time the Ruby VM checks for interrupts. + * The context in which the job is called in holds the GVL and is safe to perform + * Ruby allocations within (i.e. it is not during GC). + * + * This method is async-signal-safe and can be called from any thread, at any + * time, including in signal handlers. * - * There are situations when running a ruby program is not possible. For - * instance when a program is in a signal handler; for another instance when - * the GC is busy. On such situations however, there might be needs to do - * something. We cannot but defer such operations until we are 100% sure it is - * safe to execute them. This mechanism is called postponed jobs. This - * function registers a new one. The registered job would eventually gets - * executed. + * If this method is called multiple times, Ruby will coalesce this into only + * one call to the job the next time it checks for interrupts. + * + * @params[in] h A handle returned from rb_postponed_job_preregister + */ +void rb_postponed_job_trigger(rb_postponed_job_handle_t h); + +/** + * Schedules the given `func` to be called with `data` when Ruby next checks for + * interrupts. If this function is called multiple times in between Ruby checking + * for interrupts, then `func` will be called only once with the `data` value from + * the first call to this function. * - * @param[in] flags (Unused) reserved for future extensions. + * Like `rb_postponed_job_trigger`, the context in which the job is called + * holds the GVL and can allocate Ruby objects. + * + * This method essentially has the same semantics as: + * + * ``` + * rb_postponed_job_trigger(rb_postponed_job_preregister(func, data)); + * ``` + * + * @note Previous versions of Ruby promised that the (`func`, `data`) pairs would + * be executed as many times as they were registered with this function; in + * reality this was always subject to race conditions and this function no + * longer provides this guarantee. Instead, multiple calls to this function + * can be coalesced into a single execution of the passed `func`, with the + * most recent `data` registered at that time passed in. + * + * @deprecated This interface implies that arbitrarily many `func`'s can be enqueued + * over the lifetime of the program, whilst in reality the registration + * slots for postponed jobs are a finite resource. This is made clearer + * by the `rb_postponed_job_preregister` and `rb_postponed_job_trigger` + * functions, and a future version of Ruby might delete this function. + * + * @param[in] flags Unused and ignored. * @param[in] func Job body. * @param[in,out] data Passed as-is to `func`. - * @retval 0 Postponed job buffer is full. Failed. - * @retval otherwise Opaque return value. - * @post The passed job is postponed. + * @retval 0 Postponed job registration table is full. Failed. + * @retval 1 Registration succeeded. + * @post The passed job will run on the next interrupt check. */ + RBIMPL_ATTR_DEPRECATED(("use rb_postponed_job_preregister and rb_postponed_job_trigger")) int rb_postponed_job_register(unsigned int flags, rb_postponed_job_func_t func, void *data); /** - * Identical to rb_postponed_job_register_one(), except it additionally checks - * for duplicated registration. In case the passed job is already in the - * postponed job buffer this function does nothing. + * Identical to `rb_postponed_job_register` + * + * @deprecated This is deprecated for the same reason as `rb_postponed_job_register` * - * @param[in] flags (Unused) reserved for future extensions. + * @param[in] flags Unused and ignored. * @param[in] func Job body. * @param[in,out] data Passed as-is to `func`. - * @retval 0 Postponed job buffer is full. Failed. - * @retval otherwise Opaque return value. + * @retval 0 Postponed job registration table is full. Failed. + * @retval 1 Registration succeeded. + * @post The passed job will run on the next interrupt check. */ + RBIMPL_ATTR_DEPRECATED(("use rb_postponed_job_preregister and rb_postponed_job_trigger")) int rb_postponed_job_register_one(unsigned int flags, rb_postponed_job_func_t func, void *data); /** @} */ diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 95cf74a3b8..1256393701 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -16,2276 +16,16 @@ * relatively less rooms for things in this header file be useful when writing * an extension library. */ -#include "ruby/internal/config.h" -#include <stdarg.h> #include "ruby/ruby.h" -#include "ruby/oniguruma.h" -#include "ruby/internal/attr/const.h" -#include "ruby/internal/attr/deprecated.h" -#include "ruby/internal/attr/format.h" -#include "ruby/internal/attr/noalias.h" -#include "ruby/internal/attr/nonnull.h" -#include "ruby/internal/attr/noreturn.h" -#include "ruby/internal/attr/returns_nonnull.h" -#include "ruby/internal/attr/pure.h" -#include "ruby/internal/core/rbasic.h" -#include "ruby/internal/dllexport.h" -#include "ruby/internal/fl_type.h" -RBIMPL_SYMBOL_EXPORT_BEGIN() - -/** - * @private - * - * Bit constants used when embedding encodings into ::RBasic::flags. Extension - * libraries must not bother such things. - */ -enum ruby_encoding_consts { - - /** Max possible number of embeddable encodings. */ - RUBY_ENCODING_INLINE_MAX = 127, - - /** Where inline encodings reside. */ - RUBY_ENCODING_SHIFT = (RUBY_FL_USHIFT+10), - - /** Bits we use to store inline encodings. */ - RUBY_ENCODING_MASK = (RUBY_ENCODING_INLINE_MAX<<RUBY_ENCODING_SHIFT - /* RUBY_FL_USER10..RUBY_FL_USER16 */), - - /** Max possible length of an encoding name. */ - RUBY_ENCODING_MAXNAMELEN = 42 -}; - -#define ENCODING_INLINE_MAX RUBY_ENCODING_INLINE_MAX /**< @old{RUBY_ENCODING_INLINE_MAX} */ -#define ENCODING_SHIFT RUBY_ENCODING_SHIFT /**< @old{RUBY_ENCODING_SHIFT} */ -#define ENCODING_MASK RUBY_ENCODING_MASK /**< @old{RUBY_ENCODING_SHIFT} */ - -/** - * Destructively assigns the passed encoding to the passed object. The object - * must be capable of having inline encoding. Using this macro needs deep - * understanding of bit level object binary layout. - * - * @param[out] obj Target object to modify. - * @param[in] i Encoding in encindex format. - * @post `obj`'s encoding is `i`. - */ -#define RB_ENCODING_SET_INLINED(obj,i) do {\ - RBASIC(obj)->flags &= ~RUBY_ENCODING_MASK;\ - RBASIC(obj)->flags |= (VALUE)(i) << RUBY_ENCODING_SHIFT;\ -} while (0) - -/** @alias{rb_enc_set_index} */ -#define RB_ENCODING_SET(obj,i) rb_enc_set_index((obj), (i)) - -/** - * Queries the encoding of the passed object. The encoding must be smaller - * than ::RUBY_ENCODING_INLINE_MAX, which means you have some assumption on the - * return value. This means the API is for internal use only. - * - * @param[in] obj Target object. - * @return `obj`'s encoding index. - */ -#define RB_ENCODING_GET_INLINED(obj) \ - (int)((RBASIC(obj)->flags & RUBY_ENCODING_MASK)>>RUBY_ENCODING_SHIFT) - -/** - * @alias{rb_enc_get_index} - * - * @internal - * - * Implementation wise this is not a verbatim alias of rb_enc_get_index(). But - * the API is consistent. Don't bother. - */ -#define RB_ENCODING_GET(obj) \ - (RB_ENCODING_GET_INLINED(obj) != RUBY_ENCODING_INLINE_MAX ? \ - RB_ENCODING_GET_INLINED(obj) : \ - rb_enc_get_index(obj)) - -/** - * Queries if the passed object is in ascii 8bit (== binary) encoding. The - * object must be capable of having inline encoding. Using this macro needs - * deep understanding of bit level object binary layout. - * - * @param[in] obj An object to check. - * @retval 1 It is. - * @retval 0 It isn't. - */ -#define RB_ENCODING_IS_ASCII8BIT(obj) (RB_ENCODING_GET_INLINED(obj) == 0) - -#define ENCODING_SET_INLINED(obj,i) RB_ENCODING_SET_INLINED(obj,i) /**< @old{RB_ENCODING_SET_INLINED} */ -#define ENCODING_SET(obj,i) RB_ENCODING_SET(obj,i) /**< @old{RB_ENCODING_SET} */ -#define ENCODING_GET_INLINED(obj) RB_ENCODING_GET_INLINED(obj) /**< @old{RB_ENCODING_GET_INLINED} */ -#define ENCODING_GET(obj) RB_ENCODING_GET(obj) /**< @old{RB_ENCODING_GET} */ -#define ENCODING_IS_ASCII8BIT(obj) RB_ENCODING_IS_ASCII8BIT(obj) /**< @old{RB_ENCODING_IS_ASCII8BIT} */ -#define ENCODING_MAXNAMELEN RUBY_ENCODING_MAXNAMELEN /**< @old{RUBY_ENCODING_MAXNAMELEN} */ - -/** What rb_enc_str_coderange() returns. */ -enum ruby_coderange_type { - - /** The object's coderange is unclear yet. */ - RUBY_ENC_CODERANGE_UNKNOWN = 0, - - /** The object holds 0 to 127 inclusive and nothing else. */ - RUBY_ENC_CODERANGE_7BIT = ((int)RUBY_FL_USER8), - - /** The object's encoding and contents are consistent each other */ - RUBY_ENC_CODERANGE_VALID = ((int)RUBY_FL_USER9), - - /** The object holds invalid/malformed/broken character(s). */ - RUBY_ENC_CODERANGE_BROKEN = ((int)(RUBY_FL_USER8|RUBY_FL_USER9)), - - /** Where the coderange resides. */ - RUBY_ENC_CODERANGE_MASK = (RUBY_ENC_CODERANGE_7BIT| - RUBY_ENC_CODERANGE_VALID| - RUBY_ENC_CODERANGE_BROKEN) -}; - -RBIMPL_ATTR_CONST() -/** - * @private - * - * This is an implementation detail of #RB_ENC_CODERANGE_CLEAN_P. People don't - * use it directly. - * - * @param[in] cr An enum ::ruby_coderange_type. - * @retval 1 It is. - * @retval 0 It isn't. - */ -static inline int -rb_enc_coderange_clean_p(int cr) -{ - return (cr ^ (cr >> 1)) & RUBY_ENC_CODERANGE_7BIT; -} - -/** - * Queries if a code range is "clean". "Clean" in this context means it is - * known and valid. - * - * @param[in] cr An enum ::ruby_coderange_type. - * @retval 1 It is. - * @retval 0 It isn't. - */ -#define RB_ENC_CODERANGE_CLEAN_P(cr) rb_enc_coderange_clean_p(cr) - -/** - * Queries the (inline) code range of the passed object. The object must be - * capable of having inline encoding. Using this macro needs deep - * understanding of bit level object binary layout. - * - * @param[in] obj Target object. - * @return An enum ::ruby_coderange_type. - */ -#define RB_ENC_CODERANGE(obj) ((int)RBASIC(obj)->flags & RUBY_ENC_CODERANGE_MASK) - -/** - * Queries the (inline) code range of the passed object is - * ::RUBY_ENC_CODERANGE_7BIT. The object must be capable of having inline - * encoding. Using this macro needs deep understanding of bit level object - * binary layout. - * - * @param[in] obj Target object. - * @retval 1 It is ascii only. - * @retval 0 Otherwise (including cases when the range is not known). - */ -#define RB_ENC_CODERANGE_ASCIIONLY(obj) (RB_ENC_CODERANGE(obj) == RUBY_ENC_CODERANGE_7BIT) - -/** - * Destructively modifies the passed object so that its (inline) code range is - * the passed one. The object must be capable of having inline encoding. - * Using this macro needs deep understanding of bit level object binary layout. - * - * @param[out] obj Target object. - * @param[out] cr An enum ::ruby_coderange_type. - * @post `obj`'s code range is `cr`. - */ -#define RB_ENC_CODERANGE_SET(obj,cr) (\ - RBASIC(obj)->flags = \ - (RBASIC(obj)->flags & ~RUBY_ENC_CODERANGE_MASK) | (cr)) - -/** - * Destructively clears the passed object's (inline) code range. The object - * must be capable of having inline encoding. Using this macro needs deep - * understanding of bit level object binary layout. - * - * @param[out] obj Target object. - * @post `obj`'s code range is ::RUBY_ENC_CODERANGE_UNKNOWN. - */ -#define RB_ENC_CODERANGE_CLEAR(obj) RB_ENC_CODERANGE_SET((obj),0) - -/* assumed ASCII compatibility */ -/** - * "Mix" two code ranges into one. This is handy for instance when you - * concatenate two strings into one. Consider one of then is valid but the - * other isn't. The result must be invalid. This macro computes that kind of - * mixture. - * - * @param[in] a An enum ::ruby_coderange_type. - * @param[in] b Another enum ::ruby_coderange_type. - * @return The `a` "and" `b`. - */ -#define RB_ENC_CODERANGE_AND(a, b) \ - ((a) == RUBY_ENC_CODERANGE_7BIT ? (b) : \ - (a) != RUBY_ENC_CODERANGE_VALID ? RUBY_ENC_CODERANGE_UNKNOWN : \ - (b) == RUBY_ENC_CODERANGE_7BIT ? RUBY_ENC_CODERANGE_VALID : (b)) - -/** - * This is #RB_ENCODING_SET + RB_ENC_CODERANGE_SET combo. The object must be - * capable of having inline encoding. Using this macro needs deep - * understanding of bit level object binary layout. - * - * @param[out] obj Target object. - * @param[in] encindex Encoding in encindex format. - * @param[in] cr An enum ::ruby_coderange_type. - * @post `obj`'s encoding is `encindex`. - * @post `obj`'s code range is `cr`. - */ -#define RB_ENCODING_CODERANGE_SET(obj, encindex, cr) \ - do { \ - VALUE rb_encoding_coderange_obj = (obj); \ - RB_ENCODING_SET(rb_encoding_coderange_obj, (encindex)); \ - RB_ENC_CODERANGE_SET(rb_encoding_coderange_obj, (cr)); \ - } while (0) - -#define ENC_CODERANGE_MASK RUBY_ENC_CODERANGE_MASK /**< @old{RUBY_ENC_CODERANGE_MASK} */ -#define ENC_CODERANGE_UNKNOWN RUBY_ENC_CODERANGE_UNKNOWN /**< @old{RUBY_ENC_CODERANGE_UNKNOWN} */ -#define ENC_CODERANGE_7BIT RUBY_ENC_CODERANGE_7BIT /**< @old{RUBY_ENC_CODERANGE_7BIT} */ -#define ENC_CODERANGE_VALID RUBY_ENC_CODERANGE_VALID /**< @old{RUBY_ENC_CODERANGE_VALID} */ -#define ENC_CODERANGE_BROKEN RUBY_ENC_CODERANGE_BROKEN /**< @old{RUBY_ENC_CODERANGE_BROKEN} */ -#define ENC_CODERANGE_CLEAN_P(cr) RB_ENC_CODERANGE_CLEAN_P(cr) /**< @old{RB_ENC_CODERANGE_CLEAN_P} */ -#define ENC_CODERANGE(obj) RB_ENC_CODERANGE(obj) /**< @old{RB_ENC_CODERANGE} */ -#define ENC_CODERANGE_ASCIIONLY(obj) RB_ENC_CODERANGE_ASCIIONLY(obj) /**< @old{RB_ENC_CODERANGE_ASCIIONLY} */ -#define ENC_CODERANGE_SET(obj,cr) RB_ENC_CODERANGE_SET(obj,cr) /**< @old{RB_ENC_CODERANGE_SET} */ -#define ENC_CODERANGE_CLEAR(obj) RB_ENC_CODERANGE_CLEAR(obj) /**< @old{RB_ENC_CODERANGE_CLEAR} */ -#define ENC_CODERANGE_AND(a, b) RB_ENC_CODERANGE_AND(a, b) /**< @old{RB_ENC_CODERANGE_AND} */ -#define ENCODING_CODERANGE_SET(obj, encindex, cr) RB_ENCODING_CODERANGE_SET(obj, encindex, cr) /**< @old{RB_ENCODING_CODERANGE_SET} */ - -/** - * The type of encoding. Our design here is we take Oniguruma/Onigmo's - * multilingualisation schema as our base data structure. - */ -typedef const OnigEncodingType rb_encoding; - -RBIMPL_ATTR_NOALIAS() -/** - * Converts a character option to its encoding. It only supports a very - * limited set of Japanese encodings due to its Japanese origin. Ruby still - * has this in-core for backwards compatibility. But new codes must not bother - * such concept like one-character encoding option. Consider deprecated in - * practice. - * - * @param[in] c One of `['n', 'e', 's', 'u', 'i', 'x', 'm']`. - * @param[out] option Return buffer. - * @param[out] kcode Return buffer. - * @retval 1 `c` understood properly. - * @retval 0 `c` is not understood. - * @post `option` is a ::OnigOptionType. - * @post `kcode` is an enum `ruby_preserved_encindex`. - * - * @internal - * - * `kcode` is opaque because `ruby_preserved_encindex` is not visible from - * extension libraries. But who cares? - */ -int rb_char_to_option_kcode(int c, int *option, int *kcode); - -/** - * Creates a new encoding, using the passed one as a template. - * - * @param[in] name Name of the creating encoding. - * @param[in] src Template. - * @exception rb_eArgError Duplicated or malformed `name`. - * @return Replicated new encoding's index. - * @post Encoding named `name` is created as a copy of `src`, whose index - * is the return value. - * - * @internal - * - * `name` can be `NULL`, but that just raises an exception. OTOH it seems no - * sanity check is done against `src`...? - */ -int rb_enc_replicate(const char *name, rb_encoding *src); - -/** - * Creates a new "dummy" encoding. Roughly speaking, an encoding is dummy when - * it is stateful. Notable example of dummy encoding are those defined in - * ISO/IEC 2022 - * - * @param[in] name Name of the creating encoding. - * @exception rb_eArgError Duplicated or malformed `name`. - * @return New dummy encoding's index. - * @post Encoding named `name` is created, whose index is the return - * value. - */ -int rb_define_dummy_encoding(const char *name); - -RBIMPL_ATTR_PURE() -/** - * Queries if the passed encoding is dummy. - * - * @param[in] enc Encoding in question. - * @retval 1 It is. - * @retval 0 It isn't. - */ -int rb_enc_dummy_p(rb_encoding *enc); - -RBIMPL_ATTR_PURE() -/** - * Queries the index of the encoding. An encoding's index is a Ruby-local - * concept. It is a (sequential) number assigned to each encoding. - * - * @param[in] enc Encoding in question. - * @return Its index. - * @note You can pass null pointers to this function. It is equivalent - * to rb_usascii_encindex() then. - */ -int rb_enc_to_index(rb_encoding *enc); - -/** - * Queries the index of the encoding of the passed object, if any. - * - * @param[in] obj Object in question. - * @retval -1 `obj` is incapable of having an encoding. - * @retval otherwise `obj`'s encoding's index. - */ -int rb_enc_get_index(VALUE obj); - -/** - * Destructively assigns an encoding (via its index) to an object. - * - * @param[out] obj Object in question. - * @param[in] encindex An encoding index. - * @exception rb_eFrozenError `obj` is frozen. - * @exception rb_eArgError `obj` is incapable of having an encoding. - * @exception rb_eEncodingError `encindex` is out of bounds. - * @exception rb_eLoadError Failed to load the encoding. - */ -void rb_enc_set_index(VALUE obj, int encindex); - -RBIMPL_ATTR_PURE() -/** - * Queries if the passed object can have its encoding. - * - * @param[in] obj Object in question. - * @retval 1 It can. - * @retval 0 It cannot. - */ -int rb_enc_capable(VALUE obj); - -/** - * Queries the index of the encoding. - * - * @param[in] name Name of the encoding to find. - * @exception rb_eArgError No such encoding named `name`. - * @retval -1 `name` exists, but unable to load. - * @retval otherwise Index of encoding named `name`. - */ -int rb_enc_find_index(const char *name); - -/** - * Registers an "alias" name. In the wild, an encoding can be called using - * multiple names. For instance an encoding known as `"CP932"` is also called - * `"SJIS"` on occasions. This API registers such relationships. - * - * @param[in] alias New name. - * @param[in] orig Old name. - * @exception rb_eArgError `alias` is duplicated or malformed. - * @retval -1 Failed to load `orig`. - * @retval otherwise The index of `orig` and `alias`. - * @post `alias` is a synonym of `orig`. They refer to the identical - * encoding. - */ -int rb_enc_alias(const char *alias, const char *orig); - -/** - * Obtains a encoding index from a wider range of objects (than - * rb_enc_find_index()). - * - * @param[in] obj An ::rb_cEncoding, or its name in ::rb_cString. - * @retval -1 `obj` is unexpected type/contents. - * @retval otherwise Index corresponding to `obj`. - */ -int rb_to_encoding_index(VALUE obj); - -/** - * Identical to rb_find_encoding(), except it raises an exception instead of - * returning NULL. - * - * @param[in] obj An ::rb_cEncoding, or its name in ::rb_cString. - * @exception rb_eTypeError `obj` is neither ::rb_cEncoding nor ::rb_cString. - * @exception rb_eArgError `obj` is an unknown encoding name. - * @return Encoding of `obj`. - */ -rb_encoding *rb_to_encoding(VALUE obj); - -/** - * Identical to rb_to_encoding_index(), except the return type. - * - * @param[in] obj An ::rb_cEncoding, or its name in ::rb_cString. - * @exception rb_eTypeError `obj` is neither ::rb_cEncoding nor ::rb_cString. - * @retval NULL No such encoding. - * @return otherwise Encoding of `obj`. - */ -rb_encoding *rb_find_encoding(VALUE obj); - -/** - * Identical to rb_enc_get_index(), except the return type. - * - * @param[in] obj Object in question. - * @retval NULL Obj is incapable of having an encoding. - * @retval otherwise `obj`'s encoding. - */ -rb_encoding *rb_enc_get(VALUE obj); - -/** - * Look for the "common" encoding between the two. One character can or cannot - * be expressed depending on an encoding. This function finds the super-set of - * encodings that satisfy contents of both arguments. If that is impossible - * returns NULL. - * - * @param[in] str1 An object. - * @param[in] str2 Another object. - * @retval NULL No encoding can satisfy both at once. - * @retval otherwise Common encoding between the two. - * @note Arguments can be non-string, e.g. Regexp. - */ -rb_encoding *rb_enc_compatible(VALUE str1, VALUE str2); - -/** - * Identical to rb_enc_compatible(), except it raises an exception instead of - * returning NULL. - * - * @param[in] str1 An object. - * @param[in] str2 Another object. - * @exception rb_eEncCompatError No encoding can satisfy both. - * @return Common encoding between the two. - * @note Arguments can be non-string, e.g. Regexp. - */ -rb_encoding *rb_enc_check(VALUE str1,VALUE str2); - -/** - * Identical to rb_enc_set_index(), except it additionally does contents fix-up - * depending on the passed object. It for instance changes the byte length of - * terminating `U+0000` according to the passed encoding. - * - * @param[out] obj Object in question. - * @param[in] encindex An encoding index. - * @exception rb_eFrozenError `obj` is frozen. - * @exception rb_eArgError `obj` is incapable of having an encoding. - * @exception rb_eEncodingError `encindex` is out of bounds. - * @exception rb_eLoadError Failed to load the encoding. - * @return The passed `obj`. - * @post `obj`'s contents might be fixed according to `encindex`. - */ -VALUE rb_enc_associate_index(VALUE obj, int encindex); - -/** - * Identical to rb_enc_associate(), except it takes an encoding itself instead - * of its index. - * - * @param[out] obj Object in question. - * @param[in] enc An encoding. - * @exception rb_eFrozenError `obj` is frozen. - * @exception rb_eArgError `obj` is incapable of having an encoding. - * @return The passed `obj`. - * @post `obj`'s contents might be fixed according to `enc`. - */ -VALUE rb_enc_associate(VALUE obj, rb_encoding *enc); - -/** - * Destructively copies the encoding of the latter object to that of former - * one. It can also be seen as a routine identical to - * rb_enc_associate_index(), except it takes an object's encoding instead of an - * encoding's index. - * - * @param[out] dst Object to modify. - * @param[in] src Object to reference. - * @exception rb_eFrozenError `dst` is frozen. - * @exception rb_eArgError `dst` is incapable of having an encoding. - * @exception rb_eEncodingError `src` is incapable of having an encoding. - * @post `dst`'s encoding is that of `src`'s. - */ -void rb_enc_copy(VALUE dst, VALUE src); - -/** - * Identical to rb_enc_str_new(), except it additionally takes an encoding. - * - * @param[in] ptr A memory region of `len` bytes length. - * @param[in] len Length of `ptr`, in bytes, not including the - * terminating NUL character. - * @param[in] enc Encoding of `ptr`. - * @exception rb_eNoMemError Failed to allocate `len+1` bytes. - * @exception rb_eArgError `len` is negative. - * @return An instance of ::rb_cString, of `len` bytes length, of `enc` - * encoding, whose contents are verbatim copy of `ptr`. - * @pre At least `len` bytes of continuous memory region shall be - * accessible via `ptr`. - * @note `enc` can be a null pointer. It can also be seen as a routine - * identical to rb_usascii_str_new() then. - */ -VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc); - -RBIMPL_ATTR_NONNULL((1)) -/** - * Identical to rb_enc_str_new(), except it assumes the passed pointer is a - * pointer to a C string. It can also be seen as a routine identical to - * rb_str_new_cstr(), except it additionally takes an encoding. - * - * @param[in] ptr A C string. - * @param[in] enc Encoding of `ptr`. - * @exception rb_eNoMemError Failed to allocate memory. - * @return An instance of ::rb_cString, of `enc` encoding, whose contents - * are verbatim copy of `ptr`. - * @pre `ptr` must not be a null pointer. - * @pre Because `ptr` is a C string it makes no sense for `enc` to be - * something like UTF-32. - * @note `enc` can be a null pointer. It can also be seen as a routine - * identical to rb_usascii_str_new_cstr() then. - */ -VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc); - -/** - * Identical to rb_enc_str_new(), except it takes a C string literal. It can - * also be seen as a routine identical to rb_str_new_static(), except it - * additionally takes an encoding. - * - * @param[in] ptr A C string literal. - * @param[in] len `strlen(ptr)`. - * @param[in] enc Encoding of `ptr`. - * @exception rb_eArgError `len` out of range of `size_t`. - * @pre `ptr` must be a C string constant. - * @return An instance of ::rb_cString, of `enc` encoding, whose backend - * storage is the passed C string literal. - * @warning It is a very bad idea to write to a C string literal (often - * immediate SEGV shall occur). Consider return values of this - * function be read-only. - * @note `enc` can be a null pointer. It can also be seen as a routine - * identical to rb_usascii_str_new_static() then. - */ -VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc); - -/** - * Identical to rb_enc_str_new(), except it returns a "f"string. It can also - * be seen as a routine identical to rb_interned_str(), except it additionally - * takes an encoding. - * - * @param[in] ptr A memory region of `len` bytes length. - * @param[in] len Length of `ptr`, in bytes, not including the - * terminating NUL character. - * @param[in] enc Encoding of `ptr`. - * @exception rb_eArgError `len` is negative. - * @return A found or created instance of ::rb_cString, of `len` bytes - * length, of `enc` encoding, whose contents are identical to that - * of `ptr`. - * @pre At least `len` bytes of continuous memory region shall be - * accessible via `ptr`. - * @note `enc` can be a null pointer. - */ -VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc); - -RBIMPL_ATTR_NONNULL((1)) -/** - * Identical to rb_enc_str_new_cstr(), except it returns a "f"string. It can - * also be seen as a routine identical to rb_interned_str_cstr(), except it - * additionally takes an encoding. - * - * @param[in] ptr A memory region of `len` bytes length. - * @param[in] enc Encoding of `ptr`. - * @return A found or created instance of ::rb_cString of `enc` encoding, - * whose contents are identical to that of `ptr`. - * @pre At least `len` bytes of continuous memory region shall be - * accessible via `ptr`. - * @note `enc` can be a null pointer. - */ -VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc); - -/** - * Identical to rb_reg_new(), except it additionally takes an encoding. - * - * @param[in] ptr A memory region of `len` bytes length. - * @param[in] len Length of `ptr`, in bytes, not including the - * terminating NUL character. - * @param[in] enc Encoding of `ptr`. - * @param[in] opts Options e.g. ONIG_OPTION_MULTILINE. - * @exception rb_eRegexpError Failed to compile `ptr`. - * @return An allocated new instance of ::rb_cRegexp, of `enc` encoding, - * whose expression is compiled according to `ptr`. - */ -VALUE rb_enc_reg_new(const char *ptr, long len, rb_encoding *enc, int opts); - -RBIMPL_ATTR_NONNULL((2)) -RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) -/** - * Identical to rb_sprintf(), except it additionally takes an encoding. The - * passed encoding rules both the incoming format specifier and the resulting - * string. - * - * @param[in] enc Encoding of `fmt`. - * @param[in] fmt A `printf`-like format specifier. - * @param[in] ... Variadic number of contents to format. - * @return A rendered new instance of ::rb_cString, of `enc` encoding. - */ -VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt, ...); - -RBIMPL_ATTR_NONNULL((2)) -RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 0) -/** - * Identical to rb_enc_sprintf(), except it takes a `va_list` instead of - * variadic arguments. It can also be seen as a routine identical to - * rb_vsprintf(), except it additionally takes an encoding. - * - * @param[in] enc Encoding of `fmt`. - * @param[in] fmt A `printf`-like format specifier. - * @param[in] ap Contents to format. - * @return A rendered new instance of ::rb_cString, of `enc` encoding. - */ -VALUE rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap); - -/** - * Counts the number of characters of the passed string, according to the - * passed encoding. This has to be complicated. The passed string could be - * invalid and/or broken. This routine would scan from the beginning til the - * end, byte by byte, to seek out character boundaries. Could be super slow. - * - * @param[in] head Leftmost pointer to the string. - * @param[in] tail Rightmost pointer to the string. - * @param[in] enc Encoding of the string. - * @return Number of characters exist in `head` .. `tail`. The definition - * of "character" depends on the passed `enc`. - */ -long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc); - -/** - * Queries the n-th character. Like rb_enc_strlen() this function can be fast - * or slow depending on the contents. Don't expect characters to be uniformly - * distributed across the entire string. - * - * @param[in] head Leftmost pointer to the string. - * @param[in] tail Rightmost pointer to the string. - * @param[in] nth Requested index of characters. - * @param[in] enc Encoding of the string. - * @return Pointer to the first byte of the character that is `nth` - * character ahead of `head`, or `tail` if there is no such - * character (OOB etc). The definition of "character" depends on - * the passed `enc`. - */ -char *rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc); - -/** - * Identical to rb_enc_get_index(), except the return type. - * - * @param[in] obj Object in question. - * @exception rb_eTypeError `obj` is incapable of having an encoding. - * @return `obj`'s encoding. - */ -VALUE rb_obj_encoding(VALUE obj); - -/** - * Identical to rb_str_cat(), except it additionally takes an encoding. - * - * @param[out] str Destination object. - * @param[in] ptr Contents to append. - * @param[in] len Length of `src`, in bytes. - * @param[in] enc Encoding of `ptr`. - * @exception rb_eArgError `len` is negative. - * @exception rb_eEncCompatError `enc` is not compatible with `str`. - * @return The passed `dst`. - * @post The contents of `ptr` is copied, transcoded into `dst`'s - * encoding, then pasted into `dst`'s end. - */ -VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc); - -/** - * Encodes the passed code point into a series of bytes. - * - * @param[in] code Code point. - * @param[in] enc Target encoding scheme. - * @exception rb_eRangeError `enc` does not glean `code`. - * @return An instance of ::rb_cString, of `enc` encoding, whose sole - * contents is `code` represented in `enc`. - * @note No way to encode code points bigger than UINT_MAX. - * - * @internal - * - * In other languages, APIs like this one could be seen as the primitive - * routines where encodings' "encode" feature are implemented. However in case - * of Ruby this is not the primitive one. We directly manipulate encoded - * strings. Encoding conversion routines transocde an encoded string directly - * to another one; not via a code point array. - */ -VALUE rb_enc_uint_chr(unsigned int code, rb_encoding *enc); - -/** - * Identical to rb_external_str_new(), except it additionally takes an - * encoding. However the whole point of rb_external_str_new() is to encode a - * string into default external encoding. Being able to specify arbitrary - * encoding just ruins the designed purpose the function meseems. - * - * @param[in] ptr A memory region of `len` bytes length. - * @param[in] len Length of `ptr`, in bytes, not including the - * terminating NUL character. - * @param[in] enc Target encoding scheme. - * @exception rb_eArgError `len` is negative. - * @return An instance of ::rb_cString. In case encoding conversion from - * "default internal" to `enc` is fully defined over the given - * contents, then the return value is a string of `enc` encoding, - * whose contents are the converted ones. Otherwise the string is - * a junk. - * @warning It doesn't raise on a conversion failure and silently ends up in - * a corrupted output. You can know the failure by querying - * `valid_encoding?` of the result object. - * - * @internal - * - * @shyouhei has no idea why this one does not follow the naming convention - * that others obey. It seems to him that this should have been called - * `rb_enc_external_str_new`. - */ -VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc); - -/** - * Identical to rb_str_export(), except it additionally takes an encoding. - * - * @param[in] obj Target object. - * @param[in] enc Target encoding. - * @exception rb_eTypeError No implicit conversion to String. - * @return Converted ruby string of `enc` encoding. - */ -VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc); - -/** - * Encoding conversion main routine. - * - * @param[in] str String to convert. - * @param[in] from Source encoding. - * @param[in] to Destination encoding. - * @return A copy of `str`, with conversion from `from` to `to` applied. - * @note `from` can be a null pointer. `str`'s encoding is taken then. - * @note `to` can be a null pointer. No-op then. - */ -VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to); - -/** - * Identical to rb_str_conv_enc(), except it additionally takes IO encoder - * options. The extra arguments can be constructed using io_extract_modeenc() - * etc. - * - * @param[in] str String to convert. - * @param[in] from Source encoding. - * @param[in] to Destination encoding. - * @param[in] ecflags A set of enum ::ruby_econv_flag_type. - * @param[in] ecopts Optional hash. - * @return A copy of `str`, with conversion from `from` to `to` applied. - * @note `from` can be a null pointer. `str`'s encoding is taken then. - * @note `to` can be a null pointer. No-op then. - * @note `ecopts` can be ::RUBY_Qnil, which is equivalent to passing an - * empty hash. - */ -VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts); - -/** @cond INTERNAL_MACRO */ -#ifdef HAVE_BUILTIN___BUILTIN_CONSTANT_P -#define rb_enc_str_new(str, len, enc) RB_GNUC_EXTENSION_BLOCK( \ - (__builtin_constant_p(str) && __builtin_constant_p(len)) ? \ - rb_enc_str_new_static((str), (len), (enc)) : \ - rb_enc_str_new((str), (len), (enc)) \ -) -#define rb_enc_str_new_cstr(str, enc) RB_GNUC_EXTENSION_BLOCK( \ - (__builtin_constant_p(str)) ? \ - rb_enc_str_new_static((str), (long)strlen(str), (enc)) : \ - rb_enc_str_new_cstr((str), (enc)) \ -) -#endif -/** @endcond */ - -RBIMPL_ATTR_NORETURN() -RBIMPL_ATTR_NONNULL((3)) -RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 3, 4) -/** - * Identical to rb_raise(), except it additionally takes an encoding. - * - * @param[in] enc Encoding of the generating exception. - * @param[in] exc A subclass of ::rb_eException. - * @param[in] fmt Format specifier string compatible with rb_sprintf(). - * @param[in] ... Contents of the message. - * @exception exc The specified exception. - * @note It never returns. - */ -void rb_enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...); - -/** - * Identical to rb_find_encoding(), except it takes an encoding index instead - * of a Ruby object. - * - * @param[in] idx An encoding index. - * @retval NULL No such encoding. - * @retval otherwise An encoding whose index is `idx`. - */ -rb_encoding *rb_enc_from_index(int idx); - -/** - * Identical to rb_find_encoding(), except it takes a C's string instead of - * Ruby's. - * - * @param[in] name Name of the encoding to query. - * @retval NULL No such encoding. - * @retval otherwise An encoding whose index is `idx`. - */ -rb_encoding *rb_enc_find(const char *name); - -/** - * Queries the (canonical) name of the passed encoding. - * - * @param[in] enc An encoding. - * @return Its name. - */ -#define rb_enc_name(enc) (enc)->name - -/** - * Queries the minimum number of bytes that the passed encoding needs to - * represent a character. For ASCII and compatible encodings this is typically - * 1. There are however encodings whose minimum is not 1; they are - * historically called wide characters. - * - * @param[in] enc An encoding. - * @return Its least possible number of bytes except 0. - */ -#define rb_enc_mbminlen(enc) (enc)->min_enc_len - -/** - * Queries the maximum number of bytes that the passed encoding needs to - * represent a character. Fixed-width encodings have the same value for this - * one and #rb_enc_mbminlen. However there are variable-width encodings. - * UTF-8, for instance, takes from 1 up to 6 bytes. - * - * @param[in] enc An encoding. - * @return Its maximum possible number of bytes of a character. - */ -#define rb_enc_mbmaxlen(enc) (enc)->max_enc_len - -/** - * Queries the number of bytes of the character at the passed pointer. - * - * @param[in] p Pointer to a character's first byte. - * @param[in] e End of the string that has `p`. - * @param[in] enc Encoding of the string. - * @return If the character at `p` does not end until `e`, number of bytes - * between `p` and `e`. Otherwise the number of bytes that the - * character at `p` is encoded. - * - * @internal - * - * Strictly speaking there are chances when `p` points to a middle byte of a - * wide character. This function returns "the number of bytes from `p` to - * nearest of either `e` or the next character boundary", if you go strict. - */ -int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc); - -/** - * Identical to rb_enc_mbclen() unless the character at `p` overruns `e`. That - * can happen for instance when you read from a socket and its partial read - * cuts a wide character in-between. In those situations this function - * "estimates" theoretical length of the character in question. Typically it - * tends to be possible to know how many bytes a character needs before - * actually reaching its end; for instance UTF-8 encodes a character's length - * in the first byte of it. This function returns that info. - * - * @note This implies that the string is not broken. - * - * @param[in] p Pointer to the character's first byte. - * @param[in] e End of the string that has `p`. - * @param[in] enc Encoding of the string. - * @return Number of bytes of character at `p`, measured or estimated. - */ -int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc); - -/** - * Queries the number of bytes of the character at the passed pointer. This - * function returns 3 different types of information: - * - * ```CXX - * auto n = rb_enc_precise_mbclen(p, q, r); - * - * if (ONIGENC_MBCLEN_CHARFOUND_P(n)) { - * // Character found. Normal return. - * auto found_length = ONIGENC_MBCLEN_CHARFOUND_LEN(n); - * } - * else if (ONIGENC_MBCLEN_NEEDMORE_P(n)) { - * // Character overruns past `q`; needs more. - * auto requested_length = ONIGENC_MBCLEN_NEEDMORE_LEN(n); - * } - * else { - * // `p` is broken. - * assert(ONIGENC_MBCLEN_INVALID_P(n)); - * } - * ``` - * - * @param[in] p Pointer to the character's first byte. - * @param[in] e End of the string that has `p`. - * @param[in] enc Encoding of the string. - * @return Encoded read/needed number of bytes (see above). - */ -int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc); - -#define MBCLEN_CHARFOUND_P(ret) ONIGENC_MBCLEN_CHARFOUND_P(ret) /**< @old{ONIGENC_MBCLEN_CHARFOUND_P} */ -#define MBCLEN_CHARFOUND_LEN(ret) ONIGENC_MBCLEN_CHARFOUND_LEN(ret) /**< @old{ONIGENC_MBCLEN_CHARFOUND_LEN} */ -#define MBCLEN_INVALID_P(ret) ONIGENC_MBCLEN_INVALID_P(ret) /**< @old{ONIGENC_MBCLEN_INVALID_P} */ -#define MBCLEN_NEEDMORE_P(ret) ONIGENC_MBCLEN_NEEDMORE_P(ret) /**< @old{ONIGENC_MBCLEN_NEEDMORE_P} */ -#define MBCLEN_NEEDMORE_LEN(ret) ONIGENC_MBCLEN_NEEDMORE_LEN(ret) /**< @old{ONIGENC_MBCLEN_NEEDMORE_LEN} */ - -/** - * Queries the code point of character pointed by the passed pointer. If that - * code point is included in ASCII that code point is returned. Otherwise -1. - * This can be different from just looking at the first byte. For instance it - * reads 2 bytes in case of UTF-16BE. - * - * @param[in] p Pointer to the character's first byte. - * @param[in] e End of the string that has `p`. - * @param[in] len Return buffer. - * @param[in] enc Encoding of the string. - * @retval -1 The character at `p` is not i ASCII. - * @retval otherwise A code point of the character at `p`. - * @post `len` (if set) is the number of bytes of `p`. - */ -int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc); - -/** - * Queries the code point of character pointed by the passed pointer. - * Exceptions happen in case of broken input. - * - * @param[in] p Pointer to the character's first byte. - * @param[in] e End of the string that has `p`. - * @param[in] len Return buffer. - * @param[in] enc Encoding of the string. - * @exception rb_eArgError `p` is broken. - * @return Code point of the character pointed by `p`. - * @post `len` (if set) is the number of bytes of `p`. - */ -unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc); - -RBIMPL_ATTR_DEPRECATED(("use rb_enc_codepoint_len instead.")) -/** - * Queries the code point of character pointed by the passed pointer. - * Exceptions happen in case of broken input. - * - * @deprecated Use rb_enc_codepoint_len() instead. - * @param[in] p Pointer to the character's first byte. - * @param[in] e End of the string that has `p`. - * @param[in] enc Encoding of the string. - * @exception rb_eArgError `p` is broken. - * @return Code point of the character pointed by `p`. - */ -unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc); - -/** @cond INTERNAL_MACRO */ -#define rb_enc_codepoint(p,e,enc) rb_enc_codepoint_len((p),(e),0,(enc)) -/** @endcond */ - -/** - * Identical to rb_enc_codepoint(), except it assumes the passed character is - * not broken. - * - * @param[in] p Pointer to the character's first byte. - * @param[in] e End of the string that has `p`. - * @param[in] enc Encoding of the string. - * @return Code point of the character pointed by `p`. - */ -#define rb_enc_mbc_to_codepoint(p, e, enc) ONIGENC_MBC_TO_CODE((enc),(UChar*)(p),(UChar*)(e)) - -/** - * Queries the number of bytes requested to represent the passed code point - * using the passed encoding. - * - * @param[in] code Code point in question. - * @param[in] enc Encoding to convert the code into a byte sequence. - * @exception rb_eArgError `enc` does not glean `code`. - * @return Number of bytes requested to represent `code` using `enc`. - */ -int rb_enc_codelen(int code, rb_encoding *enc); - -/** - * Identical to rb_enc_codelen(), except it returns 0 for invalid code points. - * - * @param[in] code Code point in question. - * @param[in] enc Encoding to convert the code into a byte sequence. - * @retval 0 `code` is invalid. - * @return otherwise Number of bytes used for `enc` to encode `code`. - */ -int rb_enc_code_to_mbclen(int code, rb_encoding *enc); - -/** @cond INTERNAL_MACRO */ -#define rb_enc_code_to_mbclen(c, enc) ONIGENC_CODE_TO_MBCLEN((enc), (c)); -/** @endcond */ - -/** - * Identical to rb_enc_uint_chr(), except it writes back to the passed buffer - * instead of allocating one. - * - * @param[in] c Code point. - * @param[out] buf Return buffer. - * @param[in] enc Target encoding scheme. - * @post `c` is encoded according to `enc`, then written to `buf`. - */ -#define rb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC((enc),(c),(UChar*)(buf)) - -/** - * Queries the previous (left) character. - * - * @param[in] s Start of the string. - * @param[in] p Pointer to a character. - * @param[in] e End of the string. - * @param[in] enc Encoding. - * @retval NULL No previous character. - * @retval otherwise Pointer to the head of the previous character. - */ -#define rb_enc_prev_char(s,p,e,enc) ((char *)onigenc_get_prev_char_head((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e))) - -/** - * Queries the left boundary of a character. This function takes a pointer - * that is not necessarily a head of a character, and searches for its head. - * - * @param[in] s Start of the string. - * @param[in] p Pointer to a possibly-middle of a character. - * @param[in] e End of the string. - * @param[in] enc Encoding. - * @return Pointer to the head of the character that contains `p`. - */ -#define rb_enc_left_char_head(s,p,e,enc) ((char *)onigenc_get_left_adjust_char_head((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e))) - -/** - * Queries the right boundary of a character. This function takes a pointer - * that is not necessarily a head of a character, and searches for its tail. - * - * @param[in] s Start of the string. - * @param[in] p Pointer to a possibly-middle of a character. - * @param[in] e End of the string. - * @param[in] enc Encoding. - * @return Pointer to the end of the character that contains `p`. - */ -#define rb_enc_right_char_head(s,p,e,enc) ((char *)onigenc_get_right_adjust_char_head((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e))) - -/** - * Scans the string backwards for n characters. - * - * @param[in] s Start of the string. - * @param[in] p Pointer to a character. - * @param[in] e End of the string. - * @param[in] n Steps. - * @param[in] enc Encoding. - * @retval NULL There are no `n` characters left. - * @retval otherwise Pointer to `n` character before `p`. - */ -#define rb_enc_step_back(s,p,e,n,enc) ((char *)onigenc_step_back((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e),(int)(n))) - -/** - * Queries if the passed pointer points to a newline character. What is a - * newline and what is not depends on the passed encoding. - * - * @param[in] p Pointer to a possibly-middle of a character. - * @param[in] end End of the string. - * @param[in] enc Encoding. - * @retval 0 It isn't. - * @retval otherwise It is. - */ -#define rb_enc_is_newline(p,end,enc) ONIGENC_IS_MBC_NEWLINE((enc),(UChar*)(p),(UChar*)(end)) - -/** - * Queries if the passed code point is of passed character type in the passed - * encoding. The "character type" here is a set of macros defined in onigmo.h, - * like `ONIGENC_CTYPE_PUNCT`. - * - * @param[in] c A code point. - * @param[in] t Type (see above). - * @param[in] enc Encoding. - * @retval 1 `c` is of `t` in `enc`. - * @retval 0 Otherwise. - */ -#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE((enc),(c),(t)) - -/** - * Identical to rb_isascii(), except it additionally takes an encoding. - * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 0 `c` is out of range of ASCII character set in `enc`. - * @retval 1 Otherwise. - * - * @internal - * - * `enc` is ignored. This is at least an intentional implementation detail - * (not a bug). But there could be rooms for future extensions. - */ -#define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c) - -/** - * Identical to rb_isalpha(), except it additionally takes an encoding. - * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "ALPHA". - * @retval 0 Otherwise. - */ -#define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA((enc),(c)) - -/** - * Identical to rb_islower(), except it additionally takes an encoding. - * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "LOWER". - * @retval 0 Otherwise. - */ -#define rb_enc_islower(c,enc) ONIGENC_IS_CODE_LOWER((enc),(c)) - -/** - * Identical to rb_isupper(), except it additionally takes an encoding. - * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "UPPER". - * @retval 0 Otherwise. - */ -#define rb_enc_isupper(c,enc) ONIGENC_IS_CODE_UPPER((enc),(c)) - -/** - * Identical to rb_ispunct(), except it additionally takes an encoding. - * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "PUNCT". - * @retval 0 Otherwise. - */ -#define rb_enc_ispunct(c,enc) ONIGENC_IS_CODE_PUNCT((enc),(c)) - -/** - * Identical to rb_isalnum(), except it additionally takes an encoding. - * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "ANUM". - * @retval 0 Otherwise. - */ -#define rb_enc_isalnum(c,enc) ONIGENC_IS_CODE_ALNUM((enc),(c)) - -/** - * Identical to rb_isprint(), except it additionally takes an encoding. - * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "PRINT". - * @retval 0 Otherwise. - */ -#define rb_enc_isprint(c,enc) ONIGENC_IS_CODE_PRINT((enc),(c)) - -/** - * Identical to rb_isspace(), except it additionally takes an encoding. - * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "PRINT". - * @retval 0 Otherwise. - */ -#define rb_enc_isspace(c,enc) ONIGENC_IS_CODE_SPACE((enc),(c)) - -/** - * Identical to rb_isdigit(), except it additionally takes an encoding. - * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "DIGIT". - * @retval 0 Otherwise. - */ -#define rb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT((enc),(c)) - -/** - * @private - * - * This is an implementation detail of rb_enc_asciicompat(). People don't use - * it directly. Just always use rb_enc_asciicompat(). - * - * @param[in] enc Encoding in question. - * @retval 1 It is ASCII compatible. - * @retval 0 It isn't. - */ -static inline int -rb_enc_asciicompat_inline(rb_encoding *enc) -{ - return rb_enc_mbminlen(enc)==1 && !rb_enc_dummy_p(enc); -} - -/** - * Queries if the passed encoding is _in some sense_ compatible with ASCII. - * The concept of ASCII compatibility is nuanced, and private to our - * implementation. For instance SJIS is ASCII compatible to us, despite their - * having different characters at code point `0x5C`. This is based on some - * practical consideration that Japanese people confuses SJIS to be "upper - * compatible" with ASCII (which is in fact a wrong idea, but we just don't go - * strict here). An example of ASCII incompatible encoding is UTF-16. UTF-16 - * shares code points with ASCII, but employs a completely different encoding - * scheme. - * - * @param[in] enc Encoding in question. - * @retval 0 It is incompatible. - * @retval 1 It is compatible. - */ -#define rb_enc_asciicompat(enc) rb_enc_asciicompat_inline(enc) - -RBIMPL_ATTR_CONST() -/** - * Identical to rb_toupper(), except it additionally takes an encoding. - * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @return `c`'s (Ruby's definition of) upper case counterpart. - * - * @internal - * - * As `RBIMPL_ATTR_CONST` implies this function ignores `enc`. - */ -int rb_enc_toupper(int c, rb_encoding *enc); - -RBIMPL_ATTR_CONST() -/** - * Identical to rb_tolower(), except it additionally takes an encoding. - * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @return `c`'s (Ruby's definition of) lower case counterpart. - * - * @internal - * - * As `RBIMPL_ATTR_CONST` implies this function ignores `enc`. - */ -int rb_enc_tolower(int c, rb_encoding *enc); - -/** - * Identical to rb_intern2(), except it additionally takes an encoding. - * - * @param[in] name The name of the id. - * @param[in] len Length of `name`. - * @param[in] enc `name`'s encoding. - * @exception rb_eRuntimeError Too many symbols. - * @return A (possibly new) id whose value is the given name. - * @note These days Ruby internally has two kinds of symbols - * (static/dynamic). Symbols created using this function would - * become static ones; i.e. would never be garbage collected. It - * is up to you to avoid memory leaks. Think twice before using - * it. - */ -ID rb_intern3(const char *name, long len, rb_encoding *enc); - -RBIMPL_ATTR_NONNULL(()) -/** - * Identical to rb_symname_p(), except it additionally takes an encoding. - * - * @param[in] str A C string to check. - * @param[in] enc `str`'s encoding. - * @retval 1 It is a valid symbol name. - * @retval 0 It is invalid as a symbol name. - */ -int rb_enc_symname_p(const char *str, rb_encoding *enc); - -/** - * Identical to rb_enc_symname_p(), except it additionally takes the passed - * string's length. This is needed for strings containing NUL bytes, like in - * case of UTF-32. - * - * @param[in] name A C string to check. - * @param[in] len Number of bytes of `str`. - * @param[in] enc `str`'s encoding. - * @retval 1 It is a valid symbol name. - * @retval 0 It is invalid as a symbol name. - */ -int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc); - -/** - * Scans the passed string to collect its code range. Because a Ruby's string - * is mutable, its contents change from time to time; so does its code range. - * A long-lived string tends to fall back to ::RUBY_ENC_CODERANGE_UNKNOWN. - * This API scans it and re-assigns a fine-grained code range constant. - * - * @param[out] str A string. - * @return An enum ::ruby_coderange_type. - */ -int rb_enc_str_coderange(VALUE str); - -/** - * Scans the passed string until it finds something odd. Returns the number of - * bytes scanned. As the name implies this is suitable for repeated call. One - * of its application is `IO#readlines`. The method reads from its receiver's - * read buffer, maybe more than once, looking for newlines. But "newline" can - * be different among encodings. This API is used to detect broken contents to - * properly mark them as such. - * - * @param[in] str String to scan. - * @param[in] end End of `str`. - * @param[in] enc `str`'s encoding. - * @param[out] cr Return buffer. - * @return Distance between `str` and first such byte where broken. - * @post `cr` has the code range type. - */ -long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr); - -/** - * Queries if the passed string is "ASCII only". An ASCII only string is a - * string who doesn't have any non-ASCII characters at all. This doesn't - * necessarily mean the string is in ASCII encoding. For instance a String of - * CP932 encoding can quite much be ASCII only, depending on its contents. - * - * @param[in] str String in question. - * @retval 1 It doesn't have non-ASCII characters. - * @retval 0 It has characters that are out of ASCII. - */ -int rb_enc_str_asciionly_p(VALUE str); - -/** - * Queries if the passed string is in an ASCII-compatible encoding. - * - * @param[in] str A Ruby's string to query. - * @retval 0 `str` is not a String, or an ASCII-incompatible string. - * @retval 1 Otherwise. - */ -#define rb_enc_str_asciicompat_p(str) rb_enc_asciicompat(rb_enc_get(str)) - -/** - * Queries the Ruby-level counterpart instance of ::rb_cEncoding that - * corresponds to the passed encoding. - * - * @param[in] enc An encoding - * @retval RUBY_Qnil `enc` is a null pointer. - * @retval otherwise An instance of ::rb_cEncoding. - */ -VALUE rb_enc_from_encoding(rb_encoding *enc); - -RBIMPL_ATTR_PURE() -/** - * Queries if the passed encoding is either one of UTF-8/16/32. - * - * @note It does not take UTF-7, which we actually support, into account. - * - * @param[in] enc Encoding in question. - * @retval 0 It is not a Unicode variant. - * @retval otherwise It is. - * - * @internal - * - * In reality it returns 1/0, but the value is abstracted as - * `ONIGENC_FLAG_UNICODE`. - */ -int rb_enc_unicode_p(rb_encoding *enc); - -RBIMPL_ATTR_RETURNS_NONNULL() -/** - * Queries the encoding that represents ASCII-8BIT a.k.a. binary. - * - * @return The encoding that represents ASCII-8BIT. - * - * @internal - * - * This can not return NULL once the process properly boots up. - */ -rb_encoding *rb_ascii8bit_encoding(void); - -RBIMPL_ATTR_RETURNS_NONNULL() -/** - * Queries the encoding that represents UTF-8. - * - * @return The encoding that represents UTF-8. - * - * @internal - * - * This can not return NULL once the process properly boots up. - */ -rb_encoding *rb_utf8_encoding(void); - -RBIMPL_ATTR_RETURNS_NONNULL() -/** - * Queries the encoding that represents US-ASCII. - * - * @return The encoding that represents US-ASCII. - * - * @internal - * - * This can not return NULL once the process properly boots up. - */ -rb_encoding *rb_usascii_encoding(void); - -/** - * Queries the encoding that represents the current locale. - * - * @return The encoding that represents the process' locale. - * - * @internal - * - * This is dynamic. If you change the process' locale by e.g. calling - * `setlocale(3)`, that should also change the return value of this function. - * - * There is no official way for Ruby scripts to manipulate locales, though. - */ -rb_encoding *rb_locale_encoding(void); - -/** - * Queries the "filesystem" encoding. This is the encoding that ruby expects - * info from the OS' file system are in. This affects for instance return - * value of rb_dir_getwd(). Most notably on Windows it can be an alias of OS - * codepage. Most notably on Linux users can set this via default external - * encoding. - * - * @return The "filesystem" encoding. - */ -rb_encoding *rb_filesystem_encoding(void); - -/** - * Queries the "default external" encoding. This is used to interact with - * outer-process things such as File. Though not recommended, you can set this - * using rb_enc_set_default_external(). - * - * @return The "default external" encoding. - */ -rb_encoding *rb_default_external_encoding(void); - -/** - * Queries the "default internal" encoding. This could be a null pointer. - * Otherwise, outer-process info are transcoded from default external encoding - * to this one during reading from an IO. - * - * @return The "default internal" encoding (if any). - */ -rb_encoding *rb_default_internal_encoding(void); - -#ifndef rb_ascii8bit_encindex -RBIMPL_ATTR_CONST() -/** - * Identical to rb_ascii8bit_encoding(), except it returns the encoding's index - * instead of the encoding itself. - * - * @return The index of encoding of ASCII-8BIT. - * - * @internal - * - * This happens to be 0. - */ -int rb_ascii8bit_encindex(void); -#endif - -#ifndef rb_utf8_encindex -RBIMPL_ATTR_CONST() -/** - * Identical to rb_utf8_encoding(), except it returns the encoding's index - * instead of the encoding itself. - * - * @return The index of encoding of UTF-8. - */ -int rb_utf8_encindex(void); -#endif - -#ifndef rb_usascii_encindex -RBIMPL_ATTR_CONST() -/** - * Identical to rb_usascii_encoding(), except it returns the encoding's index - * instead of the encoding itself. - * - * @return The index of encoding of UTF-8. - */ -int rb_usascii_encindex(void); -#endif - -/** - * Identical to rb_locale_encoding(), except it returns the encoding's index - * instead of the encoding itself. - * - * @return The index of the locale encoding. - */ -int rb_locale_encindex(void); - -/** - * Identical to rb_filesystem_encoding(), except it returns the encoding's - * index instead of the encoding itself. - * - * @return The index of the filesystem encoding. - */ -int rb_filesystem_encindex(void); - -/** - * Identical to rb_default_external_encoding(), except it returns the - * Ruby-level counterpart instance of ::rb_cEncoding that corresponds to the - * default external encoding. - * - * @return An instance of ::rb_cEncoding of default external. - */ -VALUE rb_enc_default_external(void); - -/** - * Identical to rb_default_internal_encoding(), except it returns the - * Ruby-level counterpart instance of ::rb_cEncoding that corresponds to the - * default internal encoding. - * - * @return An instance of ::rb_cEncoding of default internal. - */ -VALUE rb_enc_default_internal(void); - -/** - * Destructively assigns the passed encoding as the default external encoding. - * You should not use this API. It has process-global side effects. Also it - * doesn't change encodings of strings that have already been read. - * - * @param[in] encoding Ruby level encoding. - * @exception rb_eArgError `encoding` is ::RUBY_Qnil. - * @post The default external encoding is `encoding`. - */ -void rb_enc_set_default_external(VALUE encoding); - -/** - * Destructively assigns the passed encoding as the default internal encoding. - * You should not use this API. It has process-global side effects. Also it - * doesn't change encodings of strings that have already been read. - * - * @param[in] encoding Ruby level encoding. - * @post The default internal encoding is `encoding`. - * @note Unlike rb_enc_set_default_external() you can pass ::RUBY_Qnil. - */ -void rb_enc_set_default_internal(VALUE encoding); - -/** - * Returns a platform-depended "charmap" of the current locale. This - * information is called a "Codeset name" in IEEE 1003.1 section 13 - * (`<langinfo.h>`). This is a very low-level API. The return value can have - * no corresponding encoding when passed to rb_find_encoding(). - * - * @param[in] klass Ignored for no reason (why...) - * @return The low-level locale charmap, in Ruby's String. - */ -VALUE rb_locale_charmap(VALUE klass); - -RBIMPL_ATTR_NONNULL(()) -/** - * Looks for the passed string in the passed buffer. - * - * @param[in] x Buffer that potentially includes `y`. - * @param[in] m Number of bytes of `x`. - * @param[in] y Query string. - * @param[in] n Number of bytes of `y`. - * @param[in] enc Encoding of both `x` and `y`. - * @retval -1 Not found. - * @retval otherwise Found index in `x`. - * @note This API can match at a non-character-boundary. - */ -long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc); - -RBIMPL_ATTR_NONNULL(()) -/** - * Returns a path component directly adjacent to the passed pointer. - * - * ``` - * "/multi/byte/encoded/pathname.txt" - * ^ ^ ^ - * | | +--- end - * | +--- @return - * +--- path - * ``` - * - * @param[in] path Where to start scanning. - * @param[in] end End of the path string. - * @param[in] enc Encoding of the string. - * @return A pointer in the passed string where the next path component - * resides, or `end` if there is no next path component. - */ -char *rb_enc_path_next(const char *path, const char *end, rb_encoding *enc); - -RBIMPL_ATTR_NONNULL(()) -/** - * Seeks for non-prefix part of a pathname. This can be a no-op when the OS - * has no such concept like a path prefix. But there are OSes where path - * prefixes do exist. - * - * ``` - * "C:\multi\byte\encoded\pathname.txt" - * ^ ^ ^ - * | | +--- end - * | +--- @return - * +--- path - * ``` - * - * @param[in] path Where to start scanning. - * @param[in] end End of the path string. - * @param[in] enc Encoding of the string. - * @return A pointer in the passed string where non-prefix part starts, or - * `path` if the OS does not have path prefix. - */ -char *rb_enc_path_skip_prefix(const char *path, const char *end, rb_encoding *enc); - -RBIMPL_ATTR_NONNULL(()) -/** - * Returns the last path component. - * - * ``` - * "/multi/byte/encoded/pathname.txt" - * ^ ^ ^ - * | | +--- end - * | +--- @return - * +--- path - * ``` - * - * @param[in] path Where to start scanning. - * @param[in] end End of the path string. - * @param[in] enc Encoding of the string. - * @return A pointer in the passed string where the last path component - * resides, or `end` if there is no more path component. - */ -char *rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc); - -RBIMPL_ATTR_NONNULL(()) -/** - * This just returns the passed end basically. It makes difference in case the - * passed string ends with tons of path separators like the following: - * - * ``` - * "/path/that/ends/with/lots/of/slashes//////////////" - * ^ ^ ^ - * | | +--- end - * | +--- @return - * +--- path - * ``` - * - * @param[in] path Where to start scanning. - * @param[in] end End of the path string. - * @param[in] enc Encoding of the string. - * @return A pointer in the passed string where the trailing path - * separators start, or `end` if there is no trailing path - * separators. - * - * @internal - * - * It seems this function was introduced to mimic what POSIX says about - * `basename(3)`. - */ -char *rb_enc_path_end(const char *path, const char *end, rb_encoding *enc); - -RBIMPL_ATTR_NONNULL((1, 4)) -/** - * Our own encoding-aware version of `basename(3)`. Normally, this function - * returns the last path component of the given name. However in case the - * passed name ends with a path separator, it returns the name of the - * directory, not the last (empty) component. Also if the passed name is a - * root directory, it returns that root directory. Note however that Windows - * filesystem have drive letters, which this function does not return. - * - * @param[in] name Target path. - * @param[out] baselen Return buffer. - * @param[in,out] alllen Number of bytes of `name`. - * @param[enc] enc Encoding of `name`. - * @return The rightmost component of `name`. - * @post `baselen`, if passed, is updated to be the number of bytes - * of the returned basename. - * @post `alllen`, if passed, is updated to be the number of bytes of - * strings not considered as the basename. - */ -const char *ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc); - -RBIMPL_ATTR_NONNULL((1, 3)) -/** - * Our own encoding-aware version of `extname`. This function first applies - * rb_enc_path_last_separator() to the passed name and only concerns its return - * value (ignores any parent directories). This function returns complicated - * results: - * - * ```CXX - * auto path = "..."; - * auto len = strlen(path); - * auto ret = ruby_enc_find_extname(path, &len, rb_ascii8bit_encoding()); - * - * switch(len) { - * case 0: - * if (ret == 0) { - * // `path` is a file without extensions. - * } - * else { - * // `path` is a dotfile. - * // `ret` is the file's name. - * } - * break; - * - * case 1: - * // `path` _ends_ with a dot. - * // `ret` is that dot. - * break; - * - * default: - * // `path` has an extension. - * // `ret` is that extension. - * } - * ``` - * - * @param[in] name Target path. - * @param[in,out] len Number of bytes of `name`. - * @param[in] enc Encoding of `name`. - * @return See above. - * @post `len`, if passed, is updated (see above). - */ -const char *ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc); - -/** - * Identical to rb_check_id(), except it takes a pointer to a memory region - * instead of Ruby's string. - * - * @param[in] ptr A pointer to a memory region. - * @param[in] len Number of bytes of `ptr`. - * @param[in] enc Encoding of `ptr`. - * @exception rb_eEncodingError `ptr` contains non-ASCII according to `enc`. - * @retval 0 No such id ever existed in the history. - * @retval otherwise The id that represents the given name. - */ -ID rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc); - -/** - * Identical to rb_check_id_cstr(), except for the return type. It can also be - * seen as a routine identical to rb_check_symbol(), except it takes a pointer - * to a memory region instead of Ruby's string. - * - * @param[in] ptr A pointer to a memory region. - * @param[in] len Number of bytes of `ptr`. - * @param[in] enc Encoding of `ptr`. - * @exception rb_eEncodingError `ptr` contains non-ASCII according to `enc`. - * @retval RUBY_Qnil No such id ever existed in the history. - * @retval otherwise The id that represents the given name. - */ -VALUE rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc); - -/** - * `Encoding` class. - * - * @ingroup object - */ -RUBY_EXTERN VALUE rb_cEncoding; - -/* econv stuff */ - -/** return value of rb_econv_convert() */ -typedef enum { - - /** - * The conversion stopped when it found an invalid sequence. - */ - econv_invalid_byte_sequence, - - /** - * The conversion stopped when it found a character in the input which - * cannot be representable in the output. - */ - econv_undefined_conversion, - - /** - * The conversion stopped because there is no destination. - */ - econv_destination_buffer_full, - - /** - * The conversion stopped because there is no input. - */ - econv_source_buffer_empty, - - /** - * The conversion stopped after converting everything. This is arguably - * the expected normal end of conversion. - */ - econv_finished, - - /** - * The conversion stopped after writing something to somewhere, before - * reading everything. - */ - econv_after_output, - - /** - * The conversion stopped in middle of reading a character, possibly due to - * a partial read of a socket etc. - */ - econv_incomplete_input -} rb_econv_result_t; - -/** An opaque struct that represents a lowest level of encoding conversion. */ -typedef struct rb_econv_t rb_econv_t; - -/** - * Converts the contents of the passed string from its encoding to the passed - * one. - * - * @param[in] str Target string. - * @param[in] to Destination encoding. - * @param[in] ecflags A set of enum - * ::ruby_econv_flag_type. - * @param[in] ecopts A keyword hash, like - * ::rb_io_t::rb_io_enc_t::ecopts. - * @exception rb_eArgError Not fully converted. - * @exception rb_eInvalidByteSequenceError `str` is malformed. - * @exception rb_eUndefinedConversionError `str` has a character not - * representable using `to`. - * @exception rb_eConversionNotFoundError There is no known conversion from - * `str`'s encoding to `to`. - * @return A string whose encoding is `to`, and whose contents is converted - * contents of `str`. - * @note Use rb_econv_prepare_options() to generate `ecopts`. - */ -VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts); - -/** - * Queries if there is more than one way to convert between the passed two - * encodings. Encoding conversion are has_and_belongs_to_many relationships. - * There could be no direct conversion defined for the passed pair. Ruby tries - * to find an indirect way to do so then. For instance ISO-8859-1 has no - * direct conversion to ISO-2022-JP. But there is ISO-8859-1 to UTF-8 - * conversion; then there is UTF-8 to EUC-JP conversion; finally there also is - * EUC-JP to ISO-2022-JP conversion. So in short ISO-8859-1 can be converted - * to ISO-2022-JP using that path. This function returns true. Obviously not - * everything that can be represented using UTF-8 can also be represented using - * EUC-JP. Conversions in practice can fail depending on the actual input, and - * that renders exceptions in case of rb_str_encode(). - * - * @param[in] from_encoding One encoding. - * @param[in] to_encoding Another encoding. - * @retval 0 No way to convert the two. - * @retval 1 At least one way to convert the two. - * - * @internal - * - * Practically @shyouhei knows no way for this function to return 0. It seems - * everything can eventually be converted to/from UTF-8, which connects - * everything. - */ -int rb_econv_has_convpath_p(const char* from_encoding, const char* to_encoding); - -/** - * Identical to rb_econv_prepare_opts(), except it additionally takes the - * initial value of flags. The extra bits are bitwise-ORed to the return - * value. - * - * @param[in] opthash Keyword arguments. - * @param[out] ecopts Return buffer. - * @param[in] ecflags Default set of enum ::ruby_econv_flag_type. - * @exception rb_eArgError Unknown/Broken values passed. - * @return Calculated set of enum ::ruby_econv_flag_type. - * @post `ecopts` holds a hash object suitable for - * ::rb_io_t::rb_io_enc_t::ecopts. - */ -int rb_econv_prepare_options(VALUE opthash, VALUE *ecopts, int ecflags); - -/** - * Splits a keyword arguments hash (that for instance `String#encode` took) - * into a set of enum ::ruby_econv_flag_type and a hash storing replacement - * characters etc. - * - * @param[in] opthash Keyword arguments. - * @param[out] ecopts Return buffer. - * @exception rb_eArgError Unknown/Broken values passed. - * @return Calculated set of enum ::ruby_econv_flag_type. - * @post `ecopts` holds a hash object suitable for - * ::rb_io_t::rb_io_enc_t::ecopts. - */ -int rb_econv_prepare_opts(VALUE opthash, VALUE *ecopts); - -/** - * Creates a new instance of struct ::rb_econv_t. - * - * @param[in] source_encoding Name of an encoding. - * @param[in] destination_encoding Name of another encoding. - * @param[in] ecflags A set of enum ::ruby_econv_flag_type. - * @exception rb_eArgError No such encoding. - * @retval NULL Failed to create a struct ::rb_econv_t. - * @retval otherwise Allocated struct ::rb_econv_t. - * @warning Return value must be passed to rb_econv_close() exactly once. - */ -rb_econv_t *rb_econv_open(const char *source_encoding, const char *destination_encoding, int ecflags); - -/** - * Identical to rb_econv_open(), except it additionally takes a hash of - * optional strings. - * - * - * @param[in] source_encoding Name of an encoding. - * @param[in] destination_encoding Name of another encoding. - * @param[in] ecflags A set of enum ::ruby_econv_flag_type. - * @param[in] ecopts Optional set of strings. - * @exception rb_eArgError No such encoding. - * @retval NULL Failed to create a struct ::rb_econv_t. - * @retval otherwise Allocated struct ::rb_econv_t. - * @warning Return value must be passed to rb_econv_close() exactly once. - */ -rb_econv_t *rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts); - -/** - * Converts a string from an encoding to another. - * - * Possible flags are either ::RUBY_ECONV_PARTIAL_INPUT (means the source - * buffer is a part of much larger one), ::RUBY_ECONV_AFTER_OUTPUT (instructs - * the converter to stop after output before input), or both of them. - * - * @param[in,out] ec Conversion specification/state etc. - * @param[in] source_buffer_ptr Target string. - * @param[in] source_buffer_end End of target string. - * @param[out] destination_buffer_ptr Return buffer. - * @param[out] destination_buffer_end End of return buffer. - * @param[in] flags Flags (see above). - * @return The status of the conversion. - * @post `destination_buffer_ptr` holds conversion results. - */ -rb_econv_result_t rb_econv_convert(rb_econv_t *ec, - const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, - unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, - int flags); - -/** - * Destructs a converter. Note that a converter can have a buffer, and can be - * non-empty. Calling this would lose your data then. - * - * @param[out] ec The converter to destroy. - * @post `ec` is no longer a valid pointer. - */ -void rb_econv_close(rb_econv_t *ec); - -/** - * Assigns the replacement string. The string passed here would appear in - * converted string when it cannot represent its source counterpart. This can - * happen for instance you convert an emoji to ISO-8859-1. - * - * @param[out] ec Target converter. - * @param[in] str Replacement string. - * @param[in] len Number of bytes of `str`. - * @param[in] encname Name of encoding of `str`. - * @retval 0 Success. - * @retval -1 Failure (ENOMEM etc.). - * @post `ec`'s replacement string is set to `str`. - */ -int rb_econv_set_replacement(rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname); - -/** - * "Decorate"s a converter. There are special kind of converters that - * transforms the contents, like replacing CR into CRLF. You can add such - * decorators to a converter using this API. By using this function a - * decorator is prepended at the beginning of a conversion sequence: in case of - * CRLF conversion, newlines are converted before encodings are converted. - * - * @param[out] ec Target converter to decorate. - * @param[in] decorator_name Name of decorator to prepend. - * @retval 0 Success. - * @retval -1 Failure (no such decorator etc.). - * @post Decorator works before encoding conversion happens. - * - * @internal - * - * What is the possible value of the `decorator_name` is not public. You have - * to read through `transcode.c` carefully. - */ -int rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name); - -/** - * Identical to rb_econv_decorate_at_first(), except it adds to the opposite - * direction. For instance CRLF conversion would run _after_ encodings are - * converted. - * - * @param[out] ec Target converter to decorate. - * @param[in] decorator_name Name of decorator to prepend. - * @retval 0 Success. - * @retval -1 Failure (no such decorator etc.). - * @post Decorator works after encoding conversion happens. - */ -int rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name); - -/** - * Creates a `rb_eConverterNotFoundError` exception object (but does not - * raise). - * - * @param[in] senc Name of source encoding. - * @param[in] denc Name of destination encoding. - * @param[in] ecflags A set of enum ::ruby_econv_flag_type. - * @return An instance of `rb_eConverterNotFoundError`. - */ -VALUE rb_econv_open_exc(const char *senc, const char *denc, int ecflags); - -/** - * Appends the passed string to the passed converter's output buffer. This can - * be handy when an encoding needs bytes out of thin air; for instance - * ISO-2022-JP has "shift function" which does not correspond to any - * characters. - * - * @param[out] ec Target converter. - * @param[in] str String to insert. - * @param[in] len Number of bytes of `str`. - * @param[in] str_encoding Encoding of `str`. - * @retval 0 Success. - * @retval -1 Failure (conversion error etc.). - * @note `str_encoding` can be anything, and `str` itself is converted - * when necessary. - */ -int rb_econv_insert_output(rb_econv_t *ec, - const unsigned char *str, size_t len, const char *str_encoding); - -/** - * Queries an encoding name which best suits for rb_econv_insert_output()'s - * last parameter. Strings in this encoding need no conversion when inserted; - * can be both time/space efficient. - * - * @param[in] ec Target converter. - * @return Its encoding for insertion. - */ -const char *rb_econv_encoding_to_insert_output(rb_econv_t *ec); - -/** - * This is a rb_econv_make_exception() + rb_exc_raise() combo. - * - * @param[in] ec (Possibly failed) conversion. - * @exception rb_eInvalidByteSequenceError Invalid byte sequence. - * @exception rb_eUndefinedConversionError Conversion undefined. - * @note This function can return when no error. - */ -void rb_econv_check_error(rb_econv_t *ec); - -/** - * This function makes sense right after rb_econv_convert() returns. As listed - * in ::rb_econv_result_t, rb_econv_convert() can bail out for various reasons. - * This function checks the passed converter's internal state and convert it to - * an appropriate exception object. - * - * @param[in] ec Target converter. - * @retval RUBY_Qnil The converter has no error. - * @retval otherwise Conversion error turned into an exception. - */ -VALUE rb_econv_make_exception(rb_econv_t *ec); - -/** - * Queries if rb_econv_putback() makes sense, i.e. there are invalid byte - * sequences remain in the buffer. - * - * @param[in] ec Target converter. - * @return Number of bytes that can be pushed back. - */ -int rb_econv_putbackable(rb_econv_t *ec); - -/** - * Puts back the bytes. In case of ::econv_invalid_byte_sequence, some of - * those invalid bytes are discarded and the others are buffered to be - * converted later. The latter bytes can be put back using this API. - * - * @param[out] ec Target converter (invalid byte sequence). - * @param[out] p Return buffer. - * @param[in] n Max number of bytes to put back. - * @post At most `n` bytes of what was put back is written to `p`. - */ -void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n); - -/** - * Queries the passed encoding's corresponding ASCII compatible encoding. "The - * corresponding ASCII compatible encoding" in this context is an ASCII - * compatible encoding which can represent exactly the same character sets as - * the given ASCII incompatible encoding. For instance that of UTF-16LE is - * UTF-8. - * - * @param[in] encname Name of an ASCII incompatible encoding. - * @retval NULL `encname` is already ASCII compatible. - * @retval otherwise The corresponding ASCII compatible encoding. - */ -const char *rb_econv_asciicompat_encoding(const char *encname); - -/** - * Identical to rb_econv_convert(), except it takes Ruby's string instead of - * C's pointer. - * - * @param[in,out] ec Target converter. - * @param[in] src Source string. - * @param[in] flags Flags (see rb_econv_convert). - * @exception rb_eArgError Converted string is too long. - * @exception rb_eInvalidByteSequenceError Invalid byte sequence. - * @exception rb_eUndefinedConversionError Conversion undefined. - * @return The conversion result. - */ -VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags); - -/** - * Identical to rb_econv_str_convert(), except it converts only a part of the - * passed string. Can be handy when you for instance want to do line-buffered - * conversion. - * - * @param[in,out] ec Target converter. - * @param[in] src Source string. - * @param[in] byteoff Number of bytes to seek. - * @param[in] bytesize Number of bytes to read. - * @param[in] flags Flags (see rb_econv_convert). - * @exception rb_eArgError Converted string is too long. - * @exception rb_eInvalidByteSequenceError Invalid byte sequence. - * @exception rb_eUndefinedConversionError Conversion undefined. - * @return The conversion result. - */ -VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags); - -/** - * Identical to rb_econv_str_convert(), except it appends the conversion result - * to the additionally passed string instead of creating a new string. It can - * also be seen as a routine identical to rb_econv_append(), except it takes a - * Ruby's string instead of C's pointer. - * - * @param[in,out] ec Target converter. - * @param[in] src Source string. - * @param[in] dst Return buffer. - * @param[in] flags Flags (see rb_econv_convert). - * @exception rb_eArgError Converted string is too long. - * @exception rb_eInvalidByteSequenceError Invalid byte sequence. - * @exception rb_eUndefinedConversionError Conversion undefined. - * @return The conversion result. - */ -VALUE rb_econv_str_append(rb_econv_t *ec, VALUE src, VALUE dst, int flags); - -/** - * Identical to rb_econv_str_append(), except it appends only a part of the - * passed string with conversion. It can also be seen as a routine identical - * to rb_econv_substr_convert(), except it appends the conversion result to the - * additionally passed string instead of creating a new string. - * - * @param[in,out] ec Target converter. - * @param[in] src Source string. - * @param[in] byteoff Number of bytes to seek. - * @param[in] bytesize Number of bytes to read. - * @param[in] dst Return buffer. - * @param[in] flags Flags (see rb_econv_convert). - * @exception rb_eArgError Converted string is too long. - * @exception rb_eInvalidByteSequenceError Invalid byte sequence. - * @exception rb_eUndefinedConversionError Conversion undefined. - * @return The conversion result. - */ -VALUE rb_econv_substr_append(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, VALUE dst, int flags); - -/** - * Converts the passed C's pointer according to the passed converter, then - * append the conversion result to the passed Ruby's string. This way buffer - * overflow is properly avoided to resize the destination properly. - * - * @param[in,out] ec Target converter. - * @param[in] bytesrc Target string. - * @param[in] bytesize Number of bytes of `bytesrc`. - * @param[in] dst Return buffer. - * @param[in] flags Flags (see rb_econv_convert). - * @exception rb_eArgError Converted string is too long. - * @exception rb_eInvalidByteSequenceError Invalid byte sequence. - * @exception rb_eUndefinedConversionError Conversion undefined. - * @return The conversion result. - */ -VALUE rb_econv_append(rb_econv_t *ec, const char *bytesrc, long bytesize, VALUE dst, int flags); - -/** - * This badly named function does not set the destination encoding to binary, - * but instead just nullifies newline conversion decorators if any. Other - * ordinal character conversions still happen after this; something non-binary - * would still be generated. - * - * @param[out] ec Target converter to modify. - * @post Any newline conversions, if any, would be killed. - */ -void rb_econv_binmode(rb_econv_t *ec); - -/** - * This enum is kind of omnibus. Gathers various constants. - */ -enum ruby_econv_flag_type { - - /** - * @name Flags for rb_econv_open() - * - * @{ - */ - - /** Mask for error handling related bits. */ - RUBY_ECONV_ERROR_HANDLER_MASK = 0x000000ff, - - /** Special handling of invalid sequences are there. */ - RUBY_ECONV_INVALID_MASK = 0x0000000f, - - /** Invalid sequences shall be replaced. */ - RUBY_ECONV_INVALID_REPLACE = 0x00000002, - - /** Special handling of undefined conversion are there. */ - RUBY_ECONV_UNDEF_MASK = 0x000000f0, - - /** Undefined characters shall be replaced. */ - RUBY_ECONV_UNDEF_REPLACE = 0x00000020, - - /** Undefined characters shall be escaped. */ - RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030, - - /** Decorators are there. */ - RUBY_ECONV_DECORATOR_MASK = 0x0000ff00, - - /** Newline converters are there. */ - RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00003f00, - - /** (Unclear; seems unused). */ - RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00, - - /** (Unclear; seems unused). */ - RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00003000, - - /** Universal newline mode. */ - RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100, - - /** CR to CRLF conversion shall happen. */ - RUBY_ECONV_CRLF_NEWLINE_DECORATOR = 0x00001000, - - /** CRLF to CR conversion shall happen. */ - RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000, - - /** Texts shall be XML-escaped. */ - RUBY_ECONV_XML_TEXT_DECORATOR = 0x00004000, - - /** Texts shall be AttrValue escaped */ - RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00008000, - - /** (Unclear; seems unused). */ - RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000, - - /** Texts shall be AttrValue escaped. */ - RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR = 0x00100000, - - /** Newline decorator's default. */ - RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR = -#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) - RUBY_ECONV_CRLF_NEWLINE_DECORATOR, -#else - 0, -#endif - -#define ECONV_ERROR_HANDLER_MASK RUBY_ECONV_ERROR_HANDLER_MASK /**< @old{RUBY_ECONV_ERROR_HANDLER_MASK} */ -#define ECONV_INVALID_MASK RUBY_ECONV_INVALID_MASK /**< @old{RUBY_ECONV_INVALID_MASK} */ -#define ECONV_INVALID_REPLACE RUBY_ECONV_INVALID_REPLACE /**< @old{RUBY_ECONV_INVALID_REPLACE} */ -#define ECONV_UNDEF_MASK RUBY_ECONV_UNDEF_MASK /**< @old{RUBY_ECONV_UNDEF_MASK} */ -#define ECONV_UNDEF_REPLACE RUBY_ECONV_UNDEF_REPLACE /**< @old{RUBY_ECONV_UNDEF_REPLACE} */ -#define ECONV_UNDEF_HEX_CHARREF RUBY_ECONV_UNDEF_HEX_CHARREF /**< @old{RUBY_ECONV_UNDEF_HEX_CHARREF} */ -#define ECONV_DECORATOR_MASK RUBY_ECONV_DECORATOR_MASK /**< @old{RUBY_ECONV_DECORATOR_MASK} */ -#define ECONV_NEWLINE_DECORATOR_MASK RUBY_ECONV_NEWLINE_DECORATOR_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_MASK} */ -#define ECONV_NEWLINE_DECORATOR_READ_MASK RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK} */ -#define ECONV_NEWLINE_DECORATOR_WRITE_MASK RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK} */ -#define ECONV_UNIVERSAL_NEWLINE_DECORATOR RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR} */ -#define ECONV_CRLF_NEWLINE_DECORATOR RUBY_ECONV_CRLF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CRLF_NEWLINE_DECORATOR} */ -#define ECONV_CR_NEWLINE_DECORATOR RUBY_ECONV_CR_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CR_NEWLINE_DECORATOR} */ -#define ECONV_XML_TEXT_DECORATOR RUBY_ECONV_XML_TEXT_DECORATOR /**< @old{RUBY_ECONV_XML_TEXT_DECORATOR} */ -#define ECONV_XML_ATTR_CONTENT_DECORATOR RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR} */ -#define ECONV_STATEFUL_DECORATOR_MASK RUBY_ECONV_STATEFUL_DECORATOR_MASK /**< @old{RUBY_ECONV_STATEFUL_DECORATOR_MASK} */ -#define ECONV_XML_ATTR_QUOTE_DECORATOR RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR} */ -#define ECONV_DEFAULT_NEWLINE_DECORATOR RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR} */ - /** @} */ - - /** - * @name Flags for rb_econv_convert() - * - * @{ - */ - - /** Indicates the input is a part of much larger one. */ - RUBY_ECONV_PARTIAL_INPUT = 0x00010000, - - /** Instructs the converter to stop after output. */ - RUBY_ECONV_AFTER_OUTPUT = 0x00020000, -#define ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT /**< @old{RUBY_ECONV_PARTIAL_INPUT} */ -#define ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT /**< @old{RUBY_ECONV_AFTER_OUTPUT} */ - - RUBY_ECONV_FLAGS_PLACEHOLDER /**< Placeholder (not used) */ -}; - -RBIMPL_SYMBOL_EXPORT_END() +#include "ruby/internal/encoding/coderange.h" +#include "ruby/internal/encoding/ctype.h" +#include "ruby/internal/encoding/encoding.h" +#include "ruby/internal/encoding/pathname.h" +#include "ruby/internal/encoding/re.h" +#include "ruby/internal/encoding/sprintf.h" +#include "ruby/internal/encoding/string.h" +#include "ruby/internal/encoding/symbol.h" +#include "ruby/internal/encoding/transcode.h" #endif /* RUBY_ENCODING_H */ diff --git a/include/ruby/fiber/scheduler.h b/include/ruby/fiber/scheduler.h index 093b936475..8f3d383330 100644 --- a/include/ruby/fiber/scheduler.h +++ b/include/ruby/fiber/scheduler.h @@ -11,18 +11,74 @@ */ #include "ruby/internal/config.h" +#include <errno.h> + #ifdef STDC_HEADERS #include <stddef.h> /* size_t */ #endif #include "ruby/ruby.h" #include "ruby/internal/dllexport.h" +#include "ruby/internal/arithmetic.h" RBIMPL_SYMBOL_EXPORT_BEGIN() +#define RUBY_FIBER_SCHEDULER_VERSION 2 + struct timeval; /** + * Wrap a `ssize_t` and `int errno` into a single `VALUE`. This interface should + * be used to safely capture results from system calls like `read` and `write`. + * + * You should use `rb_fiber_scheduler_io_result_apply` to unpack the result of + * this value and update `int errno`. + * + * You should not directly try to interpret the result value as it is considered + * an opaque representation. However, the general representation is an integer + * in the range of `[-int errno, size_t size]`. Linux generally restricts the + * result of system calls like `read` and `write` to `<= 2^31` which means this + * will typically fit within a single FIXNUM. + * + * @param[in] result The result of the system call. + * @param[in] error The value of `errno`. + * @return A `VALUE` which contains the result and/or errno. + */ +static inline VALUE +rb_fiber_scheduler_io_result(ssize_t result, int error) +{ + if (result == -1) { + return RB_INT2NUM(-error); + } + else { + return RB_SIZE2NUM(result); + } +} + +/** + * Apply an io result to the local thread, returning the value of the original + * system call that created it and updating `int errno`. + * + * You should not directly try to interpret the result value as it is considered + * an opaque representation. + * + * @param[in] result The `VALUE` which contains an errno and/or result size. + * @post Updates `int errno` with the value if negative. + * @return The original result of the system call. + */ +static inline ssize_t +rb_fiber_scheduler_io_result_apply(VALUE result) +{ + if (RB_FIXNUM_P(result) && RB_NUM2INT(result) < 0) { + errno = -RB_NUM2INT(result); + return -1; + } + else { + return RB_NUM2SIZE(result); + } +} + +/** * Queries the current scheduler of the current thread that is calling this * function. * @@ -41,7 +97,7 @@ VALUE rb_fiber_scheduler_get(void); * current thread will call scheduler's `#close` method on finalisation * (allowing the scheduler to properly manage all non-finished fibers). * `scheduler` can be an object of any class corresponding to - * `Fiber::SchedulerInterface`. Its implementation is up to the user. + * `Fiber::Scheduler` interface. Its implementation is up to the user. * * @param[in] scheduler The scheduler to set. * @exception rb_eArgError `scheduler` does not conform the interface. @@ -90,7 +146,7 @@ VALUE rb_fiber_scheduler_make_timeout(struct timeval *timeout); VALUE rb_fiber_scheduler_close(VALUE scheduler); /** - * Nonblocking `sleep`. Depending on scheduler implementation, this for + * Non-blocking `sleep`. Depending on scheduler implementation, this for * instance switches to another fiber etc. * * @param[in] scheduler Target scheduler. @@ -118,7 +174,7 @@ int rb_fiber_scheduler_supports_process_wait(VALUE scheduler); #endif /** - * Nonblocking `waitpid`. Depending on scheduler implementation, this for + * Non-blocking `waitpid`. Depending on scheduler implementation, this for * instance switches to another fiber etc. * * @param[in] scheduler Target scheduler. @@ -129,7 +185,7 @@ int rb_fiber_scheduler_supports_process_wait(VALUE scheduler); VALUE rb_fiber_scheduler_process_wait(VALUE scheduler, rb_pid_t pid, int flags); /** - * Nonblocking wait for the passed "blocker", which is for instance + * Non-blocking wait for the passed "blocker", which is for instance * `Thread.join` or `Mutex.lock`. Depending on scheduler implementation, this * for instance switches to another fiber etc. * @@ -151,11 +207,11 @@ VALUE rb_fiber_scheduler_block(VALUE scheduler, VALUE blocker, VALUE timeout); VALUE rb_fiber_scheduler_unblock(VALUE scheduler, VALUE blocker, VALUE fiber); /** - * Nonblocking version of rb_io_wait(). Depending on scheduler implementation, - * this for instance switches to another fiber etc. + * Non-blocking version of rb_io_wait(). Depending on scheduler + * implementation, this for instance switches to another fiber etc. * * The "events" here is a Ruby level integer, which is an OR-ed value of - * `IO::READABLE`, `IO::WRITable`, and `IO::PRIORITY`. + * `IO::READABLE`, `IO::WRITABLE`, and `IO::PRIORITY`. * * @param[in] scheduler Target scheduler. * @param[in] io An io object to wait. @@ -166,7 +222,7 @@ VALUE rb_fiber_scheduler_unblock(VALUE scheduler, VALUE blocker, VALUE fiber); VALUE rb_fiber_scheduler_io_wait(VALUE scheduler, VALUE io, VALUE events, VALUE timeout); /** - * Nonblocking wait until the passed IO is ready for reading. This is a + * Non-blocking wait until the passed IO is ready for reading. This is a * special case of rb_fiber_scheduler_io_wait(), where the interest is * `IO::READABLE` and timeout is never. * @@ -177,7 +233,7 @@ VALUE rb_fiber_scheduler_io_wait(VALUE scheduler, VALUE io, VALUE events, VALUE VALUE rb_fiber_scheduler_io_wait_readable(VALUE scheduler, VALUE io); /** - * Nonblocking wait until the passed IO is ready for writing. This is a + * Non-blocking wait until the passed IO is ready for writing. This is a * special case of rb_fiber_scheduler_io_wait(), where the interest is * `IO::WRITABLE` and timeout is never. * @@ -188,33 +244,145 @@ VALUE rb_fiber_scheduler_io_wait_readable(VALUE scheduler, VALUE io); VALUE rb_fiber_scheduler_io_wait_writable(VALUE scheduler, VALUE io); /** - * Nonblocking read from the passed IO. + * Non-blocking version of `IO.select`. + * + * It's possible that this will be emulated using a thread, so you should not + * rely on it for high performance. + * + * @param[in] scheduler Target scheduler. + * @param[in] readables An array of readable objects. + * @param[in] writables An array of writable objects. + * @param[in] exceptables An array of objects that might encounter exceptional conditions. + * @param[in] timeout Numeric timeout or nil. + * @return What `scheduler.io_select` returns, normally a 3-tuple of arrays of ready objects. + */ +VALUE rb_fiber_scheduler_io_select(VALUE scheduler, VALUE readables, VALUE writables, VALUE exceptables, VALUE timeout); + +/** + * Non-blocking version of `IO.select`, `argv` variant. + */ +VALUE rb_fiber_scheduler_io_selectv(VALUE scheduler, int argc, VALUE *argv); + +/** + * Non-blocking read from the passed IO. + * + * @param[in] scheduler Target scheduler. + * @param[in] io An io object to read from. + * @param[in] buffer The buffer to read to. + * @param[in] length The minimum number of bytes to read. + * @param[in] offset The offset in the buffer to read from. + * @retval RUBY_Qundef `scheduler` doesn't have `#io_read`. + * @return otherwise What `scheduler.io_read` returns `[-errno, size]`. + */ +VALUE rb_fiber_scheduler_io_read(VALUE scheduler, VALUE io, VALUE buffer, size_t length, size_t offset); + +/** + * Non-blocking write to the passed IO. * * @param[in] scheduler Target scheduler. - * @param[out] io An io object to read from. - * @param[out] buffer Return buffer. - * @param[in] offset Offset inside of `buffer`. - * @param[in] length Requested number of bytes to read. + * @param[in] io An io object to write to. + * @param[in] buffer The buffer to write from. + * @param[in] length The minimum number of bytes to write. + * @param[in] offset The offset in the buffer to write from. + * @retval RUBY_Qundef `scheduler` doesn't have `#io_write`. + * @return otherwise What `scheduler.io_write` returns `[-errno, size]`. + */ +VALUE rb_fiber_scheduler_io_write(VALUE scheduler, VALUE io, VALUE buffer, size_t length, size_t offset); + +/** + * Non-blocking read from the passed IO at the specified offset. + * + * @param[in] scheduler Target scheduler. + * @param[in] io An io object to read from. + * @param[in] from The offset to read from. + * @param[in] buffer The buffer to read to. + * @param[in] length The minimum number of bytes to read. + * @param[in] offset The offset in the buffer to read to. * @retval RUBY_Qundef `scheduler` doesn't have `#io_read`. * @return otherwise What `scheduler.io_read` returns. */ -VALUE rb_fiber_scheduler_io_read(VALUE scheduler, VALUE io, VALUE buffer, size_t offset, size_t length); +VALUE rb_fiber_scheduler_io_pread(VALUE scheduler, VALUE io, rb_off_t from, VALUE buffer, size_t length, size_t offset); /** - * Nonblocking write to the passed IO. + * Non-blocking write to the passed IO at the specified offset. * * @param[in] scheduler Target scheduler. - * @param[out] io An io object to write to. - * @param[in] buffer What to write. - * @param[in] offset Offset inside of `buffer`. - * @param[in] length Number of bytes to write. + * @param[in] io An io object to write to. + * @param[in] from The offset to write to. + * @param[in] buffer The buffer to write from. + * @param[in] length The minimum number of bytes to write. + * @param[in] offset The offset in the buffer to write from. * @retval RUBY_Qundef `scheduler` doesn't have `#io_write`. * @return otherwise What `scheduler.io_write` returns. */ -VALUE rb_fiber_scheduler_io_write(VALUE scheduler, VALUE io, VALUE buffer, size_t offset, size_t length); +VALUE rb_fiber_scheduler_io_pwrite(VALUE scheduler, VALUE io, rb_off_t from, VALUE buffer, size_t length, size_t offset); /** - * Nonblocking DNS lookup. + * Non-blocking read from the passed IO using a native buffer. + * + * @param[in] scheduler Target scheduler. + * @param[in] io An io object to read from. + * @param[in] base The memory to read to. + * @param[in] size Size of the memory. + * @param[in] length The minimum number of bytes to read. + * @retval RUBY_Qundef `scheduler` doesn't have `#io_read`. + * @return otherwise What `scheduler.io_read` returns. + */ +VALUE rb_fiber_scheduler_io_read_memory(VALUE scheduler, VALUE io, void *base, size_t size, size_t length); + +/** + * Non-blocking write to the passed IO using a native buffer. + * + * @param[in] scheduler Target scheduler. + * @param[in] io An io object to write to. + * @param[in] base The memory to write from. + * @param[in] size Size of the memory. + * @param[in] length The minimum number of bytes to write. + * @retval RUBY_Qundef `scheduler` doesn't have `#io_write`. + * @return otherwise What `scheduler.io_write` returns. + */ +VALUE rb_fiber_scheduler_io_write_memory(VALUE scheduler, VALUE io, const void *base, size_t size, size_t length); + +/** + * Non-blocking pread from the passed IO using a native buffer. + * + * @param[in] scheduler Target scheduler. + * @param[in] io An io object to read from. + * @param[in] from The offset to read from. + * @param[in] base The memory to read to. + * @param[in] size Size of the memory. + * @param[in] length The minimum number of bytes to read. + * @retval RUBY_Qundef `scheduler` doesn't have `#io_read`. + * @return otherwise What `scheduler.io_read` returns. + */ +VALUE rb_fiber_scheduler_io_pread_memory(VALUE scheduler, VALUE io, rb_off_t from, void *base, size_t size, size_t length); + +/** + * Non-blocking pwrite to the passed IO using a native buffer. + * + * @param[in] scheduler Target scheduler. + * @param[in] io An io object to write to. + * @param[in] from The offset to write from. + * @param[in] base The memory to write from. + * @param[in] size Size of the memory. + * @param[in] length The minimum number of bytes to write. + * @retval RUBY_Qundef `scheduler` doesn't have `#io_write`. + * @return otherwise What `scheduler.io_write` returns. + */ +VALUE rb_fiber_scheduler_io_pwrite_memory(VALUE scheduler, VALUE io, rb_off_t from, const void *base, size_t size, size_t length); + +/** + * Non-blocking close the given IO. + * + * @param[in] scheduler Target scheduler. + * @param[in] io An io object to close. + * @retval RUBY_Qundef `scheduler` doesn't have `#io_close`. + * @return otherwise What `scheduler.io_close` returns. + */ +VALUE rb_fiber_scheduler_io_close(VALUE scheduler, VALUE io); + +/** + * Non-blocking DNS lookup. * * @param[in] scheduler Target scheduler. * @param[in] hostname A host name to query. @@ -223,6 +391,12 @@ VALUE rb_fiber_scheduler_io_write(VALUE scheduler, VALUE io, VALUE buffer, size_ */ VALUE rb_fiber_scheduler_address_resolve(VALUE scheduler, VALUE hostname); +/** + * Create and schedule a non-blocking fiber. + * + */ +VALUE rb_fiber_scheduler_fiber(VALUE scheduler, int argc, VALUE *argv, int kw_splat); + RBIMPL_SYMBOL_EXPORT_END() #endif /* RUBY_FIBER_SCHEDULER_H */ diff --git a/include/ruby/intern.h b/include/ruby/intern.h index 2480e2e703..48e4cd546e 100644 --- a/include/ruby/intern.h +++ b/include/ruby/intern.h @@ -36,7 +36,6 @@ #include "ruby/internal/intern/error.h" #include "ruby/internal/intern/eval.h" #include "ruby/internal/intern/file.h" -#include "ruby/internal/intern/gc.h" #include "ruby/internal/intern/hash.h" #include "ruby/internal/intern/io.h" #include "ruby/internal/intern/load.h" diff --git a/include/ruby/internal/abi.h b/include/ruby/internal/abi.h new file mode 100644 index 0000000000..e735a67564 --- /dev/null +++ b/include/ruby/internal/abi.h @@ -0,0 +1,58 @@ +#ifndef RUBY_ABI_H +#define RUBY_ABI_H + +#ifdef RUBY_ABI_VERSION /* should match the definition in config.h */ + +/* This number represents Ruby's ABI version. + * + * In development Ruby, it should be bumped every time an ABI incompatible + * change is introduced. This will force other developers to rebuild extension + * gems. + * + * The following cases are considered as ABI incompatible changes: + * - Changing any data structures. + * - Changing macros or inline functions causing a change in behavior. + * - Deprecating or removing function declarations. + * + * The following cases are NOT considered as ABI incompatible changes: + * - Any changes that does not involve the header files in the `include` + * directory. + * - Adding macros, inline functions, or function declarations. + * - Backwards compatible refactors. + * - Editing comments. + * + * In released versions of Ruby, this number is not defined since teeny + * versions of Ruby should guarantee ABI compatibility. + */ +#define RUBY_ABI_VERSION 0 + +/* Windows does not support weak symbols so ruby_abi_version will not exist + * in the shared library. */ +#if defined(HAVE_FUNC_WEAK) && !defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) +# define RUBY_DLN_CHECK_ABI +#endif +#endif /* RUBY_ABI_VERSION */ + +#if defined(RUBY_DLN_CHECK_ABI) && !defined(RUBY_EXPORT) + +# ifdef __cplusplus +extern "C" { +# endif + +RUBY_FUNC_EXPORTED unsigned long long __attribute__((weak)) +ruby_abi_version(void) +{ +# ifdef RUBY_ABI_VERSION + return RUBY_ABI_VERSION; +# else + return 0; +# endif +} + +# ifdef __cplusplus +} +# endif + +#endif + +#endif diff --git a/include/ruby/internal/anyargs.h b/include/ruby/internal/anyargs.h index 9d8d16fdab..e3e1b6166d 100644 --- a/include/ruby/internal/anyargs.h +++ b/include/ruby/internal/anyargs.h @@ -239,15 +239,16 @@ # define RBIMPL_ANYARGS_DISPATCH_rb_define_method_13(n) RBIMPL_ANYARGS_DISPATCH((n) == 13, rb_define_method_13, RBIMPL_ANYARGS_DISPATCH_rb_define_method_12(n)) # define RBIMPL_ANYARGS_DISPATCH_rb_define_method_14(n) RBIMPL_ANYARGS_DISPATCH((n) == 14, rb_define_method_14, RBIMPL_ANYARGS_DISPATCH_rb_define_method_13(n)) # define RBIMPL_ANYARGS_DISPATCH_rb_define_method_15(n) RBIMPL_ANYARGS_DISPATCH((n) == 15, rb_define_method_15, RBIMPL_ANYARGS_DISPATCH_rb_define_method_14(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_singleton_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_singleton_method_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_singleton_method_15(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_protected_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_protected_method_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_protected_method_15(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_private_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_private_method_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_private_method_15(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_module_function(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_module_function_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_module_function_15(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_global_function(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_global_function_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_global_function_15(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_method_id(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_method_id_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_method_id_15(n)) -# define RBIMPL_ANYARGS_DISPATCH_rb_define_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_method_m3, RBIMPL_ANYARGS_DISPATCH_rb_define_method_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_singleton_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_singleton_method_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_singleton_method_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_protected_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_protected_method_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_protected_method_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_private_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_private_method_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_private_method_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_module_function(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_module_function_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_module_function_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_global_function(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_global_function_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_global_function_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_method_id(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_method_id_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_method_id_15(n)) +# define RBIMPL_ANYARGS_DISPATCH_rb_define_method(n, f) RBIMPL_ANYARGS_DISPATCH(RBIMPL_CFUNC_IS_rb_f_notimplement(f), rb_define_method_notimpl, RBIMPL_ANYARGS_DISPATCH_rb_define_method_15(n)) # define RBIMPL_ANYARGS_ATTRSET(sym) RBIMPL_ATTR_MAYBE_UNUSED() RBIMPL_ATTR_NONNULL(()) RBIMPL_ATTR_WEAKREF(sym) # define RBIMPL_ANYARGS_DECL(sym, ...) \ +RBIMPL_ANYARGS_ATTRSET(sym) static void sym ## _notimpl(__VA_ARGS__, VALUE(*)(int, const VALUE *, VALUE, VALUE), int); \ RBIMPL_ANYARGS_ATTRSET(sym) static void sym ## _m3(__VA_ARGS__, VALUE(*)(ANYARGS), int); \ RBIMPL_ANYARGS_ATTRSET(sym) static void sym ## _m2(__VA_ARGS__, VALUE(*)(VALUE, VALUE), int); \ RBIMPL_ANYARGS_ATTRSET(sym) static void sym ## _m1(__VA_ARGS__, VALUE(*)(int, union { VALUE *x; const VALUE *y; } __attribute__((__transparent_union__)), VALUE), int); \ diff --git a/include/ruby/internal/arithmetic.h b/include/ruby/internal/arithmetic.h index 3f7840c384..7ebb4a86f1 100644 --- a/include/ruby/internal/arithmetic.h +++ b/include/ruby/internal/arithmetic.h @@ -18,7 +18,8 @@ * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of * extension libraries. They could be written in C++98. - * @brief Conversion between C's arithmtic types and Ruby's numeric types. + * @brief Conversion between C's arithmetic types and Ruby's numeric + * types. */ #include "ruby/internal/arithmetic/char.h" #include "ruby/internal/arithmetic/double.h" diff --git a/include/ruby/internal/arithmetic/long.h b/include/ruby/internal/arithmetic/long.h index 792f7be179..6b8fd8ffc3 100644 --- a/include/ruby/internal/arithmetic/long.h +++ b/include/ruby/internal/arithmetic/long.h @@ -115,7 +115,7 @@ RB_INT2FIX(long i) /* :NOTE: VALUE can be wider than long. As j being unsigned, 2j+1 is fully * defined. Also it can be compiled into a single LEA instruction. */ const unsigned long j = i; - const unsigned long k = 2 * j + RUBY_FIXNUM_FLAG; + const unsigned long k = (j << 1) + RUBY_FIXNUM_FLAG; const long l = k; const SIGNED_VALUE m = l; /* Sign extend */ const VALUE n = m; diff --git a/include/ruby/internal/assume.h b/include/ruby/internal/assume.h index 65d34d4ac8..4c183e8af9 100644 --- a/include/ruby/internal/assume.h +++ b/include/ruby/internal/assume.h @@ -32,10 +32,7 @@ #include "ruby/internal/warning_push.h" /** @cond INTERNAL_MACRO */ -#if RBIMPL_COMPILER_SINCE(MSVC, 13, 10, 0) -# define RBIMPL_HAVE___ASSUME - -#elif RBIMPL_COMPILER_SINCE(Intel, 13, 0, 0) +#if defined(HAVE___ASSUME) # define RBIMPL_HAVE___ASSUME #endif /** @endcond */ diff --git a/include/ruby/internal/attr/nodiscard.h b/include/ruby/internal/attr/nodiscard.h index 087192a7a8..c3ae118942 100644 --- a/include/ruby/internal/attr/nodiscard.h +++ b/include/ruby/internal/attr/nodiscard.h @@ -26,7 +26,7 @@ /** * Wraps (or simulates) `[[nodiscard]]`. In C++ (at least since C++20) a - * nodiscard attribute can have a message why the result shall not be ignoed. + * nodiscard attribute can have a message why the result shall not be ignored. * However GCC attribute and SAL annotation cannot take them. */ #if RBIMPL_HAS_CPP_ATTRIBUTE(nodiscard) diff --git a/include/ruby/internal/attr/noexcept.h b/include/ruby/internal/attr/noexcept.h index ea3001df2a..7c3f92f1e7 100644 --- a/include/ruby/internal/attr/noexcept.h +++ b/include/ruby/internal/attr/noexcept.h @@ -54,7 +54,7 @@ * get smarter and smarter. Today they can infer if it actually throws * or not without any annotations by humans (correct me if I'm wrong). * - * - When an inline function attributed `noexcepr` actually _does_ throw an + * - When an inline function attributed `noexcept` actually _does_ throw an * exception: they have to call `std::terminate` then (C++ standard * mandates so). This means exception handling routines are actually * enforced, not omitted. This doesn't impact runtime performance (The diff --git a/include/ruby/internal/attr/nonnull.h b/include/ruby/internal/attr/nonnull.h index 874f4236c0..778d5be208 100644 --- a/include/ruby/internal/attr/nonnull.h +++ b/include/ruby/internal/attr/nonnull.h @@ -25,8 +25,10 @@ /** Wraps (or simulates) `__attribute__((nonnull))` */ #if RBIMPL_HAS_ATTRIBUTE(nonnull) # define RBIMPL_ATTR_NONNULL(list) __attribute__((__nonnull__ list)) +# define RBIMPL_NONNULL_ARG(arg) RBIMPL_ASSERT_NOTHING #else # define RBIMPL_ATTR_NONNULL(list) /* void */ +# define RBIMPL_NONNULL_ARG(arg) RUBY_ASSERT(arg) #endif #endif /* RBIMPL_ATTR_NONNULL_H */ diff --git a/include/ruby/internal/attr/packed_struct.h b/include/ruby/internal/attr/packed_struct.h new file mode 100644 index 0000000000..0678b9acc8 --- /dev/null +++ b/include/ruby/internal/attr/packed_struct.h @@ -0,0 +1,43 @@ +#ifndef RBIMPL_ATTR_PACKED_STRUCT_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RBIMPL_ATTR_PACKED_STRUCT_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Defines #RBIMPL_ATTR_PACKED_STRUCT_BEGIN, + * #RBIMPL_ATTR_PACKED_STRUCT_END, + * #RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_BEGIN, and + * #RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_END. + */ +#include "ruby/internal/config.h" + +#ifndef RBIMPL_ATTR_PACKED_STRUCT_BEGIN +# define RBIMPL_ATTR_PACKED_STRUCT_BEGIN() /* void */ +#endif +#ifndef RBIMPL_ATTR_PACKED_STRUCT_END +# define RBIMPL_ATTR_PACKED_STRUCT_END() /* void */ +#endif + +#if UNALIGNED_WORD_ACCESS +# define RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_BEGIN() RBIMPL_ATTR_PACKED_STRUCT_BEGIN() +# define RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_END() RBIMPL_ATTR_PACKED_STRUCT_END() +#else +# define RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_BEGIN() /* void */ +# define RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_END() /* void */ +#endif + +#endif diff --git a/include/ruby/internal/config.h b/include/ruby/internal/config.h index b6134c6165..da070f0979 100644 --- a/include/ruby/internal/config.h +++ b/include/ruby/internal/config.h @@ -113,6 +113,8 @@ # define UNALIGNED_WORD_ACCESS 1 #elif defined(__powerpc64__) # define UNALIGNED_WORD_ACCESS 1 +#elif defined(__POWERPC__) // __POWERPC__ is defined for ppc and ppc64 on Darwin +# define UNALIGNED_WORD_ACCESS 1 #elif defined(__aarch64__) # define UNALIGNED_WORD_ACCESS 1 #elif defined(__mc68020__) diff --git a/include/ruby/internal/core/rarray.h b/include/ruby/internal/core/rarray.h index 9f1d0509ea..90690fe794 100644 --- a/include/ruby/internal/core/rarray.h +++ b/include/ruby/internal/core/rarray.h @@ -29,25 +29,13 @@ #include "ruby/internal/core/rbasic.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/fl_type.h" -#include "ruby/internal/rgengc.h" +#include "ruby/internal/gc.h" #include "ruby/internal/stdbool.h" #include "ruby/internal/value.h" #include "ruby/internal/value_type.h" #include "ruby/assert.h" /** - * @private - * @warning Do not touch this macro. - * @warning It is an implementation detail. - * @warning The value of this macro must match for ruby itself and all - * extension libraries, otherwise serious memory corruption shall - * occur. - */ -#ifndef USE_TRANSIENT_HEAP -# define USE_TRANSIENT_HEAP 1 -#endif - -/** * Convenient casting macro. * * @param obj An object, which is in fact an ::RArray. @@ -59,15 +47,9 @@ #define RARRAY_EMBED_LEN_MASK RARRAY_EMBED_LEN_MASK #define RARRAY_EMBED_LEN_MAX RARRAY_EMBED_LEN_MAX #define RARRAY_EMBED_LEN_SHIFT RARRAY_EMBED_LEN_SHIFT -#if USE_TRANSIENT_HEAP -# define RARRAY_TRANSIENT_FLAG RARRAY_TRANSIENT_FLAG -#else -# define RARRAY_TRANSIENT_FLAG 0 -#endif /** @endcond */ #define RARRAY_LEN rb_array_len /**< @alias{rb_array_len} */ #define RARRAY_CONST_PTR rb_array_const_ptr /**< @alias{rb_array_const_ptr} */ -#define RARRAY_CONST_PTR_TRANSIENT rb_array_const_ptr_transient /**< @alias{rb_array_const_ptr_transient} */ /** @cond INTERNAL_MACRO */ #if defined(__fcc__) || defined(__fcc_version) || \ @@ -80,7 +62,6 @@ #define RARRAY_EMBED_LEN RARRAY_EMBED_LEN #define RARRAY_LENINT RARRAY_LENINT -#define RARRAY_TRANSIENT_P RARRAY_TRANSIENT_P #define RARRAY_ASET RARRAY_ASET #define RARRAY_PTR RARRAY_PTR /** @endcond */ @@ -130,24 +111,8 @@ enum ruby_rarray_flags { * 3rd parties must not be aware that there even is more than one way to * store array elements. It was a bad idea to expose this to them. */ - RARRAY_EMBED_LEN_MASK = RUBY_FL_USER4 | RUBY_FL_USER3 -#if USE_TRANSIENT_HEAP - , - - /** - * This flag has something to do with an array's "transiency". A transient - * array is an array of young generation (of generational GC), who stores - * its elements inside of dedicated memory pages called a transient heap. - * Not every young generation share that storage scheme, but elder - * generations must no join. - * - * @internal - * - * 3rd parties must not be aware that there even is more than one way to - * store array elements. It was a bad idea to expose this to them. - */ - RARRAY_TRANSIENT_FLAG = RUBY_FL_USER13 -#endif + RARRAY_EMBED_LEN_MASK = RUBY_FL_USER9 | RUBY_FL_USER8 | RUBY_FL_USER7 | RUBY_FL_USER6 | + RUBY_FL_USER5 | RUBY_FL_USER4 | RUBY_FL_USER3 }; /** @@ -156,10 +121,7 @@ enum ruby_rarray_flags { */ enum ruby_rarray_consts { /** Where ::RARRAY_EMBED_LEN_MASK resides. */ - RARRAY_EMBED_LEN_SHIFT = RUBY_FL_USHIFT + 3, - - /** Max possible number elements that can be embedded. */ - RARRAY_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(VALUE) + RARRAY_EMBED_LEN_SHIFT = RUBY_FL_USHIFT + 3 }; /** Ruby's array. */ @@ -218,7 +180,12 @@ struct RArray { * to store its elements. In this case the length is encoded into the * flags. */ - const VALUE ary[RARRAY_EMBED_LEN_MAX]; + /* This is a length 1 array because: + * 1. GCC has a bug that does not optimize C flexible array members + * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452) + * 2. Zero length arrays are not supported by all compilers + */ + const VALUE ary[1]; } as; }; @@ -245,16 +212,6 @@ VALUE *rb_ary_ptr_use_start(VALUE ary); */ void rb_ary_ptr_use_end(VALUE a); -#if USE_TRANSIENT_HEAP -/** - * Destructively converts an array of transient backend into ordinal one. - * - * @param[out] a An object of ::RArray. - * @pre `a` must be a transient array. - * @post `a` gets out of transient heap, destructively. - */ -void rb_ary_detransient(VALUE a); -#endif RBIMPL_SYMBOL_EXPORT_END() RBIMPL_ATTR_PURE_UNLESS_DEBUG() @@ -327,33 +284,6 @@ RARRAY_LENINT(VALUE ary) } RBIMPL_ATTR_PURE_UNLESS_DEBUG() -RBIMPL_ATTR_ARTIFICIAL() -/** - * Queries if the array is a transient array. - * - * @param[in] ary Array in question. - * @retval true Yes it is. - * @retval false No it isn't. - * @pre `ary` must be an instance of ::RArray. - * - * @internal - * - * @shyouhei doesn't understand the benefit of this function called from - * extension libraries. - */ -static inline bool -RARRAY_TRANSIENT_P(VALUE ary) -{ - RBIMPL_ASSERT_TYPE(ary, RUBY_T_ARRAY); - -#if USE_TRANSIENT_HEAP - return RB_FL_ANY_RAW(ary, RARRAY_TRANSIENT_FLAG); -#else - return false; -#endif -} - -RBIMPL_ATTR_PURE_UNLESS_DEBUG() /** * @private * @@ -364,7 +294,7 @@ RBIMPL_ATTR_PURE_UNLESS_DEBUG() * @return Its backend storage. */ static inline const VALUE * -rb_array_const_ptr_transient(VALUE a) +rb_array_const_ptr(VALUE a) { RBIMPL_ASSERT_TYPE(a, RUBY_T_ARRAY); @@ -376,110 +306,21 @@ rb_array_const_ptr_transient(VALUE a) } } -#if ! USE_TRANSIENT_HEAP -RBIMPL_ATTR_PURE_UNLESS_DEBUG() -#endif -/** - * @private - * - * This is an implementation detail of RARRAY_PTR(). People do not use it - * directly. - * - * @param[in] a An object of ::RArray. - * @return Its backend storage. - * @post `a` is not a transient array. - */ -static inline const VALUE * -rb_array_const_ptr(VALUE a) -{ - RBIMPL_ASSERT_TYPE(a, RUBY_T_ARRAY); - -#if USE_TRANSIENT_HEAP - if (RARRAY_TRANSIENT_P(a)) { - rb_ary_detransient(a); - } -#endif - return rb_array_const_ptr_transient(a); -} - /** * @private * * This is an implementation detail of #RARRAY_PTR_USE. People do not use it * directly. - * - * @param[in] a An object of ::RArray. - * @param[in] allow_transient Whether `a` can be transient or not. - * @return Its backend storage. - * @post `a` is not a transient array unless `allow_transient`. - */ -static inline VALUE * -rb_array_ptr_use_start(VALUE a, - RBIMPL_ATTR_MAYBE_UNUSED() - int allow_transient) -{ - RBIMPL_ASSERT_TYPE(a, RUBY_T_ARRAY); - -#if USE_TRANSIENT_HEAP - if (!allow_transient) { - if (RARRAY_TRANSIENT_P(a)) { - rb_ary_detransient(a); - } - } -#endif - - return rb_ary_ptr_use_start(a); -} - -/** - * @private - * - * This is an implementation detail of #RARRAY_PTR_USE. People do not use it - * directly. - * - * @param[in] a An object of ::RArray. - * @param[in] allow_transient Whether `a` can be transient or not. */ -static inline void -rb_array_ptr_use_end(VALUE a, - RBIMPL_ATTR_MAYBE_UNUSED() - int allow_transient) -{ - RBIMPL_ASSERT_TYPE(a, RUBY_T_ARRAY); - rb_ary_ptr_use_end(a); -} - -/** - * @private - * - * This is an implementation detail of #RARRAY_PTR_USE. People do not use it - * directly. - */ -#define RBIMPL_RARRAY_STMT(flag, ary, var, expr) do { \ +#define RBIMPL_RARRAY_STMT(ary, var, expr) do { \ RBIMPL_ASSERT_TYPE((ary), RUBY_T_ARRAY); \ const VALUE rbimpl_ary = (ary); \ - VALUE *var = rb_array_ptr_use_start(rbimpl_ary, (flag)); \ + VALUE *var = rb_ary_ptr_use_start(rbimpl_ary); \ expr; \ - rb_array_ptr_use_end(rbimpl_ary, (flag)); \ + rb_ary_ptr_use_end(rbimpl_ary); \ } while (0) /** - * @private - * - * This is an implementation detail of #RARRAY_PTR_USE. People do not use it - * directly. - */ -#define RARRAY_PTR_USE_START(a) rb_array_ptr_use_start(a, 0) - -/** - * @private - * - * This is an implementation detail of #RARRAY_PTR_USE. People do not use it - * directly. - */ -#define RARRAY_PTR_USE_END(a) rb_array_ptr_use_end(a, 0) - -/** * Declares a section of code where raw pointers are used. In case you need to * touch the raw C array instead of polite CAPIs, then that operation shall be * wrapped using this macro. @@ -505,37 +346,11 @@ rb_array_ptr_use_end(VALUE a, * them use it... Maybe some transition path can be implemented later. */ #define RARRAY_PTR_USE(ary, ptr_name, expr) \ - RBIMPL_RARRAY_STMT(0, ary, ptr_name, expr) - -/** - * @private - * - * This is an implementation detail of #RARRAY_PTR_USE_TRANSIENT. People do - * not use it directly. - */ -#define RARRAY_PTR_USE_START_TRANSIENT(a) rb_array_ptr_use_start(a, 1) - -/** - * @private - * - * This is an implementation detail of #RARRAY_PTR_USE_TRANSIENT. People do - * not use it directly. - */ -#define RARRAY_PTR_USE_END_TRANSIENT(a) rb_array_ptr_use_end(a, 1) - -/** - * Identical to #RARRAY_PTR_USE, except the pointer can be a transient one. - * - * @param ary An object of ::RArray. - * @param ptr_name A variable name which points the C array in `expr`. - * @param expr The expression that touches `ptr_name`. - */ -#define RARRAY_PTR_USE_TRANSIENT(ary, ptr_name, expr) \ - RBIMPL_RARRAY_STMT(1, ary, ptr_name, expr) + RBIMPL_RARRAY_STMT(ary, ptr_name, expr) /** * Wild use of a C pointer. This function accesses the backend storage - * directly. This is slower than #RARRAY_PTR_USE_TRANSIENT. It exercises + * directly. This is slower than #RARRAY_PTR_USE. It exercises * extra manoeuvres to protect our generational GC. Use of this function is * considered archaic. Use a modern way instead. * @@ -570,7 +385,7 @@ RARRAY_PTR(VALUE ary) static inline void RARRAY_ASET(VALUE ary, long i, VALUE v) { - RARRAY_PTR_USE_TRANSIENT(ary, ptr, + RARRAY_PTR_USE(ary, ptr, RB_OBJ_WRITE(ary, &ptr[i], v)); } @@ -585,6 +400,6 @@ RARRAY_ASET(VALUE ary, long i, VALUE v) * remains as it is due to that. If we could warn such usages we can set a * transition path, but currently no way is found to do so. */ -#define RARRAY_AREF(a, i) RARRAY_CONST_PTR_TRANSIENT(a)[i] +#define RARRAY_AREF(a, i) RARRAY_CONST_PTR(a)[i] #endif /* RBIMPL_RARRAY_H */ diff --git a/include/ruby/internal/core/rbasic.h b/include/ruby/internal/core/rbasic.h index 4617f743a7..a1477e2600 100644 --- a/include/ruby/internal/core/rbasic.h +++ b/include/ruby/internal/core/rbasic.h @@ -56,22 +56,20 @@ enum ruby_rvalue_flags { }; /** - * Ruby's object's, base components. Every single ruby objects have them in - * common. + * Ruby object's base components. All Ruby objects have them in common. */ struct RUBY_ALIGNAS(SIZEOF_VALUE) RBasic { /** - * Per-object flags. Each ruby objects have their own characteristics - * apart from their classes. For instance whether an object is frozen or - * not is not controlled by its class. This is where such properties are - * stored. + * Per-object flags. Each Ruby object has its own characteristics apart + * from its class. For instance, whether an object is frozen or not is not + * controlled by its class. This is where such properties are stored. * * @see enum ::ruby_fl_type * - * @note This is ::VALUE rather than an enum for alignment purpose. Back + * @note This is ::VALUE rather than an enum for alignment purposes. Back * in the 1990s there were no such thing like `_Alignas` in C. */ VALUE flags; @@ -79,10 +77,10 @@ RBasic { /** * Class of an object. Every object has its class. Also, everything is an * object in Ruby. This means classes are also objects. Classes have - * their own classes, classes of classes have their classes, too ... and - * it recursively continues forever. + * their own classes, classes of classes have their classes too, and it + * recursively continues forever. * - * Also note the `const` qualifier. In ruby an object cannot "change" its + * Also note the `const` qualifier. In Ruby, an object cannot "change" its * class. */ const VALUE klass; diff --git a/include/ruby/internal/core/rclass.h b/include/ruby/internal/core/rclass.h index 13a33a28bd..b0b6bfc80c 100644 --- a/include/ruby/internal/core/rclass.h +++ b/include/ruby/internal/core/rclass.h @@ -26,9 +26,7 @@ #include "ruby/internal/cast.h" /** @cond INTERNAL_MACRO */ -#define RMODULE_IS_OVERLAID RMODULE_IS_OVERLAID #define RMODULE_IS_REFINEMENT RMODULE_IS_REFINEMENT -#define RMODULE_INCLUDED_INTO_REFINEMENT RMODULE_INCLUDED_INTO_REFINEMENT /** @endcond */ /** @@ -55,57 +53,12 @@ * Why is it here, given RClass itself is not? */ enum ruby_rmodule_flags { - - /** - * This flag has something to do with refinements... I guess? It is set on - * occasions for modules that are refined by refinements, but it seems - * ... nobody cares about such things? Not sure but this flag could - * perhaps be a write-only information. - */ - RMODULE_IS_OVERLAID = RUBY_FL_USER2, - /** * This flag has something to do with refinements. A module created using * rb_mod_refine() has this flag set. This is the bit which controls * difference between normal inclusion versus refinements. */ - RMODULE_IS_REFINEMENT = RUBY_FL_USER3, - - /** - * This flag has something to do with refinements. This is set when a - * (non-refinement) module is included into another module, which is a - * refinement. This amends the way `super` searches for a super method. - * - * ```ruby - * class Foo - * def foo - * "Foo" - * end - * end - * - * module Bar - * def foo - * "[#{super}]" # this - * end - * end - * - * module Baz - * refine Foo do - * include Bar - * def foo - * "<#{super}>" - * end - * end - * end - * - * using Baz - * Foo.new.foo # => "[<Foo>]" - * ``` - * - * The `super` marked with "this" comment shall look for overlaid - * `Foo#foo`, which is not the ordinal method lookup direction. - */ - RMODULE_INCLUDED_INTO_REFINEMENT = RUBY_FL_USER4 + RMODULE_IS_REFINEMENT = RUBY_FL_USER3 }; struct RClass; /* Opaque, declared here for RCLASS() macro. */ diff --git a/include/ruby/internal/core/rdata.h b/include/ruby/internal/core/rdata.h index f6656b6546..43ab3c01e7 100644 --- a/include/ruby/internal/core/rdata.h +++ b/include/ruby/internal/core/rdata.h @@ -369,30 +369,6 @@ rb_data_object_alloc(VALUE klass, void *data, RUBY_DATA_FUNC dmark, RUBY_DATA_FU return rb_data_object_wrap(klass, data, dmark, dfree); } -RBIMPL_ATTR_DEPRECATED(("by: rb_cObject. Will be removed in 3.1.")) -RBIMPL_ATTR_PURE() -/** - * @private - * - * @deprecated There once was a variable called rb_cData, which no longer - * exists today. This function is a function because we want - * warnings for the usages. - */ -static inline VALUE -rb_cData(void) -{ - return rb_cObject; -} - -/** - * @private - * - * @deprecated This macro once was a thing in the old days, but makes no sense - * any longer today. Exists here for backwards compatibility - * only. You can safely forget about it. - */ -#define rb_cData rb_cData() - /** @cond INTERNAL_MACRO */ #define rb_data_object_wrap_0 rb_data_object_wrap #define rb_data_object_wrap_1 rb_data_object_wrap_warning diff --git a/include/ruby/internal/core/rfile.h b/include/ruby/internal/core/rfile.h index f8dddde9e5..a0eb8cb833 100644 --- a/include/ruby/internal/core/rfile.h +++ b/include/ruby/internal/core/rfile.h @@ -25,7 +25,7 @@ /* rb_io_t is in ruby/io.h. The header file has historically not been included * into ruby/ruby.h. We follow that tradition. */ -struct rb_io_t; +struct rb_io; /** * Ruby's File and IO. Ruby's IO are not just file descriptors. They have @@ -38,7 +38,7 @@ struct RFile { struct RBasic basic; /** IO's specific fields. */ - struct rb_io_t *fptr; + struct rb_io *fptr; }; /** diff --git a/include/ruby/internal/core/rhash.h b/include/ruby/internal/core/rhash.h index 61d2c15d87..897c570794 100644 --- a/include/ruby/internal/core/rhash.h +++ b/include/ruby/internal/core/rhash.h @@ -54,19 +54,6 @@ * * @internal * - * Declaration of rb_hash_iter_lev() is at include/ruby/backward.h. - */ -#define RHASH_ITER_LEV(h) rb_hash_iter_lev(h) - -/** - * @private - * - * @deprecated This macro once was a thing in the old days, but makes no sense - * any longer today. Exists here for backwards compatibility - * only. You can safely forget about it. - * - * @internal - * * Declaration of rb_hash_ifnone() is at include/ruby/backward.h. */ #define RHASH_IFNONE(h) rb_hash_ifnone(h) diff --git a/include/ruby/internal/core/rmatch.h b/include/ruby/internal/core/rmatch.h index 2d2fd897f5..a528c2999e 100644 --- a/include/ruby/internal/core/rmatch.h +++ b/include/ruby/internal/core/rmatch.h @@ -68,7 +68,7 @@ struct rmatch_offset { }; /** Represents a match. */ -struct rmatch { +struct rb_matchext_struct { /** * "Registers" of a match. This is a quasi-opaque struct that holds * execution result of a match. Roughly resembles `&~`. @@ -82,6 +82,8 @@ struct rmatch { int char_offset_num_allocated; }; +typedef struct rb_matchext_struct rb_matchext_t; + /** * Regular expression execution context. When a regular expression "matches" * to a string, it generates capture groups etc. This struct holds that info. @@ -102,16 +104,13 @@ struct RMatch { VALUE str; /** - * The result of this match. - */ - struct rmatch *rmatch; - - /** * The expression of this match. */ VALUE regexp; /* RRegexp */ }; +#define RMATCH_EXT(m) ((rb_matchext_t *)((char *)(m) + sizeof(struct RMatch))) + RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() /** @@ -139,8 +138,7 @@ static inline struct re_registers * RMATCH_REGS(VALUE match) { RBIMPL_ASSERT_TYPE(match, RUBY_T_MATCH); - RBIMPL_ASSERT_OR_ASSUME(RMATCH(match)->rmatch != NULL); - return &RMATCH(match)->rmatch->regs; + return &RMATCH_EXT(match)->regs; } #endif /* RBIMPL_RMATCH_H */ diff --git a/include/ruby/internal/core/robject.h b/include/ruby/internal/core/robject.h index f2028063a6..c2bcae6306 100644 --- a/include/ruby/internal/core/robject.h +++ b/include/ruby/internal/core/robject.h @@ -37,16 +37,15 @@ /** * Convenient casting macro. * - * @param obj An object, which is in fact an ::RRegexp. - * @return The passed object casted to ::RRegexp. + * @param obj An object, which is in fact an ::RObject. + * @return The passed object casted to ::RObject. */ #define ROBJECT(obj) RBIMPL_CAST((struct RObject *)(obj)) /** @cond INTERNAL_MACRO */ #define ROBJECT_EMBED_LEN_MAX ROBJECT_EMBED_LEN_MAX #define ROBJECT_EMBED ROBJECT_EMBED -#define ROBJECT_NUMIV ROBJECT_NUMIV +#define ROBJECT_IV_CAPACITY ROBJECT_IV_CAPACITY #define ROBJECT_IVPTR ROBJECT_IVPTR -#define ROBJECT_IV_INDEX_TBL ROBJECT_IV_INDEX_TBL /** @endcond */ /** @@ -75,15 +74,6 @@ enum ruby_robject_flags { ROBJECT_EMBED = RUBY_FL_USER1 }; -/** - * This is an enum because GDB wants it (rather than a macro). People need not - * bother. - */ -enum ruby_robject_consts { - /** Max possible number of instance variables that can be embedded. */ - ROBJECT_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(VALUE) -}; - struct st_table; /** @@ -103,13 +93,6 @@ struct RObject { * this pattern. */ struct { - - /** - * Number of instance variables. This is per object; objects might - * differ in this field even if they have the identical classes. - */ - uint32_t numiv; - /** Pointer to a C array that holds instance variables. */ VALUE *ivptr; @@ -121,42 +104,24 @@ struct RObject { * * This is a shortcut for `RCLASS_IV_INDEX_TBL(rb_obj_class(obj))`. */ - struct st_table *iv_index_tbl; + struct rb_id_table *iv_index_tbl; } heap; - /** - * Embedded instance variables. When an object is small enough, it + /* Embedded instance variables. When an object is small enough, it * uses this area to store the instance variables. + * + * This is a length 1 array because: + * 1. GCC has a bug that does not optimize C flexible array members + * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452) + * 2. Zero length arrays are not supported by all compilers */ - VALUE ary[ROBJECT_EMBED_LEN_MAX]; + VALUE ary[1]; } as; }; RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() /** - * Queries the number of instance variables. - * - * @param[in] obj Object in question. - * @return Its number of instance variables. - * @pre `obj` must be an instance of ::RObject. - */ -static inline uint32_t -ROBJECT_NUMIV(VALUE obj) -{ - RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT); - - if (RB_FL_ANY_RAW(obj, ROBJECT_EMBED)) { - return ROBJECT_EMBED_LEN_MAX; - } - else { - return ROBJECT(obj)->as.heap.numiv; - } -} - -RBIMPL_ATTR_PURE_UNLESS_DEBUG() -RBIMPL_ATTR_ARTIFICIAL() -/** * Queries the instance variables. * * @param[in] obj Object in question. diff --git a/include/ruby/internal/core/rstring.h b/include/ruby/internal/core/rstring.h index d16a57b1c4..0bca74e688 100644 --- a/include/ruby/internal/core/rstring.h +++ b/include/ruby/internal/core/rstring.h @@ -42,11 +42,7 @@ /** @cond INTERNAL_MACRO */ #define RSTRING_NOEMBED RSTRING_NOEMBED -#define RSTRING_EMBED_LEN_MASK RSTRING_EMBED_LEN_MASK -#define RSTRING_EMBED_LEN_SHIFT RSTRING_EMBED_LEN_SHIFT -#define RSTRING_EMBED_LEN_MAX RSTRING_EMBED_LEN_MAX #define RSTRING_FSTR RSTRING_FSTR -#define RSTRING_EMBED_LEN RSTRING_EMBED_LEN #define RSTRING_LEN RSTRING_LEN #define RSTRING_LENINT RSTRING_LENINT #define RSTRING_PTR RSTRING_PTR @@ -160,19 +156,6 @@ enum ruby_rstring_flags { */ RSTRING_NOEMBED = RUBY_FL_USER1, - /** - * When a string employs embedded strategy (see ::RSTRING_NOEMBED), these - * bits are used to store the number of bytes actually filled into - * ::RString::ary. - * - * @internal - * - * 3rd parties must not be aware that there even is more than one way to - * store a string. Might better be hidden. - */ - RSTRING_EMBED_LEN_MASK = RUBY_FL_USER2 | RUBY_FL_USER3 | RUBY_FL_USER4 | - RUBY_FL_USER5 | RUBY_FL_USER6, - /* Actually, string encodings are also encoded into the flags, using * remaining bits.*/ @@ -199,18 +182,6 @@ enum ruby_rstring_flags { }; /** - * This is an enum because GDB wants it (rather than a macro). People need not - * bother. - */ -enum ruby_rstring_consts { - /** Where ::RSTRING_EMBED_LEN_MASK resides. */ - RSTRING_EMBED_LEN_SHIFT = RUBY_FL_USHIFT + 2, - - /** Max possible number of characters that can be embedded. */ - RSTRING_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(char) - 1 -}; - -/** * Ruby's String. A string in ruby conceptually has these information: * * - Encoding of the string. @@ -227,6 +198,13 @@ struct RString { /** Basic part, including flags and class. */ struct RBasic basic; + /** + * Length of the string, not including terminating NUL character. + * + * @note This is in bytes. + */ + long len; + /** String's specific fields. */ union { @@ -235,14 +213,6 @@ struct RString { * pattern. */ struct { - - /** - * Length of the string, not including terminating NUL character. - * - * @note This is in bytes. - */ - long len; - /** * Pointer to the contents of the string. In the old days each * string had dedicated memory regions. That is no longer true @@ -271,14 +241,15 @@ struct RString { } aux; } heap; - /** - * Embedded contents. When a string is short enough, it uses this area - * to store the contents themselves. This was impractical in the 20th - * century, but these days 64 bit machines can typically hold 48 bytes - * here. Could be sufficiently large. In this case the length is - * encoded into the flags. - */ - char ary[RSTRING_EMBED_LEN_MAX + 1]; + /** Embedded contents. */ + struct { + /* This is a length 1 array because: + * 1. GCC has a bug that does not optimize C flexible array members + * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452) + * 2. Zero length arrays are not supported by all compilers + */ + char ary[1]; + } embed; } as; }; @@ -390,26 +361,12 @@ RBIMPL_ATTR_ARTIFICIAL() * * @param[in] str String in question. * @return Its length, in bytes. - * @pre `str` must be an instance of ::RString, and must has its - * ::RSTRING_NOEMBED flag off. - * - * @internal - * - * This was a macro before. It was inevitable to be public, since macros are - * global constructs. But should it be forever? Now that it is a function, - * @shyouhei thinks it could just be eliminated, hidden into implementation - * details. + * @pre `str` must be an instance of ::RString. */ static inline long -RSTRING_EMBED_LEN(VALUE str) +RSTRING_LEN(VALUE str) { - RBIMPL_ASSERT_TYPE(str, RUBY_T_STRING); - RBIMPL_ASSERT_OR_ASSUME(! RB_FL_ANY_RAW(str, RSTRING_NOEMBED)); - - VALUE f = RBASIC(str)->flags; - f &= RSTRING_EMBED_LEN_MASK; - f >>= RSTRING_EMBED_LEN_SHIFT; - return RBIMPL_CAST((long)f); + return RSTRING(str)->len; } RBIMPL_WARNING_PUSH() @@ -439,29 +396,14 @@ rbimpl_rstring_getmem(VALUE str) else { /* Expecting compilers to optimize this on-stack struct away. */ struct RString retval; - retval.as.heap.len = RSTRING_EMBED_LEN(str); - retval.as.heap.ptr = RSTRING(str)->as.ary; + retval.len = RSTRING_LEN(str); + retval.as.heap.ptr = RSTRING(str)->as.embed.ary; return retval; } } RBIMPL_WARNING_POP() -RBIMPL_ATTR_PURE_UNLESS_DEBUG() -RBIMPL_ATTR_ARTIFICIAL() -/** - * Queries the length of the string. - * - * @param[in] str String in question. - * @return Its length, in bytes. - * @pre `str` must be an instance of ::RString. - */ -static inline long -RSTRING_LEN(VALUE str) -{ - return rbimpl_rstring_getmem(str).as.heap.len; -} - RBIMPL_ATTR_ARTIFICIAL() /** * Queries the contents pointer of the string. @@ -475,13 +417,9 @@ RSTRING_PTR(VALUE str) { char *ptr = rbimpl_rstring_getmem(str).as.heap.ptr; - if (RB_UNLIKELY(! ptr)) { + if (RUBY_DEBUG && RB_UNLIKELY(! ptr)) { /* :BEWARE: @shyouhei thinks that currently, there are rooms for this - * function to return NULL. In the 20th century that was a pointless - * concern. However struct RString can hold fake strings nowadays. It - * seems no check against NULL are exercised around handling of them - * (one of such usages is located in marshal.c, which scares - * @shyouhei). Better check here for maximum safety. + * function to return NULL. Better check here for maximum safety. * * Also, this is not rb_warn() because RSTRING_PTR() can be called * during GC (see what obj_info() does). rb_warn() needs to allocate @@ -505,12 +443,12 @@ RSTRING_END(VALUE str) { struct RString buf = rbimpl_rstring_getmem(str); - if (RB_UNLIKELY(! buf.as.heap.ptr)) { + if (RUBY_DEBUG && RB_UNLIKELY(! buf.as.heap.ptr)) { /* Ditto. */ rb_debug_rstring_null_ptr("RSTRING_END"); } - return &buf.as.heap.ptr[buf.as.heap.len]; + return &buf.as.heap.ptr[buf.len]; } RBIMPL_ATTR_ARTIFICIAL() @@ -544,7 +482,7 @@ RSTRING_LENINT(VALUE str) __extension__ ({ \ struct RString rbimpl_str = rbimpl_rstring_getmem(str); \ (ptrvar) = rbimpl_str.as.heap.ptr; \ - (lenvar) = rbimpl_str.as.heap.len; \ + (lenvar) = rbimpl_str.len; \ }) #else # define RSTRING_GETMEM(str, ptrvar, lenvar) \ diff --git a/include/ruby/internal/core/rtypeddata.h b/include/ruby/internal/core/rtypeddata.h index bbf208867d..6c19576c20 100644 --- a/include/ruby/internal/core/rtypeddata.h +++ b/include/ruby/internal/core/rtypeddata.h @@ -114,6 +114,8 @@ #define RUBY_TYPED_PROMOTED1 RUBY_TYPED_PROMOTED1 /** @endcond */ +#define TYPED_DATA_EMBEDDED 2 + /** * @private * @@ -137,6 +139,8 @@ rbimpl_typeddata_flags { */ RUBY_TYPED_FREE_IMMEDIATELY = 1, + RUBY_TYPED_EMBEDDABLE = 2, + /** * This flag has something to do with Ractor. Multiple Ractors run without * protecting each other. Sharing an object among Ractors is basically @@ -173,10 +177,16 @@ rbimpl_typeddata_flags { RUBY_TYPED_WB_PROTECTED = RUBY_FL_WB_PROTECTED, /* THIS FLAG DEPENDS ON Ruby version */ /** - * This flag is mysterious. It seems nobody is currently using it. The - * intention of this flag is also unclear. We need further investigations. + * This flag no longer in use */ - RUBY_TYPED_PROMOTED1 = RUBY_FL_PROMOTED1 /* THIS FLAG DEPENDS ON Ruby version */ + RUBY_TYPED_UNUSED = RUBY_FL_UNUSED6, + + /** + * This flag determines whether marking and compaction should be carried out + * using the dmark/dcompact callback functions or whether we should mark + * declaratively using a list of references defined inside the data struct we're wrapping + */ + RUBY_TYPED_DECL_MARKING = RUBY_FL_USER2 }; /** @@ -347,16 +357,14 @@ struct RTypedData { * data. This roughly resembles a Ruby level class (apart from method * definition etc.) */ - const rb_data_type_t *type; + const rb_data_type_t *const type; /** * This has to be always 1. * * @internal - * - * Why, then, this is not a const ::VALUE? */ - VALUE typed_flag; + const VALUE typed_flag; /** Pointer to the actual C level struct that you want to wrap. */ void *data; @@ -456,7 +464,7 @@ RBIMPL_SYMBOL_EXPORT_END() */ #define TypedData_Make_Struct0(result, klass, type, size, data_type, sval) \ VALUE result = rb_data_typed_object_zalloc(klass, size, data_type); \ - (sval) = RBIMPL_CAST((type *)RTYPEDDATA_DATA(result)); \ + (sval) = (type *)RTYPEDDATA_GET_DATA(result); \ RBIMPL_CAST(/*suppress unused variable warnings*/(void)(sval)) /** @@ -507,6 +515,36 @@ RBIMPL_SYMBOL_EXPORT_END() #define TypedData_Get_Struct(obj,type,data_type,sval) \ ((sval) = RBIMPL_CAST((type *)rb_check_typeddata((obj), (data_type)))) +static inline bool +RTYPEDDATA_EMBEDDED_P(VALUE obj) +{ +#if RUBY_DEBUG + if (RB_UNLIKELY(!RB_TYPE_P(obj, RUBY_T_DATA))) { + Check_Type(obj, RUBY_T_DATA); + RBIMPL_UNREACHABLE_RETURN(false); + } +#endif + + return RTYPEDDATA(obj)->typed_flag & TYPED_DATA_EMBEDDED; +} + +static inline void * +RTYPEDDATA_GET_DATA(VALUE obj) +{ +#if RUBY_DEBUG + if (RB_UNLIKELY(!RB_TYPE_P(obj, RUBY_T_DATA))) { + Check_Type(obj, RUBY_T_DATA); + RBIMPL_UNREACHABLE_RETURN(false); + } +#endif + + /* We reuse the data pointer in embedded TypedData. We can't use offsetof + * since RTypedData a non-POD type in C++. */ + const size_t embedded_typed_data_size = sizeof(struct RTypedData) - sizeof(void *); + + return RTYPEDDATA_EMBEDDED_P(obj) ? (char *)obj + embedded_typed_data_size : RTYPEDDATA(obj)->data; +} + RBIMPL_ATTR_PURE() RBIMPL_ATTR_ARTIFICIAL() /** @@ -523,7 +561,8 @@ RBIMPL_ATTR_ARTIFICIAL() static inline bool rbimpl_rtypeddata_p(VALUE obj) { - return RTYPEDDATA(obj)->typed_flag == 1; + VALUE typed_flag = RTYPEDDATA(obj)->typed_flag; + return typed_flag != 0 && typed_flag <= 3; } RBIMPL_ATTR_PURE_UNLESS_DEBUG() diff --git a/include/ruby/internal/dllexport.h b/include/ruby/internal/dllexport.h index 08a262209d..71026e7100 100644 --- a/include/ruby/internal/dllexport.h +++ b/include/ruby/internal/dllexport.h @@ -37,9 +37,7 @@ * ``` */ #undef RUBY_EXTERN -#if defined(MJIT_HEADER) && defined(_WIN32) -# define RUBY_EXTERN extern __declspec(dllimport) -#elif defined(RUBY_EXPORT) +#if defined(RUBY_EXPORT) # define RUBY_EXTERN extern #elif defined(_WIN32) # define RUBY_EXTERN extern __declspec(dllimport) @@ -59,36 +57,6 @@ # define RUBY_FUNC_EXPORTED /* void */ #endif -/** - * @cond INTERNAL_MACRO - * - * These MJIT related macros are placed here because translate_mjit_header can - * need them. Extension libraries should not touch. - */ - -/* These macros are used for functions which are exported only for MJIT - and NOT ensured to be exported in future versions. */ - -#if ! defined(MJIT_HEADER) -# define MJIT_FUNC_EXPORTED RUBY_FUNC_EXPORTED -#elif ! RBIMPL_COMPILER_IS(MSVC) -# define MJIT_FUNC_EXPORTED RUBY_FUNC_EXPORTED -#else -# define MJIT_FUNC_EXPORTED static -#endif - -#define MJIT_SYMBOL_EXPORT_BEGIN RUBY_SYMBOL_EXPORT_BEGIN -#define MJIT_SYMBOL_EXPORT_END RUBY_SYMBOL_EXPORT_END - -/* On mswin, MJIT header transformation can't be used since cl.exe can't output - preprocessed output preserving macros. So this `MJIT_STATIC` is needed - to force non-static function to static on MJIT header to avoid symbol conflict. */ -#ifdef MJIT_HEADER -# define MJIT_STATIC static -#else -# define MJIT_STATIC -#endif - /** @endcond */ /** Shortcut macro equivalent to `RUBY_SYMBOL_EXPORT_BEGIN extern "C" {`. diff --git a/include/ruby/internal/encoding/coderange.h b/include/ruby/internal/encoding/coderange.h new file mode 100644 index 0000000000..7a81208c9e --- /dev/null +++ b/include/ruby/internal/encoding/coderange.h @@ -0,0 +1,202 @@ +#ifndef RUBY_INTERNAL_ENCODING_CODERANGE_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_CODERANGE_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines for code ranges. + */ + +#include "ruby/internal/attr/const.h" +#include "ruby/internal/attr/pure.h" +#include "ruby/internal/dllexport.h" +#include "ruby/internal/fl_type.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** What rb_enc_str_coderange() returns. */ +enum ruby_coderange_type { + + /** The object's coderange is unclear yet. */ + RUBY_ENC_CODERANGE_UNKNOWN = 0, + + /** The object holds 0 to 127 inclusive and nothing else. */ + RUBY_ENC_CODERANGE_7BIT = ((int)RUBY_FL_USER8), + + /** The object's encoding and contents are consistent each other */ + RUBY_ENC_CODERANGE_VALID = ((int)RUBY_FL_USER9), + + /** The object holds invalid/malformed/broken character(s). */ + RUBY_ENC_CODERANGE_BROKEN = ((int)(RUBY_FL_USER8|RUBY_FL_USER9)), + + /** Where the coderange resides. */ + RUBY_ENC_CODERANGE_MASK = (RUBY_ENC_CODERANGE_7BIT| + RUBY_ENC_CODERANGE_VALID| + RUBY_ENC_CODERANGE_BROKEN) +}; + +RBIMPL_ATTR_CONST() +/** + * @private + * + * This is an implementation detail of #RB_ENC_CODERANGE_CLEAN_P. People don't + * use it directly. + * + * @param[in] cr An enum ::ruby_coderange_type. + * @retval 1 It is. + * @retval 0 It isn't. + */ +static inline int +rb_enc_coderange_clean_p(int cr) +{ + return (cr ^ (cr >> 1)) & RUBY_ENC_CODERANGE_7BIT; +} + +RBIMPL_ATTR_CONST() +/** + * Queries if a code range is "clean". "Clean" in this context means it is + * known and valid. + * + * @param[in] cr An enum ::ruby_coderange_type. + * @retval 1 It is. + * @retval 0 It isn't. + */ +static inline bool +RB_ENC_CODERANGE_CLEAN_P(enum ruby_coderange_type cr) +{ + return rb_enc_coderange_clean_p(cr); +} + +RBIMPL_ATTR_PURE_UNLESS_DEBUG() +/** + * Queries the (inline) code range of the passed object. The object must be + * capable of having inline encoding. Using this macro needs deep + * understanding of bit level object binary layout. + * + * @param[in] obj Target object. + * @return An enum ::ruby_coderange_type. + */ +static inline enum ruby_coderange_type +RB_ENC_CODERANGE(VALUE obj) +{ + VALUE ret = RB_FL_TEST_RAW(obj, RUBY_ENC_CODERANGE_MASK); + + return RBIMPL_CAST((enum ruby_coderange_type)ret); +} + +RBIMPL_ATTR_PURE_UNLESS_DEBUG() +/** + * Queries the (inline) code range of the passed object is + * ::RUBY_ENC_CODERANGE_7BIT. The object must be capable of having inline + * encoding. Using this macro needs deep understanding of bit level object + * binary layout. + * + * @param[in] obj Target object. + * @retval 1 It is ascii only. + * @retval 0 Otherwise (including cases when the range is not known). + */ +static inline bool +RB_ENC_CODERANGE_ASCIIONLY(VALUE obj) +{ + return RB_ENC_CODERANGE(obj) == RUBY_ENC_CODERANGE_7BIT; +} + +/** + * Destructively modifies the passed object so that its (inline) code range is + * the passed one. The object must be capable of having inline encoding. + * Using this macro needs deep understanding of bit level object binary layout. + * + * @param[out] obj Target object. + * @param[out] cr An enum ::ruby_coderange_type. + * @post `obj`'s code range is `cr`. + */ +static inline void +RB_ENC_CODERANGE_SET(VALUE obj, enum ruby_coderange_type cr) +{ + RB_FL_UNSET_RAW(obj, RUBY_ENC_CODERANGE_MASK); + RB_FL_SET_RAW(obj, cr); +} + +/** + * Destructively clears the passed object's (inline) code range. The object + * must be capable of having inline encoding. Using this macro needs deep + * understanding of bit level object binary layout. + * + * @param[out] obj Target object. + * @post `obj`'s code range is ::RUBY_ENC_CODERANGE_UNKNOWN. + */ +static inline void +RB_ENC_CODERANGE_CLEAR(VALUE obj) +{ + RB_FL_UNSET_RAW(obj, RUBY_ENC_CODERANGE_MASK); +} + +RBIMPL_ATTR_CONST() +/* assumed ASCII compatibility */ +/** + * "Mix" two code ranges into one. This is handy for instance when you + * concatenate two strings into one. Consider one of then is valid but the + * other isn't. The result must be invalid. This macro computes that kind of + * mixture. + * + * @param[in] a An enum ::ruby_coderange_type. + * @param[in] b Another enum ::ruby_coderange_type. + * @return The `a` "and" `b`. + */ +static inline enum ruby_coderange_type +RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b) +{ + if (a == RUBY_ENC_CODERANGE_7BIT) { + return b; + } + else if (a != RUBY_ENC_CODERANGE_VALID) { + return RUBY_ENC_CODERANGE_UNKNOWN; + } + else if (b == RUBY_ENC_CODERANGE_7BIT) { + return RUBY_ENC_CODERANGE_VALID; + } + else { + return b; + } +} + +#define ENC_CODERANGE_MASK RUBY_ENC_CODERANGE_MASK /**< @old{RUBY_ENC_CODERANGE_MASK} */ +#define ENC_CODERANGE_UNKNOWN RUBY_ENC_CODERANGE_UNKNOWN /**< @old{RUBY_ENC_CODERANGE_UNKNOWN} */ +#define ENC_CODERANGE_7BIT RUBY_ENC_CODERANGE_7BIT /**< @old{RUBY_ENC_CODERANGE_7BIT} */ +#define ENC_CODERANGE_VALID RUBY_ENC_CODERANGE_VALID /**< @old{RUBY_ENC_CODERANGE_VALID} */ +#define ENC_CODERANGE_BROKEN RUBY_ENC_CODERANGE_BROKEN /**< @old{RUBY_ENC_CODERANGE_BROKEN} */ +#define ENC_CODERANGE_CLEAN_P(cr) RB_ENC_CODERANGE_CLEAN_P(cr) /**< @old{RB_ENC_CODERANGE_CLEAN_P} */ +#define ENC_CODERANGE(obj) RB_ENC_CODERANGE(obj) /**< @old{RB_ENC_CODERANGE} */ +#define ENC_CODERANGE_ASCIIONLY(obj) RB_ENC_CODERANGE_ASCIIONLY(obj) /**< @old{RB_ENC_CODERANGE_ASCIIONLY} */ +#define ENC_CODERANGE_SET(obj,cr) RB_ENC_CODERANGE_SET(obj,cr) /**< @old{RB_ENC_CODERANGE_SET} */ +#define ENC_CODERANGE_CLEAR(obj) RB_ENC_CODERANGE_CLEAR(obj) /**< @old{RB_ENC_CODERANGE_CLEAR} */ +#define ENC_CODERANGE_AND(a, b) RB_ENC_CODERANGE_AND(a, b) /**< @old{RB_ENC_CODERANGE_AND} */ +#define ENCODING_CODERANGE_SET(obj, encindex, cr) RB_ENCODING_CODERANGE_SET(obj, encindex, cr) /**< @old{RB_ENCODING_CODERANGE_SET} */ + +/** @cond INTERNAL_MACRO */ +#define RB_ENC_CODERANGE RB_ENC_CODERANGE +#define RB_ENC_CODERANGE_AND RB_ENC_CODERANGE_AND +#define RB_ENC_CODERANGE_ASCIIONLY RB_ENC_CODERANGE_ASCIIONLY +#define RB_ENC_CODERANGE_CLEAN_P RB_ENC_CODERANGE_CLEAN_P +#define RB_ENC_CODERANGE_CLEAR RB_ENC_CODERANGE_CLEAR +#define RB_ENC_CODERANGE_SET RB_ENC_CODERANGE_SET +/** @endcond */ + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_CODERANGE_H */ diff --git a/include/ruby/internal/encoding/ctype.h b/include/ruby/internal/encoding/ctype.h new file mode 100644 index 0000000000..05c314aeb3 --- /dev/null +++ b/include/ruby/internal/encoding/ctype.h @@ -0,0 +1,258 @@ +#ifndef RUBY_INTERNAL_ENCODING_CTYPE_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_CTYPE_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines to query chacater types. + */ + +#include "ruby/onigmo.h" +#include "ruby/internal/attr/const.h" +#include "ruby/internal/dllexport.h" +#include "ruby/internal/encoding/encoding.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * Queries if the passed pointer points to a newline character. What is a + * newline and what is not depends on the passed encoding. + * + * @param[in] p Pointer to a possibly-middle of a character. + * @param[in] end End of the string. + * @param[in] enc Encoding. + * @retval false It isn't. + * @retval true It is. + */ +static inline bool +rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc) +{ + OnigUChar *up = RBIMPL_CAST((OnigUChar *)p); + OnigUChar *ue = RBIMPL_CAST((OnigUChar *)e); + + return ONIGENC_IS_MBC_NEWLINE(enc, up, ue); +} + +/** + * Queries if the passed code point is of passed character type in the passed + * encoding. The "character type" here is a set of macros defined in onigmo.h, + * like `ONIGENC_CTYPE_PUNCT`. + * + * @param[in] c An `OnigCodePoint` value. + * @param[in] t An `OnigCtype` value. + * @param[in] enc A `rb_encoding*` value. + * @retval true `c` is of `t` in `enc`. + * @retval false Otherwise. + */ +static inline bool +rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_CTYPE(enc, c, t); +} + +/** + * Identical to rb_isascii(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval false `c` is out of range of ASCII character set in `enc`. + * @retval true Otherwise. + * + * @internal + * + * `enc` is ignored. This is at least an intentional implementation detail + * (not a bug). But there could be rooms for future extensions. + */ +static inline bool +rb_enc_isascii(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_ASCII(c); +} + +/** + * Identical to rb_isalpha(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "ALPHA". + * @retval false Otherwise. + */ +static inline bool +rb_enc_isalpha(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_ALPHA(enc, c); +} + +/** + * Identical to rb_islower(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "LOWER". + * @retval false Otherwise. + */ +static inline bool +rb_enc_islower(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_LOWER(enc, c); +} + +/** + * Identical to rb_isupper(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "UPPER". + * @retval false Otherwise. + */ +static inline bool +rb_enc_isupper(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_UPPER(enc, c); +} + +/** + * Identical to rb_iscntrl(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "CNTRL". + * @retval false Otherwise. + */ +static inline bool +rb_enc_iscntrl(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_CNTRL(enc, c); +} + +/** + * Identical to rb_ispunct(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "PUNCT". + * @retval false Otherwise. + */ +static inline bool +rb_enc_ispunct(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_PUNCT(enc, c); +} + +/** + * Identical to rb_isalnum(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "ANUM". + * @retval false Otherwise. + */ +static inline bool +rb_enc_isalnum(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_ALNUM(enc, c); +} + +/** + * Identical to rb_isprint(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "PRINT". + * @retval false Otherwise. + */ +static inline bool +rb_enc_isprint(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_PRINT(enc, c); +} + +/** + * Identical to rb_isspace(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "PRINT". + * @retval false Otherwise. + */ +static inline bool +rb_enc_isspace(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_SPACE(enc, c); +} + +/** + * Identical to rb_isdigit(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "DIGIT". + * @retval false Otherwise. + */ +static inline bool +rb_enc_isdigit(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_DIGIT(enc, c); +} + +RBIMPL_ATTR_CONST() +/** + * Identical to rb_toupper(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @return `c`'s (Ruby's definition of) upper case counterpart. + * + * @internal + * + * As `RBIMPL_ATTR_CONST` implies this function ignores `enc`. + */ +int rb_enc_toupper(int c, rb_encoding *enc); + +RBIMPL_ATTR_CONST() +/** + * Identical to rb_tolower(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @return `c`'s (Ruby's definition of) lower case counterpart. + * + * @internal + * + * As `RBIMPL_ATTR_CONST` implies this function ignores `enc`. + */ +int rb_enc_tolower(int c, rb_encoding *enc); + +RBIMPL_SYMBOL_EXPORT_END() + +/** @cond INTERNAL_MACRO */ +#define rb_enc_is_newline rb_enc_is_newline +#define rb_enc_isalnum rb_enc_isalnum +#define rb_enc_isalpha rb_enc_isalpha +#define rb_enc_isascii rb_enc_isascii +#define rb_enc_isctype rb_enc_isctype +#define rb_enc_isdigit rb_enc_isdigit +#define rb_enc_islower rb_enc_islower +#define rb_enc_isprint rb_enc_isprint +#define rb_enc_iscntrl rb_enc_iscntrl +#define rb_enc_ispunct rb_enc_ispunct +#define rb_enc_isspace rb_enc_isspace +#define rb_enc_isupper rb_enc_isupper +/** @endcond */ + +#endif /* RUBY_INTERNAL_ENCODING_CTYPE_H */ diff --git a/include/ruby/internal/encoding/encoding.h b/include/ruby/internal/encoding/encoding.h new file mode 100644 index 0000000000..a680651a81 --- /dev/null +++ b/include/ruby/internal/encoding/encoding.h @@ -0,0 +1,1044 @@ +#ifndef RUBY_INTERNAL_ENCODING_ENCODING_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_ENCODING_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Defines ::rb_encoding + */ + +#include "ruby/oniguruma.h" +#include "ruby/internal/attr/const.h" +#include "ruby/internal/attr/deprecated.h" +#include "ruby/internal/attr/noalias.h" +#include "ruby/internal/attr/pure.h" +#include "ruby/internal/attr/returns_nonnull.h" +#include "ruby/internal/dllexport.h" +#include "ruby/internal/encoding/coderange.h" +#include "ruby/internal/value.h" +#include "ruby/internal/core/rbasic.h" +#include "ruby/internal/fl_type.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * `Encoding` class. + * + * @ingroup object + */ +RUBY_EXTERN VALUE rb_cEncoding; + +/** + * @private + * + * Bit constants used when embedding encodings into ::RBasic::flags. Extension + * libraries must not bother such things. + */ +enum ruby_encoding_consts { + + /** Max possible number of embeddable encodings. */ + RUBY_ENCODING_INLINE_MAX = 127, + + /** Where inline encodings reside. */ + RUBY_ENCODING_SHIFT = (RUBY_FL_USHIFT+10), + + /** Bits we use to store inline encodings. */ + RUBY_ENCODING_MASK = (RUBY_ENCODING_INLINE_MAX<<RUBY_ENCODING_SHIFT + /* RUBY_FL_USER10..RUBY_FL_USER16 */), + + /** Max possible length of an encoding name. */ + RUBY_ENCODING_MAXNAMELEN = 42 +}; + +#define ENCODING_INLINE_MAX RUBY_ENCODING_INLINE_MAX /**< @old{RUBY_ENCODING_INLINE_MAX} */ +#define ENCODING_SHIFT RUBY_ENCODING_SHIFT /**< @old{RUBY_ENCODING_SHIFT} */ +#define ENCODING_MASK RUBY_ENCODING_MASK /**< @old{RUBY_ENCODING_MASK} */ + +/** + * Destructively assigns the passed encoding to the passed object. The object + * must be capable of having inline encoding. Using this macro needs deep + * understanding of bit level object binary layout. + * + * @param[out] obj Target object to modify. + * @param[in] ecindex Encoding in encindex format. + * @post `obj`'s encoding is `encindex`. + */ +static inline void +RB_ENCODING_SET_INLINED(VALUE obj, int encindex) +{ + VALUE f = /* upcast */ encindex; + + f <<= RUBY_ENCODING_SHIFT; + RB_FL_UNSET_RAW(obj, RUBY_ENCODING_MASK); + RB_FL_SET_RAW(obj, f); +} + +/** + * Queries the encoding of the passed object. The encoding must be smaller + * than ::RUBY_ENCODING_INLINE_MAX, which means you have some assumption on the + * return value. This means the API is for internal use only. + * + * @param[in] obj Target object. + * @return `obj`'s encoding index. + */ +static inline int +RB_ENCODING_GET_INLINED(VALUE obj) +{ + VALUE ret = RB_FL_TEST_RAW(obj, RUBY_ENCODING_MASK) >> RUBY_ENCODING_SHIFT; + + return RBIMPL_CAST((int)ret); +} + +#define ENCODING_SET_INLINED(obj,i) RB_ENCODING_SET_INLINED(obj,i) /**< @old{RB_ENCODING_SET_INLINED} */ +#define ENCODING_SET(obj,i) RB_ENCODING_SET(obj,i) /**< @old{RB_ENCODING_SET} */ +#define ENCODING_GET_INLINED(obj) RB_ENCODING_GET_INLINED(obj) /**< @old{RB_ENCODING_GET_INLINED} */ +#define ENCODING_GET(obj) RB_ENCODING_GET(obj) /**< @old{RB_ENCODING_GET} */ +#define ENCODING_IS_ASCII8BIT(obj) RB_ENCODING_IS_ASCII8BIT(obj) /**< @old{RB_ENCODING_IS_ASCII8BIT} */ +#define ENCODING_MAXNAMELEN RUBY_ENCODING_MAXNAMELEN /**< @old{RUBY_ENCODING_MAXNAMELEN} */ + +/** + * The type of encoding. Our design here is we take Oniguruma/Onigmo's + * multilingualisation schema as our base data structure. + */ +typedef const OnigEncodingType rb_encoding; + +RBIMPL_ATTR_NOALIAS() +/** + * Converts a character option to its encoding. It only supports a very + * limited set of Japanese encodings due to its Japanese origin. Ruby still + * has this in-core for backwards compatibility. But new codes must not bother + * such concept like one-character encoding option. Consider deprecated in + * practice. + * + * @param[in] c One of `['n', 'e', 's', 'u', 'i', 'x', 'm']`. + * @param[out] option Return buffer. + * @param[out] kcode Return buffer. + * @retval 1 `c` understood properly. + * @retval 0 `c` is not understood. + * @post `option` is a ::OnigOptionType. + * @post `kcode` is an enum `ruby_preserved_encindex`. + * + * @internal + * + * `kcode` is opaque because `ruby_preserved_encindex` is not visible from + * extension libraries. But who cares? + */ +int rb_char_to_option_kcode(int c, int *option, int *kcode); + +/** + * Creates a new "dummy" encoding. Roughly speaking, an encoding is dummy when + * it is stateful. Notable example of dummy encoding are those defined in + * ISO/IEC 2022 + * + * @param[in] name Name of the creating encoding. + * @exception rb_eArgError Duplicated or malformed `name`. + * @return New dummy encoding's index. + * @post Encoding named `name` is created, whose index is the return + * value. + */ +int rb_define_dummy_encoding(const char *name); + +RBIMPL_ATTR_PURE() +/** + * Queries if the passed encoding is dummy. + * + * @param[in] enc Encoding in question. + * @retval 1 It is. + * @retval 0 It isn't. + */ +int rb_enc_dummy_p(rb_encoding *enc); + +RBIMPL_ATTR_PURE() +/** + * Queries the index of the encoding. An encoding's index is a Ruby-local + * concept. It is a (sequential) number assigned to each encoding. + * + * @param[in] enc Encoding in question. + * @return Its index. + * @note You can pass null pointers to this function. It is equivalent + * to rb_usascii_encindex() then. + */ +int rb_enc_to_index(rb_encoding *enc); + +/** + * Queries the index of the encoding of the passed object, if any. + * + * @param[in] obj Object in question. + * @retval -1 `obj` is incapable of having an encoding. + * @retval otherwise `obj`'s encoding's index. + */ +int rb_enc_get_index(VALUE obj); + +/** + * @alias{rb_enc_get_index} + * + * @internal + * + * Implementation wise this is not a verbatim alias of rb_enc_get_index(). But + * the API is consistent. Don't bother. + */ +static inline int +RB_ENCODING_GET(VALUE obj) +{ + int encindex = RB_ENCODING_GET_INLINED(obj); + + if (encindex == RUBY_ENCODING_INLINE_MAX) { + return rb_enc_get_index(obj); + } + else { + return encindex; + } +} + +/** + * Destructively assigns an encoding (via its index) to an object. + * + * @param[out] obj Object in question. + * @param[in] encindex An encoding index. + * @exception rb_eFrozenError `obj` is frozen. + * @exception rb_eArgError `obj` is incapable of having an encoding. + * @exception rb_eEncodingError `encindex` is out of bounds. + * @exception rb_eLoadError Failed to load the encoding. + */ +void rb_enc_set_index(VALUE obj, int encindex); + +/** @alias{rb_enc_set_index} */ +static inline void +RB_ENCODING_SET(VALUE obj, int encindex) +{ + rb_enc_set_index(obj, encindex); +} + +/** + * This is #RB_ENCODING_SET + RB_ENC_CODERANGE_SET combo. The object must be + * capable of having inline encoding. Using this macro needs deep + * understanding of bit level object binary layout. + * + * @param[out] obj Target object. + * @param[in] encindex Encoding in encindex format. + * @param[in] cr An enum ::ruby_coderange_type. + * @post `obj`'s encoding is `encindex`. + * @post `obj`'s code range is `cr`. + */ +static inline void +RB_ENCODING_CODERANGE_SET(VALUE obj, int encindex, enum ruby_coderange_type cr) +{ + RB_ENCODING_SET(obj, encindex); + RB_ENC_CODERANGE_SET(obj, cr); +} + +RBIMPL_ATTR_PURE() +/** + * Queries if the passed object can have its encoding. + * + * @param[in] obj Object in question. + * @retval 1 It can. + * @retval 0 It cannot. + */ +int rb_enc_capable(VALUE obj); + +/** + * Queries the index of the encoding. + * + * @param[in] name Name of the encoding to find. + * @exception rb_eArgError No such encoding named `name`. + * @retval -1 `name` exists, but unable to load. + * @retval otherwise Index of encoding named `name`. + */ +int rb_enc_find_index(const char *name); + +/** + * Registers an "alias" name. In the wild, an encoding can be called using + * multiple names. For instance an encoding known as `"CP932"` is also called + * `"SJIS"` on occasions. This API registers such relationships. + * + * @param[in] alias New name. + * @param[in] orig Old name. + * @exception rb_eArgError `alias` is duplicated or malformed. + * @retval -1 Failed to load `orig`. + * @retval otherwise The index of `orig` and `alias`. + * @post `alias` is a synonym of `orig`. They refer to the identical + * encoding. + */ +int rb_enc_alias(const char *alias, const char *orig); + +/** + * Obtains a encoding index from a wider range of objects (than + * rb_enc_find_index()). + * + * @param[in] obj An ::rb_cEncoding, or its name in ::rb_cString. + * @retval -1 `obj` is unexpected type/contents. + * @retval otherwise Index corresponding to `obj`. + */ +int rb_to_encoding_index(VALUE obj); + +/** + * Identical to rb_find_encoding(), except it raises an exception instead of + * returning NULL. + * + * @param[in] obj An ::rb_cEncoding, or its name in ::rb_cString. + * @exception rb_eTypeError `obj` is neither ::rb_cEncoding nor ::rb_cString. + * @exception rb_eArgError `obj` is an unknown encoding name. + * @return Encoding of `obj`. + */ +rb_encoding *rb_to_encoding(VALUE obj); + +/** + * Identical to rb_to_encoding_index(), except the return type. + * + * @param[in] obj An ::rb_cEncoding, or its name in ::rb_cString. + * @exception rb_eTypeError `obj` is neither ::rb_cEncoding nor ::rb_cString. + * @retval NULL No such encoding. + * @return otherwise Encoding of `obj`. + */ +rb_encoding *rb_find_encoding(VALUE obj); + +/** + * Identical to rb_enc_get_index(), except the return type. + * + * @param[in] obj Object in question. + * @retval NULL Obj is incapable of having an encoding. + * @retval otherwise `obj`'s encoding. + */ +rb_encoding *rb_enc_get(VALUE obj); + +/** + * Look for the "common" encoding between the two. One character can or cannot + * be expressed depending on an encoding. This function finds the super-set of + * encodings that satisfy contents of both arguments. If that is impossible + * returns NULL. + * + * @param[in] str1 An object. + * @param[in] str2 Another object. + * @retval NULL No encoding can satisfy both at once. + * @retval otherwise Common encoding between the two. + * @note Arguments can be non-string, e.g. Regexp. + */ +rb_encoding *rb_enc_compatible(VALUE str1, VALUE str2); + +/** + * Identical to rb_enc_compatible(), except it raises an exception instead of + * returning NULL. + * + * @param[in] str1 An object. + * @param[in] str2 Another object. + * @exception rb_eEncCompatError No encoding can satisfy both. + * @return Common encoding between the two. + * @note Arguments can be non-string, e.g. Regexp. + */ +rb_encoding *rb_enc_check(VALUE str1,VALUE str2); + +/** + * Identical to rb_enc_set_index(), except it additionally does contents fix-up + * depending on the passed object. It for instance changes the byte length of + * terminating `U+0000` according to the passed encoding. + * + * @param[out] obj Object in question. + * @param[in] encindex An encoding index. + * @exception rb_eFrozenError `obj` is frozen. + * @exception rb_eArgError `obj` is incapable of having an encoding. + * @exception rb_eEncodingError `encindex` is out of bounds. + * @exception rb_eLoadError Failed to load the encoding. + * @return The passed `obj`. + * @post `obj`'s contents might be fixed according to `encindex`. + */ +VALUE rb_enc_associate_index(VALUE obj, int encindex); + +/** + * Identical to rb_enc_associate_index(), except it takes an encoding itself + * instead of its index. + * + * @param[out] obj Object in question. + * @param[in] enc An encoding. + * @exception rb_eFrozenError `obj` is frozen. + * @exception rb_eArgError `obj` is incapable of having an encoding. + * @return The passed `obj`. + * @post `obj`'s contents might be fixed according to `enc`. + */ +VALUE rb_enc_associate(VALUE obj, rb_encoding *enc); + +/** + * Destructively copies the encoding of the latter object to that of former + * one. It can also be seen as a routine identical to + * rb_enc_associate_index(), except it takes an object's encoding instead of an + * encoding's index. + * + * @param[out] dst Object to modify. + * @param[in] src Object to reference. + * @exception rb_eFrozenError `dst` is frozen. + * @exception rb_eArgError `dst` is incapable of having an encoding. + * @exception rb_eEncodingError `src` is incapable of having an encoding. + * @post `dst`'s encoding is that of `src`'s. + */ +void rb_enc_copy(VALUE dst, VALUE src); + + +/** + * Identical to rb_find_encoding(), except it takes an encoding index instead + * of a Ruby object. + * + * @param[in] idx An encoding index. + * @retval NULL No such encoding. + * @retval otherwise An encoding whose index is `idx`. + */ +rb_encoding *rb_enc_from_index(int idx); + +/** + * Identical to rb_find_encoding(), except it takes a C's string instead of + * Ruby's. + * + * @param[in] name Name of the encoding to query. + * @retval NULL No such encoding. + * @retval otherwise An encoding whose index is `idx`. + */ +rb_encoding *rb_enc_find(const char *name); + +/** + * Queries the (canonical) name of the passed encoding. + * + * @param[in] enc An encoding. + * @return Its name. + */ +static inline const char * +rb_enc_name(rb_encoding *enc) +{ + return enc->name; +} + +/** + * Queries the minimum number of bytes that the passed encoding needs to + * represent a character. For ASCII and compatible encodings this is typically + * 1. There are however encodings whose minimum is not 1; they are + * historically called wide characters. + * + * @param[in] enc An encoding. + * @return Its least possible number of bytes except 0. + */ +static inline int +rb_enc_mbminlen(rb_encoding *enc) +{ + return enc->min_enc_len; +} + +/** + * Queries the maximum number of bytes that the passed encoding needs to + * represent a character. Fixed-width encodings have the same value for this + * one and #rb_enc_mbminlen. However there are variable-width encodings. + * UTF-8, for instance, takes from 1 up to 6 bytes. + * + * @param[in] enc An encoding. + * @return Its maximum possible number of bytes of a character. + */ +static inline int +rb_enc_mbmaxlen(rb_encoding *enc) +{ + return enc->max_enc_len; +} + +/** + * Queries the number of bytes of the character at the passed pointer. + * + * @param[in] p Pointer to a character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] enc Encoding of the string. + * @return If the character at `p` does not end until `e`, number of bytes + * between `p` and `e`. Otherwise the number of bytes that the + * character at `p` is encoded. + * + * @internal + * + * Strictly speaking there are chances when `p` points to a middle byte of a + * wide character. This function returns "the number of bytes from `p` to + * nearest of either `e` or the next character boundary", if you go strict. + */ +int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc); + +/** + * Identical to rb_enc_mbclen() unless the character at `p` overruns `e`. That + * can happen for instance when you read from a socket and its partial read + * cuts a wide character in-between. In those situations this function + * "estimates" theoretical length of the character in question. Typically it + * tends to be possible to know how many bytes a character needs before + * actually reaching its end; for instance UTF-8 encodes a character's length + * in the first byte of it. This function returns that info. + * + * @note This implies that the string is not broken. + * + * @param[in] p Pointer to the character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] enc Encoding of the string. + * @return Number of bytes of character at `p`, measured or estimated. + */ +int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc); + +/** + * Queries the number of bytes of the character at the passed pointer. This + * function returns 3 different types of information: + * + * ```CXX + * auto n = rb_enc_precise_mbclen(p, q, r); + * + * if (ONIGENC_MBCLEN_CHARFOUND_P(n)) { + * // Character found. Normal return. + * auto found_length = ONIGENC_MBCLEN_CHARFOUND_LEN(n); + * } + * else if (ONIGENC_MBCLEN_NEEDMORE_P(n)) { + * // Character overruns past `q`; needs more. + * auto requested_length = ONIGENC_MBCLEN_NEEDMORE_LEN(n); + * } + * else { + * // `p` is broken. + * assert(ONIGENC_MBCLEN_INVALID_P(n)); + * } + * ``` + * + * @param[in] p Pointer to the character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] enc Encoding of the string. + * @return Encoded read/needed number of bytes (see above). + */ +int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc); + +#define MBCLEN_CHARFOUND_P(ret) ONIGENC_MBCLEN_CHARFOUND_P(ret) /**< @old{ONIGENC_MBCLEN_CHARFOUND_P} */ +#define MBCLEN_CHARFOUND_LEN(ret) ONIGENC_MBCLEN_CHARFOUND_LEN(ret) /**< @old{ONIGENC_MBCLEN_CHARFOUND_LEN} */ +#define MBCLEN_INVALID_P(ret) ONIGENC_MBCLEN_INVALID_P(ret) /**< @old{ONIGENC_MBCLEN_INVALID_P} */ +#define MBCLEN_NEEDMORE_P(ret) ONIGENC_MBCLEN_NEEDMORE_P(ret) /**< @old{ONIGENC_MBCLEN_NEEDMORE_P} */ +#define MBCLEN_NEEDMORE_LEN(ret) ONIGENC_MBCLEN_NEEDMORE_LEN(ret) /**< @old{ONIGENC_MBCLEN_NEEDMORE_LEN} */ + +/** + * Queries the code point of character pointed by the passed pointer. If that + * code point is included in ASCII that code point is returned. Otherwise -1. + * This can be different from just looking at the first byte. For instance it + * reads 2 bytes in case of UTF-16BE. + * + * @param[in] p Pointer to the character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] len Return buffer. + * @param[in] enc Encoding of the string. + * @retval -1 The character at `p` is not i ASCII. + * @retval otherwise A code point of the character at `p`. + * @post `len` (if set) is the number of bytes of `p`. + */ +int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc); + +/** + * Queries the code point of character pointed by the passed pointer. + * Exceptions happen in case of broken input. + * + * @param[in] p Pointer to the character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] len Return buffer. + * @param[in] enc Encoding of the string. + * @exception rb_eArgError `p` is broken. + * @return Code point of the character pointed by `p`. + * @post `len` (if set) is the number of bytes of `p`. + */ +unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc); + +/** + * Queries the code point of character pointed by the passed pointer. + * Exceptions happen in case of broken input. + * + * @deprecated Use rb_enc_codepoint_len() instead. + * @param[in] p Pointer to the character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] enc Encoding of the string. + * @exception rb_eArgError `p` is broken. + * @return Code point of the character pointed by `p`. + * + * @internal + * + * @matz says in commit 91e5ba1cb865a2385d3e1cbfacd824496898e098 that the line + * below is a "prototype for obsolete function". However even today there + * still are some use cases of it throughout our repository. It seems it has + * its own niche. + */ +static inline unsigned int +rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc) +{ + return rb_enc_codepoint_len(p, e, 0, enc); + /* ^^^ + * This can be `NULL` in C, `nullptr` in C++, and `0` for both. + * We choose the most portable one here. + */ +} + + +/** + * Identical to rb_enc_codepoint(), except it assumes the passed character is + * not broken. + * + * @param[in] p Pointer to the character's first byte. + * @param[in] e End of the string that has `p`. + * @param[in] enc Encoding of the string. + * @return Code point of the character pointed by `p`. + */ +static inline OnigCodePoint +rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc) +{ + const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p); + const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e); + + return ONIGENC_MBC_TO_CODE(enc, up, ue); +} + +/** + * Queries the number of bytes requested to represent the passed code point + * using the passed encoding. + * + * @param[in] code Code point in question. + * @param[in] enc Encoding to convert the code into a byte sequence. + * @exception rb_eArgError `enc` does not glean `code`. + * @return Number of bytes requested to represent `code` using `enc`. + */ +int rb_enc_codelen(int code, rb_encoding *enc); + +/** + * Identical to rb_enc_codelen(), except it returns 0 for invalid code points. + * + * @param[in] c Code point in question. + * @param[in] enc Encoding to convert `c` into a byte sequence. + * @retval 0 `c` is invalid. + * @return otherwise Number of bytes needed for `enc` to encode `c`. + */ +static inline int +rb_enc_code_to_mbclen(int c, rb_encoding *enc) +{ + OnigCodePoint uc = RBIMPL_CAST((OnigCodePoint)c); + + return ONIGENC_CODE_TO_MBCLEN(enc, uc); +} + +/** + * Identical to rb_enc_uint_chr(), except it writes back to the passed buffer + * instead of allocating one. + * + * @param[in] c Code point. + * @param[out] buf Return buffer. + * @param[in] enc Target encoding scheme. + * @retval <= 0 `c` is invalid in `enc`. + * @return otherwise Number of bytes written to `buf`. + * @post `c` is encoded according to `enc`, then written to `buf`. + * + * @internal + * + * The second argument must be typed. But its current usages prevent us from + * being any stricter than this. :FIXME: + */ +static inline int +rb_enc_mbcput(unsigned int c, void *buf, rb_encoding *enc) +{ + OnigCodePoint uc = RBIMPL_CAST((OnigCodePoint)c); + OnigUChar *ubuf = RBIMPL_CAST((OnigUChar *)buf); + + return ONIGENC_CODE_TO_MBC(enc, uc, ubuf); +} + +/** + * Queries the previous (left) character. + * + * @param[in] s Start of the string. + * @param[in] p Pointer to a character. + * @param[in] e End of the string. + * @param[in] enc Encoding. + * @retval NULL No previous character. + * @retval otherwise Pointer to the head of the previous character. + */ +static inline char * +rb_enc_prev_char(const char *s, const char *p, const char *e, rb_encoding *enc) +{ + const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s); + const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p); + const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e); + OnigUChar *ur = onigenc_get_prev_char_head(enc, us, up, ue); + + return RBIMPL_CAST((char *)ur); +} + +/** + * Queries the left boundary of a character. This function takes a pointer + * that is not necessarily a head of a character, and searches for its head. + * + * @param[in] s Start of the string. + * @param[in] p Pointer to a possibly-middle of a character. + * @param[in] e End of the string. + * @param[in] enc Encoding. + * @return Pointer to the head of the character that contains `p`. + */ +static inline char * +rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc) +{ + const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s); + const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p); + const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e); + OnigUChar *ur = onigenc_get_left_adjust_char_head(enc, us, up, ue); + + return RBIMPL_CAST((char *)ur); +} + +/** + * Queries the right boundary of a character. This function takes a pointer + * that is not necessarily a head of a character, and searches for its tail. + * + * @param[in] s Start of the string. + * @param[in] p Pointer to a possibly-middle of a character. + * @param[in] e End of the string. + * @param[in] enc Encoding. + * @return Pointer to the end of the character that contains `p`. + */ +static inline char * +rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc) +{ + const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s); + const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p); + const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e); + OnigUChar *ur = onigenc_get_right_adjust_char_head(enc, us, up, ue); + + return RBIMPL_CAST((char *)ur); +} + +/** + * Scans the string backwards for n characters. + * + * @param[in] s Start of the string. + * @param[in] p Pointer to a character. + * @param[in] e End of the string. + * @param[in] n Steps. + * @param[in] enc Encoding. + * @retval NULL There are no `n` characters left. + * @retval otherwise Pointer to `n` character before `p`. + */ +static inline char * +rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc) +{ + const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s); + const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p); + const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e); + const OnigUChar *ur = onigenc_step_back(enc, us, up, ue, n); + + return RBIMPL_CAST((char *)ur); +} + +/** + * @private + * + * This is an implementation detail of rb_enc_asciicompat(). People don't use + * it directly. Just always use rb_enc_asciicompat(). + * + * @param[in] enc Encoding in question. + * @retval 1 It is ASCII compatible. + * @retval 0 It isn't. + */ +static inline int +rb_enc_asciicompat_inline(rb_encoding *enc) +{ + return rb_enc_mbminlen(enc)==1 && !rb_enc_dummy_p(enc); +} + +/** + * Queries if the passed encoding is _in some sense_ compatible with ASCII. + * The concept of ASCII compatibility is nuanced, and private to our + * implementation. For instance SJIS is ASCII compatible to us, despite their + * having different characters at code point `0x5C`. This is based on some + * practical consideration that Japanese people confuses SJIS to be "upper + * compatible" with ASCII (which is in fact a wrong idea, but we just don't go + * strict here). An example of ASCII incompatible encoding is UTF-16. UTF-16 + * shares code points with ASCII, but employs a completely different encoding + * scheme. + * + * @param[in] enc Encoding in question. + * @retval 0 It is incompatible. + * @retval 1 It is compatible. + */ +static inline bool +rb_enc_asciicompat(rb_encoding *enc) +{ + if (rb_enc_mbminlen(enc) != 1) { + return false; + } + else if (rb_enc_dummy_p(enc)) { + return false; + } + else { + return true; + } +} + +/** + * Queries if the passed string is in an ASCII-compatible encoding. + * + * @param[in] str A Ruby's string to query. + * @retval 0 `str` is not a String, or an ASCII-incompatible string. + * @retval 1 Otherwise. + */ +static inline bool +rb_enc_str_asciicompat_p(VALUE str) +{ + rb_encoding *enc = rb_enc_get(str); + + return rb_enc_asciicompat(enc); +} + +/** + * Queries the Ruby-level counterpart instance of ::rb_cEncoding that + * corresponds to the passed encoding. + * + * @param[in] enc An encoding + * @retval RUBY_Qnil `enc` is a null pointer. + * @retval otherwise An instance of ::rb_cEncoding. + */ +VALUE rb_enc_from_encoding(rb_encoding *enc); + +RBIMPL_ATTR_PURE() +/** + * Queries if the passed encoding is either one of UTF-8/16/32. + * + * @note It does not take UTF-7, which we actually support, into account. + * + * @param[in] enc Encoding in question. + * @retval 0 It is not a Unicode variant. + * @retval otherwise It is. + * + * @internal + * + * In reality it returns 1/0, but the value is abstracted as + * `ONIGENC_FLAG_UNICODE`. + */ +int rb_enc_unicode_p(rb_encoding *enc); + +RBIMPL_ATTR_RETURNS_NONNULL() +/** + * Queries the encoding that represents ASCII-8BIT a.k.a. binary. + * + * @return The encoding that represents ASCII-8BIT. + * + * @internal + * + * This can not return NULL once the process properly boots up. + */ +rb_encoding *rb_ascii8bit_encoding(void); + +RBIMPL_ATTR_RETURNS_NONNULL() +/** + * Queries the encoding that represents UTF-8. + * + * @return The encoding that represents UTF-8. + * + * @internal + * + * This can not return NULL once the process properly boots up. + */ +rb_encoding *rb_utf8_encoding(void); + +RBIMPL_ATTR_RETURNS_NONNULL() +/** + * Queries the encoding that represents US-ASCII. + * + * @return The encoding that represents US-ASCII. + * + * @internal + * + * This can not return NULL once the process properly boots up. + */ +rb_encoding *rb_usascii_encoding(void); + +/** + * Queries the encoding that represents the current locale. + * + * @return The encoding that represents the process' locale. + * + * @internal + * + * This is dynamic. If you change the process' locale by e.g. calling + * `setlocale(3)`, that should also change the return value of this function. + * + * There is no official way for Ruby scripts to manipulate locales, though. + */ +rb_encoding *rb_locale_encoding(void); + +/** + * Queries the "filesystem" encoding. This is the encoding that ruby expects + * info from the OS' file system are in. This affects for instance return + * value of rb_dir_getwd(). Most notably on Windows it can be an alias of OS + * codepage. Most notably on Linux users can set this via default external + * encoding. + * + * @return The "filesystem" encoding. + */ +rb_encoding *rb_filesystem_encoding(void); + +/** + * Queries the "default external" encoding. This is used to interact with + * outer-process things such as File. Though not recommended, you can set this + * using rb_enc_set_default_external(). + * + * @return The "default external" encoding. + */ +rb_encoding *rb_default_external_encoding(void); + +/** + * Queries the "default internal" encoding. This could be a null pointer. + * Otherwise, outer-process info are transcoded from default external encoding + * to this one during reading from an IO. + * + * @return The "default internal" encoding (if any). + */ +rb_encoding *rb_default_internal_encoding(void); + +#ifndef rb_ascii8bit_encindex +RBIMPL_ATTR_CONST() +/** + * Identical to rb_ascii8bit_encoding(), except it returns the encoding's index + * instead of the encoding itself. + * + * @return The index of encoding of ASCII-8BIT. + * + * @internal + * + * This happens to be 0. + */ +int rb_ascii8bit_encindex(void); +#endif + +/** + * Queries if the passed object is in ascii 8bit (== binary) encoding. The + * object must be capable of having inline encoding. Using this macro needs + * deep understanding of bit level object binary layout. + * + * @param[in] obj An object to check. + * @retval 1 It is. + * @retval 0 It isn't. + */ +static inline bool +RB_ENCODING_IS_ASCII8BIT(VALUE obj) +{ + return RB_ENCODING_GET_INLINED(obj) == rb_ascii8bit_encindex(); +} + +#ifndef rb_utf8_encindex +RBIMPL_ATTR_CONST() +/** + * Identical to rb_utf8_encoding(), except it returns the encoding's index + * instead of the encoding itself. + * + * @return The index of encoding of UTF-8. + */ +int rb_utf8_encindex(void); +#endif + +#ifndef rb_usascii_encindex +RBIMPL_ATTR_CONST() +/** + * Identical to rb_usascii_encoding(), except it returns the encoding's index + * instead of the encoding itself. + * + * @return The index of encoding of UTF-8. + */ +int rb_usascii_encindex(void); +#endif + +/** + * Identical to rb_locale_encoding(), except it returns the encoding's index + * instead of the encoding itself. + * + * @return The index of the locale encoding. + */ +int rb_locale_encindex(void); + +/** + * Identical to rb_filesystem_encoding(), except it returns the encoding's + * index instead of the encoding itself. + * + * @return The index of the filesystem encoding. + */ +int rb_filesystem_encindex(void); + +/** + * Identical to rb_default_external_encoding(), except it returns the + * Ruby-level counterpart instance of ::rb_cEncoding that corresponds to the + * default external encoding. + * + * @return An instance of ::rb_cEncoding of default external. + */ +VALUE rb_enc_default_external(void); + +/** + * Identical to rb_default_internal_encoding(), except it returns the + * Ruby-level counterpart instance of ::rb_cEncoding that corresponds to the + * default internal encoding. + * + * @return An instance of ::rb_cEncoding of default internal. + */ +VALUE rb_enc_default_internal(void); + +/** + * Destructively assigns the passed encoding as the default external encoding. + * You should not use this API. It has process-global side effects. Also it + * doesn't change encodings of strings that have already been read. + * + * @param[in] encoding Ruby level encoding. + * @exception rb_eArgError `encoding` is ::RUBY_Qnil. + * @post The default external encoding is `encoding`. + */ +void rb_enc_set_default_external(VALUE encoding); + +/** + * Destructively assigns the passed encoding as the default internal encoding. + * You should not use this API. It has process-global side effects. Also it + * doesn't change encodings of strings that have already been read. + * + * @param[in] encoding Ruby level encoding. + * @post The default internal encoding is `encoding`. + * @note Unlike rb_enc_set_default_external() you can pass ::RUBY_Qnil. + */ +void rb_enc_set_default_internal(VALUE encoding); + +/** + * Returns a platform-depended "charmap" of the current locale. This + * information is called a "Codeset name" in IEEE 1003.1 section 13 + * (`<langinfo.h>`). This is a very low-level API. The return value can have + * no corresponding encoding when passed to rb_find_encoding(). + * + * @param[in] klass Ignored for no reason (why...) + * @return The low-level locale charmap, in Ruby's String. + */ +VALUE rb_locale_charmap(VALUE klass); + +RBIMPL_SYMBOL_EXPORT_END() + +/** @cond INTERNAL_MACRO */ +#define RB_ENCODING_GET RB_ENCODING_GET +#define RB_ENCODING_GET_INLINED RB_ENCODING_GET_INLINED +#define RB_ENCODING_IS_ASCII8BIT RB_ENCODING_IS_ASCII8BIT +#define RB_ENCODING_SET RB_ENCODING_SET +#define RB_ENCODING_SET_INLINED RB_ENCODING_SET_INLINED +#define rb_enc_asciicompat rb_enc_asciicompat +#define rb_enc_code_to_mbclen rb_enc_code_to_mbclen +#define rb_enc_codepoint rb_enc_codepoint +#define rb_enc_left_char_head rb_enc_left_char_head +#define rb_enc_mbc_to_codepoint rb_enc_mbc_to_codepoint +#define rb_enc_mbcput rb_enc_mbcput +#define rb_enc_mbmaxlen rb_enc_mbmaxlen +#define rb_enc_mbminlen rb_enc_mbminlen +#define rb_enc_name rb_enc_name +#define rb_enc_prev_char rb_enc_prev_char +#define rb_enc_right_char_head rb_enc_right_char_head +#define rb_enc_step_back rb_enc_step_back +#define rb_enc_str_asciicompat_p rb_enc_str_asciicompat_p +/** @endcond */ + +#endif /* RUBY_INTERNAL_ENCODING_ENCODING_H */ diff --git a/include/ruby/internal/encoding/pathname.h b/include/ruby/internal/encoding/pathname.h new file mode 100644 index 0000000000..0b5e85a524 --- /dev/null +++ b/include/ruby/internal/encoding/pathname.h @@ -0,0 +1,184 @@ +#ifndef RUBY_INTERNAL_ENCODING_PATHNAME_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_PATHNAME_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines to manipulate encodings of pathnames. + */ + +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/dllexport.h" +#include "ruby/internal/encoding/encoding.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() +RBIMPL_ATTR_NONNULL(()) +/** + * Returns a path component directly adjacent to the passed pointer. + * + * ``` + * "/multi/byte/encoded/pathname.txt" + * ^ ^ ^ + * | | +--- end + * | +--- @return + * +--- path + * ``` + * + * @param[in] path Where to start scanning. + * @param[in] end End of the path string. + * @param[in] enc Encoding of the string. + * @return A pointer in the passed string where the next path component + * resides, or `end` if there is no next path component. + */ +char *rb_enc_path_next(const char *path, const char *end, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL(()) +/** + * Seeks for non-prefix part of a pathname. This can be a no-op when the OS + * has no such concept like a path prefix. But there are OSes where path + * prefixes do exist. + * + * ``` + * "C:\multi\byte\encoded\pathname.txt" + * ^ ^ ^ + * | | +--- end + * | +--- @return + * +--- path + * ``` + * + * @param[in] path Where to start scanning. + * @param[in] end End of the path string. + * @param[in] enc Encoding of the string. + * @return A pointer in the passed string where non-prefix part starts, or + * `path` if the OS does not have path prefix. + */ +char *rb_enc_path_skip_prefix(const char *path, const char *end, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL(()) +/** + * Returns the last path component. + * + * ``` + * "/multi/byte/encoded/pathname.txt" + * ^ ^ ^ + * | | +--- end + * | +--- @return + * +--- path + * ``` + * + * @param[in] path Where to start scanning. + * @param[in] end End of the path string. + * @param[in] enc Encoding of the string. + * @return A pointer in the passed string where the last path component + * resides, or `end` if there is no more path component. + */ +char *rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL(()) +/** + * This just returns the passed end basically. It makes difference in case the + * passed string ends with tons of path separators like the following: + * + * ``` + * "/path/that/ends/with/lots/of/slashes//////////////" + * ^ ^ ^ + * | | +--- end + * | +--- @return + * +--- path + * ``` + * + * @param[in] path Where to start scanning. + * @param[in] end End of the path string. + * @param[in] enc Encoding of the string. + * @return A pointer in the passed string where the trailing path + * separators start, or `end` if there is no trailing path + * separators. + * + * @internal + * + * It seems this function was introduced to mimic what POSIX says about + * `basename(3)`. + */ +char *rb_enc_path_end(const char *path, const char *end, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL((1, 4)) +/** + * Our own encoding-aware version of `basename(3)`. Normally, this function + * returns the last path component of the given name. However in case the + * passed name ends with a path separator, it returns the name of the + * directory, not the last (empty) component. Also if the passed name is a + * root directory, it returns that root directory. Note however that Windows + * filesystem have drive letters, which this function does not return. + * + * @param[in] name Target path. + * @param[out] baselen Return buffer. + * @param[in,out] alllen Number of bytes of `name`. + * @param[enc] enc Encoding of `name`. + * @return The rightmost component of `name`. + * @post `baselen`, if passed, is updated to be the number of bytes + * of the returned basename. + * @post `alllen`, if passed, is updated to be the number of bytes of + * strings not considered as the basename. + */ +const char *ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL((1, 3)) +/** + * Our own encoding-aware version of `extname`. This function first applies + * rb_enc_path_last_separator() to the passed name and only concerns its return + * value (ignores any parent directories). This function returns complicated + * results: + * + * ```CXX + * auto path = "..."; + * auto len = strlen(path); + * auto ret = ruby_enc_find_extname(path, &len, rb_ascii8bit_encoding()); + * + * switch(len) { + * case 0: + * if (ret == 0) { + * // `path` is a file without extensions. + * } + * else { + * // `path` is a dotfile. + * // `ret` is the file's name. + * } + * break; + * + * case 1: + * // `path` _ends_ with a dot. + * // `ret` is that dot. + * break; + * + * default: + * // `path` has an extension. + * // `ret` is that extension. + * } + * ``` + * + * @param[in] name Target path. + * @param[in,out] len Number of bytes of `name`. + * @param[in] enc Encoding of `name`. + * @return See above. + * @post `len`, if passed, is updated (see above). + */ +const char *ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc); + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_PATHNAME_H */ diff --git a/include/ruby/internal/encoding/re.h b/include/ruby/internal/encoding/re.h new file mode 100644 index 0000000000..d0de23bc83 --- /dev/null +++ b/include/ruby/internal/encoding/re.h @@ -0,0 +1,46 @@ +#ifndef RUBY_INTERNAL_ENCODING_RE_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_RE_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines to manipulate encodings of symbols. + */ + +#include "ruby/internal/dllexport.h" +#include "ruby/internal/encoding/encoding.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * Identical to rb_reg_new(), except it additionally takes an encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @param[in] enc Encoding of `ptr`. + * @param[in] opts Options e.g. ONIG_OPTION_MULTILINE. + * @exception rb_eRegexpError Failed to compile `ptr`. + * @return An allocated new instance of ::rb_cRegexp, of `enc` encoding, + * whose expression is compiled according to `ptr`. + */ +VALUE rb_enc_reg_new(const char *ptr, long len, rb_encoding *enc, int opts); + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_RE_H */ diff --git a/include/ruby/internal/encoding/sprintf.h b/include/ruby/internal/encoding/sprintf.h new file mode 100644 index 0000000000..cb8737b414 --- /dev/null +++ b/include/ruby/internal/encoding/sprintf.h @@ -0,0 +1,78 @@ +#ifndef RUBY_INTERNAL_ENCODING_SPRINTF_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_SPRINTF_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines to manipulate encodings of symbols. + */ +#include "ruby/internal/config.h" +#include <stdarg.h> +#include "ruby/internal/attr/format.h" +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/attr/noreturn.h" +#include "ruby/internal/dllexport.h" +#include "ruby/internal/encoding/encoding.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() +RBIMPL_ATTR_NONNULL((2)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) +/** + * Identical to rb_sprintf(), except it additionally takes an encoding. The + * passed encoding rules both the incoming format specifier and the resulting + * string. + * + * @param[in] enc Encoding of `fmt`. + * @param[in] fmt A `printf`-like format specifier. + * @param[in] ... Variadic number of contents to format. + * @return A rendered new instance of ::rb_cString, of `enc` encoding. + */ +VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt, ...); + +RBIMPL_ATTR_NONNULL((2)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 0) +/** + * Identical to rb_enc_sprintf(), except it takes a `va_list` instead of + * variadic arguments. It can also be seen as a routine identical to + * rb_vsprintf(), except it additionally takes an encoding. + * + * @param[in] enc Encoding of `fmt`. + * @param[in] fmt A `printf`-like format specifier. + * @param[in] ap Contents to format. + * @return A rendered new instance of ::rb_cString, of `enc` encoding. + */ +VALUE rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL((3)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 3, 4) +/** + * Identical to rb_raise(), except it additionally takes an encoding. + * + * @param[in] enc Encoding of the generating exception. + * @param[in] exc A subclass of ::rb_eException. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + * @param[in] ... Contents of the message. + * @exception exc The specified exception. + * @note It never returns. + */ +void rb_enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...); + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_SPRINTF_H */ diff --git a/include/ruby/internal/encoding/string.h b/include/ruby/internal/encoding/string.h new file mode 100644 index 0000000000..2b9dfe4f31 --- /dev/null +++ b/include/ruby/internal/encoding/string.h @@ -0,0 +1,346 @@ +#ifndef RUBY_INTERNAL_ENCODING_STRING_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_STRING_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines to manipulate encodings of strings. + */ + +#include "ruby/internal/dllexport.h" +#include "ruby/internal/value.h" +#include "ruby/internal/encoding/encoding.h" +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/intern/string.h" /* rbimpl_strlen */ + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * Identical to rb_str_new(), except it additionally takes an encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString, of `len` bytes length, of `enc` + * encoding, whose contents are verbatim copy of `ptr`. + * @pre At least `len` bytes of continuous memory region shall be + * accessible via `ptr`. + * @note `enc` can be a null pointer. It can also be seen as a routine + * identical to rb_usascii_str_new() then. + */ +VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL((1)) +/** + * Identical to rb_enc_str_new(), except it assumes the passed pointer is a + * pointer to a C string. It can also be seen as a routine identical to + * rb_str_new_cstr(), except it additionally takes an encoding. + * + * @param[in] ptr A C string. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString, of `enc` encoding, whose contents + * are verbatim copy of `ptr`. + * @pre `ptr` must not be a null pointer. + * @pre Because `ptr` is a C string it makes no sense for `enc` to be + * something like UTF-32. + * @note `enc` can be a null pointer. It can also be seen as a routine + * identical to rb_usascii_str_new_cstr() then. + */ +VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc); + +/** + * Identical to rb_enc_str_new(), except it takes a C string literal. It can + * also be seen as a routine identical to rb_str_new_static(), except it + * additionally takes an encoding. + * + * @param[in] ptr A C string literal. + * @param[in] len `strlen(ptr)`. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eArgError `len` out of range of `size_t`. + * @pre `ptr` must be a C string constant. + * @return An instance of ::rb_cString, of `enc` encoding, whose backend + * storage is the passed C string literal. + * @warning It is a very bad idea to write to a C string literal (often + * immediate SEGV shall occur). Consider return values of this + * function be read-only. + * @note `enc` can be a null pointer. It can also be seen as a routine + * identical to rb_usascii_str_new_static() then. + */ +VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc); + +/** + * Identical to rb_enc_str_new(), except it returns a "f"string. It can also + * be seen as a routine identical to rb_interned_str(), except it additionally + * takes an encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eArgError `len` is negative. + * @return A found or created instance of ::rb_cString, of `len` bytes + * length, of `enc` encoding, whose contents are identical to that + * of `ptr`. + * @pre At least `len` bytes of continuous memory region shall be + * accessible via `ptr`. + * @note `enc` can be a null pointer. + */ +VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL((1)) +/** + * Identical to rb_enc_str_new_cstr(), except it returns a "f"string. It can + * also be seen as a routine identical to rb_interned_str_cstr(), except it + * additionally takes an encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] enc Encoding of `ptr`. + * @return A found or created instance of ::rb_cString of `enc` encoding, + * whose contents are identical to that of `ptr`. + * @pre At least `len` bytes of continuous memory region shall be + * accessible via `ptr`. + * @note `enc` can be a null pointer. + */ +VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc); + +/** + * Counts the number of characters of the passed string, according to the + * passed encoding. This has to be complicated. The passed string could be + * invalid and/or broken. This routine would scan from the beginning til the + * end, byte by byte, to seek out character boundaries. Could be super slow. + * + * @param[in] head Leftmost pointer to the string. + * @param[in] tail Rightmost pointer to the string. + * @param[in] enc Encoding of the string. + * @return Number of characters exist in `head` .. `tail`. The definition + * of "character" depends on the passed `enc`. + */ +long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc); + +/** + * Queries the n-th character. Like rb_enc_strlen() this function can be fast + * or slow depending on the contents. Don't expect characters to be uniformly + * distributed across the entire string. + * + * @param[in] head Leftmost pointer to the string. + * @param[in] tail Rightmost pointer to the string. + * @param[in] nth Requested index of characters. + * @param[in] enc Encoding of the string. + * @return Pointer to the first byte of the character that is `nth` + * character ahead of `head`, or `tail` if there is no such + * character (OOB etc). The definition of "character" depends on + * the passed `enc`. + */ +char *rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc); + +/** + * Identical to rb_enc_get_index(), except the return type. + * + * @param[in] obj Object in question. + * @exception rb_eTypeError `obj` is incapable of having an encoding. + * @return `obj`'s encoding. + */ +VALUE rb_obj_encoding(VALUE obj); + +/** + * Identical to rb_str_cat(), except it additionally takes an encoding. + * + * @param[out] str Destination object. + * @param[in] ptr Contents to append. + * @param[in] len Length of `src`, in bytes. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eArgError `len` is negative. + * @exception rb_eEncCompatError `enc` is not compatible with `str`. + * @return The passed `dst`. + * @post The contents of `ptr` is copied, transcoded into `dst`'s + * encoding, then pasted into `dst`'s end. + */ +VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc); + +/** + * Encodes the passed code point into a series of bytes. + * + * @param[in] code Code point. + * @param[in] enc Target encoding scheme. + * @exception rb_eRangeError `enc` does not glean `code`. + * @return An instance of ::rb_cString, of `enc` encoding, whose sole + * contents is `code` represented in `enc`. + * @note No way to encode code points bigger than UINT_MAX. + * + * @internal + * + * In other languages, APIs like this one could be seen as the primitive + * routines where encodings' "encode" feature are implemented. However in case + * of Ruby this is not the primitive one. We directly manipulate encoded + * strings. Encoding conversion routines transcode an encoded string directly + * to another one; not via a code point array. + */ +VALUE rb_enc_uint_chr(unsigned int code, rb_encoding *enc); + +/** + * Identical to rb_external_str_new(), except it additionally takes an + * encoding. However the whole point of rb_external_str_new() is to encode a + * string into default external encoding. Being able to specify arbitrary + * encoding just ruins the designed purpose the function meseems. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @param[in] enc Target encoding scheme. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString. In case encoding conversion from + * "default internal" to `enc` is fully defined over the given + * contents, then the return value is a string of `enc` encoding, + * whose contents are the converted ones. Otherwise the string is + * a junk. + * @warning It doesn't raise on a conversion failure and silently ends up in + * a corrupted output. You can know the failure by querying + * `valid_encoding?` of the result object. + * + * @internal + * + * @shyouhei has no idea why this one does not follow the naming convention + * that others obey. It seems to him that this should have been called + * `rb_enc_external_str_new`. + */ +VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc); + +/** + * Identical to rb_str_export(), except it additionally takes an encoding. + * + * @param[in] obj Target object. + * @param[in] enc Target encoding. + * @exception rb_eTypeError No implicit conversion to String. + * @return Converted ruby string of `enc` encoding. + */ +VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc); + +/** + * Encoding conversion main routine. + * + * @param[in] str String to convert. + * @param[in] from Source encoding. + * @param[in] to Destination encoding. + * @return A copy of `str`, with conversion from `from` to `to` applied. + * @note `from` can be a null pointer. `str`'s encoding is taken then. + * @note `to` can be a null pointer. No-op then. + */ +VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to); + +/** + * Identical to rb_str_conv_enc(), except it additionally takes IO encoder + * options. The extra arguments can be constructed using io_extract_modeenc() + * etc. + * + * @param[in] str String to convert. + * @param[in] from Source encoding. + * @param[in] to Destination encoding. + * @param[in] ecflags A set of enum ::ruby_econv_flag_type. + * @param[in] ecopts Optional hash. + * @return A copy of `str`, with conversion from `from` to `to` applied. + * @note `from` can be a null pointer. `str`'s encoding is taken then. + * @note `to` can be a null pointer. No-op then. + * @note `ecopts` can be ::RUBY_Qnil, which is equivalent to passing an + * empty hash. + */ +VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts); + +/** + * Scans the passed string to collect its code range. Because a Ruby's string + * is mutable, its contents change from time to time; so does its code range. + * A long-lived string tends to fall back to ::RUBY_ENC_CODERANGE_UNKNOWN. + * This API scans it and re-assigns a fine-grained code range constant. + * + * @param[out] str A string. + * @return An enum ::ruby_coderange_type. + */ +int rb_enc_str_coderange(VALUE str); + +/** + * Scans the passed string until it finds something odd. Returns the number of + * bytes scanned. As the name implies this is suitable for repeated call. One + * of its application is `IO#readlines`. The method reads from its receiver's + * read buffer, maybe more than once, looking for newlines. But "newline" can + * be different among encodings. This API is used to detect broken contents to + * properly mark them as such. + * + * @param[in] str String to scan. + * @param[in] end End of `str`. + * @param[in] enc `str`'s encoding. + * @param[out] cr Return buffer. + * @return Distance between `str` and first such byte where broken. + * @post `cr` has the code range type. + */ +long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr); + +/** + * Queries if the passed string is "ASCII only". An ASCII only string is a + * string who doesn't have any non-ASCII characters at all. This doesn't + * necessarily mean the string is in ASCII encoding. For instance a String of + * CP932 encoding can quite much be ASCII only, depending on its contents. + * + * @param[in] str String in question. + * @retval 1 It doesn't have non-ASCII characters. + * @retval 0 It has characters that are out of ASCII. + */ +int rb_enc_str_asciionly_p(VALUE str); + +RBIMPL_ATTR_NONNULL(()) +/** + * Looks for the passed string in the passed buffer. + * + * @param[in] x Buffer that potentially includes `y`. + * @param[in] m Number of bytes of `x`. + * @param[in] y Query string. + * @param[in] n Number of bytes of `y`. + * @param[in] enc Encoding of both `x` and `y`. + * @retval -1 Not found. + * @retval otherwise Found index in `x`. + * @note This API can match at a non-character-boundary. + */ +long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc); + +/** @cond INTERNAL_MACRO */ +RBIMPL_ATTR_NONNULL(()) +static inline VALUE +rbimpl_enc_str_new_cstr(const char *str, rb_encoding *enc) +{ + long len = rbimpl_strlen(str); + + return rb_enc_str_new_static(str, len, enc); +} + +#define rb_enc_str_new(str, len, enc) \ + ((RBIMPL_CONSTANT_P(str) && \ + RBIMPL_CONSTANT_P(len) ? \ + rb_enc_str_new_static: \ + rb_enc_str_new) ((str), (len), (enc))) + +#define rb_enc_str_new_cstr(str, enc) \ + ((RBIMPL_CONSTANT_P(str) ? \ + rbimpl_enc_str_new_cstr : \ + rb_enc_str_new_cstr) ((str), (enc))) + +/** @endcond */ + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_STRING_H */ diff --git a/include/ruby/internal/encoding/symbol.h b/include/ruby/internal/encoding/symbol.h new file mode 100644 index 0000000000..9cd1b0dbf4 --- /dev/null +++ b/include/ruby/internal/encoding/symbol.h @@ -0,0 +1,100 @@ +#ifndef RUBY_INTERNAL_ENCODING_SYMBOL_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_SYMBOL_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief Routines to manipulate encodings of symbols. + */ + +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/dllexport.h" +#include "ruby/internal/encoding/encoding.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** + * Identical to rb_intern2(), except it additionally takes an encoding. + * + * @param[in] name The name of the id. + * @param[in] len Length of `name`. + * @param[in] enc `name`'s encoding. + * @exception rb_eRuntimeError Too many symbols. + * @return A (possibly new) id whose value is the given name. + * @note These days Ruby internally has two kinds of symbols + * (static/dynamic). Symbols created using this function would + * become static ones; i.e. would never be garbage collected. It + * is up to you to avoid memory leaks. Think twice before using + * it. + */ +ID rb_intern3(const char *name, long len, rb_encoding *enc); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_symname_p(), except it additionally takes an encoding. + * + * @param[in] str A C string to check. + * @param[in] enc `str`'s encoding. + * @retval 1 It is a valid symbol name. + * @retval 0 It is invalid as a symbol name. + */ +int rb_enc_symname_p(const char *str, rb_encoding *enc); + +/** + * Identical to rb_enc_symname_p(), except it additionally takes the passed + * string's length. This is needed for strings containing NUL bytes, like in + * case of UTF-32. + * + * @param[in] name A C string to check. + * @param[in] len Number of bytes of `str`. + * @param[in] enc `str`'s encoding. + * @retval 1 It is a valid symbol name. + * @retval 0 It is invalid as a symbol name. + */ +int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc); + +/** + * Identical to rb_check_id(), except it takes a pointer to a memory region + * instead of Ruby's string. + * + * @param[in] ptr A pointer to a memory region. + * @param[in] len Number of bytes of `ptr`. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eEncodingError `ptr` contains non-ASCII according to `enc`. + * @retval 0 No such id ever existed in the history. + * @retval otherwise The id that represents the given name. + */ +ID rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc); + +/** + * Identical to rb_check_id_cstr(), except for the return type. It can also be + * seen as a routine identical to rb_check_symbol(), except it takes a pointer + * to a memory region instead of Ruby's string. + * + * @param[in] ptr A pointer to a memory region. + * @param[in] len Number of bytes of `ptr`. + * @param[in] enc Encoding of `ptr`. + * @exception rb_eEncodingError `ptr` contains non-ASCII according to `enc`. + * @retval RUBY_Qnil No such id ever existed in the history. + * @retval otherwise The id that represents the given name. + */ +VALUE rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc); + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_SYMBOL_H */ diff --git a/include/ruby/internal/encoding/transcode.h b/include/ruby/internal/encoding/transcode.h new file mode 100644 index 0000000000..7f26d2eae9 --- /dev/null +++ b/include/ruby/internal/encoding/transcode.h @@ -0,0 +1,562 @@ +#ifndef RUBY_INTERNAL_ENCODING_TRANSCODE_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RUBY_INTERNAL_ENCODING_TRANSCODE_H +/** + * @file + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief econv stuff + */ + +#include "ruby/internal/dllexport.h" +#include "ruby/internal/value.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +/** return value of rb_econv_convert() */ +typedef enum { + + /** + * The conversion stopped when it found an invalid sequence. + */ + econv_invalid_byte_sequence, + + /** + * The conversion stopped when it found a character in the input which + * cannot be representable in the output. + */ + econv_undefined_conversion, + + /** + * The conversion stopped because there is no destination. + */ + econv_destination_buffer_full, + + /** + * The conversion stopped because there is no input. + */ + econv_source_buffer_empty, + + /** + * The conversion stopped after converting everything. This is arguably + * the expected normal end of conversion. + */ + econv_finished, + + /** + * The conversion stopped after writing something to somewhere, before + * reading everything. + */ + econv_after_output, + + /** + * The conversion stopped in middle of reading a character, possibly due to + * a partial read of a socket etc. + */ + econv_incomplete_input +} rb_econv_result_t; + +/** An opaque struct that represents a lowest level of encoding conversion. */ +typedef struct rb_econv_t rb_econv_t; + +/** + * Converts the contents of the passed string from its encoding to the passed + * one. + * + * @param[in] str Target string. + * @param[in] to Destination encoding. + * @param[in] ecflags A set of enum + * ::ruby_econv_flag_type. + * @param[in] ecopts A keyword hash, like + * ::rb_io_t::rb_io_enc_t::ecopts. + * @exception rb_eArgError Not fully converted. + * @exception rb_eInvalidByteSequenceError `str` is malformed. + * @exception rb_eUndefinedConversionError `str` has a character not + * representable using `to`. + * @exception rb_eConversionNotFoundError There is no known conversion from + * `str`'s encoding to `to`. + * @return A string whose encoding is `to`, and whose contents is converted + * contents of `str`. + * @note Use rb_econv_prepare_options() to generate `ecopts`. + */ +VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts); + +/** + * Queries if there is more than one way to convert between the passed two + * encodings. Encoding conversion are has_and_belongs_to_many relationships. + * There could be no direct conversion defined for the passed pair. Ruby tries + * to find an indirect way to do so then. For instance ISO-8859-1 has no + * direct conversion to ISO-2022-JP. But there is ISO-8859-1 to UTF-8 + * conversion; then there is UTF-8 to EUC-JP conversion; finally there also is + * EUC-JP to ISO-2022-JP conversion. So in short ISO-8859-1 can be converted + * to ISO-2022-JP using that path. This function returns true. Obviously not + * everything that can be represented using UTF-8 can also be represented using + * EUC-JP. Conversions in practice can fail depending on the actual input, and + * that renders exceptions in case of rb_str_encode(). + * + * @param[in] from_encoding One encoding. + * @param[in] to_encoding Another encoding. + * @retval 0 No way to convert the two. + * @retval 1 At least one way to convert the two. + * + * @internal + * + * Practically @shyouhei knows no way for this function to return 0. It seems + * everything can eventually be converted to/from UTF-8, which connects + * everything. + */ +int rb_econv_has_convpath_p(const char* from_encoding, const char* to_encoding); + +/** + * Identical to rb_econv_prepare_opts(), except it additionally takes the + * initial value of flags. The extra bits are bitwise-ORed to the return + * value. + * + * @param[in] opthash Keyword arguments. + * @param[out] ecopts Return buffer. + * @param[in] ecflags Default set of enum ::ruby_econv_flag_type. + * @exception rb_eArgError Unknown/Broken values passed. + * @return Calculated set of enum ::ruby_econv_flag_type. + * @post `ecopts` holds a hash object suitable for + * ::rb_io_t::rb_io_enc_t::ecopts. + */ +int rb_econv_prepare_options(VALUE opthash, VALUE *ecopts, int ecflags); + +/** + * Splits a keyword arguments hash (that for instance `String#encode` took) + * into a set of enum ::ruby_econv_flag_type and a hash storing replacement + * characters etc. + * + * @param[in] opthash Keyword arguments. + * @param[out] ecopts Return buffer. + * @exception rb_eArgError Unknown/Broken values passed. + * @return Calculated set of enum ::ruby_econv_flag_type. + * @post `ecopts` holds a hash object suitable for + * ::rb_io_t::rb_io_enc_t::ecopts. + */ +int rb_econv_prepare_opts(VALUE opthash, VALUE *ecopts); + +/** + * Creates a new instance of struct ::rb_econv_t. + * + * @param[in] source_encoding Name of an encoding. + * @param[in] destination_encoding Name of another encoding. + * @param[in] ecflags A set of enum ::ruby_econv_flag_type. + * @exception rb_eArgError No such encoding. + * @retval NULL Failed to create a struct ::rb_econv_t. + * @retval otherwise Allocated struct ::rb_econv_t. + * @warning Return value must be passed to rb_econv_close() exactly once. + */ +rb_econv_t *rb_econv_open(const char *source_encoding, const char *destination_encoding, int ecflags); + +/** + * Identical to rb_econv_open(), except it additionally takes a hash of + * optional strings. + * + * + * @param[in] source_encoding Name of an encoding. + * @param[in] destination_encoding Name of another encoding. + * @param[in] ecflags A set of enum ::ruby_econv_flag_type. + * @param[in] ecopts Optional set of strings. + * @exception rb_eArgError No such encoding. + * @retval NULL Failed to create a struct ::rb_econv_t. + * @retval otherwise Allocated struct ::rb_econv_t. + * @warning Return value must be passed to rb_econv_close() exactly once. + */ +rb_econv_t *rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts); + +/** + * Converts a string from an encoding to another. + * + * Possible flags are either ::RUBY_ECONV_PARTIAL_INPUT (means the source + * buffer is a part of much larger one), ::RUBY_ECONV_AFTER_OUTPUT (instructs + * the converter to stop after output before input), or both of them. + * + * @param[in,out] ec Conversion specification/state etc. + * @param[in] source_buffer_ptr Target string. + * @param[in] source_buffer_end End of target string. + * @param[out] destination_buffer_ptr Return buffer. + * @param[out] destination_buffer_end End of return buffer. + * @param[in] flags Flags (see above). + * @return The status of the conversion. + * @post `destination_buffer_ptr` holds conversion results. + */ +rb_econv_result_t rb_econv_convert(rb_econv_t *ec, + const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, + unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, + int flags); + +/** + * Destructs a converter. Note that a converter can have a buffer, and can be + * non-empty. Calling this would lose your data then. + * + * @param[out] ec The converter to destroy. + * @post `ec` is no longer a valid pointer. + */ +void rb_econv_close(rb_econv_t *ec); + +/** + * Assigns the replacement string. The string passed here would appear in + * converted string when it cannot represent its source counterpart. This can + * happen for instance you convert an emoji to ISO-8859-1. + * + * @param[out] ec Target converter. + * @param[in] str Replacement string. + * @param[in] len Number of bytes of `str`. + * @param[in] encname Name of encoding of `str`. + * @retval 0 Success. + * @retval -1 Failure (ENOMEM etc.). + * @post `ec`'s replacement string is set to `str`. + */ +int rb_econv_set_replacement(rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname); + +/** + * "Decorate"s a converter. There are special kind of converters that + * transforms the contents, like replacing CR into CRLF. You can add such + * decorators to a converter using this API. By using this function a + * decorator is prepended at the beginning of a conversion sequence: in case of + * CRLF conversion, newlines are converted before encodings are converted. + * + * @param[out] ec Target converter to decorate. + * @param[in] decorator_name Name of decorator to prepend. + * @retval 0 Success. + * @retval -1 Failure (no such decorator etc.). + * @post Decorator works before encoding conversion happens. + * + * @internal + * + * What is the possible value of the `decorator_name` is not public. You have + * to read through `transcode.c` carefully. + */ +int rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name); + +/** + * Identical to rb_econv_decorate_at_first(), except it adds to the opposite + * direction. For instance CRLF conversion would run _after_ encodings are + * converted. + * + * @param[out] ec Target converter to decorate. + * @param[in] decorator_name Name of decorator to prepend. + * @retval 0 Success. + * @retval -1 Failure (no such decorator etc.). + * @post Decorator works after encoding conversion happens. + */ +int rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name); + +/** + * Creates a `rb_eConverterNotFoundError` exception object (but does not + * raise). + * + * @param[in] senc Name of source encoding. + * @param[in] denc Name of destination encoding. + * @param[in] ecflags A set of enum ::ruby_econv_flag_type. + * @return An instance of `rb_eConverterNotFoundError`. + */ +VALUE rb_econv_open_exc(const char *senc, const char *denc, int ecflags); + +/** + * Appends the passed string to the passed converter's output buffer. This can + * be handy when an encoding needs bytes out of thin air; for instance + * ISO-2022-JP has "shift function" which does not correspond to any + * characters. + * + * @param[out] ec Target converter. + * @param[in] str String to insert. + * @param[in] len Number of bytes of `str`. + * @param[in] str_encoding Encoding of `str`. + * @retval 0 Success. + * @retval -1 Failure (conversion error etc.). + * @note `str_encoding` can be anything, and `str` itself is converted + * when necessary. + */ +int rb_econv_insert_output(rb_econv_t *ec, + const unsigned char *str, size_t len, const char *str_encoding); + +/** + * Queries an encoding name which best suits for rb_econv_insert_output()'s + * last parameter. Strings in this encoding need no conversion when inserted; + * can be both time/space efficient. + * + * @param[in] ec Target converter. + * @return Its encoding for insertion. + */ +const char *rb_econv_encoding_to_insert_output(rb_econv_t *ec); + +/** + * This is a rb_econv_make_exception() + rb_exc_raise() combo. + * + * @param[in] ec (Possibly failed) conversion. + * @exception rb_eInvalidByteSequenceError Invalid byte sequence. + * @exception rb_eUndefinedConversionError Conversion undefined. + * @note This function can return when no error. + */ +void rb_econv_check_error(rb_econv_t *ec); + +/** + * This function makes sense right after rb_econv_convert() returns. As listed + * in ::rb_econv_result_t, rb_econv_convert() can bail out for various reasons. + * This function checks the passed converter's internal state and convert it to + * an appropriate exception object. + * + * @param[in] ec Target converter. + * @retval RUBY_Qnil The converter has no error. + * @retval otherwise Conversion error turned into an exception. + */ +VALUE rb_econv_make_exception(rb_econv_t *ec); + +/** + * Queries if rb_econv_putback() makes sense, i.e. there are invalid byte + * sequences remain in the buffer. + * + * @param[in] ec Target converter. + * @return Number of bytes that can be pushed back. + */ +int rb_econv_putbackable(rb_econv_t *ec); + +/** + * Puts back the bytes. In case of ::econv_invalid_byte_sequence, some of + * those invalid bytes are discarded and the others are buffered to be + * converted later. The latter bytes can be put back using this API. + * + * @param[out] ec Target converter (invalid byte sequence). + * @param[out] p Return buffer. + * @param[in] n Max number of bytes to put back. + * @post At most `n` bytes of what was put back is written to `p`. + */ +void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n); + +/** + * Queries the passed encoding's corresponding ASCII compatible encoding. "The + * corresponding ASCII compatible encoding" in this context is an ASCII + * compatible encoding which can represent exactly the same character sets as + * the given ASCII incompatible encoding. For instance that of UTF-16LE is + * UTF-8. + * + * @param[in] encname Name of an ASCII incompatible encoding. + * @retval NULL `encname` is already ASCII compatible. + * @retval otherwise The corresponding ASCII compatible encoding. + */ +const char *rb_econv_asciicompat_encoding(const char *encname); + +/** + * Identical to rb_econv_convert(), except it takes Ruby's string instead of + * C's pointer. + * + * @param[in,out] ec Target converter. + * @param[in] src Source string. + * @param[in] flags Flags (see rb_econv_convert). + * @exception rb_eArgError Converted string is too long. + * @exception rb_eInvalidByteSequenceError Invalid byte sequence. + * @exception rb_eUndefinedConversionError Conversion undefined. + * @return The conversion result. + */ +VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags); + +/** + * Identical to rb_econv_str_convert(), except it converts only a part of the + * passed string. Can be handy when you for instance want to do line-buffered + * conversion. + * + * @param[in,out] ec Target converter. + * @param[in] src Source string. + * @param[in] byteoff Number of bytes to seek. + * @param[in] bytesize Number of bytes to read. + * @param[in] flags Flags (see rb_econv_convert). + * @exception rb_eArgError Converted string is too long. + * @exception rb_eInvalidByteSequenceError Invalid byte sequence. + * @exception rb_eUndefinedConversionError Conversion undefined. + * @return The conversion result. + */ +VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags); + +/** + * Identical to rb_econv_str_convert(), except it appends the conversion result + * to the additionally passed string instead of creating a new string. It can + * also be seen as a routine identical to rb_econv_append(), except it takes a + * Ruby's string instead of C's pointer. + * + * @param[in,out] ec Target converter. + * @param[in] src Source string. + * @param[in] dst Return buffer. + * @param[in] flags Flags (see rb_econv_convert). + * @exception rb_eArgError Converted string is too long. + * @exception rb_eInvalidByteSequenceError Invalid byte sequence. + * @exception rb_eUndefinedConversionError Conversion undefined. + * @return The conversion result. + */ +VALUE rb_econv_str_append(rb_econv_t *ec, VALUE src, VALUE dst, int flags); + +/** + * Identical to rb_econv_str_append(), except it appends only a part of the + * passed string with conversion. It can also be seen as a routine identical + * to rb_econv_substr_convert(), except it appends the conversion result to the + * additionally passed string instead of creating a new string. + * + * @param[in,out] ec Target converter. + * @param[in] src Source string. + * @param[in] byteoff Number of bytes to seek. + * @param[in] bytesize Number of bytes to read. + * @param[in] dst Return buffer. + * @param[in] flags Flags (see rb_econv_convert). + * @exception rb_eArgError Converted string is too long. + * @exception rb_eInvalidByteSequenceError Invalid byte sequence. + * @exception rb_eUndefinedConversionError Conversion undefined. + * @return The conversion result. + */ +VALUE rb_econv_substr_append(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, VALUE dst, int flags); + +/** + * Converts the passed C's pointer according to the passed converter, then + * append the conversion result to the passed Ruby's string. This way buffer + * overflow is properly avoided to resize the destination properly. + * + * @param[in,out] ec Target converter. + * @param[in] bytesrc Target string. + * @param[in] bytesize Number of bytes of `bytesrc`. + * @param[in] dst Return buffer. + * @param[in] flags Flags (see rb_econv_convert). + * @exception rb_eArgError Converted string is too long. + * @exception rb_eInvalidByteSequenceError Invalid byte sequence. + * @exception rb_eUndefinedConversionError Conversion undefined. + * @return The conversion result. + */ +VALUE rb_econv_append(rb_econv_t *ec, const char *bytesrc, long bytesize, VALUE dst, int flags); + +/** + * This badly named function does not set the destination encoding to binary, + * but instead just nullifies newline conversion decorators if any. Other + * ordinal character conversions still happen after this; something non-binary + * would still be generated. + * + * @param[out] ec Target converter to modify. + * @post Any newline conversions, if any, would be killed. + */ +void rb_econv_binmode(rb_econv_t *ec); + +/** + * This enum is kind of omnibus. Gathers various constants. + */ +enum ruby_econv_flag_type { + + /** + * @name Flags for rb_econv_open() + * + * @{ + */ + + /** Mask for error handling related bits. */ + RUBY_ECONV_ERROR_HANDLER_MASK = 0x000000ff, + + /** Special handling of invalid sequences are there. */ + RUBY_ECONV_INVALID_MASK = 0x0000000f, + + /** Invalid sequences shall be replaced. */ + RUBY_ECONV_INVALID_REPLACE = 0x00000002, + + /** Special handling of undefined conversion are there. */ + RUBY_ECONV_UNDEF_MASK = 0x000000f0, + + /** Undefined characters shall be replaced. */ + RUBY_ECONV_UNDEF_REPLACE = 0x00000020, + + /** Undefined characters shall be escaped. */ + RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030, + + /** Decorators are there. */ + RUBY_ECONV_DECORATOR_MASK = 0x0001ff00, + + /** Newline converters are there. */ + RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00007f00, + + /** (Unclear; seems unused). */ + RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00, + + /** (Unclear; seems unused). */ + RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00007000, + + /** Universal newline mode. */ + RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100, + + /** CR to CRLF conversion shall happen. */ + RUBY_ECONV_CRLF_NEWLINE_DECORATOR = 0x00001000, + + /** CRLF to CR conversion shall happen. */ + RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000, + + /** CRLF to LF conversion shall happen. */ + RUBY_ECONV_LF_NEWLINE_DECORATOR = 0x00004000, + + /** Texts shall be XML-escaped. */ + RUBY_ECONV_XML_TEXT_DECORATOR = 0x00008000, + + /** Texts shall be AttrValue escaped */ + RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00010000, + + /** (Unclear; seems unused). */ + RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000, + + /** Texts shall be AttrValue escaped. */ + RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR = 0x00100000, + + /** Newline decorator's default. */ + RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR = +#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) + RUBY_ECONV_CRLF_NEWLINE_DECORATOR, +#else + 0, +#endif + +#define ECONV_ERROR_HANDLER_MASK RUBY_ECONV_ERROR_HANDLER_MASK /**< @old{RUBY_ECONV_ERROR_HANDLER_MASK} */ +#define ECONV_INVALID_MASK RUBY_ECONV_INVALID_MASK /**< @old{RUBY_ECONV_INVALID_MASK} */ +#define ECONV_INVALID_REPLACE RUBY_ECONV_INVALID_REPLACE /**< @old{RUBY_ECONV_INVALID_REPLACE} */ +#define ECONV_UNDEF_MASK RUBY_ECONV_UNDEF_MASK /**< @old{RUBY_ECONV_UNDEF_MASK} */ +#define ECONV_UNDEF_REPLACE RUBY_ECONV_UNDEF_REPLACE /**< @old{RUBY_ECONV_UNDEF_REPLACE} */ +#define ECONV_UNDEF_HEX_CHARREF RUBY_ECONV_UNDEF_HEX_CHARREF /**< @old{RUBY_ECONV_UNDEF_HEX_CHARREF} */ +#define ECONV_DECORATOR_MASK RUBY_ECONV_DECORATOR_MASK /**< @old{RUBY_ECONV_DECORATOR_MASK} */ +#define ECONV_NEWLINE_DECORATOR_MASK RUBY_ECONV_NEWLINE_DECORATOR_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_MASK} */ +#define ECONV_NEWLINE_DECORATOR_READ_MASK RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK} */ +#define ECONV_NEWLINE_DECORATOR_WRITE_MASK RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK} */ +#define ECONV_UNIVERSAL_NEWLINE_DECORATOR RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR} */ +#define ECONV_CRLF_NEWLINE_DECORATOR RUBY_ECONV_CRLF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CRLF_NEWLINE_DECORATOR} */ +#define ECONV_CR_NEWLINE_DECORATOR RUBY_ECONV_CR_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CR_NEWLINE_DECORATOR} */ +#define ECONV_LF_NEWLINE_DECORATOR RUBY_ECONV_LF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_LF_NEWLINE_DECORATOR} */ +#define ECONV_XML_TEXT_DECORATOR RUBY_ECONV_XML_TEXT_DECORATOR /**< @old{RUBY_ECONV_XML_TEXT_DECORATOR} */ +#define ECONV_XML_ATTR_CONTENT_DECORATOR RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR} */ +#define ECONV_STATEFUL_DECORATOR_MASK RUBY_ECONV_STATEFUL_DECORATOR_MASK /**< @old{RUBY_ECONV_STATEFUL_DECORATOR_MASK} */ +#define ECONV_XML_ATTR_QUOTE_DECORATOR RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR} */ +#define ECONV_DEFAULT_NEWLINE_DECORATOR RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR} */ + /** @} */ + + /** + * @name Flags for rb_econv_convert() + * + * @{ + */ + + /** Indicates the input is a part of much larger one. */ + RUBY_ECONV_PARTIAL_INPUT = 0x00020000, + + /** Instructs the converter to stop after output. */ + RUBY_ECONV_AFTER_OUTPUT = 0x00040000, +#define ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT /**< @old{RUBY_ECONV_PARTIAL_INPUT} */ +#define ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT /**< @old{RUBY_ECONV_AFTER_OUTPUT} */ + + RUBY_ECONV_FLAGS_PLACEHOLDER /**< Placeholder (not used) */ +}; + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_INTERNAL_ENCODING_TRANSCODE_H */ diff --git a/include/ruby/internal/error.h b/include/ruby/internal/error.h index 49e2276cb9..cd37f4461a 100644 --- a/include/ruby/internal/error.h +++ b/include/ruby/internal/error.h @@ -50,7 +50,19 @@ typedef enum { /** Warning is for experimental features. */ RB_WARN_CATEGORY_EXPERIMENTAL, - RB_WARN_CATEGORY_ALL_BITS = 0x6 /* no RB_WARN_CATEGORY_NONE bit */ + /** Warning is for performance issues (not enabled by -w). */ + RB_WARN_CATEGORY_PERFORMANCE, + + RB_WARN_CATEGORY_DEFAULT_BITS = ( + (1U << RB_WARN_CATEGORY_DEPRECATED) | + (1U << RB_WARN_CATEGORY_EXPERIMENTAL) | + 0), + + RB_WARN_CATEGORY_ALL_BITS = ( + (1U << RB_WARN_CATEGORY_DEPRECATED) | + (1U << RB_WARN_CATEGORY_EXPERIMENTAL) | + (1U << RB_WARN_CATEGORY_PERFORMANCE) | + 0) } rb_warning_category_t; /** for rb_readwrite_sys_fail first argument */ @@ -469,7 +481,7 @@ VALUE *rb_ruby_debug_ptr(void); */ #define ruby_debug (*rb_ruby_debug_ptr()) -/* reports if `-W' specified */ +/* reports if $VERBOSE is true */ RBIMPL_ATTR_NONNULL((1)) RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2) /** @@ -484,7 +496,8 @@ RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2) * default, the method just emits its passed contents to ::rb_stderr using * rb_io_write(). * - * @note This function is affected by the `-W` flag. + * @note This function is affected by the value of $VERBOSE, it does + * nothing unless $VERBOSE is true. * @param[in] fmt Format specifier string compatible with rb_sprintf(). * * @internal @@ -509,7 +522,7 @@ RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 3, 4) * Issues a compile-time warning that happens at `__file__:__line__`. Purpose * of this function being exposed to CAPI is unclear. * - * @note This function is affected by the `-W` flag. + * @note This function is affected by the value of $VERBOSE. * @param[in] file The path corresponding to Ruby level `__FILE__`. * @param[in] line The number corresponding to Ruby level `__LINE__`. * @param[in] fmt Format specifier string compatible with rb_sprintf(). @@ -522,19 +535,20 @@ RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2) * Identical to rb_sys_fail(), except it does not raise an exception to render * a warning instead. * - * @note This function is affected by the `-W` flag. + * @note This function is affected by the value of $VERBOSE. * @param[in] fmt Format specifier string compatible with rb_sprintf(). */ void rb_sys_warning(const char *fmt, ...); -/* reports always */ +/* reports if $VERBOSE is not nil (so if it is true or false) */ RBIMPL_ATTR_COLD() RBIMPL_ATTR_NONNULL((1)) RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2) /** - * Identical to rb_warning(), except it reports always regardless of runtime - * `-W` flag. + * Identical to rb_warning(), except it reports unless $VERBOSE is nil. * + * @note This function is affected by the value of $VERBOSE, it does + * nothing if $VERBOSE is nil. * @param[in] fmt Format specifier string compatible with rb_sprintf(). */ void rb_warn(const char *fmt, ...); @@ -543,8 +557,7 @@ RBIMPL_ATTR_COLD() RBIMPL_ATTR_NONNULL((2)) RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) /** - * Identical to rb_category_warning(), except it reports always regardless of - * runtime `-W` flag. + * Identical to rb_category_warning(), except it reports unless $VERBOSE is nil. * * @param[in] cat Category e.g. deprecated. * @param[in] fmt Format specifier string compatible with rb_sprintf(). @@ -554,8 +567,7 @@ void rb_category_warn(rb_warning_category_t cat, const char *fmt, ...); RBIMPL_ATTR_NONNULL((1, 3)) RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 3, 4) /** - * Identical to rb_compile_warning(), except it reports always regardless of - * runtime `-W` flag. + * Identical to rb_compile_warning(), except it reports unless $VERBOSE is nil. * * @param[in] file The path corresponding to Ruby level `__FILE__`. * @param[in] line The number corresponding to Ruby level `__LINE__`. diff --git a/include/ruby/internal/eval.h b/include/ruby/internal/eval.h index 34a53849da..5bcbb97746 100644 --- a/include/ruby/internal/eval.h +++ b/include/ruby/internal/eval.h @@ -28,10 +28,12 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() RBIMPL_ATTR_NONNULL(()) /** - * Evaluates the given string in an isolated binding. + * Evaluates the given string. * - * Here "isolated" means that the binding does not inherit any other - * bindings. This behaves same as the binding for required libraries. + * In case it is called from within a C-backended method, the evaluation is + * done under the current binding. However there can be no method. On such + * situation this function evaluates in an isolated binding, like `require` + * runs in a separate one. * * `__FILE__` will be `"(eval)"`, and `__LINE__` starts from 1 in the * evaluation. @@ -39,6 +41,31 @@ RBIMPL_ATTR_NONNULL(()) * @param[in] str Ruby code to evaluate. * @exception rb_eException Raises an exception on error. * @return The evaluated result. + * + * @internal + * + * @shyouhei's old tale about the birth and growth of this function: + * + * At the beginning, there was no rb_eval_string(). @shyouhei heard that + * @shugo, author of Apache httpd's mod_ruby module, requested @matz for this + * API. He wanted a way so that mod_ruby can evaluate ruby scripts one by one, + * separately, in each different contexts. So this function was made. It was + * designed to be a global interpreter entry point like ruby_run_node(). + * + * The way it is implemented however allows extension libraries (not just + * programs like Apache httpd) to call this function. Because its name says + * nothing about the initial design, people started to think of it as an + * orthodox way to call ruby level `eval` method from their extension + * libraries. Even our `extension.rdoc` has had a description of this function + * basically according to this understanding. + * + * The old (mod_ruby like) usage still works. But over time, usages of this + * function from extension libraries got popular, while mod_ruby faded out; is + * no longer maintained now. Devs decided to actively support both. This + * function now auto-detects how it is called, and switches how it works + * depending on it. + * + * @see https://bugs.ruby-lang.org/issues/18780 */ VALUE rb_eval_string(const char *str); diff --git a/include/ruby/internal/event.h b/include/ruby/internal/event.h index 04b137a193..1d194ed618 100644 --- a/include/ruby/internal/event.h +++ b/include/ruby/internal/event.h @@ -23,6 +23,10 @@ #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" +#ifdef HAVE_STDINT_H +#include <stdint.h> +#endif + /* These macros are not enums because they are wider than int.*/ /** @@ -54,6 +58,7 @@ #define RUBY_EVENT_THREAD_END 0x0800 /**< Encountered an end of a thread. */ #define RUBY_EVENT_FIBER_SWITCH 0x1000 /**< Encountered a `Fiber#yield`. */ #define RUBY_EVENT_SCRIPT_COMPILED 0x2000 /**< Encountered an `eval`. */ +#define RUBY_EVENT_RESCUE 0x4000 /**< Encountered a `rescue` statement. */ #define RUBY_EVENT_TRACEPOINT_ALL 0xffff /**< Bitmask of extended events. */ /** @} */ diff --git a/include/ruby/internal/fl_type.h b/include/ruby/internal/fl_type.h index 47f054256b..0a05166784 100644 --- a/include/ruby/internal/fl_type.h +++ b/include/ruby/internal/fl_type.h @@ -57,8 +57,7 @@ #define FL_SINGLETON RBIMPL_CAST((VALUE)RUBY_FL_SINGLETON) /**< @old{RUBY_FL_SINGLETON} */ #define FL_WB_PROTECTED RBIMPL_CAST((VALUE)RUBY_FL_WB_PROTECTED) /**< @old{RUBY_FL_WB_PROTECTED} */ -#define FL_PROMOTED0 RBIMPL_CAST((VALUE)RUBY_FL_PROMOTED0) /**< @old{RUBY_FL_PROMOTED0} */ -#define FL_PROMOTED1 RBIMPL_CAST((VALUE)RUBY_FL_PROMOTED1) /**< @old{RUBY_FL_PROMOTED1} */ +#define FL_PROMOTED RBIMPL_CAST((VALUE)RUBY_FL_PROMOTED) /**< @old{RUBY_FL_PROMOTED} */ #define FL_FINALIZE RBIMPL_CAST((VALUE)RUBY_FL_FINALIZE) /**< @old{RUBY_FL_FINALIZE} */ #define FL_TAINT RBIMPL_CAST((VALUE)RUBY_FL_TAINT) /**< @old{RUBY_FL_TAINT} */ #define FL_SHAREABLE RBIMPL_CAST((VALUE)RUBY_FL_SHAREABLE) /**< @old{RUBY_FL_SHAREABLE} */ @@ -111,13 +110,6 @@ #define RB_OBJ_FREEZE_RAW RB_OBJ_FREEZE_RAW #define RB_OBJ_FROZEN RB_OBJ_FROZEN #define RB_OBJ_FROZEN_RAW RB_OBJ_FROZEN_RAW -#define RB_OBJ_INFECT RB_OBJ_INFECT -#define RB_OBJ_INFECT_RAW RB_OBJ_INFECT_RAW -#define RB_OBJ_TAINT RB_OBJ_TAINT -#define RB_OBJ_TAINTABLE RB_OBJ_TAINTABLE -#define RB_OBJ_TAINTED RB_OBJ_TAINTED -#define RB_OBJ_TAINTED_RAW RB_OBJ_TAINTED_RAW -#define RB_OBJ_TAINT_RAW RB_OBJ_TAINT_RAW #define RB_OBJ_UNTRUST RB_OBJ_TAINT #define RB_OBJ_UNTRUSTED RB_OBJ_TAINTED /** @endcond */ @@ -183,7 +175,7 @@ RB_GNUC_EXTENSION * @note About the `FL_USER` terminology: the "user" here does not necessarily * mean only you. For instance struct ::RString instances use these * bits to cache their encodings etc. Devs discussed about this topic, - * reached their concensus that ::RUBY_T_DATA is the only valid data + * reached their consensus that ::RUBY_T_DATA is the only valid data * structure that can use these bits; other data structures including * ::RUBY_T_OBJECT use these bits for their own purpose. See also * https://bugs.ruby-lang.org/issues/18059 @@ -207,12 +199,15 @@ ruby_fl_type { RUBY_FL_WB_PROTECTED = (1<<5), /** - * This flag has something to do with our garbage collector. These days - * ruby objects are "generational". There are those who are young and - * those who are old. Young objects are prone to die; monitored relatively - * extensively by the garbage collector. OTOH old objects tend to live - * longer. They are relatively rarely considered. This flag is set when a - * object experienced promotion i.e. survived a garbage collection. + * Ruby objects are "generational". There are young objects & old objects. + * Young objects are prone to die & monitored relatively extensively by the + * garbage collector. Old objects tend to live longer & are monitored less + * frequently. When an object survives a GC, its age is incremented. When + * age is equal to RVALUE_OLD_AGE, the object becomes Old. This flag is set + * when an object becomes old, and is used by the write barrier to check if + * an old object should be considered for marking more frequently - as old + * objects that have references added between major GCs need to be remarked + * to prevent the referred object being mistakenly swept. * * @internal * @@ -220,41 +215,14 @@ ruby_fl_type { * 3rd parties. It must be an implementation detail that they should never * know. Might better be hidden. */ - RUBY_FL_PROMOTED0 = (1<<5), + RUBY_FL_PROMOTED = (1<<5), /** - * This flag has something to do with our garbage collector. These days - * ruby objects are "generational". There are those who are young and - * those who are old. Young objects are prone to die; monitored relatively - * extensively by the garbage collector. OTOH old objects tend to live - * longer. They are relatively rarely considered. This flag is set when a - * object experienced two promotions i.e. survived garbage collections - * twice. + * This flag is no longer in use * * @internal - * - * But honestly, @shyouhei doesn't think this flag should be visible from - * 3rd parties. It must be an implementation detail that they should never - * know. Might better be hidden. - */ - RUBY_FL_PROMOTED1 = (1<<6), - - /** - * This flag has something to do with our garbage collector. These days - * ruby objects are "generational". There are those who are young and - * those who are old. Young objects are prone to die; monitored relatively - * extensively by the garbage collector. OTOH old objects tend to live - * longer. They are relatively rarely considered. This flag is set when a - * object experienced promotions i.e. survived more than one garbage - * collections. - * - * @internal - * - * But honestly, @shyouhei doesn't think this flag should be visible from - * 3rd parties. It must be an implementation detail that they should never - * know. Might better be hidden. */ - RUBY_FL_PROMOTED = RUBY_FL_PROMOTED0 | RUBY_FL_PROMOTED1, + RUBY_FL_UNUSED6 = (1<<6), /** * This flag has something to do with finalisers. A ruby object can have @@ -283,7 +251,7 @@ ruby_fl_type { # pragma deprecated(RUBY_FL_TAINT) #endif - = (1<<8), + = 0, /** * This flag has something to do with Ractor. Multiple Ractors run without @@ -310,7 +278,7 @@ ruby_fl_type { # pragma deprecated(RUBY_FL_UNTRUSTED) #endif - = (1<<8), + = 0, /** * This flag has something to do with object IDs. Unlike in the old days, @@ -427,7 +395,7 @@ ruby_fl_type { * 3rd parties. It must be an implementation detail that they should never * know. Might better be hidden. */ - RUBY_FL_SINGLETON = RUBY_FL_USER0, + RUBY_FL_SINGLETON = RUBY_FL_USER1, }; enum { @@ -451,12 +419,6 @@ enum { RBIMPL_SYMBOL_EXPORT_BEGIN() /** - * @deprecated Does nothing. This method is deprecated and will be removed in - * Ruby 3.2. - */ -void rb_obj_infect(VALUE victim, VALUE carrier); - -/** * This is an implementation detail of #RB_OBJ_FREEZE(). People don't use it * directly. * @@ -495,7 +457,7 @@ RB_FL_ABLE(VALUE obj) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() /** - * This is an implenentation detail of RB_FL_TEST(). 3rd parties need not use + * This is an implementation detail of RB_FL_TEST(). 3rd parties need not use * this. Just always use RB_FL_TEST(). * * @param[in] obj Object in question. @@ -543,7 +505,7 @@ RB_FL_TEST(VALUE obj, VALUE flags) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() /** - * This is an implenentation detail of RB_FL_ANY(). 3rd parties need not use + * This is an implementation detail of RB_FL_ANY(). 3rd parties need not use * this. Just always use RB_FL_ANY(). * * @param[in] obj Object in question. @@ -577,7 +539,7 @@ RB_FL_ANY(VALUE obj, VALUE flags) RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() /** - * This is an implenentation detail of RB_FL_ALL(). 3rd parties need not use + * This is an implementation detail of RB_FL_ALL(). 3rd parties need not use * this. Just always use RB_FL_ALL(). * * @param[in] obj Object in question. @@ -613,7 +575,7 @@ RBIMPL_ATTR_ARTIFICIAL() /** * @private * - * This is an implenentation detail of RB_FL_SET(). 3rd parties need not use + * This is an implementation detail of RB_FL_SET(). 3rd parties need not use * this. Just always use RB_FL_SET(). * * @param[out] obj Object in question. @@ -633,7 +595,7 @@ rbimpl_fl_set_raw_raw(struct RBasic *obj, VALUE flags) RBIMPL_ATTR_ARTIFICIAL() /** - * This is an implenentation detail of RB_FL_SET(). 3rd parties need not use + * This is an implementation detail of RB_FL_SET(). 3rd parties need not use * this. Just always use RB_FL_SET(). * * @param[out] obj Object in question. @@ -673,7 +635,7 @@ RBIMPL_ATTR_ARTIFICIAL() /** * @private * - * This is an implenentation detail of RB_FL_UNSET(). 3rd parties need not use + * This is an implementation detail of RB_FL_UNSET(). 3rd parties need not use * this. Just always use RB_FL_UNSET(). * * @param[out] obj Object in question. @@ -693,7 +655,7 @@ rbimpl_fl_unset_raw_raw(struct RBasic *obj, VALUE flags) RBIMPL_ATTR_ARTIFICIAL() /** - * This is an implenentation detail of RB_FL_UNSET(). 3rd parties need not use + * This is an implementation detail of RB_FL_UNSET(). 3rd parties need not use * this. Just always use RB_FL_UNSET(). * * @param[out] obj Object in question. @@ -728,7 +690,7 @@ RBIMPL_ATTR_ARTIFICIAL() /** * @private * - * This is an implenentation detail of RB_FL_REVERSE(). 3rd parties need not + * This is an implementation detail of RB_FL_REVERSE(). 3rd parties need not * use this. Just always use RB_FL_REVERSE(). * * @param[out] obj Object in question. @@ -748,7 +710,7 @@ rbimpl_fl_reverse_raw_raw(struct RBasic *obj, VALUE flags) RBIMPL_ATTR_ARTIFICIAL() /** - * This is an implenentation detail of RB_FL_REVERSE(). 3rd parties need not + * This is an implementation detail of RB_FL_REVERSE(). 3rd parties need not * use this. Just always use RB_FL_REVERSE(). * * @param[out] obj Object in question. @@ -793,6 +755,7 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) static inline bool RB_OBJ_TAINTABLE(VALUE obj) { + (void)obj; return false; } @@ -810,6 +773,7 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) static inline VALUE RB_OBJ_TAINTED_RAW(VALUE obj) { + (void)obj; return false; } @@ -827,6 +791,7 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) static inline bool RB_OBJ_TAINTED(VALUE obj) { + (void)obj; return false; } @@ -842,6 +807,7 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) static inline void RB_OBJ_TAINT_RAW(VALUE obj) { + (void)obj; return; } @@ -857,6 +823,7 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) static inline void RB_OBJ_TAINT(VALUE obj) { + (void)obj; return; } @@ -873,6 +840,8 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) static inline void RB_OBJ_INFECT_RAW(VALUE dst, VALUE src) { + (void)dst; + (void)src; return; } @@ -889,13 +858,15 @@ RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) static inline void RB_OBJ_INFECT(VALUE dst, VALUE src) { + (void)dst; + (void)src; return; } RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() /** - * This is an implenentation detail of RB_OBJ_FROZEN(). 3rd parties need not + * This is an implementation detail of RB_OBJ_FROZEN(). 3rd parties need not * use this. Just always use RB_OBJ_FROZEN(). * * @param[in] obj Object in question. @@ -934,9 +905,13 @@ RB_OBJ_FROZEN(VALUE obj) } } +RUBY_SYMBOL_EXPORT_BEGIN +void rb_obj_freeze_inline(VALUE obj); +RUBY_SYMBOL_EXPORT_END + RBIMPL_ATTR_ARTIFICIAL() /** - * This is an implenentation detail of RB_OBJ_FREEZE(). 3rd parties need not + * This is an implementation detail of RB_OBJ_FREEZE(). 3rd parties need not * use this. Just always use RB_OBJ_FREEZE(). * * @param[out] obj Object in question. @@ -944,24 +919,7 @@ RBIMPL_ATTR_ARTIFICIAL() static inline void RB_OBJ_FREEZE_RAW(VALUE obj) { - RB_FL_SET_RAW(obj, RUBY_FL_FREEZE); -} - -/** - * Prevents further modifications to the given object. ::rb_eFrozenError shall - * be raised if modification is attempted. - * - * @param[out] x Object in question. - */ -static inline void -rb_obj_freeze_inline(VALUE x) -{ - if (RB_FL_ABLE(x)) { - RB_OBJ_FREEZE_RAW(x); - if (RBASIC_CLASS(x) && !(RBASIC(x)->flags & RUBY_FL_SINGLETON)) { - rb_freeze_singleton_class(x); - } - } + rb_obj_freeze_inline(obj); } #endif /* RBIMPL_FL_TYPE_H */ diff --git a/include/ruby/internal/gc.h b/include/ruby/internal/gc.h index 66fc14e511..462f416af2 100644 --- a/include/ruby/internal/gc.h +++ b/include/ruby/internal/gc.h @@ -20,16 +20,379 @@ * extension libraries. They could be written in C++98. * @brief Registering values to the GC. */ +#include "ruby/internal/config.h" + +#ifdef STDC_HEADERS +# include <stddef.h> /* size_t */ +#endif + +#ifdef HAVE_SYS_TYPES_H +# include <sys/types.h> /* ssize_t */ +#endif + +#include "ruby/assert.h" +#include "ruby/internal/attr/cold.h" +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/attr/noreturn.h" +#include "ruby/internal/attr/artificial.h" +#include "ruby/internal/attr/maybe_unused.h" +#include "ruby/internal/attr/pure.h" #include "ruby/internal/dllexport.h" +#include "ruby/internal/special_consts.h" +#include "ruby/internal/stdbool.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() +#define RUBY_REF_EDGE(s, p) offsetof(s, p) +#define RUBY_REFS_LIST_PTR(l) (RUBY_DATA_FUNC)(l) +#define RUBY_REF_END SIZE_MAX +#define RUBY_REFERENCES(t) static const size_t t[] +#define RUBY_REFERENCES_START(t) RUBY_REFERENCES(t) = { +#define RUBY_REFERENCES_END RUBY_REF_END, }; + +/* gc.c */ + +RBIMPL_ATTR_COLD() +RBIMPL_ATTR_NORETURN() +/** + * Triggers out-of-memory error. If possible it raises ::rb_eNoMemError. But + * because we are running out of memory that is not always doable. This + * function tries hard to show something, but ultimately can die silently. + * + * @exception rb_eNoMemError Raises it if possible. + */ +void rb_memerror(void); + +RBIMPL_ATTR_PURE() +/** + * Queries if the GC is busy. + * + * @retval 0 It isn't. + * @retval 1 It is. + */ +int rb_during_gc(void); + +RBIMPL_ATTR_NONNULL((1)) +/** + * Marks objects between the two pointers. This is one of the GC utility + * functions that you can call when you design your own + * ::rb_data_type_struct::dmark. + * + * @pre Continuous memory region from `start` to `end` shall be fully + * addressable. + * @param[out] start Pointer to an array of objects. + * @param[out] end Pointer that terminates the array of objects. + * @post Objects from `start` (included) to `end` (excluded) are marked. + * + * @internal + * + * `end` can be NULL... But that just results in no-op. + */ +void rb_gc_mark_locations(const VALUE *start, const VALUE *end); + +/** + * Identical to rb_mark_hash(), except it marks only values of the table and + * leave their associated keys unmarked. This is one of the GC utility + * functions that you can call when you design your own + * ::rb_data_type_struct::dmark. + * + * @warning Of course it can break GC. Leave it unused if unsure. + * @param[in] tbl A table to mark. + * @post Values stored in `tbl` are marked. + */ +void rb_mark_tbl(struct st_table *tbl); + +/** + * Identical to rb_mark_tbl(), except it marks objects using + * rb_gc_mark_movable(). This is one of the GC utility functions that you can + * call when you design your own ::rb_data_type_struct::dmark. + * + * @warning Of course it can break GC. Leave it unused if unsure. + * @param[in] tbl A table to mark. + * @post Values stored in `tbl` are marked. + */ +void rb_mark_tbl_no_pin(struct st_table *tbl); + +/** + * Identical to rb_mark_hash(), except it marks only keys of the table and + * leave their associated values unmarked. This is one of the GC utility + * functions that you can call when you design your own + * ::rb_data_type_struct::dmark. + * + * @warning Of course it can break GC. Leave it unused if unsure. + * @param[in] tbl A table to mark. + * @post Keys stored in `tbl` are marked. + */ +void rb_mark_set(struct st_table *tbl); + /** - * Inform the garbage collector that `valptr` points to a live Ruby object that - * should not be moved. Note that extensions should use this API on global - * constants instead of assuming constants defined in Ruby are always alive. - * Ruby code can remove global constants. + * Marks keys and values associated inside of the given table. This is one of + * the GC utility functions that you can call when you design your own + * ::rb_data_type_struct::dmark. + * + * @param[in] tbl A table to mark. + * @post Objects stored in `tbl` are marked. + */ +void rb_mark_hash(struct st_table *tbl); + +/** + * Updates references inside of tables. After you marked values using + * rb_mark_tbl_no_pin(), the objects inside of the table could of course be + * moved. This function is to fixup those references. You can call this from + * your ::rb_data_type_struct::dcompact. + * + * @param[out] ptr A table that potentially includes moved references. + * @post Moved references, if any, are corrected. + */ +void rb_gc_update_tbl_refs(st_table *ptr); + +/** + * Identical to rb_gc_mark(), except it allows the passed value be a + * non-object. For instance pointers to different type of memory regions are + * allowed here. Such values are silently ignored. This is one of the GC + * utility functions that you can call when you design your own + * ::rb_data_type_struct::dmark. + * + * @param[out] obj A possible object. + * @post `obj` is marked, if possible. + */ +void rb_gc_mark_maybe(VALUE obj); + +/** + * Marks an object. This is one of the GC utility functions that you can call + * when you design your own ::rb_data_type_struct::dmark. + * + * @param[out] obj Arbitrary Ruby object. + * @post `obj` is marked. + */ +void rb_gc_mark(VALUE obj); + +/** + * Maybe this is the only function provided for C extensions to control the + * pinning of objects, so let us describe it in detail. These days Ruby's GC + * is copying. As far as an object's physical address is guaranteed unused, it + * can move around the object space. Our GC engine rearranges these objects + * after it reclaims unreachable objects from our object space, so that the + * space is compact (improves memory locality). This is called the + * "compaction" phase, and works well most of the time... as far as there are + * no C extensions. C extensions complicate the scenario because Ruby core + * cannot detect any use of the physical address of an object inside of C + * functions. In order to prevent memory corruptions, objects observable from + * C extensions are "pinned"; they stick to where they are born until they die, + * just in case any C extensions touch their raw pointers. This variant of + * scheme is called "Mostly-Copying" garbage collector. Authors of C + * extensions, however, can extremely carefully write them to become + * compaction-aware. To do so avoid referring to a Ruby object from inside of + * your struct in the first place. But if that is not possible, use this + * function from your ::rb_data_type_struct::dmark then. This way objects + * marked using it are considered movable. If you chose this way you have to + * manually fix up locations of such moved pointers using rb_gc_location(). + * + * @see Bartlett, Joel F., "Compacting Garbage Collection with Ambiguous + * Roots", ACM SIGPLAN Lisp Pointers Volume 1 Issue 6 pp. 3-12, + * April-May-June, 1988. https://doi.org/10.1145/1317224.1317225 + * + * @param[in] obj Object that is movable. + * @post Values stored in `tbl` are marked. + */ +void rb_gc_mark_movable(VALUE obj); + +/** + * Finds a new "location" of an object. An object can be moved on compaction. + * This function projects its new abode, or just returns the passed object if + * not moved. This is one of the GC utility functions that you can call when + * you design your own ::rb_data_type_struct::dcompact. + * + * @param[in] obj An object, possibly already moved to somewhere else. + * @return An object, which holds the current contents of former `obj`. + */ +VALUE rb_gc_location(VALUE obj); + +/** + * Triggers a GC process. This was the only GC entry point that we had at the + * beginning. Over time our GC evolved. Now what this function does is just a + * very simplified variation of the entire GC algorithms. A series of + * procedures kicked by this API is called a "full" GC. + * + * - It immediately scans the entire object space to sort the dead. + * - It immediately reclaims any single dead bodies to reuse later. + * + * It is worth noting that the procedures above do not include evaluations of + * finalisers. They run later. + * + * @internal + * + * Finalisers are deferred until we can handle interrupts. See + * `rb_postponed_job_flush` in vm_trace.c. + * + * Of course there are GC that are not "full". For instance this one and the + * GC which runs when we are running out of memory are different. See + * `gc_profile_record_flag` defined in gc.c for the kinds of GC. + * + * In spite of the name this is not what everything that a GC can trigger. As + * of writing it seems this function does not trigger compaction. But this + * might change in future. + */ +void rb_gc(void); + +/** + * Copy&paste an object's finaliser to another. This is one of the GC utility + * functions that you can call when you design your own `initialize_copy`, + * `initialize_dup`, `initialize_clone`. + * + * @param[out] dst Destination object. + * @param[in] src Source object. + * @post `dst` and `src` share the same finaliser. + * + * @internal + * + * But isn't it easier for you to call super, and let `Object#initialize_copy` + * call this function instead? + */ +void rb_gc_copy_finalizer(VALUE dst, VALUE src); + +/** + * (Re-) enables GC. This makes sense only after you called rb_gc_disable(). + * + * @retval RUBY_Qtrue GC was disabled before. + * @retval RUBY_Qfalse GC was enabled before. + * @post GC is enabled. + * + * @internal + * + * This is one of such exceptional functions that does not raise both Ruby + * exceptions and C++ exceptions. + */ +VALUE rb_gc_enable(void); + +/** + * Disables GC. This prevents automatic GC runs when the process is running + * out of memory. Such situations shall result in rb_memerror(). However this + * does not prevent users from manually invoking rb_gc(). That should work. + * People abused this by disabling GC at the beginning of an event loop, + * process events without GC overheads, then manually force reclaiming garbage + * at the bottom of the loop. However because our GC is now much smarter than + * just calling rb_gc(), this technique is proven to be sub-optimal these days. + * It is believed that there is currently practically no needs of this + * function. + * + * @retval RUBY_Qtrue GC was disabled before. + * @retval RUBY_Qfalse GC was enabled before. + * @post GC is disabled. + */ +VALUE rb_gc_disable(void); + +/** + * Identical to rb_gc(), except the return value. + * + * @return Always returns ::RUBY_Qnil. + */ +VALUE rb_gc_start(void); + +/** + * Assigns a finaliser for an object. Each objects can have objects (typically + * blocks) that run immediately after that object dies. They are called + * finalisers of an object. This function associates a finaliser object with a + * target object. + * + * @note Note that finalisers run _after_ the object they finalise dies. You + * cannot for instance call its methods. + * @note If your finaliser references the object it finalises that object + * loses any chance to become a garbage; effectively leaks memory until + * the end of the process. + * + * @param[in] obj Target to finalise. + * @param[in] block Something `call`able. + * @exception rb_eRuntimeError Somehow `obj` cannot have finalisers. + * @exception rb_eFrozenError `obj` is frozen. + * @exception rb_eArgError `block` doesn't respond to `call`. + * @return The passed `block`. + * @post `block` runs after `obj` dies. + */ +VALUE rb_define_finalizer(VALUE obj, VALUE block); + +/** + * Modifies the object so that it has no finalisers at all. This function is + * mainly provided for symmetry. No practical usages can be thought of. + * + * @param[out] obj Object to clear its finalisers. + * @exception rb_eFrozenError `obj` is frozen. + * @return The passed `obj`. + * @post `obj` has no finalisers. + * @note There is no way to undefine a specific part of many finalisers + * that `obj` could have. All you can do is to clear them all. + */ +VALUE rb_undefine_finalizer(VALUE obj); + +/** + * Identical to rb_gc_stat(), with "count" parameter. + * + * @return Lifetime total number of runs of GC. + */ +size_t rb_gc_count(void); + +/** + * Obtains various GC related profiles. The parameter can be either a Symbol + * or a Hash. If a Hash is passed, it is filled with everything currently + * available. If a Symbol is passed just that portion is returned. + * + * Possible variations of keys you can pass here change from version to + * version. You can get the list of known keys by passing an empty hash and + * let it be filled. + * + * @param[in,out] key_or_buf A Symbol, or a Hash. + * @exception rb_eTypeError Neither Symbol nor Hash. + * @exception rb_eFrozenError Frozen hash is passed. + * @return In case a Hash is passed it returns 0. Otherwise the + * profile value associated with the given key is returned. + * @post In case a Hash is passed it is filled with values. + */ +size_t rb_gc_stat(VALUE key_or_buf); + +/** + * Obtains various info regarding the most recent GC run. This includes for + * instance the reason of the GC. The parameter can be either a Symbol or a + * Hash. If a Hash is passed, it is filled with everything currently + * available. If a Symbol is passed just that portion is returned. + * + * Possible variations of keys you can pass here change from version to + * version. You can get the list of known keys by passing an empty hash and + * let it be filled. + * + * @param[in,out] key_or_buf A Symbol, or a Hash. + * @exception rb_eTypeError Neither Symbol nor Hash. + * @exception rb_eFrozenError Frozen hash is passed. + * @return In case a Hash is passed it returns that hash. Otherwise + * the profile value associated with the given key is returned. + * @post In case a Hash is passed it is filled with values. + */ +VALUE rb_gc_latest_gc_info(VALUE key_or_buf); + +/** + * Informs that there are external memory usages. Our GC runs when we are + * running out of memory. The amount of memory, however, can increase/decrease + * behind-the-scene. For instance DLLs can allocate memories using `mmap(2)` + * etc, which are opaque to us. Registering such external allocations using + * this function enables proper detection of how much memories an object used + * as a whole. That will trigger GCs more often than it would otherwise. You + * can also pass negative numbers here, to indicate that such external + * allocations are gone. + * + * @param[in] diff Amount of memory increased(+)/decreased(-). + */ +void rb_gc_adjust_memory_usage(ssize_t diff); + +/** + * Inform the garbage collector that the global or static variable pointed by + * `valptr` stores a live Ruby object that should not be moved. Note that + * extensions should use this API on global constants instead of assuming + * constants defined in Ruby are always alive. Ruby code can remove global + * constants. + * + * Because this registration itself has a possibility to trigger a GC, this + * function must be called before any GC-able objects is assigned to the + * address pointed by `valptr`. */ void rb_gc_register_address(VALUE *valptr); @@ -54,4 +417,413 @@ void rb_gc_register_mark_object(VALUE object); RBIMPL_SYMBOL_EXPORT_END() +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ +#undef USE_RGENGC +#define USE_RGENGC 1 + +/** + * @deprecated This macro seems broken. Setting this to anything other than + * zero just doesn't compile. We need to KonMari. + */ +#ifndef USE_RGENGC_LOGGING_WB_UNPROTECT +# define USE_RGENGC_LOGGING_WB_UNPROTECT 0 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RArray. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_ARRAY +# define RGENGC_WB_PROTECTED_ARRAY 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RHash. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_HASH +# define RGENGC_WB_PROTECTED_HASH 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RStruct. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_STRUCT +# define RGENGC_WB_PROTECTED_STRUCT 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RString. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_STRING +# define RGENGC_WB_PROTECTED_STRING 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RObject. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_OBJECT +# define RGENGC_WB_PROTECTED_OBJECT 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RRegexp. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_REGEXP +# define RGENGC_WB_PROTECTED_REGEXP 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RMatch. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_MATCH +# define RGENGC_WB_PROTECTED_MATCH 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RClass. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_CLASS +# define RGENGC_WB_PROTECTED_CLASS 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RFloat. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_FLOAT +# define RGENGC_WB_PROTECTED_FLOAT 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RComplex. It has to be set at the time ruby itself compiles. + * Makes no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_COMPLEX +# define RGENGC_WB_PROTECTED_COMPLEX 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RRational. It has to be set at the time ruby itself compiles. + * Makes no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_RATIONAL +# define RGENGC_WB_PROTECTED_RATIONAL 1 +#endif + +/** + * @private + * + * This is a compile-time flag to enable/disable write barrier for + * struct ::RBignum. It has to be set at the time ruby itself compiles. Makes + * no sense for 3rd parties. + */ +#ifndef RGENGC_WB_PROTECTED_BIGNUM +# define RGENGC_WB_PROTECTED_BIGNUM 1 +#endif + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + * + * @internal + * + * @shyouhei doesn't think anybody uses this right now. + */ +#ifndef RGENGC_WB_PROTECTED_NODE_CREF +# define RGENGC_WB_PROTECTED_NODE_CREF 1 +#endif + +/** + * @defgroup rgengc Write barrier (WB) interfaces: + * + * @note The following core interfaces can be changed in the future. Please + * catch up if you want to insert WB into C-extensions correctly. + * + * @{ + */ + +/** + * Declaration of a "back" pointer. This is a write barrier for new reference + * from "old" generation to "young" generation. It writes `young` into + * `*slot`, which is a pointer inside of `old`. + * + * @param[in] old An old object. + * @param[in] slot A pointer inside of `old`. + * @param[out] young A young object. + */ +#define RB_OBJ_WRITE(old, slot, young) \ + RBIMPL_CAST(rb_obj_write((VALUE)(old), (VALUE *)(slot), (VALUE)(young), __FILE__, __LINE__)) + +/** + * Identical to #RB_OBJ_WRITE(), except it doesn't write any values, but only a + * WB declaration. `oldv` is replaced value with `b` (not used in current + * Ruby). + * + * @param[in] old An old object. + * @param[in] oldv An object previously stored inside of `old`. + * @param[out] young A young object. + */ +#define RB_OBJ_WRITTEN(old, oldv, young) \ + RBIMPL_CAST(rb_obj_written((VALUE)(old), (VALUE)(oldv), (VALUE)(young), __FILE__, __LINE__)) +/** @} */ + +#define OBJ_PROMOTED_RAW RB_OBJ_PROMOTED_RAW /**< @old{RB_OBJ_PROMOTED_RAW} */ +#define OBJ_PROMOTED RB_OBJ_PROMOTED /**< @old{RB_OBJ_PROMOTED} */ +#define OBJ_WB_UNPROTECT RB_OBJ_WB_UNPROTECT /**< @old{RB_OBJ_WB_UNPROTECT} */ + +/** + * Asserts that the passed object is not fenced by write barriers. Objects of + * such property do not contribute to generational GCs. They are scanned + * always. + * + * @param[out] x An object that would not be protected by the barrier. + */ +#define RB_OBJ_WB_UNPROTECT(x) rb_obj_wb_unprotect(x, __FILE__, __LINE__) + +/** + * Identical to #RB_OBJ_WB_UNPROTECT(), except it can also assert that the + * given object is of given type. + * + * @param[in] type One of `ARRAY`, `STRING`, etc. + * @param[out] obj An object of `type` that would not be protected. + * + * @internal + * + * @shyouhei doesn't understand why this has to be visible from extensions. + */ +#define RB_OBJ_WB_UNPROTECT_FOR(type, obj) \ + (RGENGC_WB_PROTECTED_##type ? OBJ_WB_UNPROTECT(obj) : obj) + +/** + * @private + * + * This is an implementation detail of rb_obj_wb_unprotect(). People don't use + * it directly. + */ +#define RGENGC_LOGGING_WB_UNPROTECT rb_gc_unprotect_logging + +/** @cond INTERNAL_MACRO */ +#define RB_OBJ_PROMOTED_RAW RB_OBJ_PROMOTED_RAW +#define RB_OBJ_PROMOTED RB_OBJ_PROMOTED +/** @endcond */ + +RBIMPL_SYMBOL_EXPORT_BEGIN() +/** + * This is the implementation of #RB_OBJ_WRITE(). People don't use it + * directly. + * + * @param[in] old An object that points to `young`. + * @param[out] young An object that is referenced from `old`. + */ +void rb_gc_writebarrier(VALUE old, VALUE young); + +/** + * This is the implementation of #RB_OBJ_WB_UNPROTECT(). People don't use it + * directly. + * + * @param[out] obj An object that does not participate in WB. + */ +void rb_gc_writebarrier_unprotect(VALUE obj); + +#if USE_RGENGC_LOGGING_WB_UNPROTECT +/** + * @private + * + * This is the implementation of #RGENGC_LOGGING_WB_UNPROTECT(). People + * don't use it directly. + * + * @param[in] objptr Don't know why this is a pointer to void but in + * reality this is a pointer to an object that is about + * to be un-protected. + * @param[in] filename Pass C's `__FILE__` here. + * @param[in] line Pass C's `__LINE__` here. + */ +void rb_gc_unprotect_logging(void *objptr, const char *filename, int line); +#endif + +RBIMPL_SYMBOL_EXPORT_END() + +RBIMPL_ATTR_PURE_UNLESS_DEBUG() +RBIMPL_ATTR_ARTIFICIAL() +/** + * This is the implementation of #RB_OBJ_PROMOTED(). People don't use it + * directly. + * + * @param[in] obj An object to query. + * @retval true The object is "promoted". + * @retval false The object is young. Have not experienced GC at all. + */ +static inline bool +RB_OBJ_PROMOTED_RAW(VALUE obj) +{ + RBIMPL_ASSERT_OR_ASSUME(RB_FL_ABLE(obj)); + return RB_FL_ANY_RAW(obj, RUBY_FL_PROMOTED); +} + +RBIMPL_ATTR_PURE_UNLESS_DEBUG() +RBIMPL_ATTR_ARTIFICIAL() +/** + * Tests if the object is "promoted" -- that is, whether the object experienced + * one or more GC marks. + * + * @param[in] obj An object to query. + * @retval true The object is "promoted". + * @retval false The object is young. Have not experienced GC at all. + * @note Hello, is anyone actively calling this function? @shyouhei have + * never seen any actual usages outside of the GC implementation + * itself. + */ +static inline bool +RB_OBJ_PROMOTED(VALUE obj) +{ + if (! RB_FL_ABLE(obj)) { + return false; + } + else { + return RB_OBJ_PROMOTED_RAW(obj); + } +} + +/** + * This is the implementation of #RB_OBJ_WB_UNPROTECT(). People don't use it + * directly. + * + * @param[out] x An object that does not participate in WB. + * @param[in] filename C's `__FILE__` of the caller function. + * @param[in] line C's `__LINE__` of the caller function. + * @return x + */ +static inline VALUE +rb_obj_wb_unprotect( + VALUE x, + RBIMPL_ATTR_MAYBE_UNUSED() + const char *filename, + RBIMPL_ATTR_MAYBE_UNUSED() + int line) +{ +#if USE_RGENGC_LOGGING_WB_UNPROTECT + RGENGC_LOGGING_WB_UNPROTECT(RBIMPL_CAST((void *)x), filename, line); +#endif + rb_gc_writebarrier_unprotect(x); + return x; +} + +/** + * @private + * + * This is the implementation of #RB_OBJ_WRITTEN(). People don't use it + * directly. + * + * @param[in] a An old object. + * @param[in] oldv An object previously stored inside of `old`. + * @param[out] b A young object. + * @param[in] filename C's `__FILE__` of the caller function. + * @param[in] line C's `__LINE__` of the caller function. + * @return a + */ +static inline VALUE +rb_obj_written( + VALUE a, + RBIMPL_ATTR_MAYBE_UNUSED() + VALUE oldv, + VALUE b, + RBIMPL_ATTR_MAYBE_UNUSED() + const char *filename, + RBIMPL_ATTR_MAYBE_UNUSED() + int line) +{ +#if USE_RGENGC_LOGGING_WB_UNPROTECT + RGENGC_LOGGING_OBJ_WRITTEN(a, oldv, b, filename, line); +#endif + + if (!RB_SPECIAL_CONST_P(b)) { + rb_gc_writebarrier(a, b); + } + + return a; +} + +/** + * @private + * + * This is the implementation of #RB_OBJ_WRITE(). People don't use it + * directly. + * + * @param[in] a An old object. + * @param[in] slot A pointer inside of `old`. + * @param[out] b A young object. + * @param[in] filename C's `__FILE__` of the caller function. + * @param[in] line C's `__LINE__` of the caller function. + * @return a + */ +static inline VALUE +rb_obj_write( + VALUE a, VALUE *slot, VALUE b, + RBIMPL_ATTR_MAYBE_UNUSED() + const char *filename, + RBIMPL_ATTR_MAYBE_UNUSED() + int line) +{ +#ifdef RGENGC_LOGGING_WRITE + RGENGC_LOGGING_WRITE(a, slot, b, filename, line); +#endif + + *slot = b; + + rb_obj_written(a, RUBY_Qundef /* ignore `oldv' now */, b, filename, line); + return a; +} + +RBIMPL_ATTR_DEPRECATED(("Will be removed soon")) +static inline void rb_gc_force_recycle(VALUE obj){} + #endif /* RBIMPL_GC_H */ diff --git a/include/ruby/internal/globals.h b/include/ruby/internal/globals.h index b478e30b04..60d8e5309a 100644 --- a/include/ruby/internal/globals.h +++ b/include/ruby/internal/globals.h @@ -82,6 +82,7 @@ RUBY_EXTERN VALUE rb_cInteger; /**< `Module` class. */ RUBY_EXTERN VALUE rb_cMatch; /**< `MatchData` class. */ RUBY_EXTERN VALUE rb_cMethod; /**< `Method` class. */ RUBY_EXTERN VALUE rb_cModule; /**< `Module` class. */ +RUBY_EXTERN VALUE rb_cRefinement; /**< `Refinement` class. */ RUBY_EXTERN VALUE rb_cNameErrorMesg; /**< `NameError::Message` class. */ RUBY_EXTERN VALUE rb_cNilClass; /**< `NilClass` class. */ RUBY_EXTERN VALUE rb_cNumeric; /**< `Numeric` class. */ @@ -93,7 +94,7 @@ RUBY_EXTERN VALUE rb_cRegexp; /**< `Regexp` class. */ RUBY_EXTERN VALUE rb_cStat; /**< `File::Stat` class. */ RUBY_EXTERN VALUE rb_cString; /**< `String` class. */ RUBY_EXTERN VALUE rb_cStruct; /**< `Struct` class. */ -RUBY_EXTERN VALUE rb_cSymbol; /**< `Sumbol` class. */ +RUBY_EXTERN VALUE rb_cSymbol; /**< `Symbol` class. */ RUBY_EXTERN VALUE rb_cThread; /**< `Thread` class. */ RUBY_EXTERN VALUE rb_cTime; /**< `Time` class. */ RUBY_EXTERN VALUE rb_cTrueClass; /**< `TrueClass` class. */ diff --git a/include/ruby/internal/has/builtin.h b/include/ruby/internal/has/builtin.h index 957aff8375..243ba2a34c 100644 --- a/include/ruby/internal/has/builtin.h +++ b/include/ruby/internal/has/builtin.h @@ -53,8 +53,10 @@ # define RBIMPL_HAS_BUILTIN___builtin_assume 0 # /* See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52624 for bswap16. */ # define RBIMPL_HAS_BUILTIN___builtin_bswap16 RBIMPL_COMPILER_SINCE(GCC, 4, 8, 0) +#ifndef __OpenBSD__ # define RBIMPL_HAS_BUILTIN___builtin_bswap32 RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0) # define RBIMPL_HAS_BUILTIN___builtin_bswap64 RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0) +#endif # define RBIMPL_HAS_BUILTIN___builtin_clz RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0) # define RBIMPL_HAS_BUILTIN___builtin_clzl RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0) # define RBIMPL_HAS_BUILTIN___builtin_clzll RBIMPL_COMPILER_SINCE(GCC, 3, 6, 0) @@ -76,10 +78,6 @@ # define RBIMPL_HAS_BUILTIN___builtin_unreachable RBIMPL_COMPILER_SINCE(GCC, 4, 5, 0) # /* Note that "0, 0, 0" might be inaccurate. */ -#elif RBIMPL_COMPILER_IS(MSVC) -# /* MSVC has UNREACHABLE, but that is not __builtin_unreachable. */ -# define RBIMPL_HAS_BUILTIN(_) 0 - #else # /* Take config.h definition when available */ # define RBIMPL_HAS_BUILTIN(_) ((RBIMPL_HAS_BUILTIN_ ## _)+0) @@ -109,7 +107,7 @@ # define RBIMPL_HAS_BUILTIN___builtin_rotateright64 0 # define RBIMPL_HAS_BUILTIN___builtin_popcountll HAVE_BUILTIN___BUILTIN_POPCOUNTLL # define RBIMPL_HAS_BUILTIN___builtin_sub_overflow HAVE_BUILTIN___BUILTIN_SUB_OVERFLOW -# if defined(UNREACHABLE) +# if defined(HAVE___BUILTIN_UNREACHABLE) # define RBIMPL_HAS_BUILTIN___builtin_unreachable 1 # else # define RBIMPL_HAS_BUILTIN___builtin_unreachable 0 diff --git a/include/ruby/internal/has/c_attribute.h b/include/ruby/internal/has/c_attribute.h index c5c48867bf..69b0f402cd 100644 --- a/include/ruby/internal/has/c_attribute.h +++ b/include/ruby/internal/has/c_attribute.h @@ -21,11 +21,23 @@ * @brief Defines #RBIMPL_HAS_C_ATTRIBUTE. */ +#include "ruby/internal/has/extension.h" +#include "ruby/internal/has/warning.h" + /** Wraps (or simulates) `__has_c_attribute`. */ #if defined(__cplusplus) # /* Makes no sense. */ # define RBIMPL_HAS_C_ATTRIBUTE(_) 0 +#elif RBIMPL_HAS_EXTENSION(c_attributes) +# /* Hmm. It seems Clang 17 has this macro defined even when -std=c99 mode, +# * _and_ fails to compile complaining that attributes are C2X feature. We +# * need to work around this nonsense. */ +# define RBIMPL_HAS_C_ATTRIBUTE(_) __has_c_attribute(_) + +#elif RBIMPL_HAS_WARNING("-Wc2x-extensions") +# define RBIMPL_HAS_C_ATTRIBUTE(_) 0 + #elif defined(__has_c_attribute) # define RBIMPL_HAS_C_ATTRIBUTE(_) __has_c_attribute(_) diff --git a/include/ruby/internal/intern/array.h b/include/ruby/internal/intern/array.h index 17964bf810..1909fdf17b 100644 --- a/include/ruby/internal/intern/array.h +++ b/include/ruby/internal/intern/array.h @@ -107,14 +107,14 @@ VALUE rb_ary_new_from_args(long n, ...); VALUE rb_ary_new_from_values(long n, const VALUE *elts); /** - * Allocates a "temporary" array. This is a hidden empty array. Handy on - * occasions. + * Allocates a hidden (no class) empty array. * * @param[in] capa Designed capacity of the array. * @return A hidden, empty array. * @see rb_obj_hide() */ -VALUE rb_ary_tmp_new(long capa); +VALUE rb_ary_hidden_new(long capa); +#define rb_ary_tmp_new rb_ary_hidden_new /** * Destroys the given array for no reason. @@ -187,7 +187,7 @@ VALUE rb_ary_shared_with_p(VALUE lhs, VALUE rhs); * : (int i) -> T? * | (int beg, int len) -> ::Array[T]? * | (Range[int] r) -> ::Array[T]? - * | (ArithmeticSequence as) -> ::Array[T]? # This also raises RagneError. + * | (ArithmeticSequence as) -> ::Array[T]? # This also raises RangeError. * end * ``` */ diff --git a/include/ruby/internal/intern/bignum.h b/include/ruby/internal/intern/bignum.h index 43d68018de..c27f77a1fb 100644 --- a/include/ruby/internal/intern/bignum.h +++ b/include/ruby/internal/intern/bignum.h @@ -51,7 +51,7 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() VALUE rb_big_new(size_t len, int sign); /** - * Queries if the passed bignum instance is a "bigzro". What is a bigzero? + * Queries if the passed bignum instance is a "bigzero". What is a bigzero? * Well, bignums are for very big integers, but can also represent tiny ones * like -1, 0, 1. Bigzero are instances of bignums whose values are zero. * Knowing if a bignum is bigzero can be handy on occasions, like for instance @@ -793,7 +793,7 @@ size_t rb_absint_size(VALUE val, int *nlz_bits_ret); * @exception rb_eTypeError `val` doesn't respond to `#to_int`. * @retval (size_t)-1 Overflowed. * @retval otherwise - `((val_numbits * CHAR_BIT + word_numbits - 1) / word_numbits)`, + * `((val_numbits * CHAR_BIT + word_numbits - 1) / word_numbits)`, * where val_numbits is the number of bits of `abs(val)`. * @post If `nlz_bits_ret` is not `NULL` and there is no overflow, * `(return_value * word_numbits - val_numbits)` is stored in diff --git a/include/ruby/internal/intern/class.h b/include/ruby/internal/intern/class.h index af0c0768b8..357af5d176 100644 --- a/include/ruby/internal/intern/class.h +++ b/include/ruby/internal/intern/class.h @@ -88,8 +88,8 @@ VALUE rb_define_class_id(ID id, VALUE super); * @post `outer::id` refers the returned class. * @note If a class named `id` is already defined and its superclass is * `super`, the function just returns the defined class. - * @note The compaction GC does not move classes returned by this - * function. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. */ VALUE rb_define_class_id_under(VALUE outer, ID id, VALUE super); @@ -100,6 +100,14 @@ VALUE rb_define_class_id_under(VALUE outer, ID id, VALUE super); */ VALUE rb_module_new(void); + +/** + * Creates a new, anonymous refinement. + * + * @return An anonymous refinement. + */ +VALUE rb_refinement_new(void); + /** * This is a very badly designed API that creates an anonymous module. * @@ -119,8 +127,8 @@ VALUE rb_define_module_id(ID id); * constant is not a module. * @return The created module. * @post `outer::id` refers the returned module. - * @note The compaction GC does not move classes returned by this - * function. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. */ VALUE rb_define_module_id_under(VALUE outer, ID id); @@ -150,7 +158,7 @@ VALUE rb_mod_included_modules(VALUE mod); VALUE rb_mod_include_p(VALUE child, VALUE parent); /** - * Queries the module's ancestors. This routine gathers classes and modules + * Queries the module's ancestors. This routine gathers classes and modules * that the passed module either inherits, includes, or prepends, then * recursively applies that routine again and again to the collected entries * until the list doesn't grow up. @@ -167,6 +175,44 @@ VALUE rb_mod_include_p(VALUE child, VALUE parent); VALUE rb_mod_ancestors(VALUE mod); /** + * Queries the class's descendants. This routine gathers classes that are + * subclasses of the given class (or subclasses of those subclasses, etc.), + * returning an array of classes that have the given class as an ancestor. + * The returned array does not include the given class or singleton classes. + * + * @param[in] klass A class. + * @return An array of classes where `klass` is an ancestor. + * + * @internal + */ +VALUE rb_class_descendants(VALUE klass); + +/** + * Queries the class's direct descendants. This routine gathers classes that are + * direct subclasses of the given class, + * returning an array of classes that have the given class as a superclass. + * The returned array does not include singleton classes. + * + * @param[in] klass A class. + * @return An array of classes where `klass` is the `superclass`. + * + * @internal + */ +VALUE rb_class_subclasses(VALUE klass); + + +/** + * Returns the attached object for a singleton class. + * If the given class is not a singleton class, raises a TypeError. + * + * @param[in] klass A class. + * @return The object which has the singleton class `klass`. + * + * @internal + */ +VALUE rb_class_attached_object(VALUE klass); + +/** * Generates an array of symbols, which are the list of method names defined in * the passed class. * diff --git a/include/ruby/internal/intern/cont.h b/include/ruby/internal/intern/cont.h index b0d9137dd9..32647f48aa 100644 --- a/include/ruby/internal/intern/cont.h +++ b/include/ruby/internal/intern/cont.h @@ -39,6 +39,28 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() VALUE rb_fiber_new(rb_block_call_func_t func, VALUE callback_obj); /** + * Creates a Fiber instance from a C-backended block with the specified + * storage. + * + * If the given storage is Qundef or Qtrue, this function is equivalent to + * rb_fiber_new() which inherits storage from the current fiber. + * + * Specifying Qtrue is experimental and may be changed in the future. + * + * If the given storage is Qnil, this function will lazy initialize the + * internal storage which starts of empty (without any inheritance). + * + * Otherwise, the given storage is used as the internal storage. + * + * @param[in] func A function, to become the fiber's body. + * @param[in] callback_obj Passed as-is to `func`. + * @param[in] storage The way to set up the storage for the fiber. + * @return An allocated new instance of rb_cFiber, which is ready to be + * "resume"d. + */ +VALUE rb_fiber_new_storage(rb_block_call_func_t func, VALUE callback_obj, VALUE storage); + +/** * Queries the fiber which is calling this function. Any ruby execution * context has its fiber, either explicitly or implicitly. * @@ -139,8 +161,7 @@ VALUE rb_fiber_resume_kw(VALUE fiber, int argc, const VALUE *argv, int kw_splat) * fiber then suspends its execution until next time it is resumed. * * This function can also raise arbitrary exceptions injected from outside of - * the fiber, using `Fiber#raise` Ruby level API. There is no way to do that - * from C though. + * the fiber using rb_fiber_raise(). * * ```ruby * exc = Class.new Exception @@ -159,12 +180,6 @@ VALUE rb_fiber_resume_kw(VALUE fiber, int argc, const VALUE *argv, int kw_splat) * @param[in] argv Passed to rb_fiber_resume(). * @exception rb_eException (See above) * @return (See rb_fiber_resume() for details) - * - * @internal - * - * "There is no way to do that from C" is a lie. But @shyouhei doesn't think - * this very intentionally obfuscated way to raise arbitrary exceptions from C - * is an official C API. Extension libraries must not know this fact. */ VALUE rb_fiber_yield(int argc, const VALUE *argv); @@ -239,7 +254,28 @@ VALUE rb_fiber_transfer(VALUE fiber, int argc, const VALUE *argv); */ VALUE rb_fiber_transfer_kw(VALUE fiber, int argc, const VALUE *argv, int kw_splat); -VALUE rb_fiber_raise(VALUE fiber, int argc, VALUE *argv); +/** + * Identical to rb_fiber_resume() but instead of resuming normal execution of + * the passed fiber, it raises the given exception in it. From inside of the + * fiber this would be seen as if rb_fiber_yield() raised. + * + * This function does return in case the passed fiber gracefully handled the + * passed exception. But if it does not, the raised exception propagates out + * of the passed fiber; this function then does not return. + * + * Parameters are passed to rb_make_exception() to create an exception object. + * See its document for what are allowed here. + * + * It is a failure to call this function against a fiber which is resuming, + * have never run yet, or has already finished running. + * + * @param[out] fiber Where exception is raised. + * @param[in] argc Passed as-is to rb_make_exception(). + * @param[in] argv Passed as-is to rb_make_exception(). + * @exception rb_eFiberError `fiber` is terminated etc. + * @return (See rb_fiber_resume() for details) + */ +VALUE rb_fiber_raise(VALUE fiber, int argc, const VALUE *argv); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/error.h b/include/ruby/internal/intern/error.h index 37d3b8592b..11e147a121 100644 --- a/include/ruby/internal/intern/error.h +++ b/include/ruby/internal/intern/error.h @@ -38,8 +38,6 @@ #define rb_exc_new3 rb_exc_new_str /**< @old{rb_exc_new_str} */ /** @cond INTERNAL_MACRO */ -#define rb_check_trusted rb_check_trusted -#define rb_check_trusted_inline rb_check_trusted #define rb_check_arity rb_check_arity /** @endcond */ @@ -192,7 +190,6 @@ RBIMPL_ATTR_NONNULL(()) */ void rb_error_frozen(const char *what); -RBIMPL_ATTR_NORETURN() /** * Identical to rb_error_frozen(), except it takes arbitrary Ruby object * instead of C's string. @@ -204,12 +201,6 @@ RBIMPL_ATTR_NORETURN() void rb_error_frozen_object(VALUE what); /** - * @deprecated Does nothing. This method is deprecated and will be removed in - * Ruby 3.2. - */ -void rb_error_untrusted(VALUE); - -/** * Queries if the passed object is frozen. * * @param[in] obj Target object to test frozen-ness. @@ -219,12 +210,6 @@ void rb_error_untrusted(VALUE); void rb_check_frozen(VALUE obj); /** - * @deprecated Does nothing. This method is deprecated and will be removed in - * Ruby 3.2. - */ -void rb_check_trusted(VALUE); - -/** * Ensures that the passed object can be `initialize_copy` relationship. When * you implement your own one you would better call this at the right beginning * of your implementation. @@ -249,7 +234,7 @@ RBIMPL_ATTR_NORETURN() * @param[in] max Maximum allowed `argc`. * @exception rb_eArgError Always. */ -MJIT_STATIC void rb_error_arity(int argc, int min, int max); +void rb_error_arity(int argc, int min, int max); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/file.h b/include/ruby/internal/intern/file.h index 8e98ba08f8..79820fdc61 100644 --- a/include/ruby/internal/intern/file.h +++ b/include/ruby/internal/intern/file.h @@ -187,6 +187,27 @@ RBIMPL_ATTR_PURE() */ int rb_is_absolute_path(const char *path); +/** + * Queries the file size of the given file. Because this function calls + * `fstat(2)` internally, it is a failure to pass a closed file to this + * function. + * + * This function flushes the passed file's buffer if any. Can take time. + * + * @param[in] file A file object. + * @exception rb_eFrozenError `file` is frozen. + * @exception rb_eIOError `file` is closed. + * @exception rb_eSystemCallError Permission denied etc. + * @exception rb_eNoMethodError The given non-file object doesn't respond + * to `#size`. + * @return The size of the passed file. + * @note Passing a non-regular file such as a UNIX domain socket to this + * function is not a failure. But the return value is + * unpredictable. POSIX's `<sys/stat.h>` states that "the use of + * this field is unspecified" then. + */ +rb_off_t rb_file_size(VALUE file); + RBIMPL_SYMBOL_EXPORT_END() #endif /* RBIMPL_INTERN_FILE_H */ diff --git a/include/ruby/internal/intern/gc.h b/include/ruby/internal/intern/gc.h deleted file mode 100644 index 1617a7cef6..0000000000 --- a/include/ruby/internal/intern/gc.h +++ /dev/null @@ -1,390 +0,0 @@ -#ifndef RBIMPL_INTERN_GC_H /*-*-C++-*-vi:se ft=cpp:*/ -#define RBIMPL_INTERN_GC_H -/** - * @file - * @author Ruby developers <ruby-core@ruby-lang.org> - * @copyright This file is a part of the programming language Ruby. - * Permission is hereby granted, to either redistribute and/or - * modify this file, provided that the conditions mentioned in the - * file COPYING are met. Consult the file for details. - * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are - * implementation details. Don't take them as canon. They could - * rapidly appear then vanish. The name (path) of this header file - * is also an implementation detail. Do not expect it to persist - * at the place it is now. Developers are free to move it anywhere - * anytime at will. - * @note To ruby-core: remember that this header can be possibly - * recursively included from extension libraries written in C++. - * Do not expect for instance `__VA_ARGS__` is always available. - * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Public APIs related to ::rb_mGC. - */ -#include "ruby/internal/config.h" - -#ifdef STDC_HEADERS -# include <stddef.h> /* size_t */ -#endif - -#if HAVE_SYS_TYPES_H -# include <sys/types.h> /* ssize_t */ -#endif - -#include "ruby/internal/attr/cold.h" -#include "ruby/internal/attr/noreturn.h" -#include "ruby/internal/attr/nonnull.h" -#include "ruby/internal/attr/pure.h" -#include "ruby/internal/dllexport.h" -#include "ruby/internal/value.h" - -RBIMPL_SYMBOL_EXPORT_BEGIN() - -/* gc.c */ - -RBIMPL_ATTR_COLD() -RBIMPL_ATTR_NORETURN() -/** - * Triggers out-of-memory error. If possible it raises ::rb_eNoMemError. But - * because we are running out of memory that is not always doable. This - * function tries hard to show something, but ultimately can die silently. - * - * @exception rb_eNoMemError Raises it if possible. - */ -void rb_memerror(void); - -RBIMPL_ATTR_PURE() -/** - * Queries if the GC is busy. - * - * @retval 0 It isn't. - * @retval 1 It is. - */ -int rb_during_gc(void); - -RBIMPL_ATTR_NONNULL((1)) -/** - * Marks objects between the two pointers. This is one of the GC utility - * functions that you can call when you design your own - * ::rb_data_type_struct::dmark. - * - * @pre Continuous memory region from `start` to `end` shall be fully - * addressable. - * @param[out] start Pointer to an array of objects. - * @param[out] end Pointer that terminates the array of objects. - * @post Objects from `start` to `end`, both inclusive, are marked. - * - * @internal - * - * `end` can be NULL... But that just results in no-op. - */ -void rb_gc_mark_locations(const VALUE *start, const VALUE *end); - -/** - * Identical to rb_mark_hash(), except it marks only values of the table and - * leave their associated keys unmarked. This is one of the GC utility - * functions that you can call when you design your own - * ::rb_data_type_struct::dmark. - * - * @warning Of course it can break GC. Leave it unused if unsure. - * @param[in] tbl A table to mark. - * @post Values stored in `tbl` are marked. - */ -void rb_mark_tbl(struct st_table *tbl); - -/** - * Identical to rb_mark_tbl(), except it marks objects using - * rb_gc_mark_movable(). This is one of the GC utility functions that you can - * call when you design your own ::rb_data_type_struct::dmark. - * - * @warning Of course it can break GC. Leave it unused if unsure. - * @param[in] tbl A table to mark. - * @post Values stored in `tbl` are marked. - */ -void rb_mark_tbl_no_pin(struct st_table *tbl); - -/** - * Identical to rb_mark_hash(), except it marks only keys of the table and - * leave their associated values unmarked. This is one of the GC utility - * functions that you can call when you design your own - * ::rb_data_type_struct::dmark. - * - * @warning Of course it can break GC. Leave it unused if unsure. - * @param[in] tbl A table to mark. - * @post Keys stored in `tbl` are marked. - */ -void rb_mark_set(struct st_table *tbl); - -/** - * Marks keys and values associated inside of the given table. This is one of - * the GC utility functions that you can call when you design your own - * ::rb_data_type_struct::dmark. - * - * @param[in] tbl A table to mark. - * @post Objects stored in `tbl` are marked. - */ -void rb_mark_hash(struct st_table *tbl); - -/** - * Updates references inside of tables. After you marked values using - * rb_mark_tbl_no_pin(), the objects inside of the table could of course be - * moved. This function is to fixup those references. You can call this from - * your ::rb_data_type_struct::dcompact. - * - * @param[out] ptr A table that potentially includes moved references. - * @post Moved references, if any, are corrected. - */ -void rb_gc_update_tbl_refs(st_table *ptr); - -/** - * Identical to rb_gc_mark(), except it allows the passed value be a - * non-object. For instance pointers to different type of memory regions are - * allowed here. Such values are silently ignored. This is one of the GC - * utility functions that you can call when you design your own - * ::rb_data_type_struct::dmark. - * - * @param[out] obj A possible object. - * @post `obj` is marked, if possible. - */ -void rb_gc_mark_maybe(VALUE obj); - -/** - * Marks an object. This is one of the GC utility functions that you can call - * when you design your own ::rb_data_type_struct::dmark. - * - * @param[out] obj Arbitrary Ruby object. - * @post `obj` is marked. - */ -void rb_gc_mark(VALUE obj); - -/** - * Maybe this is the only function provided for C extensions to control the - * pinning of objects, so let us describe it in detail. These days Ruby's GC - * is copying. As far as an object's physical address is guaranteed unused, it - * can move around the object space. Our GC engine rearranges these objects - * after it reclaims unreachable objects from our object space, so that the - * space is compact (improves memory locality). This is called the - * "compaction" phase, and works well most of the time... as far as there are - * no C extensions. C extensions complicate the scenario because Ruby core - * cannot detect any use of the physical address of an object inside of C - * functions. In order to prevent memory corruptions, objects observable from - * C extensions are "pinned"; they stick to where they are born until they die, - * just in case any C extensions touch their raw pointers. This variant of - * scheme is called "Mostly-Copying" garbage collector. Authors of C - * extensions, however, can extremely carefully write them to become - * compaction-aware. To do so avoid referring to a Ruby object from inside of - * your struct in the first place. But if that is not possible, use this - * function from your ::rb_data_type_struct::dmark then. This way objects - * marked using it are considered movable. If you chose this way you have to - * manually fix up locations of such moved pointers using rb_gc_location(). - * - * @see Bartlett, Joel F., "Compacting Garbage Collection with Ambiguous - * Roots", ACM SIGPLAN Lisp Pointers Volume 1 Issue 6 pp. 3-12, - * April-May-June, 1988. https://doi.org/10.1145/1317224.1317225 - * - * @param[in] obj Object that is movable. - * @post Values stored in `tbl` are marked. - */ -void rb_gc_mark_movable(VALUE obj); - -/** - * Finds a new "location" of an object. An object can be moved on compaction. - * This function projects its new abode, or just returns the passed object if - * not moved. This is one of the GC utility functions that you can call when - * you design your own ::rb_data_type_struct::dcompact. - * - * @param[in] obj An object, possibly already moved to somewhere else. - * @return An object, which holds the current contents of former `obj`. - */ -VALUE rb_gc_location(VALUE obj); - -/** - * Asserts that the passed object is no longer needed. Such objects are - * reclaimed sooner or later so this function is not mandatory. But sometimes - * you can know from your application knowledge that an object is surely dead - * at some point. Calling this as a hint can be a polite way. - * - * @param[out] obj Object, dead. - * @pre `obj` have never been passed to this function before. - * @post `obj` could be invalidated. - * @warning It is a failure to pass an object multiple times to this - * function. - */ -void rb_gc_force_recycle(VALUE obj); - -/** - * Triggers a GC process. This was the only GC entry point that we had at the - * beginning. Over time our GC evolved. Now what this function does is just a - * very simplified variation of the entire GC algorithms. A series of - * procedures kicked by this API is called a "full" GC. - * - * - It immediately scans the entire object space to sort the dead. - * - It immediately reclaims any single dead bodies to reuse later. - * - * It is worth noting that the procedures above do not include evaluations of - * finalisers. They run later. - * - * @internal - * - * Finalisers are deferred until we can handle interrupts. See - * `rb_postponed_job_flush` in vm_trace.c. - * - * Of course there are GC that are not "full". For instance this one and the - * GC which runs when we are running out of memory are different. See - * `gc_profile_record_flag` defined in gc.c for the kinds of GC. - * - * In spite of the name this is not what everything that a GC can trigger. As - * of writing it seems this function does not trigger compaction. But this - * might change in future. - */ -void rb_gc(void); - -/** - * Copy&paste an object's finaliser to another. This is one of the GC utility - * functions that you can call when you design your own `initialize_copy`, - * `initialize_dup`, `initialize_clone`. - * - * @param[out] dst Destination object. - * @param[in] src Source object. - * @post `dst` and `src` share the same finaliser. - * - * @internal - * - * But isn't it easier for you to call super, and let `Object#intialize_copy` - * call this function instead? - */ -void rb_gc_copy_finalizer(VALUE dst, VALUE src); - -/** - * (Re-) enables GC. This makes sense only after you called rb_gc_disable(). - * - * @retval RUBY_Qtrue GC was disabled before. - * @retval RUBY_Qfalse GC was enabled before. - * @post GC is enabled. - * - * @internal - * - * This is one of such exceptional functions that does not raise both Ruby - * exceptions and C++ exceptions. - */ -VALUE rb_gc_enable(void); - -/** - * Disables GC. This prevents automatic GC runs when the process is running - * out of memory. Such situations shall result in rb_memerror(). However this - * does not prevent users from manually invoking rb_gc(). That should work. - * People abused this by disabling GC at the beginning of an event loop, - * process events without GC overheads, then manually force reclaiming garbage - * at the bottom of the loop. However because our GC is now much smarter than - * just calling rb_gc(), this technique is proven to be sub-optimal these days. - * It is believed that there is currently practically no needs of this - * function. - * - * @retval RUBY_Qtrue GC was disabled before. - * @retval RUBY_Qfalse GC was enabled before. - * @post GC is disabled. - */ -VALUE rb_gc_disable(void); - -/** - * Identical to rb_gc(), except the return value. - * - * @return Always returns ::RUBY_Qnil. - */ -VALUE rb_gc_start(void); - -/** - * Assigns a finaliser for an object. Each objects can have objects (typically - * blocks) that run immediately after that object dies. They are called - * finalisers of an object. This function associates a finaliser object with a - * target object. - * - * @note Note that finalisers run _after_ the object they finalise dies. You - * cannot for instance call its methods. - * @note If your finaliser references the object it finalises that object - * loses any chance to become a garbage; effectively leaks memory until - * the end of the process. - * - * @param[in] obj Target to finalise. - * @param[in] block Something `call`able. - * @exception rb_eRuntimeError Somehow `obj` cannot have finalisers. - * @exception rb_eFrozenError `obj` is frozen. - * @exception rb_eArgError `block` doesn't respond to `call`. - * @return The passed `block`. - * @post `block` runs after `obj` dies. - */ -VALUE rb_define_finalizer(VALUE obj, VALUE block); - -/** - * Modifies the object so that it has no finalisers at all. This function is - * mainly provided for symmetry. No practical usages can be thought of. - * - * @param[out] obj Object to clear its finalisers. - * @exception rb_eFrozenError `obj` is frozen. - * @return The passed `obj`. - * @post `obj` has no finalisers. - * @note There is no way to undefine a specific part of many finalisers - * that `obj` could have. All you can do is to clear them all. - */ -VALUE rb_undefine_finalizer(VALUE obj); - -/** - * Identical to rb_gc_stat(), with "count" parameter. - * - * @return Lifetime total number of runs of GC. - */ -size_t rb_gc_count(void); - -/** - * Obtains various GC related profiles. The parameter can be either a Symbol - * or a Hash. If a Hash is passed, it is filled with everything currently - * available. If a Symbol is passed just that portion is returned. - * - * Possible variations of keys you can pass here change from version to - * version. You can get the list of known keys by passing an empty hash and - * let it be filled. - * - * @param[in,out] key_or_buf A Symbol, or a Hash. - * @exception rb_eTypeError Neither Symbol nor Hash. - * @exception rb_eFrozenError Frozen hash is passed. - * @return In case a Hash is passed it returns 0. Otherwise the - * profile value associated with the given key is returned. - * @post In case a Hash is passed it is filled with values. - */ -size_t rb_gc_stat(VALUE key_or_buf); - -/** - * Obtains various info regarding the most recent GC run. This includes for - * instance the reason of the GC. The parameter can be either a Symbol or a - * Hash. If a Hash is passed, it is filled with everything currently - * available. If a Symbol is passed just that portion is returned. - * - * Possible variations of keys you can pass here change from version to - * version. You can get the list of known keys by passing an empty hash and - * let it be filled. - * - * @param[in,out] key_or_buf A Symbol, or a Hash. - * @exception rb_eTypeError Neither Symbol nor Hash. - * @exception rb_eFrozenError Frozen hash is passed. - * @return In case a Hash is passed it returns that hash. Otherwise - * the profile value associated with the given key is returned. - * @post In case a Hash is passed it is filled with values. - */ -VALUE rb_gc_latest_gc_info(VALUE key_or_buf); - -/** - * Informs that there are external memory usages. Our GC runs when we are - * running out of memory. The amount of memory, however, can increase/decrease - * behind-the-scene. For instance DLLs can allocate memories using `mmap(2)` - * etc, which are opaque to us. Registering such external allocations using - * this function enables proper detection of how much memories an object used - * as a whole. That will trigger GCs more often than it would otherwise. You - * can also pass negative numbers here, to indicate that such external - * allocations are gone. - * - * @param[in] diff Amount of memory increased(+)/decreased(-). - */ -void rb_gc_adjust_memory_usage(ssize_t diff); - -RBIMPL_SYMBOL_EXPORT_END() - -#endif /* RBIMPL_INTERN_GC_H */ diff --git a/include/ruby/internal/intern/hash.h b/include/ruby/internal/intern/hash.h index 9d2ce8279a..af8dfd5d8f 100644 --- a/include/ruby/internal/intern/hash.h +++ b/include/ruby/internal/intern/hash.h @@ -107,6 +107,17 @@ VALUE rb_hash(VALUE obj); VALUE rb_hash_new(void); /** + * Identical to rb_hash_new(), except it additionally specifies how many keys + * it is expected to contain. This way you can create a hash that is large enough + * for your need. For large hashes it means it won't need to be reallocated and + * rehashed as much, improving performance. + * + * @param[in] capa Designed capacity of the hash. + * @return An empty Hash, whose capacity is `capa`. + */ +VALUE rb_hash_new_capa(long capa); + +/** * Duplicates a hash. * * @param[in] hash An instance of ::rb_cHash. @@ -288,15 +299,6 @@ int rb_path_check(const char *path); /* hash.c */ /** - * @deprecated This function once was a thing in the old days, but makes no - * sense any longer today. Exists here for backwards - * compatibility only. You can safely forget about it. - * - * @return 0 always. - */ -int rb_env_path_tainted(void); - -/** * Destructively removes every environment variables of the running process. * * @return The `ENV` object. diff --git a/include/ruby/internal/intern/load.h b/include/ruby/internal/intern/load.h index 288a16c2ec..9ceb98c2e4 100644 --- a/include/ruby/internal/intern/load.h +++ b/include/ruby/internal/intern/load.h @@ -177,6 +177,43 @@ VALUE rb_f_require(VALUE self, VALUE feature); VALUE rb_require_string(VALUE feature); /** + * Resolves and returns a symbol of a function in the native extension + * specified by the feature and symbol names. Extensions will use this function + * to access the symbols provided by other native extensions. + * + * @param[in] feature Name of a feature, e.g. `"json"`. + * @param[in] symbol Name of a symbol defined by the feature. + * @return The resolved symbol of a function, defined and externed by the + * specified feature. It may be NULL if the feature is not loaded, + * the feature is not extension, or the symbol is not found. + */ +void *rb_ext_resolve_symbol(const char *feature, const char *symbol); + +/** + * This macro is to provide backwards compatibility. It provides a way to + * define function prototypes and resolving function symbols in a safe way. + * + * ```CXX + * // prototypes + * #ifdef HAVE_RB_EXT_RESOLVE_SYMBOL + * VALUE *(*other_extension_func)(VALUE,VALUE); + * #else + * VALUE other_extension_func(VALUE); + * #endif + * + * // in Init_xxx() + * #ifdef HAVE_RB_EXT_RESOLVE_SYMBOL + * other_extension_func = \ + * (VALUE(*)(VALUE,VALUE))rb_ext_resolve_symbol(fname, sym_name); + * if (other_extension_func == NULL) { + * // raise your own error + * } + * #endif + * ``` + */ +#define HAVE_RB_EXT_RESOLVE_SYMBOL 1 + +/** * @name extension configuration * @{ */ diff --git a/include/ruby/internal/intern/object.h b/include/ruby/internal/intern/object.h index 6bb4ccb2fe..9daad7d046 100644 --- a/include/ruby/internal/intern/object.h +++ b/include/ruby/internal/intern/object.h @@ -92,8 +92,8 @@ VALUE rb_class_new_instance_kw(int argc, const VALUE *argv, VALUE klass, int kw_ * * @param[in] lhs Comparison left hand side. * @param[in] rhs Comparison right hand side. - * @retval RUBY_Qtrue They are equal. - * @retval RUBY_Qfalse Otherwise. + * @retval non-zero They are equal. + * @retval 0 Otherwise. * @note This function actually calls `lhs.eql?(rhs)` so you cannot * implement your class' `#eql?` method using it. */ @@ -151,13 +151,12 @@ VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass); * @return An allocated, not yet initialised instance of `klass`. * @note It calls the allocator defined by rb_define_alloc_func(). You * cannot use this function to define an allocator. Use - * rb_newobj_of(), #TypedData_Make_Struct or others, instead. + * TypedData_Make_Struct or others, instead. * @note Usually prefer rb_class_new_instance() to rb_obj_alloc() and * rb_obj_call_init(). * @see rb_class_new_instance() * @see rb_obj_call_init() * @see rb_define_alloc_func() - * @see rb_newobj_of() * @see #TypedData_Make_Struct */ VALUE rb_obj_alloc(VALUE klass); @@ -202,74 +201,6 @@ VALUE rb_obj_dup(VALUE obj); */ VALUE rb_obj_init_copy(VALUE src, VALUE dst); -RBIMPL_ATTR_DEPRECATED_EXT(("taintedness turned out to be a wrong idea.")) -/** - * @deprecated This function once was a thing in the old days, but makes no - * sense any longer today. Exists here for backwards - * compatibility only. You can safely forget about it. - * - * @param[in] obj Object in question. - * @return Verbatim `obj`. - */ -VALUE rb_obj_taint(VALUE obj); - -RBIMPL_ATTR_PURE() -RBIMPL_ATTR_DEPRECATED_EXT(("taintedness turned out to be a wrong idea.")) -/** - * @deprecated This function once was a thing in the old days, but makes no - * sense any longer today. Exists here for backwards - * compatibility only. You can safely forget about it. - * - * @param[in] obj Object in question. - * @return Always returns ::RUBY_Qfalse. - */ -VALUE rb_obj_tainted(VALUE obj); - -RBIMPL_ATTR_DEPRECATED_EXT(("taintedness turned out to be a wrong idea.")) -/** - * @deprecated This function once was a thing in the old days, but makes no - * sense any longer today. Exists here for backwards - * compatibility only. You can safely forget about it. - * - * @param[in] obj Object in question. - * @return Verbatim `obj`. - */ -VALUE rb_obj_untaint(VALUE obj); - -RBIMPL_ATTR_DEPRECATED_EXT(("trustedness turned out to be a wrong idea.")) -/** - * @deprecated This function once was a thing in the old days, but makes no - * sense any longer today. Exists here for backwards - * compatibility only. You can safely forget about it. - * - * @param[in] obj Object in question. - * @return Verbatim `obj`. - */ -VALUE rb_obj_untrust(VALUE obj); - -RBIMPL_ATTR_PURE() -RBIMPL_ATTR_DEPRECATED_EXT(("trustedness turned out to be a wrong idea.")) -/** - * @deprecated This function once was a thing in the old days, but makes no - * sense any longer today. Exists here for backwards - * compatibility only. You can safely forget about it. - * - * @param[in] obj Object in question. - * @return Always returns ::RUBY_Qfalse. - */ -VALUE rb_obj_untrusted(VALUE obj); - -RBIMPL_ATTR_DEPRECATED_EXT(("trustedness turned out to be a wrong idea.")) -/** - * @deprecated This function once was a thing in the old days, but makes no - * sense any longer today. Exists here for backwards - * compatibility only. You can safely forget about it. - * - * @param[in] obj Object in question. - * @return Verbatim `obj`. - */ -VALUE rb_obj_trust(VALUE obj); - /** * Just calls rb_obj_freeze_inline() inside. Does this make any sens to * extension libraries? diff --git a/include/ruby/internal/intern/process.h b/include/ruby/internal/intern/process.h index 7a7b24ed4b..cfa5e13162 100644 --- a/include/ruby/internal/intern/process.h +++ b/include/ruby/internal/intern/process.h @@ -31,6 +31,15 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* process.c */ /** + * Wait for the specified process to terminate, reap it, and return its status. + * + * @param[in] pid The process ID to wait for. + * @param[in] flags The flags to pass to waitpid(2). + * @return VALUE An instance of Process::Status. + */ +VALUE rb_process_status_wait(rb_pid_t pid, int flags); + +/** * Sets the "last status", or the `$?`. * * @param[in] status The termination status, as defined in `waitpid(3posix)`. @@ -247,7 +256,7 @@ rb_pid_t rb_spawn_err(int argc, const VALUE *argv, char *errbuf, size_t buflen); * * @internal * - * This function might or might not exist depending on `./confiugre` result. + * This function might or might not exist depending on `./configure` result. * It must be a portability hell. Better not use. */ VALUE rb_proc_times(VALUE _); diff --git a/include/ruby/internal/intern/re.h b/include/ruby/internal/intern/re.h index 31f5593275..4dd58b469b 100644 --- a/include/ruby/internal/intern/re.h +++ b/include/ruby/internal/intern/re.h @@ -87,11 +87,6 @@ void rb_match_busy(VALUE md); * @retval RUBY_Qfalse There is a `n`-th capture and is empty. * @retval RUBY_Qtrue There is a `n`-th capture that has something. * - * @internal - * - * @shyouhei wonders: why there are both rb_reg_match_defined() and - * rb_match_nth_defined, which are largely the same things, but do not share - * their implementations at all? */ VALUE rb_reg_nth_defined(int n, VALUE md); diff --git a/include/ruby/internal/intern/select.h b/include/ruby/internal/intern/select.h index fabc287cd1..6ba84c6e63 100644 --- a/include/ruby/internal/intern/select.h +++ b/include/ruby/internal/intern/select.h @@ -76,7 +76,7 @@ struct timeval; * * Although any file descriptors are possible here, it makes completely no * sense to pass a descriptor that is not `O_NONBLOCK`. If you want to know - * the reason for this limitatuon in detail, you might find this thread super + * the reason for this limitation in detail, you might find this thread super * interesting: https://lkml.org/lkml/2004/10/6/117 */ int rb_thread_fd_select(int nfds, rb_fdset_t *rfds, rb_fdset_t *wfds, rb_fdset_t *efds, struct timeval *timeout); diff --git a/include/ruby/internal/intern/select/largesize.h b/include/ruby/internal/intern/select/largesize.h index d156f62034..d65f088c06 100644 --- a/include/ruby/internal/intern/select/largesize.h +++ b/include/ruby/internal/intern/select/largesize.h @@ -35,9 +35,6 @@ * `select(2)` documents how to allocate fd_set dynamically. * http://www.openbsd.org/cgi-bin/man.cgi?query=select&manpath=OpenBSD+4.4 * - * - HP-UX documents how to allocate fd_set dynamically. - * http://docs.hp.com/en/B2355-60105/select.2.html - * * - Solaris 8 has `select_large_fdset` * * - Mac OS X 10.7 (Lion) diff --git a/include/ruby/internal/intern/select/posix.h b/include/ruby/internal/intern/select/posix.h index bfde159890..0a9b0b2e51 100644 --- a/include/ruby/internal/intern/select/posix.h +++ b/include/ruby/internal/intern/select/posix.h @@ -95,11 +95,10 @@ RBIMPL_ATTR_NOALIAS() * * @param[out] dst Target fdset. * @param[in] src Source fdset. - * @param[in] n Unused parameter. * @post `dst` is a copy of `src`. */ static inline void -rb_fd_dup(rb_fdset_t *dst, const fd_set *src, int n) +rb_fd_dup(rb_fdset_t *dst, const fd_set *src) { *dst = *src; } @@ -137,7 +136,7 @@ rb_fd_max(const rb_fdset_t *f) } /** @cond INTERNAL_MACRO */ -/* :FIXME: What are these? They don't exist for shibling implementations. */ +/* :FIXME: What are these? They don't exist for sibling implementations. */ #define rb_fd_init_copy(d, s) (*(d) = *(s)) #define rb_fd_term(f) ((void)(f)) /** @endcond */ diff --git a/include/ruby/internal/intern/signal.h b/include/ruby/internal/intern/signal.h index 84f7558404..4773788651 100644 --- a/include/ruby/internal/intern/signal.h +++ b/include/ruby/internal/intern/signal.h @@ -97,7 +97,7 @@ RBIMPL_ATTR_NONNULL(()) * - Case #11: When signo and PID are both negative, the behaviour of this * function depends on how `killpg(3)` works. On Linux, it seems such * attempt is strictly prohibited and `Errno::EINVAL` is raised. But on - * macOS, it seems it tries to to send the signal actually to the process + * macOS, it seems it tries to send the signal actually to the process * group. * * @note Above description is in fact different from how `kill(2)` works. @@ -113,12 +113,6 @@ RBIMPL_ATTR_NONNULL(()) */ VALUE rb_f_kill(int argc, const VALUE *argv); -/* This must be private, @shyouhei guesses. */ -#ifdef POSIX_SIGNAL -#define posix_signal ruby_posix_signal -void (*posix_signal(int, void (*)(int)))(int); -#endif - RBIMPL_ATTR_PURE() /** * Queries the name of the signal. It returns for instance `"KILL"` for diff --git a/include/ruby/internal/intern/string.h b/include/ruby/internal/intern/string.h index 0e2e6d6af7..6827563e8d 100644 --- a/include/ruby/internal/intern/string.h +++ b/include/ruby/internal/intern/string.h @@ -62,13 +62,13 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() */ VALUE rb_str_new(const char *ptr, long len); -RBIMPL_ATTR_NONNULL(()) /** * Identical to rb_str_new(), except it assumes the passed pointer is a pointer * to a C string. * * @param[in] ptr A C string. * @exception rb_eNoMemError Failed to allocate memory. + * @exception rb_eArgError `ptr` is a null pointer. * @return An instance of ::rb_cString, of "binary" encoding, whose * contents are verbatim copy of `ptr`. * @pre `ptr` must not be a null pointer. @@ -122,37 +122,6 @@ VALUE rb_str_new_frozen(VALUE str); */ VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len); -RBIMPL_ATTR_NONNULL(()) -/** - * @deprecated This function once was a thing in the old days, but makes no - * sense any longer today. Exists here for backwards - * compatibility only. You can safely forget about it. - * - * @param[in] ptr A C string. - * @exception rb_eNoMemError Failed to allocate memory. - * @return An instance of ::rb_cString, of "binary" encoding, whose - * contents are verbatim copy of `ptr`. - * @pre `ptr` must not be a null pointer. - */ -VALUE rb_tainted_str_new_cstr(const char *ptr); - -/** - * @deprecated This function once was a thing in the old days, but makes no - * sense any longer today. Exists here for backwards - * compatibility only. You can safely forget about it. - * - * @param[in] ptr A memory region of `len` bytes length. - * @param[in] len Length of `ptr`, in bytes, not including the - * terminating NUL character. - * @exception rb_eNoMemError Failed to allocate `len+1` bytes. - * @exception rb_eArgError `len` is negative. - * @return An instance of ::rb_cString, of `len` bytes length, of - * "binary" encoding, whose contents are verbatim copy of `ptr`. - * @pre At least `len` bytes of continuous memory region shall be - * accessible via `ptr`. - */ -VALUE rb_tainted_str_new(const char *ptr, long len); - /** * Identical to rb_str_new(), except it generates a string of "default * external" encoding. @@ -333,7 +302,6 @@ VALUE rb_str_tmp_new(long len); */ VALUE rb_usascii_str_new(const char *ptr, long len); -RBIMPL_ATTR_NONNULL(()) /** * Identical to rb_str_new_cstr(), except it generates a string of "US ASCII" * encoding. It can also be seen as a routine Identical to @@ -342,6 +310,7 @@ RBIMPL_ATTR_NONNULL(()) * * @param[in] ptr A C string. * @exception rb_eNoMemError Failed to allocate memory. + * @exception rb_eArgError `ptr` is a null pointer. * @return An instance of ::rb_cString, of "US ASCII" encoding, whose * contents are verbatim copy of `ptr`. * @pre `ptr` must not be a null pointer. @@ -361,7 +330,6 @@ VALUE rb_usascii_str_new_cstr(const char *ptr); */ VALUE rb_utf8_str_new(const char *ptr, long len); -RBIMPL_ATTR_NONNULL(()) /** * Identical to rb_str_new_cstr(), except it generates a string of "UTF-8" * encoding. It can also be seen as a routine Identical to @@ -370,6 +338,7 @@ RBIMPL_ATTR_NONNULL(()) * * @param[in] ptr A C string. * @exception rb_eNoMemError Failed to allocate memory. + * @exception rb_eArgError `ptr` is a null pointer. * @return An instance of ::rb_cString, of "UTF-8" encoding, whose contents * are verbatim copy of `ptr`. * @pre `ptr` must not be a null pointer. @@ -443,7 +412,7 @@ VALUE rb_utf8_str_new_static(const char *ptr, long len); /** * Identical to rb_interned_str(), except it takes a Ruby's string instead of - * C's. It can also be seen as a routine identical to to rb_str_new_shared(), + * C's. It can also be seen as a routine identical to rb_str_new_shared(), * except it returns an infamous "f"string. * * @param[in] str An object of ::RString. @@ -485,7 +454,7 @@ VALUE rb_interned_str(const char *ptr, long len); RBIMPL_ATTR_NONNULL(()) /** * Identical to rb_interned_str(), except it assumes the passed pointer is a - * pointer to a C's string. It can also be seen as a routine identical to to + * pointer to a C's string. It can also be seen as a routine identical to * rb_str_to_interned_str(), except it takes a C's string instead of Ruby's. * Or it can also be seen as a routine identical to rb_str_new_cstr(), except * it returns an infamous "f"string. @@ -553,7 +522,6 @@ VALUE rb_str_buf_append(VALUE dst, VALUE src); /** @alias{rb_str_cat} */ VALUE rb_str_buf_cat(VALUE, const char*, long); -RBIMPL_ATTR_NONNULL(()) /** @alias{rb_str_cat_cstr} */ VALUE rb_str_buf_cat2(VALUE, const char*); @@ -634,6 +602,21 @@ VALUE rb_str_dup(VALUE str); VALUE rb_str_resurrect(VALUE str); /** + * Returns whether a string is chilled or not. + * + * This function is temporary and users must check for its presence using + * #ifdef HAVE_RB_STR_CHILLED_P. If HAVE_RB_STR_CHILLED_P is not defined, then + * strings can't be chilled. + * + * @param[in] str A string. + * @retval 1 The string is chilled. + * @retval 0 Otherwise. + */ +bool rb_str_chilled_p(VALUE str); + +#define HAVE_RB_STR_CHILLED_P 1 + +/** * Obtains a "temporary lock" of the string. This advisory locking mechanism * prevents other cooperating threads from tampering the receiver. The same * thing could be done via freeze mechanism, but this one can also be unlocked @@ -874,7 +857,6 @@ VALUE rb_str_resize(VALUE str, long len); */ VALUE rb_str_cat(VALUE dst, const char *src, long srclen); -RBIMPL_ATTR_NONNULL(()) /** * Identical to rb_str_cat(), except it assumes the passed pointer is a pointer * to a C string. @@ -882,6 +864,7 @@ RBIMPL_ATTR_NONNULL(()) * @param[out] dst Destination object. * @param[in] src Contents to append. * @exception rb_eArgError Result string too big. + * @exception rb_eArgError `src` is a null pointer. * @return The passed `dst`. * @pre `dst` must not be any arbitrary objects except ::RString. * @pre `src` must not be a null pointer. @@ -889,7 +872,6 @@ RBIMPL_ATTR_NONNULL(()) */ VALUE rb_str_cat_cstr(VALUE dst, const char *src); -RBIMPL_ATTR_NONNULL(()) /** @alias{rb_str_cat_cstr} */ VALUE rb_str_cat2(VALUE, const char*); @@ -1153,7 +1135,6 @@ VALUE rb_str_inspect(VALUE str); */ VALUE rb_str_dump(VALUE str); -RBIMPL_ATTR_NONNULL(()) /** * Divides the given string based on the given delimiter. This is the * 1-argument 0-block version of `String#split`. @@ -1161,6 +1142,7 @@ RBIMPL_ATTR_NONNULL(()) * @param[in] str Object in question to split. * @param[in] delim Delimiter, in C string. * @exception rb_eTypeError `str` has no implicit conversion to String. + * @exception rb_eArgError `delim` is a null pointer. * @return An array of strings, which are substrings of the passed `str`. * If `delim` is an empty C string (i.e. `""`), `str` is split into * each characters. If `delim` is a C string whose sole content is @@ -1400,22 +1382,6 @@ rbimpl_str_new_cstr(const char *str) return rb_str_new_static(str, len); } -RBIMPL_ATTR_DEPRECATED(("taintedness turned out to be a wrong idea.")) -/** - * @private - * - * This is an implementation detail. Don't bother. - * - * @param[in] str A C string literal. - * @return Corresponding Ruby string. - */ -static inline VALUE -rbimpl_tainted_str_new_cstr(const char *str) -{ - long len = rbimpl_strlen(str); - return rb_tainted_str_new(str, len); -} - RBIMPL_ATTR_NONNULL(()) /** * @private @@ -1602,22 +1568,6 @@ rbimpl_exc_new_cstr(VALUE exc, const char *str) rb_utf8_str_new) ((str), (len))) /** - * @deprecated This macro once was a thing in the old days, but makes no sense - * any longer today. Exists here for backwards compatibility - * only. You can safely forget about it. - * - * @param[in] str A C string. - * @exception rb_eNoMemError Failed to allocate memory. - * @return An instance of ::rb_cString, of "binary" encoding, whose - * contents are verbatim copy of `str`. - * @pre `str` must not be a null pointer. - */ -#define rb_tainted_str_new_cstr(str) \ - ((RBIMPL_CONSTANT_P(str) ? \ - rbimpl_tainted_str_new_cstr : \ - rb_tainted_str_new_cstr) (str)) - -/** * Identical to #rb_str_new_cstr, except it generates a string of "US ASCII" * encoding. It can also be seen as a routine Identical to * #rb_usascii_str_new, except it assumes the passed pointer is a pointer to a @@ -1741,7 +1691,6 @@ rbimpl_exc_new_cstr(VALUE exc, const char *str) #define rb_str_new3 rb_str_new_shared /**< @old{rb_str_new_shared} */ #define rb_str_new4 rb_str_new_frozen /**< @old{rb_str_new_frozen} */ #define rb_str_new5 rb_str_new_with_class /**< @old{rb_str_new_with_class} */ -#define rb_tainted_str_new2 rb_tainted_str_new_cstr /**< @old{rb_tainted_str_new_cstr} */ #define rb_str_buf_new2 rb_str_buf_new_cstr /**< @old{rb_str_buf_new_cstr} */ #define rb_usascii_str_new2 rb_usascii_str_new_cstr /**< @old{rb_usascii_str_new_cstr} */ #define rb_str_buf_cat rb_str_cat /**< @alias{rb_str_cat} */ diff --git a/include/ruby/internal/intern/struct.h b/include/ruby/internal/intern/struct.h index 312cf444e2..16b3fad4e0 100644 --- a/include/ruby/internal/intern/struct.h +++ b/include/ruby/internal/intern/struct.h @@ -46,14 +46,16 @@ VALUE rb_struct_new(VALUE klass, ...); * * @param[in] name Name of the class. * @param[in] ... Arbitrary number of `const char*`, terminated by - * zero. Each of which are the name of fields. + * NULL. Each of which are the name of fields. * @exception rb_eNameError `name` is not a constant name. * @exception rb_eTypeError `name` is already taken. - * @exception rb_eArgError Duplicated field name. + * @exception rb_eArgError Duplicated field name. * @return The defined class. * @post Global toplevel constant `name` is defined. * @note `name` is allowed to be a null pointer. This function creates * an anonymous struct class then. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. * * @internal * @@ -70,14 +72,16 @@ RBIMPL_ATTR_NONNULL((2)) * @param[out] space Namespace that the defining class shall reside. * @param[in] name Name of the class. * @param[in] ... Arbitrary number of `const char*`, terminated by - * zero. Each of which are the name of fields. + * NULL. Each of which are the name of fields. * @exception rb_eNameError `name` is not a constant name. * @exception rb_eTypeError `name` is already taken. - * @exception rb_eArgError Duplicated field name. + * @exception rb_eArgError Duplicated field name. * @return The defined class. * @post `name` is a constant under `space`. * @note In contrast to rb_struct_define(), it doesn't make any sense to * pass a null pointer to this function. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. */ VALUE rb_struct_define_under(VALUE space, const char *name, ...); @@ -164,10 +168,10 @@ VALUE rb_struct_alloc_noinit(VALUE klass); * @param[in] super Superclass of the defining class. * @param[in] func Must be 0 for extension libraries. * @param[in] ... Arbitrary number of `const char*`, terminated by - * zero. Each of which are the name of fields. + * NULL. Each of which are the name of fields. * @exception rb_eNameError `name` is not a constant name. * @exception rb_eTypeError `name` is already taken. - * @exception rb_eArgError Duplicated field name. + * @exception rb_eArgError Duplicated field name. * @return The defined class. * @post Global toplevel constant `name` is defined. * @note `name` is allowed to be a null pointer. This function creates @@ -187,17 +191,35 @@ RBIMPL_ATTR_NONNULL((2)) * @param[in] super Superclass of the defining class. * @param[in] alloc Must be 0 for extension libraries. * @param[in] ... Arbitrary number of `const char*`, terminated by - * zero. Each of which are the name of fields. + * NULL. Each of which are the name of fields. * @exception rb_eNameError `class_name` is not a constant name. * @exception rb_eTypeError `class_name` is already taken. - * @exception rb_eArgError Duplicated field name. + * @exception rb_eArgError Duplicated field name. * @return The defined class. * @post `class_name` is a constant under `outer`. * @note In contrast to rb_struct_define_without_accessor(), it doesn't * make any sense to pass a null name. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. */ VALUE rb_struct_define_without_accessor_under(VALUE outer, const char *class_name, VALUE super, rb_alloc_func_t alloc, ...); +/** + * Defines an anonymous data class. + * + * @endinternal + * + * @param[in] super Superclass of the defining class. Must be a + * descendant of ::rb_cData, or 0 as ::rb_cData. + * @param[in] ... Arbitrary number of `const char*`, terminated by + * NULL. Each of which are the name of fields. + * @exception rb_eArgError Duplicated field name. + * @return The defined class. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. + */ +VALUE rb_data_define(VALUE super, ...); + RBIMPL_SYMBOL_EXPORT_END() #endif /* RBIMPL_INTERN_STRUCT_H */ diff --git a/include/ruby/internal/intern/thread.h b/include/ruby/internal/intern/thread.h index 294e552fe9..716375acd7 100644 --- a/include/ruby/internal/intern/thread.h +++ b/include/ruby/internal/intern/thread.h @@ -46,7 +46,7 @@ void rb_thread_schedule(void); * * @param[in] fd A file descriptor. * @exception rb_eIOError Closed stream. - * @exception rb_eSystemCalleError Situations like EBADF. + * @exception rb_eSystemCallError Situations like EBADF. */ int rb_thread_wait_fd(int fd); @@ -56,7 +56,7 @@ int rb_thread_wait_fd(int fd); * * @param[in] fd A file descriptor. * @exception rb_eIOError Closed stream. - * @exception rb_eSystemCalleError Situations like EBADF. + * @exception rb_eSystemCallError Situations like EBADF. */ int rb_thread_fd_writable(int fd); diff --git a/include/ruby/internal/intern/vm.h b/include/ruby/internal/intern/vm.h index 562d30a6fe..29e0c7f534 100644 --- a/include/ruby/internal/intern/vm.h +++ b/include/ruby/internal/intern/vm.h @@ -229,8 +229,7 @@ void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func); * restrict creation of an instance of a class. For example it rarely makes * sense for a DB adaptor class to allow programmers creating DB row objects * without querying the DB itself. You can kill sporadic creation of such - * objects then, by nullifying the allocator function using this API. Your - * object shall be allocated using #RB_NEWOBJ_OF() directly. + * objects then, by nullifying the allocator function using this API. * * @param[out] klass The class to modify. * @pre `klass` must be an instance of Class. @@ -247,21 +246,17 @@ void rb_undef_alloc_func(VALUE klass); * * @internal * - * Who cares? @shyouhei fins no practical usage of the return value. Maybe we + * Who cares? @shyouhei finds no practical usage of the return value. Maybe we * need KonMari. */ rb_alloc_func_t rb_get_alloc_func(VALUE klass); /** - * Clears the constant cache. Extension libraries should not bother such - * things. Just forget about this API (or even, the presence of constant - * cache). - * - * @internal - * - * Completely no idea why this function is defined in vm_method.c. + * Clears the inline constant caches associated with a particular ID. Extension + * libraries should not bother with such things. Just forget about this API (or + * even, the presence of constant caches). */ -void rb_clear_constant_cache(void); +void rb_clear_constant_cache_for_id(ID id); /** * Resembles `alias`. diff --git a/include/ruby/internal/interpreter.h b/include/ruby/internal/interpreter.h index 662d39c0ec..a10e7ad2d8 100644 --- a/include/ruby/internal/interpreter.h +++ b/include/ruby/internal/interpreter.h @@ -141,7 +141,7 @@ void ruby_show_copyright(void); * * @param[in] addr A pointer somewhere on the stack, near its bottom. */ -void ruby_init_stack(volatile VALUE *addr); +void ruby_init_stack(void *addr); /** * Initializes the VM and builtin libraries. diff --git a/include/ruby/internal/memory.h b/include/ruby/internal/memory.h index aa3464465d..270cc1ac8b 100644 --- a/include/ruby/internal/memory.h +++ b/include/ruby/internal/memory.h @@ -38,7 +38,7 @@ # include <alloca.h> #endif -#if defined(_MSC_VER) && defined(_WIN64) +#if defined(_MSC_VER) && defined(_M_AMD64) # include <intrin.h> # pragma intrinsic(_umul128) #endif @@ -56,13 +56,14 @@ #include "ruby/internal/has/builtin.h" #include "ruby/internal/stdalign.h" #include "ruby/internal/stdbool.h" +#include "ruby/internal/stdckdint.h" #include "ruby/internal/xmalloc.h" #include "ruby/backward/2/limits.h" #include "ruby/backward/2/long_long.h" #include "ruby/backward/2/assume.h" #include "ruby/defines.h" -/** @cond INTENAL_MACRO */ +/** @cond INTERNAL_MACRO */ /* Make alloca work the best possible way. */ #if defined(alloca) @@ -287,12 +288,12 @@ typedef uint128_t DSIZE_T; RBIMPL_CAST((type *)alloca(rbimpl_size_mul_or_raise(sizeof(type), (n)))) /** - * Identical to #RB_ALLOCV_N(), except it implicitly assumes the type of array - * is ::VALUE. + * Identical to #RB_ALLOCV_N(), except that it allocates a number of bytes and + * returns a void* . * * @param v A variable to hold the just-in-case opaque Ruby object. * @param n Size of allocation, in bytes. - * @return An array of `n` bytes of ::VALUE. + * @return A void pointer to `n` bytes storage. * @note `n` may be evaluated twice. */ #define RB_ALLOCV(v, n) \ @@ -363,7 +364,7 @@ typedef uint128_t DSIZE_T; * @return `p1`. * @post First `n` elements of `p2` are copied into `p1`. */ -#define MEMCPY(p1,p2,type,n) memcpy((p1), (p2), rbimpl_size_mul_or_raise(sizeof(type), (n))) +#define MEMCPY(p1,p2,type,n) ruby_nonempty_memcpy((p1), (p2), rbimpl_size_mul_or_raise(sizeof(type), (n))) /** * Handy macro to call memmove. @@ -567,7 +568,10 @@ rbimpl_size_mul_overflow(size_t x, size_t y) { struct rbimpl_size_mul_overflow_tag ret = { false, 0, }; -#if RBIMPL_HAS_BUILTIN(__builtin_mul_overflow) +#if defined(ckd_mul) + ret.left = ckd_mul(&ret.right, x, y); + +#elif RBIMPL_HAS_BUILTIN(__builtin_mul_overflow) ret.left = __builtin_mul_overflow(x, y, &ret.right); #elif defined(DSIZE_T) @@ -644,7 +648,6 @@ rb_alloc_tmp_buffer2(volatile VALUE *store, long count, size_t elsize) return rb_alloc_tmp_buffer_with_count(store, total_size, cnt); } -#if ! defined(__MINGW32__) && ! defined(__DOXYGEN__) RBIMPL_SYMBOL_EXPORT_BEGIN() RBIMPL_ATTR_NOALIAS() RBIMPL_ATTR_NONNULL((1)) @@ -663,8 +666,5 @@ ruby_nonempty_memcpy(void *dest, const void *src, size_t n) } } RBIMPL_SYMBOL_EXPORT_END() -#undef memcpy -#define memcpy ruby_nonempty_memcpy -#endif #endif /* RBIMPL_MEMORY_H */ diff --git a/include/ruby/internal/module.h b/include/ruby/internal/module.h index d678dd2102..97b0b2b8b0 100644 --- a/include/ruby/internal/module.h +++ b/include/ruby/internal/module.h @@ -56,8 +56,8 @@ RBIMPL_ATTR_NONNULL(()) * @post Top-level constant named `name` refers the returned class. * @note If a class named `name` is already defined and its superclass is * `super`, the function just returns the defined class. - * @note The compaction GC does not move classes returned by this - * function. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. * * @internal * @@ -75,8 +75,8 @@ RBIMPL_ATTR_NONNULL(()) * constant is not a module. * @return The created module. * @post Top-level constant named `name` refers the returned module. - * @note The compaction GC does not move classes returned by this - * function. + * @note The GC does not collect nor move modules returned by this + * function. They are immortal. * * @internal * @@ -103,8 +103,8 @@ RBIMPL_ATTR_NONNULL(()) * @post `outer::name` refers the returned class. * @note If a class named `name` is already defined and its superclass * is `super`, the function just returns the defined class. - * @note The compaction GC does not move classes returned by this - * function. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. */ VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super); @@ -118,8 +118,8 @@ RBIMPL_ATTR_NONNULL(()) * the constant is not a class. * @return The created module. * @post `outer::name` refers the returned module. - * @note The compaction GC does not move classes returned by this - * function. + * @note The GC does not collect nor move modules returned by this + * function. They are immortal. */ VALUE rb_define_module_under(VALUE outer, const char *name); diff --git a/include/ruby/internal/newobj.h b/include/ruby/internal/newobj.h index a8a5557a25..6eee2fa5fa 100644 --- a/include/ruby/internal/newobj.h +++ b/include/ruby/internal/newobj.h @@ -29,63 +29,14 @@ #include "ruby/internal/value.h" #include "ruby/assert.h" -/** - * Declares, allocates, then assigns a new object to the given variable. - * - * @param obj Variable name. - * @param type Variable type. - * @exception rb_eNoMemError No space left. - * @return An allocated object, not initialised. - * @note Modern programs tend to use #NEWOBJ_OF instead. - * - * @internal - * - * :FIXME: Should we deprecate it? - */ -#define RB_NEWOBJ(obj,type) type *(obj) = RBIMPL_CAST((type *)rb_newobj()) - -/** - * Identical to #RB_NEWOBJ, except it also accepts the allocating object's - * class and flags. - * - * @param obj Variable name. - * @param type Variable type. - * @param klass Object's class. - * @param flags Object's flags. - * @exception rb_eNoMemError No space left. - * @return An allocated object, filled with the arguments. - */ -#define RB_NEWOBJ_OF(obj,type,klass,flags) type *(obj) = RBIMPL_CAST((type *)rb_newobj_of(klass, flags)) - -#define NEWOBJ RB_NEWOBJ /**< @old{RB_NEWOBJ} */ -#define NEWOBJ_OF RB_NEWOBJ_OF /**< @old{RB_NEWOBJ_OF} */ #define OBJSETUP rb_obj_setup /**< @old{rb_obj_setup} */ #define CLONESETUP rb_clone_setup /**< @old{rb_clone_setup} */ #define DUPSETUP rb_dup_setup /**< @old{rb_dup_setup} */ RBIMPL_SYMBOL_EXPORT_BEGIN() /** - * This is the implementation detail of #RB_NEWOBJ. - * - * @exception rb_eNoMemError No space left. - * @return An allocated object, not initialised. - */ -VALUE rb_newobj(void); - -/** - * This is the implementation detail of #RB_NEWOBJ_OF. - * - * @param klass Object's class. - * @param flags Object's flags. - * @exception rb_eNoMemError No space left. - * @return An allocated object, filled with the arguments. - */ -VALUE rb_newobj_of(VALUE klass, VALUE flags); - -/** * Fills common fields in the object. * - * @note Prefer rb_newobj_of() to this function. * @param[in,out] obj A Ruby object to be set up. * @param[in] klass `obj` will belong to this class. * @param[in] type One of ::ruby_value_type. @@ -172,6 +123,8 @@ RBIMPL_ATTR_DEPRECATED(("This is no longer how Object#clone works.")) static inline void rb_clone_setup(VALUE clone, VALUE obj) { + (void)clone; + (void)obj; return; } @@ -189,6 +142,8 @@ RBIMPL_ATTR_DEPRECATED(("This is no longer how Object#dup works.")) static inline void rb_dup_setup(VALUE dup, VALUE obj) { + (void)dup; + (void)obj; return; } diff --git a/include/ruby/internal/rgengc.h b/include/ruby/internal/rgengc.h deleted file mode 100644 index 7ea04442f6..0000000000 --- a/include/ruby/internal/rgengc.h +++ /dev/null @@ -1,443 +0,0 @@ -#ifndef RBIMPL_RGENGC_H /*-*-C++-*-vi:se ft=cpp:*/ -#define RBIMPL_RGENGC_H -/** - * @file - * @author Ruby developers <ruby-core@ruby-lang.org> - * @copyright This file is a part of the programming language Ruby. - * Permission is hereby granted, to either redistribute and/or - * modify this file, provided that the conditions mentioned in the - * file COPYING are met. Consult the file for details. - * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are - * implementation details. Don't take them as canon. They could - * rapidly appear then vanish. The name (path) of this header file - * is also an implementation detail. Do not expect it to persist - * at the place it is now. Developers are free to move it anywhere - * anytime at will. - * @note To ruby-core: remember that this header can be possibly - * recursively included from extension libraries written in C++. - * Do not expect for instance `__VA_ARGS__` is always available. - * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief RGENGC write-barrier APIs. - * @see Sasada, K., "Gradual write-barrier insertion into a Ruby - * interpreter", in proceedings of the 2019 ACM SIGPLAN - * International Symposium on Memory Management (ISMM 2019), pp - * 115-121, 2019. https://doi.org/10.1145/3315573.3329986 - */ -#include "ruby/internal/attr/artificial.h" -#include "ruby/internal/attr/maybe_unused.h" -#include "ruby/internal/attr/pure.h" -#include "ruby/internal/dllexport.h" -#include "ruby/internal/special_consts.h" -#include "ruby/internal/stdbool.h" -#include "ruby/internal/value.h" -#include "ruby/assert.h" - -/** - * @private - * - * @deprecated This macro once was a thing in the old days, but makes no sense - * any longer today. Exists here for backwards compatibility - * only. You can safely forget about it. - */ -#undef USE_RGENGC -#define USE_RGENGC 1 - -/** - * @private - * - * This is a compile-time flag to enable/disable incremental GC feature. It - * has to be set at the time ruby itself compiles. Makes no sense for 3rd - * parties. It is safe for them to set this though; that just doesn't change - * anything. - */ -#ifndef USE_RINCGC -# define USE_RINCGC 1 -#endif - -/** - * @deprecated This macro seems broken. Setting this to anything other than - * zero just doesn't compile. We need to KonMari. - */ -#ifndef USE_RGENGC_LOGGING_WB_UNPROTECT -# define USE_RGENGC_LOGGING_WB_UNPROTECT 0 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RArray. It has to be set at the time ruby itself compiles. Makes - * no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_ARRAY -# define RGENGC_WB_PROTECTED_ARRAY 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RHash. It has to be set at the time ruby itself compiles. Makes - * no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_HASH -# define RGENGC_WB_PROTECTED_HASH 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RStruct. It has to be set at the time ruby itself compiles. Makes - * no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_STRUCT -# define RGENGC_WB_PROTECTED_STRUCT 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RString. It has to be set at the time ruby itself compiles. Makes - * no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_STRING -# define RGENGC_WB_PROTECTED_STRING 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RObject. It has to be set at the time ruby itself compiles. Makes - * no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_OBJECT -# define RGENGC_WB_PROTECTED_OBJECT 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RRegexp. It has to be set at the time ruby itself compiles. Makes - * no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_REGEXP -# define RGENGC_WB_PROTECTED_REGEXP 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RClass. It has to be set at the time ruby itself compiles. Makes - * no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_CLASS -# define RGENGC_WB_PROTECTED_CLASS 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RFloat. It has to be set at the time ruby itself compiles. Makes - * no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_FLOAT -# define RGENGC_WB_PROTECTED_FLOAT 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RComplex. It has to be set at the time ruby itself compiles. - * Makes no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_COMPLEX -# define RGENGC_WB_PROTECTED_COMPLEX 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RRational. It has to be set at the time ruby itself compiles. - * Makes no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_RATIONAL -# define RGENGC_WB_PROTECTED_RATIONAL 1 -#endif - -/** - * @private - * - * This is a compile-time flag to enable/disable write barrier for - * struct ::RBignum. It has to be set at the time ruby itself compiles. Makes - * no sense for 3rd parties. - */ -#ifndef RGENGC_WB_PROTECTED_BIGNUM -# define RGENGC_WB_PROTECTED_BIGNUM 1 -#endif - -/** - * @private - * - * @deprecated This macro once was a thing in the old days, but makes no sense - * any longer today. Exists here for backwards compatibility - * only. You can safely forget about it. - * - * @internal - * - * @shyouhei doesn't think anybody uses this right now. - */ -#ifndef RGENGC_WB_PROTECTED_NODE_CREF -# define RGENGC_WB_PROTECTED_NODE_CREF 1 -#endif - -/** - * @defgroup rgengc Write barrier (WB) interfaces: - * - * @note The following core interfaces can be changed in the future. Please - * catch up if you want to insert WB into C-extensions correctly. - * - * @{ - */ - -/** - * Declaration of a "back" pointer. This is a write barrier for new reference - * from "old" generation to "young" generation. It writes `young` into - * `*slot`, which is a pointer inside of `old`. - * - * @param[in] old An old object. - * @param[in] slot A pointer inside of `old`. - * @param[out] young A young object. - */ -#define RB_OBJ_WRITE(old, slot, young) \ - RBIMPL_CAST(rb_obj_write((VALUE)(old), (VALUE *)(slot), (VALUE)(young), __FILE__, __LINE__)) - -/** - * Identical to #RB_OBJ_WRITE(), except it doesn't write any values, but only a - * WB declaration. `oldv` is replaced value with `b` (not used in current - * Ruby). - * - * @param[in] old An old object. - * @param[in] oldv An object previously stored inside of `old`. - * @param[out] young A young object. - */ -#define RB_OBJ_WRITTEN(old, oldv, young) \ - RBIMPL_CAST(rb_obj_written((VALUE)(old), (VALUE)(oldv), (VALUE)(young), __FILE__, __LINE__)) -/** @} */ - -#define OBJ_PROMOTED_RAW RB_OBJ_PROMOTED_RAW /**< @old{RB_OBJ_PROMOTED_RAW} */ -#define OBJ_PROMOTED RB_OBJ_PROMOTED /**< @old{RB_OBJ_PROMOTED} */ -#define OBJ_WB_UNPROTECT RB_OBJ_WB_UNPROTECT /**< @old{RB_OBJ_WB_UNPROTECT} */ - -/** - * Asserts that the passed object is not fenced by write barriers. Objects of - * such property do not contribute to generational GCs. They are scanned - * always. - * - * @param[out] x An object that would not be protected by the barrier. - */ -#define RB_OBJ_WB_UNPROTECT(x) rb_obj_wb_unprotect(x, __FILE__, __LINE__) - -/** - * Identical to #RB_OBJ_WB_UNPROTECT(), except it can also assert that the - * given object is of given type. - * - * @param[in] type One of `ARRAY`, `STRING`, etc. - * @param[out] obj An object of `type` that would not be protected. - * - * @internal - * - * @shyouhei doesn't understand why this has to be visible from extensions. - */ -#define RB_OBJ_WB_UNPROTECT_FOR(type, obj) \ - (RGENGC_WB_PROTECTED_##type ? OBJ_WB_UNPROTECT(obj) : obj) - -/** - * @private - * - * This is an implementation detail of rb_obj_wb_unprotect(). People don't use - * it directly. - */ -#define RGENGC_LOGGING_WB_UNPROTECT rb_gc_unprotect_logging - -/** @cond INTERNAL_MACRO */ -#define RB_OBJ_PROMOTED_RAW RB_OBJ_PROMOTED_RAW -#define RB_OBJ_PROMOTED RB_OBJ_PROMOTED -/** @endcond */ - -RBIMPL_SYMBOL_EXPORT_BEGIN() -/** - * This is the implementation of #RB_OBJ_WRITE(). People don't use it - * directly. - * - * @param[in] old An object that points to `young`. - * @param[out] young An object that is referenced from `old`. - */ -void rb_gc_writebarrier(VALUE old, VALUE young); - -/** - * This is the implementation of #RB_OBJ_WB_UNPROTECT(). People don't use it - * directly. - * - * @param[out] obj An object that does not participate in WB. - */ -void rb_gc_writebarrier_unprotect(VALUE obj); - -#if USE_RGENGC_LOGGING_WB_UNPROTECT -/** - * @private - * - * This is the implementation of #RGENGC_LOGGING_WB_UNPROTECT(). People - * don't use it directly. - * - * @param[in] objptr Don't know why this is a pointer to void but in - * reality this is a pointer to an object that is about - * to be un-protected. - * @param[in] filename Pass C's `__FILE__` here. - * @param[in] line Pass C's `__LINE__` here. - */ -void rb_gc_unprotect_logging(void *objptr, const char *filename, int line); -#endif - -RBIMPL_SYMBOL_EXPORT_END() - -RBIMPL_ATTR_PURE_UNLESS_DEBUG() -RBIMPL_ATTR_ARTIFICIAL() -/** - * This is the implementation of #RB_OBJ_PROMOTED(). People don't use it - * directly. - * - * @param[in] obj An object to query. - * @retval true The object is "promoted". - * @retval false The object is young. Have not experienced GC at all. - */ -static inline bool -RB_OBJ_PROMOTED_RAW(VALUE obj) -{ - RBIMPL_ASSERT_OR_ASSUME(RB_FL_ABLE(obj)); - return RB_FL_ANY_RAW(obj, RUBY_FL_PROMOTED); -} - -RBIMPL_ATTR_PURE_UNLESS_DEBUG() -RBIMPL_ATTR_ARTIFICIAL() -/** - * Tests if the object is "promoted" -- that is, whether the object experienced - * one or more GC marks. - * - * @param[in] obj An object to query. - * @retval true The object is "promoted". - * @retval false The object is young. Have not experienced GC at all. - * @note Hello, is anyone actively calling this function? @shyouhei have - * never seen any actual usages outside of the GC implementation - * itself. - */ -static inline bool -RB_OBJ_PROMOTED(VALUE obj) -{ - if (! RB_FL_ABLE(obj)) { - return false; - } - else { - return RB_OBJ_PROMOTED_RAW(obj); - } -} - -/** - * This is the implementation of #RB_OBJ_WB_UNPROTECT(). People don't use it - * directly. - * - * @param[out] x An object that does not participate in WB. - * @param[in] filename C's `__FILE__` of the caller function. - * @param[in] line C's `__LINE__` of the caller function. - * @return x - */ -static inline VALUE -rb_obj_wb_unprotect( - VALUE x, - RBIMPL_ATTR_MAYBE_UNUSED() - const char *filename, - RBIMPL_ATTR_MAYBE_UNUSED() - int line) -{ -#if USE_RGENGC_LOGGING_WB_UNPROTECT - RGENGC_LOGGING_WB_UNPROTECT(RBIMPL_CAST((void *)x), filename, line); -#endif - rb_gc_writebarrier_unprotect(x); - return x; -} - -/** - * @private - * - * This is the implementation of #RB_OBJ_WRITTEN(). People don't use it - * directly. - * - * @param[in] a An old object. - * @param[in] oldv An object previously stored inside of `old`. - * @param[out] b A young object. - * @param[in] filename C's `__FILE__` of the caller function. - * @param[in] line C's `__LINE__` of the caller function. - * @return a - */ -static inline VALUE -rb_obj_written( - VALUE a, - RBIMPL_ATTR_MAYBE_UNUSED() - VALUE oldv, - VALUE b, - RBIMPL_ATTR_MAYBE_UNUSED() - const char *filename, - RBIMPL_ATTR_MAYBE_UNUSED() - int line) -{ -#if USE_RGENGC_LOGGING_WB_UNPROTECT - RGENGC_LOGGING_OBJ_WRITTEN(a, oldv, b, filename, line); -#endif - - if (!RB_SPECIAL_CONST_P(b)) { - rb_gc_writebarrier(a, b); - } - - return a; -} - -/** - * @private - * - * This is the implementation of #RB_OBJ_WRITE(). People don't use it - * directly. - * - * @param[in] a An old object. - * @param[in] slot A pointer inside of `old`. - * @param[out] b A young object. - * @param[in] filename C's `__FILE__` of the caller function. - * @param[in] line C's `__LINE__` of the caller function. - * @return a - */ -static inline VALUE -rb_obj_write( - VALUE a, VALUE *slot, VALUE b, - RBIMPL_ATTR_MAYBE_UNUSED() - const char *filename, - RBIMPL_ATTR_MAYBE_UNUSED() - int line) -{ -#ifdef RGENGC_LOGGING_WRITE - RGENGC_LOGGING_WRITE(a, slot, b, filename, line); -#endif - - *slot = b; - - rb_obj_written(a, RUBY_Qundef /* ignore `oldv' now */, b, filename, line); - return a; -} - -#endif /* RBIMPL_RGENGC_H */ diff --git a/include/ruby/internal/scan_args.h b/include/ruby/internal/scan_args.h index cf5b18f77d..1ed2bf6368 100644 --- a/include/ruby/internal/scan_args.h +++ b/include/ruby/internal/scan_args.h @@ -100,7 +100,7 @@ RBIMPL_ATTR_NONNULL((2, 3)) * param-arg-spec := pre-arg-spec [post-arg-spec] / post-arg-spec / * pre-opt-post-arg-spec * pre-arg-spec := num-of-leading-mandatory-args - [num-of-optional-args] + * [num-of-optional-args] * post-arg-spec := sym-for-variable-length-args * [num-of-trailing-mandatory-args] * pre-opt-post-arg-spec := num-of-leading-mandatory-args num-of-optional-args diff --git a/include/ruby/internal/special_consts.h b/include/ruby/internal/special_consts.h index 38934e4da3..dc0a6b41d6 100644 --- a/include/ruby/internal/special_consts.h +++ b/include/ruby/internal/special_consts.h @@ -76,6 +76,8 @@ #define RB_SPECIAL_CONST_P RB_SPECIAL_CONST_P #define RB_STATIC_SYM_P RB_STATIC_SYM_P #define RB_TEST RB_TEST +#define RB_UNDEF_P RB_UNDEF_P +#define RB_NIL_OR_UNDEF_P RB_NIL_OR_UNDEF_P /** @endcond */ /** special constants - i.e. non-zero and non-fixnum constants */ @@ -94,9 +96,9 @@ ruby_special_consts { RUBY_SYMBOL_FLAG, /**< Flag to denote a static symbol. */ #elif USE_FLONUM RUBY_Qfalse = 0x00, /* ...0000 0000 */ + RUBY_Qnil = 0x04, /* ...0000 0100 */ RUBY_Qtrue = 0x14, /* ...0001 0100 */ - RUBY_Qnil = 0x08, /* ...0000 1000 */ - RUBY_Qundef = 0x34, /* ...0011 0100 */ + RUBY_Qundef = 0x24, /* ...0010 0100 */ RUBY_IMMEDIATE_MASK = 0x07, /* ...0000 0111 */ RUBY_FIXNUM_FLAG = 0x01, /* ...xxxx xxx1 */ RUBY_FLONUM_MASK = 0x03, /* ...0000 0011 */ @@ -104,14 +106,14 @@ ruby_special_consts { RUBY_SYMBOL_FLAG = 0x0c, /* ...xxxx 1100 */ #else RUBY_Qfalse = 0x00, /* ...0000 0000 */ - RUBY_Qtrue = 0x02, /* ...0000 0010 */ - RUBY_Qnil = 0x04, /* ...0000 0100 */ - RUBY_Qundef = 0x06, /* ...0000 0110 */ + RUBY_Qnil = 0x02, /* ...0000 0010 */ + RUBY_Qtrue = 0x06, /* ...0000 0110 */ + RUBY_Qundef = 0x0a, /* ...0000 1010 */ RUBY_IMMEDIATE_MASK = 0x03, /* ...0000 0011 */ RUBY_FIXNUM_FLAG = 0x01, /* ...xxxx xxx1 */ RUBY_FLONUM_MASK = 0x00, /* any values ANDed with FLONUM_MASK cannot be FLONUM_FLAG */ RUBY_FLONUM_FLAG = 0x02, /* ...0000 0010 */ - RUBY_SYMBOL_FLAG = 0x0e, /* ...0000 1110 */ + RUBY_SYMBOL_FLAG = 0x0e, /* ...xxxx 1110 */ #endif RUBY_SPECIAL_SHIFT = 8 /**< Least significant 8 bits are reserved. */ @@ -136,12 +138,21 @@ static inline bool RB_TEST(VALUE obj) { /* + * if USE_FLONUM * Qfalse: ....0000 0000 - * Qnil: ....0000 1000 - * ~Qnil: ....1111 0111 + * Qnil: ....0000 0100 + * ~Qnil: ....1111 1011 * v ....xxxx xxxx * ---------------------------- - * RTEST(v) ....xxxx 0xxx + * RTEST(v) ....xxxx x0xx + * + * if ! USE_FLONUM + * Qfalse: ....0000 0000 + * Qnil: ....0000 0010 + * ~Qnil: ....1111 1101 + * v ....xxxx xxxx + * ---------------------------- + * RTEST(v) ....xxxx xx0x * * RTEST(v) can be 0 if and only if (v == Qfalse || v == Qnil). */ @@ -168,6 +179,62 @@ RBIMPL_ATTR_CONST() RBIMPL_ATTR_CONSTEXPR(CXX11) RBIMPL_ATTR_ARTIFICIAL() /** + * Checks if the given object is undef. + * + * @param[in] obj An arbitrary ruby object. + * @retval true `obj` is ::RUBY_Qundef. + * @retval false Anything else. + */ +static inline bool +RB_UNDEF_P(VALUE obj) +{ + return obj == RUBY_Qundef; +} + +RBIMPL_ATTR_CONST() +RBIMPL_ATTR_CONSTEXPR(CXX14) +RBIMPL_ATTR_ARTIFICIAL() +/** + * Checks if the given object is nil or undef. Can be used to see if + * a keyword argument is not given or given `nil`. + * + * @param[in] obj An arbitrary ruby object. + * @retval true `obj` is ::RUBY_Qnil or ::RUBY_Qundef. + * @retval false Anything else. + */ +static inline bool +RB_NIL_OR_UNDEF_P(VALUE obj) +{ + /* + * if USE_FLONUM + * Qundef: ....0010 0100 + * Qnil: ....0000 0100 + * mask: ....1101 1111 + * common_bits: ....0000 0100 + * --------------------------------- + * Qnil & mask ....0000 0100 + * Qundef & mask ....0000 0100 + * + * if ! USE_FLONUM + * Qundef: ....0000 1010 + * Qnil: ....0000 0010 + * mask: ....1111 0111 + * common_bits: ....0000 0010 + * ---------------------------- + * Qnil & mask ....0000 0010 + * Qundef & mask ....0000 0010 + * + * NIL_OR_UNDEF_P(v) can be true only when v is Qundef or Qnil. + */ + const VALUE mask = ~(RUBY_Qundef ^ RUBY_Qnil); + const VALUE common_bits = RUBY_Qundef & RUBY_Qnil; + return (obj & mask) == common_bits; +} + +RBIMPL_ATTR_CONST() +RBIMPL_ATTR_CONSTEXPR(CXX11) +RBIMPL_ATTR_ARTIFICIAL() +/** * Checks if the given object is a so-called Fixnum. * * @param[in] obj An arbitrary ruby object. @@ -259,7 +326,7 @@ RBIMPL_ATTR_ARTIFICIAL() static inline bool RB_SPECIAL_CONST_P(VALUE obj) { - return RB_IMMEDIATE_P(obj) || ! RB_TEST(obj); + return RB_IMMEDIATE_P(obj) || obj == RUBY_Qfalse; } RBIMPL_ATTR_CONST() diff --git a/include/ruby/internal/static_assert.h b/include/ruby/internal/static_assert.h index 594c2b2917..b9ff6646e7 100644 --- a/include/ruby/internal/static_assert.h +++ b/include/ruby/internal/static_assert.h @@ -71,7 +71,7 @@ #else # define RBIMPL_STATIC_ASSERT(name, expr) \ - typedef int static_assert_ ## name ## _check[1 - 2 * !(expr)] + MAYBE_UNUSED(typedef int static_assert_ ## name ## _check[1 - 2 * !(expr)]) #endif #endif /* RBIMPL_STATIC_ASSERT_H */ diff --git a/include/ruby/internal/stdbool.h b/include/ruby/internal/stdbool.h index b15321cb00..1ca61136ba 100644 --- a/include/ruby/internal/stdbool.h +++ b/include/ruby/internal/stdbool.h @@ -39,7 +39,7 @@ # /* Take stdbool.h definition. */ # include <stdbool.h> -#else +#elif !defined(HAVE__BOOL) typedef unsigned char _Bool; # /* See also http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2229.htm */ # define bool _Bool diff --git a/include/ruby/internal/stdckdint.h b/include/ruby/internal/stdckdint.h new file mode 100644 index 0000000000..d02530136e --- /dev/null +++ b/include/ruby/internal/stdckdint.h @@ -0,0 +1,60 @@ +#ifndef RBIMPL_STDCKDINT_H /*-*-C++-*-vi:se ft=cpp:*/ +#define RBIMPL_STDCKDINT_H +/** + * @author Ruby developers <ruby-core@ruby-lang.org> + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are + * implementation details. Don't take them as canon. They could + * rapidly appear then vanish. The name (path) of this header file + * is also an implementation detail. Do not expect it to persist + * at the place it is now. Developers are free to move it anywhere + * anytime at will. + * @note To ruby-core: remember that this header can be possibly + * recursively included from extension libraries written in C++. + * Do not expect for instance `__VA_ARGS__` is always available. + * We assume C99 for ruby itself but we don't assume languages of + * extension libraries. They could be written in C++98. + * @brief C23 shim for <stdckdint.h> + */ +#include "ruby/internal/config.h" +#include "ruby/internal/has/builtin.h" +#include "ruby/internal/stdbool.h" + +#ifdef __has_include +# if __has_include(<stdckdint.h>) +# /* Conforming C23 situation; e.g. recent clang */ +# define RBIMPL_HAVE_STDCKDINT_H +# endif +#endif + +#ifdef HAVE_STDCKDINT_H +# /* Some OSes (most notably FreeBSD) have this file. */ +# define RBIMPL_HAVE_STDCKDINT_H +#endif + +#ifdef RBIMPL_HAVE_STDCKDINT_H +# /* Take that. */ +# include <stdckdint.h> + +#elif RBIMPL_HAS_BUILTIN(__builtin_add_overflow) +# define ckd_add(x, y, z) ((bool)__builtin_add_overflow((y), (z), (x))) +# define ckd_sub(x, y, z) ((bool)__builtin_sub_overflow((y), (z), (x))) +# define ckd_mul(x, y, z) ((bool)__builtin_mul_overflow((y), (z), (x))) +# define __STDC_VERSION_STDCKDINT_H__ 202311L + +#/* elif defined(__cplusplus) */ +#/* :TODO: if we assume C++11 we can use `<type_traits>` to implement them. */ + +#else +# /* intentionally leave them undefined */ +# /* to make `#ifdef ckd_add` etc. work as intended. */ +# undef ckd_add +# undef ckd_sub +# undef ckd_mul +# undef __STDC_VERSION_STDCKDINT_H__ +#endif + +#endif /* RBIMPL_STDCKDINT_H */ diff --git a/include/ruby/internal/variable.h b/include/ruby/internal/variable.h index 1f84b92db0..c017ffe3f7 100644 --- a/include/ruby/internal/variable.h +++ b/include/ruby/internal/variable.h @@ -147,7 +147,7 @@ RBIMPL_ATTR_NONNULL(()) * init_Foo(void) * { * foo = rb_eval_string("..."); - * rb_define_global_variable("$foo", &foo); + * rb_define_variable("$foo", &foo); * } * ``` * diff --git a/include/ruby/io.h b/include/ruby/io.h index c117087d6a..e9dfeda5b1 100644 --- a/include/ruby/io.h +++ b/include/ruby/io.h @@ -35,7 +35,11 @@ # undef revents # endif # define RB_WAITFD_IN POLLIN -# define RB_WAITFD_PRI POLLPRI +# if defined(POLLPRI) +# define RB_WAITFD_PRI POLLPRI +# else +# define RB_WAITFD_PRI 0 +# endif # define RB_WAITFD_OUT POLLOUT #else # define RB_WAITFD_IN 0x001 @@ -45,11 +49,17 @@ /** @endcond */ #include "ruby/internal/attr/const.h" +#include "ruby/internal/attr/packed_struct.h" #include "ruby/internal/attr/pure.h" #include "ruby/internal/attr/noreturn.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" -#include "ruby/backward/2/attributes.h" /* PACKED_STRUCT_UNALIGNED */ + +// IO#wait, IO#wait_readable, IO#wait_writable, IO#wait_priority are defined by this implementation. +#define RUBY_IO_WAIT_METHODS + +// Used as the default timeout argument to `rb_io_wait` to use the `IO#timeout` value. +#define RUBY_IO_TIMEOUT_DEFAULT Qnil RBIMPL_SYMBOL_EXPORT_BEGIN() @@ -57,23 +67,31 @@ struct stat; struct timeval; /** + * Indicates that a timeout has occurred while performing an IO operation. + */ +RUBY_EXTERN VALUE rb_eIOTimeoutError; + +/** * Type of events that an IO can wait. * * @internal * * This is visible from extension libraries because `io/wait` wants it. */ -typedef enum { +enum rb_io_event { RUBY_IO_READABLE = RB_WAITFD_IN, /**< `IO::READABLE` */ RUBY_IO_WRITABLE = RB_WAITFD_OUT, /**< `IO::WRITABLE` */ RUBY_IO_PRIORITY = RB_WAITFD_PRI, /**< `IO::PRIORITY` */ -} rb_io_event_t; +}; + +typedef enum rb_io_event rb_io_event_t; /** * IO buffers. This is an implementation detail of ::rb_io_t::wbuf and * ::rb_io_t::rbuf. People don't manipulate it directly. */ -PACKED_STRUCT_UNALIGNED(struct rb_io_buffer_t { +RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_BEGIN() +struct rb_io_internal_buffer { /** Pointer to the underlying memory region, of at least `capa` bytes. */ char *ptr; /* off + len <= capa */ @@ -86,45 +104,84 @@ PACKED_STRUCT_UNALIGNED(struct rb_io_buffer_t { /** Designed capacity of the buffer. */ int capa; -}); +} RBIMPL_ATTR_PACKED_STRUCT_UNALIGNED_END(); /** @alias{rb_io_buffer_t} */ -typedef struct rb_io_buffer_t rb_io_buffer_t; +typedef struct rb_io_internal_buffer rb_io_buffer_t; + +/** Decomposed encoding flags (e.g. `"enc:enc2""`). */ +/* + * enc enc2 read action write action + * NULL NULL force_encoding(default_external) write the byte sequence of str + * e1 NULL force_encoding(e1) convert str.encoding to e1 + * e1 e2 convert from e2 to e1 convert str.encoding to e2 + */ +struct rb_io_encoding { + /** Internal encoding. */ + rb_encoding *enc; + /** External encoding. */ + rb_encoding *enc2; + /** + * Flags. + * + * @see enum ::ruby_econv_flag_type + */ + int ecflags; + /** + * Flags as Ruby hash. + * + * @internal + * + * This is set. But used from nowhere maybe? + */ + VALUE ecopts; +}; +#ifndef HAVE_RB_IO_T +#define HAVE_RB_IO_T 1 /** Ruby's IO, metadata and buffers. */ -typedef struct rb_io_t { - +struct rb_io { /** The IO's Ruby level counterpart. */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) VALUE self; /** stdio ptr for read/write, if available. */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) FILE *stdio_file; /** file descriptor. */ + RBIMPL_ATTR_DEPRECATED(("rb_io_descriptor")) int fd; /** mode flags: FMODE_XXXs */ + RBIMPL_ATTR_DEPRECATED(("rb_io_mode")) int mode; /** child's pid (for pipes) */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) rb_pid_t pid; /** number of lines read */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) int lineno; /** pathname for file */ + RBIMPL_ATTR_DEPRECATED(("rb_io_path")) VALUE pathv; /** finalize proc */ - void (*finalize)(struct rb_io_t*,int); + RBIMPL_ATTR_DEPRECATED(("with no replacement")) + void (*finalize)(struct rb_io*,int); /** Write buffer. */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) rb_io_buffer_t wbuf; /** * (Byte) read buffer. Note also that there is a field called * ::rb_io_t::cbuf, which also concerns read IO. */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) rb_io_buffer_t rbuf; /** @@ -132,49 +189,25 @@ typedef struct rb_io_t { * * @see rb_io_set_write_io() */ + RBIMPL_ATTR_DEPRECATED(("rb_io_get_write_io")) VALUE tied_io_for_writing; - /** Decomposed encoding flags (e.g. `"enc:enc2""`). */ - /* - * enc enc2 read action write action - * NULL NULL force_encoding(default_external) write the byte sequence of str - * e1 NULL force_encoding(e1) convert str.encoding to e1 - * e1 e2 convert from e2 to e1 convert str.encoding to e2 - */ - struct rb_io_enc_t { - /** Internal encoding. */ - rb_encoding *enc; - - /** External encoding. */ - rb_encoding *enc2; - - /** - * Flags. - * - * @see enum ::ruby_econv_flag_type - */ - int ecflags; - - /** - * Flags as Ruby hash. - * - * @internal - * - * This is set. But used from nowhere maybe? - */ - VALUE ecopts; - } encs; /**< Decomposed encoding flags. */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) + struct rb_io_encoding encs; /**< Decomposed encoding flags. */ /** Encoding converter used when reading from this IO. */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) rb_econv_t *readconv; /** * rb_io_ungetc() destination. This buffer is read before checking * ::rb_io_t::rbuf */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) rb_io_buffer_t cbuf; /** Encoding converter used when writing to this IO. */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) rb_econv_t *writeconv; /** @@ -183,21 +216,25 @@ typedef struct rb_io_t { * conversion from encoding X to encoding Y does not exist, Ruby finds an * encoding Z that bridges the two, so that X to Z to Y conversion happens. */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) VALUE writeconv_asciicompat; /** Whether ::rb_io_t::writeconv is already set up. */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) int writeconv_initialized; /** * Value of ::rb_io_t::rb_io_enc_t::ecflags stored right before * initialising ::rb_io_t::writeconv. */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) int writeconv_pre_ecflags; /** * Value of ::rb_io_t::rb_io_enc_t::ecopts stored right before initialising * ::rb_io_t::writeconv. */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) VALUE writeconv_pre_ecopts; /** @@ -207,20 +244,21 @@ typedef struct rb_io_t { * * This of course doesn't help inter-process IO interleaves, though. */ + RBIMPL_ATTR_DEPRECATED(("with no replacement")) VALUE write_lock; -} rb_io_t; -/** @alias{rb_io_enc_t} */ -typedef struct rb_io_enc_t rb_io_enc_t; + /** + * The timeout associated with this IO when performing blocking operations. + */ + RBIMPL_ATTR_DEPRECATED(("rb_io_timeout/rb_io_set_timeout")) + VALUE timeout; +}; +#endif -/** - * @private - * - * @deprecated This macro once was a thing in the old days, but makes no sense - * any longer today. Exists here for backwards compatibility - * only. You can safely forget about it. - */ -#define HAVE_RB_IO_T 1 +typedef struct rb_io rb_io_t; + +/** @alias{rb_io_enc_t} */ +typedef struct rb_io_encoding rb_io_enc_t; /** * @name Possible flags for ::rb_io_t::mode @@ -311,7 +349,16 @@ typedef struct rb_io_enc_t rb_io_enc_t; * Setting this one and #FMODE_BINMODE at the same time is a contradiction. */ #define FMODE_TEXTMODE 0x00001000 -/* #define FMODE_PREP 0x00010000 */ +/** + * This flag means that an IO object is wrapping an "external" file descriptor, + * which is owned by something outside the Ruby interpreter (usually a C extension). + * Ruby will not close this file when the IO object is garbage collected. + * If this flag is set, then IO#autoclose? is false, and vice-versa. + * + * This flag was previously called FMODE_PREP internally. + */ +#define FMODE_EXTERNAL 0x00010000 + /* #define FMODE_SIGNAL_ON_EPIPE 0x00020000 */ /** @@ -326,6 +373,18 @@ typedef struct rb_io_enc_t rb_io_enc_t; /** @} */ /** + * Allocate a new IO object, with the given file descriptor. + */ +VALUE rb_io_open_descriptor(VALUE klass, int descriptor, int mode, VALUE path, VALUE timeout, struct rb_io_encoding *encoding); + +/** + * Returns whether or not the underlying IO is closed. + * + * @return Whether the underlying IO is closed. + */ +VALUE rb_io_closed_p(VALUE io); + +/** * Queries the underlying IO pointer. * * @param[in] obj An IO object. @@ -395,14 +454,14 @@ rb_io_t *rb_io_make_open_file(VALUE obj); * like this: * * ```CXX - * typedef struct rb_io_t { + * typedef struct rb_io { * FILE *f; // stdio ptr for read/write * FILE *f2; // additional ptr for rw pipes * int mode; // mode flags * int pid; // child's pid (for pipes) * int lineno; // number of lines read * char *path; // pathname for file - * void (*finalize) _((struct rb_io_t*,int)); // finalize proc + * void (*finalize) _((struct rb_io*,int)); // finalize proc * } rb_io_t; *``` * @@ -470,7 +529,7 @@ int rb_io_modestr_fmode(const char *modestr); /** * Identical to rb_io_modestr_fmode(), except it returns a mixture of `O_` - * flags. This for instnce returns `O_WRONLY | O_TRUNC | O_CREAT | O_EXCL` for + * flags. This for instance returns `O_WRONLY | O_TRUNC | O_CREAT | O_EXCL` for * `"wx"`. * * @param[in] modestr File mode, in C's string. @@ -648,10 +707,23 @@ VALUE rb_io_get_write_io(VALUE io); VALUE rb_io_set_write_io(VALUE io, VALUE w); /** - * Sets an IO to a "nonblock mode". This amends the way an IO operates so that - * instead of waiting for rooms for read/write, it returns errors. In case of - * multiplexed IO situations it can be vital for IO operations not to block. - * This is the key API to achieve that property. + * Instructs the OS to put its internal file structure into "nonblocking mode". + * This is an in-Kernel concept. Reading from/writing to that file using C + * function calls would return -1 with errno set. However when it comes to a + * ruby program, we hide that error behind our `IO#read` method. Ruby level + * `IO#read` blocks regardless of this flag. If you want to avoid blocking, + * you should consider using methods like `IO#readpartial`. + * + * ```ruby + * require 'io/nonblock' + * STDIN.nonblock = true + * STDIN.gets # blocks. + * ``` + * + * As of writing there is a room of this API in Fiber schedulers. A Fiber + * scheduler could be written in a way its behaviour depends on this property. + * You need an in-depth understanding of how schedulers work to properly + * leverage this, though. * * @note Note however that nonblocking-ness propagates across process * boundaries. You must really carefully watch your step when turning @@ -671,6 +743,27 @@ VALUE rb_io_set_write_io(VALUE io, VALUE w); void rb_io_set_nonblock(rb_io_t *fptr); /** + * Returns the path for the given IO. + * + */ +VALUE rb_io_path(VALUE io); + +/** + * Returns an integer representing the numeric file descriptor for + * <em>io</em>. + * + * @param[in] io An IO. + * @retval int A file descriptor. + */ +int rb_io_descriptor(VALUE io); + +/** + * Get the mode of the IO. + * + */ +int rb_io_mode(VALUE io); + +/** * This function breaks down the option hash that `IO#initialize` takes into * components. This is an implementation detail of rb_io_extract_modeenc() * today. People prefer that API instead. @@ -735,8 +828,8 @@ int rb_io_extract_encoding_option(VALUE opt, rb_encoding **enc_p, rb_encoding ** * class File * def initialize: ( * (String | int) path, - * ?(Strig | int) fmode, - * ?(Strig | int) perm, + * ?(String | int) fmode, + * ?(String | int) perm, * ?mode: (String | int), * ?flags: int, * ?external_encoding: (Encoding | String), @@ -817,13 +910,37 @@ int rb_io_wait_writable(int fd); int rb_wait_for_single_fd(int fd, int events, struct timeval *tv); /** + * Get the timeout associated with the specified io object. + * + * @param[in] io An IO object. + * @retval RUBY_Qnil There is no associated timeout. + * @retval Otherwise The timeout value. + */ +VALUE rb_io_timeout(VALUE io); + +/** + * Set the timeout associated with the specified io object. This timeout is + * used as a best effort timeout to prevent operations from blocking forever. + * + * @param[in] io An IO object. + * @param[in] timeout A timeout value. Must respond to #to_f. + * @ + */ +VALUE rb_io_set_timeout(VALUE io, VALUE timeout); + +/** * Blocks until the passed IO is ready for the passed events. The "events" * here is a Ruby level integer, which is an OR-ed value of `IO::READABLE`, * `IO::WRITable`, and `IO::PRIORITY`. * + * If timeout is `Qnil`, it will use the default timeout as given by + * `rb_io_timeout(io)`. + * * @param[in] io An IO object to wait. * @param[in] events See above. * @param[in] timeout Time, or numeric seconds since UNIX epoch. + * If Qnil, use the default timeout. If Qfalse + * or Qundef, wait forever. * @exception rb_eIOError `io` is not open. * @exception rb_eRangeError `timeout` is out of range. * @exception rb_eSystemCallError `select(2)` failed for some reason. @@ -875,13 +992,8 @@ VALUE rb_io_maybe_wait(int error, VALUE io, VALUE events, VALUE timeout); * @exception rb_eIOError `io` is not open. * @exception rb_eRangeError `timeout` is out of range. * @exception rb_eSystemCallError `select(2)` failed for some reason. - * @exception rb_eTypeError Operation timed out. - * @return Always returns ::RUBY_IO_READABLE. - * - * @internal - * - * Because rb_io_maybe_wait() returns ::RUBY_Qfalse on timeout, this function - * fails to convert that value to `int`, and raises ::rb_eTypeError. + * @retval 0 Operation timed out. + * @retval Otherwise Always returns ::RUBY_IO_READABLE. */ int rb_io_maybe_wait_readable(int error, VALUE io, VALUE timeout); @@ -896,13 +1008,8 @@ int rb_io_maybe_wait_readable(int error, VALUE io, VALUE timeout); * @exception rb_eIOError `io` is not open. * @exception rb_eRangeError `timeout` is out of range. * @exception rb_eSystemCallError `select(2)` failed for some reason. - * @exception rb_eTypeError Operation timed out. - * @return Always returns ::RUBY_IO_WRITABLE. - * - * @internal - * - * Because rb_io_maybe_wait() returns ::RUBY_Qfalse on timeout, this function - * fails to convert that value to `int`, and raises ::rb_eTypeError. + * @retval 0 Operation timed out. + * @retval Otherwise Always returns ::RUBY_IO_WRITABLE. */ int rb_io_maybe_wait_writable(int error, VALUE io, VALUE timeout); diff --git a/include/ruby/io/buffer.h b/include/ruby/io/buffer.h new file mode 100644 index 0000000000..e4d98bf051 --- /dev/null +++ b/include/ruby/io/buffer.h @@ -0,0 +1,110 @@ +#ifndef RUBY_IO_BUFFER_H +#define RUBY_IO_BUFFER_H +/** + * @file + * @author Samuel Williams + * @date Fri 2 Jul 2021 16:29:01 NZST + * @copyright Copyright (C) 2021 Samuel Williams + * @copyright This file is a part of the programming language Ruby. + * Permission is hereby granted, to either redistribute and/or + * modify this file, provided that the conditions mentioned in the + * file COPYING are met. Consult the file for details. + */ + +#pragma once + +#include "ruby/ruby.h" +#include "ruby/internal/config.h" + +RBIMPL_SYMBOL_EXPORT_BEGIN() + +// WARNING: This entire interface is experimental and may change in the future! +#define RB_IO_BUFFER_EXPERIMENTAL 1 + +#define RUBY_IO_BUFFER_VERSION 2 + +// The `IO::Buffer` class. +RUBY_EXTERN VALUE rb_cIOBuffer; + +// The operating system page size. +RUBY_EXTERN size_t RUBY_IO_BUFFER_PAGE_SIZE; + +// The default buffer size, usually a (small) multiple of the page size. +// Can be overridden by the RUBY_IO_BUFFER_DEFAULT_SIZE environment variable. +RUBY_EXTERN size_t RUBY_IO_BUFFER_DEFAULT_SIZE; + +// Represents the internal state of the buffer. +// More than one flag can be set at a time. +enum rb_io_buffer_flags { + // The memory in the buffer is owned by someone else. + // More specifically, it means that someone else owns the buffer and we shouldn't try to resize it. + RB_IO_BUFFER_EXTERNAL = 1, + // The memory in the buffer is allocated internally. + RB_IO_BUFFER_INTERNAL = 2, + // The memory in the buffer is mapped. + // A non-private mapping is marked as external. + RB_IO_BUFFER_MAPPED = 4, + + // A mapped buffer that is also shared. + RB_IO_BUFFER_SHARED = 8, + + // The buffer is locked and cannot be resized. + // More specifically, it means we can't change the base address or size. + // A buffer is typically locked before a system call that uses the data. + RB_IO_BUFFER_LOCKED = 32, + + // The buffer mapping is private and will not impact other processes or the underlying file. + RB_IO_BUFFER_PRIVATE = 64, + + // The buffer is read-only and cannot be modified. + RB_IO_BUFFER_READONLY = 128, + + // The buffer is backed by a file. + RB_IO_BUFFER_FILE = 256, +}; + +// Represents the endian of the data types. +enum rb_io_buffer_endian { + // The least significant units are put first. + RB_IO_BUFFER_LITTLE_ENDIAN = 4, + RB_IO_BUFFER_BIG_ENDIAN = 8, + +#if defined(WORDS_BIGENDIAN) + RB_IO_BUFFER_HOST_ENDIAN = RB_IO_BUFFER_BIG_ENDIAN, +#else + RB_IO_BUFFER_HOST_ENDIAN = RB_IO_BUFFER_LITTLE_ENDIAN, +#endif + + RB_IO_BUFFER_NETWORK_ENDIAN = RB_IO_BUFFER_BIG_ENDIAN +}; + +VALUE rb_io_buffer_new(void *base, size_t size, enum rb_io_buffer_flags flags); +VALUE rb_io_buffer_map(VALUE io, size_t size, rb_off_t offset, enum rb_io_buffer_flags flags); + +VALUE rb_io_buffer_lock(VALUE self); +VALUE rb_io_buffer_unlock(VALUE self); +int rb_io_buffer_try_unlock(VALUE self); + +VALUE rb_io_buffer_free(VALUE self); +VALUE rb_io_buffer_free_locked(VALUE self); + +// Access the internal buffer and flags. Validates the pointers. +// The points may not remain valid if the source buffer is manipulated. +// Consider using rb_io_buffer_lock if needed. +enum rb_io_buffer_flags rb_io_buffer_get_bytes(VALUE self, void **base, size_t *size); +void rb_io_buffer_get_bytes_for_reading(VALUE self, const void **base, size_t *size); +void rb_io_buffer_get_bytes_for_writing(VALUE self, void **base, size_t *size); + +VALUE rb_io_buffer_transfer(VALUE self); +void rb_io_buffer_resize(VALUE self, size_t size); +void rb_io_buffer_clear(VALUE self, uint8_t value, size_t offset, size_t length); + +// The length is the minimum required length. +VALUE rb_io_buffer_read(VALUE self, VALUE io, size_t length, size_t offset); +VALUE rb_io_buffer_pread(VALUE self, VALUE io, rb_off_t from, size_t length, size_t offset); +VALUE rb_io_buffer_write(VALUE self, VALUE io, size_t length, size_t offset); +VALUE rb_io_buffer_pwrite(VALUE self, VALUE io, rb_off_t from, size_t length, size_t offset); + +RBIMPL_SYMBOL_EXPORT_END() + +#endif /* RUBY_IO_BUFFER_H */ diff --git a/include/ruby/memory_view.h b/include/ruby/memory_view.h index bac49e363e..42309d5afc 100644 --- a/include/ruby/memory_view.h +++ b/include/ruby/memory_view.h @@ -16,7 +16,7 @@ # include <stddef.h> /* size_t */ #endif -#if HAVE_SYS_TYPES_H +#ifdef HAVE_SYS_TYPES_H # include <sys/types.h> /* ssize_t */ #endif @@ -47,10 +47,10 @@ typedef struct { char format; /** :FIXME: what is a "native" size is unclear. */ - unsigned native_size_p: 1; + bool native_size_p; /** Endian of the component */ - unsigned little_endian_p: 1; + bool little_endian_p; /** The component's offset. */ size_t offset; @@ -146,8 +146,11 @@ typedef struct { * Or, NULL when this memory view exposes a flat array. */ const ssize_t *sub_offsets; - /** the private data for managing this exported memory */ + /** The private data for managing this exported memory */ void *private_data; + + /** DO NOT TOUCH THIS: The memory view entry for the internal use */ + const struct rb_memory_view_entry *_memory_view_entry; } rb_memory_view_t; /** Type of function of ::rb_memory_view_entry_t::get_func. */ @@ -160,9 +163,10 @@ typedef bool (* rb_memory_view_release_func_t)(VALUE obj, rb_memory_view_t *view typedef bool (* rb_memory_view_available_p_func_t)(VALUE obj); /** Operations applied to a specific kind of a memory view. */ -typedef struct { - - /** Exports a memory view from a Ruby object. */ +typedef struct rb_memory_view_entry { + /** + * Exports a memory view from a Ruby object. + */ rb_memory_view_get_func_t get_func; /** diff --git a/include/ruby/missing.h b/include/ruby/missing.h index 1e97e294f1..aea6c9088d 100644 --- a/include/ruby/missing.h +++ b/include/ruby/missing.h @@ -33,6 +33,18 @@ # include <sys/time.h> #endif +#ifdef HAVE_SYS_STAT_H +# include <sys/stat.h> +#endif + +#ifdef HAVE_UNISTD_H +# include <unistd.h> +#endif + +#ifdef HAVE_STDIO_H +# include <stdio.h> +#endif + #ifdef HAVE_IEEEFP_H # include <ieeefp.h> #endif @@ -224,6 +236,107 @@ RUBY_EXTERN void setproctitle(const char *fmt, ...); RUBY_EXTERN void explicit_bzero(void *b, size_t len); #endif +#ifndef HAVE_TZSET +RUBY_EXTERN void tzset(void); +#endif + +#ifndef HAVE_POSIX_MADVISE +RUBY_EXTERN int posix_madvise(void *, size_t, int); +#endif + +#ifndef HAVE_GETEUID +RUBY_EXTERN rb_uid_t geteuid(void); +#endif + +#ifndef HAVE_GETUID +RUBY_EXTERN rb_uid_t getuid(void); +#endif + +#ifndef HAVE_GETEGID +RUBY_EXTERN rb_gid_t getegid(void); +#endif + +#ifndef HAVE_GETGID +RUBY_EXTERN rb_gid_t getgid(void); +#endif + +#ifndef HAVE_GETLOGIN +RUBY_EXTERN char *getlogin(void); +#endif + +#ifndef HAVE_GETPPID +RUBY_EXTERN rb_pid_t getppid(void); +#endif + +#ifndef HAVE_UMASK +RUBY_EXTERN rb_mode_t umask(rb_mode_t); +#endif + +#ifndef HAVE_CHMOD +RUBY_EXTERN int chmod(const char *, rb_mode_t); +#endif + +#ifndef HAVE_CHOWN +RUBY_EXTERN int chown(const char *, rb_uid_t, rb_gid_t); +#endif + +#ifndef HAVE_PCLOSE +RUBY_EXTERN int pclose(FILE *); +#endif + +#ifndef HAVE_POPEN +RUBY_EXTERN FILE *popen(const char *, const char *); +#endif + +#ifndef HAVE_PIPE +RUBY_EXTERN int pipe(int [2]); +#endif + +#ifndef HAVE_DUP +RUBY_EXTERN int dup(int); +#endif + +#ifndef HAVE_DUP2 +RUBY_EXTERN int dup2(int, int); +#endif + +#ifndef HAVE_KILL +RUBY_EXTERN int kill(rb_pid_t, int); +#endif + +#ifndef HAVE_EXECL +RUBY_EXTERN int execl(const char *, const char *, ...); +#endif + +#ifndef HAVE_EXECLE +RUBY_EXTERN int execle(const char *, const char *, ...); +#endif + +#ifndef HAVE_EXECV +RUBY_EXTERN int execv(const char *, char *const []); +#endif + +#ifndef HAVE_EXECVE +RUBY_EXTERN int execve(const char *, char *const [], char *const []); +#endif + +#ifndef HAVE_SHUTDOWN +RUBY_EXTERN int shutdown(int, int); +#endif + +#ifndef HAVE_SYSTEM +RUBY_EXTERN int system(const char *); +#endif + +#ifndef WNOHANG +# define WNOHANG 0 +#endif + +#ifndef HAVE_WAITPID +# define HAVE_WAITPID 1 +RUBY_EXTERN rb_pid_t waitpid(rb_pid_t, int *, int); +#endif + RBIMPL_SYMBOL_EXPORT_END() #endif /* RUBY_MISSING_H */ diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h index 6187b37dc3..d233336316 100644 --- a/include/ruby/onigmo.h +++ b/include/ruby/onigmo.h @@ -356,9 +356,9 @@ int onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, c #define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc) ONIG_EXTERN -int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc); +int onigenc_mbclen(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc); -#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_approximate(p,e,enc) +#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen(p,e,enc) #define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len) #define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc) #define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len) @@ -793,6 +793,13 @@ typedef struct re_pattern_buffer { OnigDistance dmin; /* min-distance of exact or map */ OnigDistance dmax; /* max-distance of exact or map */ + /* rb_hrtime_t from hrtime.h */ +#ifdef MY_RUBY_BUILD_MAY_TIME_TRAVEL + int128_t timelimit; +#else + uint64_t timelimit; +#endif + /* regex_t link chain */ struct re_pattern_buffer* chain; /* escape compile-conflict */ } OnigRegexType; @@ -837,6 +844,8 @@ void onig_free(OnigRegex); ONIG_EXTERN void onig_free_body(OnigRegex); ONIG_EXTERN +int onig_reg_copy(OnigRegex* reg, OnigRegex orig_reg); +ONIG_EXTERN OnigPosition onig_scan(OnigRegex reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*), void* callback_arg); ONIG_EXTERN OnigPosition onig_search(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option); @@ -845,6 +854,8 @@ OnigPosition onig_search_gpos(OnigRegex, const OnigUChar* str, const OnigUChar* ONIG_EXTERN OnigPosition onig_match(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option); ONIG_EXTERN +int onig_check_linear_time(OnigRegex reg); +ONIG_EXTERN OnigRegion* onig_region_new(void); ONIG_EXTERN void onig_region_init(OnigRegion* region); diff --git a/include/ruby/ractor.h b/include/ruby/ractor.h index 47eac038c9..7811616f6d 100644 --- a/include/ruby/ractor.h +++ b/include/ruby/ractor.h @@ -145,7 +145,7 @@ bool rb_ractor_local_storage_value_lookup(rb_ractor_local_key_t key, VALUE *val) * Associates the passed value to the passed key. * * @param[in] key A ractor-local storage key. - * @param[in] val Arbitary ruby object. + * @param[in] val Arbitrary ruby object. * @post `val` corresponds to `key` in the current Ractor. */ void rb_ractor_local_storage_value_set(rb_ractor_local_key_t key, VALUE val); @@ -241,8 +241,9 @@ RBIMPL_SYMBOL_EXPORT_END() * extremely carefully implemented to be Ractor-safe; for instance integers * have such property. This function can classify that. * - * @param[in] obj Arbitrary ruby object. - * @ + * @param[in] obj Arbitrary ruby object. + * @retval true `obj` is capable of shared across ractors. + * @retval false `obj` cannot travel across ractor boundaries. */ static inline bool rb_ractor_shareable_p(VALUE obj) diff --git a/include/ruby/random.h b/include/ruby/random.h index 657b37f034..f3df0d96fb 100644 --- a/include/ruby/random.h +++ b/include/ruby/random.h @@ -11,11 +11,31 @@ * * This is a set of APIs to roll your own subclass of ::rb_cRandom. An * illustrative example of such PRNG can be found at - * `ext/-test-/ramdom/loop.c`. + * `ext/-test-/random/loop.c`. */ #include "ruby/ruby.h" +/* + * version + * 0: before versioning; deprecated + * 1: added version, flags and init_32bit function + */ +#define RUBY_RANDOM_INTERFACE_VERSION_MAJOR 1 +#define RUBY_RANDOM_INTERFACE_VERSION_MINOR 0 + +#define RUBY_RANDOM_PASTE_VERSION_SUFFIX(x, y, z) x##_##y##_##z +#define RUBY_RANDOM_WITH_VERSION_SUFFIX(name, major, minor) \ + RUBY_RANDOM_PASTE_VERSION_SUFFIX(name, major, minor) +#define rb_random_data_type \ + RUBY_RANDOM_WITH_VERSION_SUFFIX(rb_random_data_type, \ + RUBY_RANDOM_INTERFACE_VERSION_MAJOR, \ + RUBY_RANDOM_INTERFACE_VERSION_MINOR) +#define RUBY_RANDOM_INTERFACE_VERSION_INITIALIZER \ + {RUBY_RANDOM_INTERFACE_VERSION_MAJOR, RUBY_RANDOM_INTERFACE_VERSION_MINOR} +#define RUBY_RANDOM_INTERFACE_VERSION_MAJOR_MAX 0xff +#define RUBY_RANDOM_INTERFACE_VERSION_MINOR_MAX 0xff + RBIMPL_SYMBOL_EXPORT_BEGIN() /** @@ -48,6 +68,17 @@ typedef void rb_random_init_func(rb_random_t *rng, const uint32_t *buf, size_t l RBIMPL_ATTR_NONNULL(()) /** + * This is the type of functions called when your random object is initialised. + * Passed data is the seed integer. + * + * @param[out] rng Your random struct to fill in. + * @param[in] data Seed, single word. + * @post `rng` is initialised using the passed seeds. + */ +typedef void rb_random_init_int32_func(rb_random_t *rng, uint32_t data); + +RBIMPL_ATTR_NONNULL(()) +/** * This is the type of functions called from your object's `#rand` method. * * @param[out] rng Your random struct to extract an integer from. @@ -84,9 +115,24 @@ typedef struct { /** Number of bits of seed numbers. */ size_t default_seed_bits; - /** Initialiser function. */ + /** + * Major/minor versions of this interface + */ + struct { + uint8_t major, minor; + } version; + + /** + * Reserved flags + */ + uint16_t flags; + + /** Function to initialize from uint32_t array. */ rb_random_init_func *init; + /** Function to initialize from single uint32_t. */ + rb_random_init_int32_func *init_int32; + /** Function to obtain a random integer. */ rb_random_get_int32_func *get_int32; @@ -130,11 +176,12 @@ typedef struct { } rb_random_interface_t; /** - * This utility macro defines 3 functions named prefix_init, prefix_get_int32, - * prefix_get_bytes. + * This utility macro defines 4 functions named prefix_init, prefix_init_int32, + * prefix_get_int32, prefix_get_bytes. */ #define RB_RANDOM_INTERFACE_DECLARE(prefix) \ static void prefix##_init(rb_random_t *, const uint32_t *, size_t); \ + static void prefix##_init_int32(rb_random_t *, uint32_t); \ static unsigned int prefix##_get_int32(rb_random_t *); \ static void prefix##_get_bytes(rb_random_t *, void *, size_t) @@ -161,7 +208,9 @@ typedef struct { * ``` */ #define RB_RANDOM_INTERFACE_DEFINE(prefix) \ + RUBY_RANDOM_INTERFACE_VERSION_INITIALIZER, 0, \ prefix##_init, \ + prefix##_init_int32, \ prefix##_get_int32, \ prefix##_get_bytes @@ -173,6 +222,12 @@ typedef struct { RB_RANDOM_INTERFACE_DEFINE(prefix), \ prefix##_get_real +#define RB_RANDOM_DEFINE_INIT_INT32_FUNC(prefix) \ + static void prefix##_init_int32(rb_random_t *rnd, uint32_t data) \ + { \ + prefix##_init(rnd, &data, 1); \ + } + #if defined _WIN32 && !defined __CYGWIN__ typedef rb_data_type_t rb_random_data_type_t; # define RB_RANDOM_PARENT 0 @@ -189,7 +244,7 @@ typedef const rb_data_type_t rb_random_data_type_t; * 0, RB_RANDOM_INTERFACE_DEFINE(your), * }; * - * static inline constexpr your_prng = { + * static inline constexpr rb_random_data_type_t your_prng_type = { * "your PRNG", * { rb_random_mark, }, * RB_RANDOM_PARENT, // <<-- HERE diff --git a/include/ruby/re.h b/include/ruby/re.h index 3892d6e7f2..f86d6f26cf 100644 --- a/include/ruby/re.h +++ b/include/ruby/re.h @@ -18,6 +18,7 @@ #include <stdio.h> +#include "ruby/onigmo.h" #include "ruby/regex.h" #include "ruby/internal/core/rmatch.h" #include "ruby/internal/dllexport.h" @@ -126,6 +127,30 @@ VALUE rb_reg_quote(VALUE str); regex_t *rb_reg_prepare_re(VALUE re, VALUE str); /** + * Runs a regular expression match using function `match`. Performs preparation, + * error handling, and memory cleanup. + * + * @param[in] re Target regular expression. + * @param[in] str What `re` is about to run on. + * @param[in] match The function to run to match `str` against `re`. + * @param[in] args Pointer to arguments to pass into `match`. + * @param[out] regs Registers on a successful match. + * @exception rb_eArgError `re` does not fit for `str`. + * @exception rb_eEncCompatError `re` and `str` are incompatible. + * @exception rb_eRegexpError `re` is malformed. + * @return Match position on a successful match, `ONIG_MISMATCH` otherwise. + * + * @internal + * + * The type `regex_t *` is defined in `<ruby/onigmo.h>`, _and_ + * _conflicts_ with POSIX's `<regex.h>`. We can no longer save the situation + * at this point. Just don't mix the two. + */ +OnigPosition rb_reg_onig_match(VALUE re, VALUE str, + OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args), + void *args, struct re_registers *regs); + +/** * Duplicates a match data. This is roughly the same as `onig_region_copy()`, * except it tries to GC when there is not enough memory. * diff --git a/include/ruby/ruby.h b/include/ruby/ruby.h index f35d13685c..035f02c70b 100644 --- a/include/ruby/ruby.h +++ b/include/ruby/ruby.h @@ -23,6 +23,7 @@ #include <stdarg.h> #include "defines.h" +#include "ruby/internal/abi.h" #include "ruby/internal/anyargs.h" #include "ruby/internal/arithmetic.h" #include "ruby/internal/core.h" @@ -42,7 +43,6 @@ #include "ruby/internal/method.h" #include "ruby/internal/module.h" #include "ruby/internal/newobj.h" -#include "ruby/internal/rgengc.h" #include "ruby/internal/scan_args.h" #include "ruby/internal/special_consts.h" #include "ruby/internal/symbol.h" @@ -97,8 +97,10 @@ VALUE rb_get_path(VALUE obj); VALUE rb_get_path_no_checksafe(VALUE); /** - * @deprecated This macro is an alias of #FilePathValue now. The part that did - * "String" was deleted. It remains here because of no harm. + * This macro actually does the same thing as #FilePathValue now. The "String" + * part indicates that this is for when a string is treated like a pathname, + * rather than the actual pathname on the file systems. For examples: + * `Dir.fnmatch?`, `File.join`, `File.basename`, etc. */ #define FilePathStringValue(v) ((v) = rb_get_path(v)) @@ -107,7 +109,7 @@ VALUE rb_get_path_no_checksafe(VALUE); # define rb_varargs_argc_check_runtime(argc, vargc) \ (((argc) <= (vargc)) ? (argc) : \ (rb_fatal("argc(%d) exceeds actual arguments(%d)", \ - argc, vargc), 0)) + argc, vargc), 0)) # define rb_varargs_argc_valid_p(argc, vargc) \ ((argc) == 0 ? (vargc) <= 1 : /* [ruby-core:85266] [Bug #14425] */ \ (argc) == (vargc)) @@ -116,16 +118,16 @@ VALUE rb_get_path_no_checksafe(VALUE); ERRORFUNC((" argument length doesn't match"), int rb_varargs_bad_length(int,int)); # else # define rb_varargs_bad_length(argc, vargc) \ - ((argc)/rb_varargs_argc_valid_p(argc, vargc)) + ((argc)/rb_varargs_argc_valid_p(argc, vargc)) # endif # define rb_varargs_argc_check(argc, vargc) \ __builtin_choose_expr(__builtin_constant_p(argc), \ - (rb_varargs_argc_valid_p(argc, vargc) ? (argc) : \ - rb_varargs_bad_length(argc, vargc)), \ - rb_varargs_argc_check_runtime(argc, vargc)) + (rb_varargs_argc_valid_p(argc, vargc) ? (argc) : \ + rb_varargs_bad_length(argc, vargc)), \ + rb_varargs_argc_check_runtime(argc, vargc)) # else # define rb_varargs_argc_check(argc, vargc) \ - rb_varargs_argc_check_runtime(argc, vargc) + rb_varargs_argc_check_runtime(argc, vargc) # endif #endif /** @endcond */ @@ -270,30 +272,148 @@ RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 3, 0) */ int ruby_vsnprintf(char *str, size_t n, char const *fmt, va_list ap); +#include <errno.h> + +/** + * @name Errno handling routines for userland threads + * @note POSIX chapter 2 section 3 states that for each thread of a process, + * the value of `errno` shall not be affected by function calls or + * assignments to `errno` by other threads. + * + * Soooo this `#define errno` below seems like a noob mistake at first sight. + * If you look at its actual implementation, the functions are just adding one + * level of indirection. It doesn't make any sense sorry? But yes! @ko1 told + * @shyouhei that this is inevitable. + * + * The ultimate reason is because Ruby now has N:M threads implemented. + * Threads of that sort change their context in user land. A function can be + * "transferred" between threads in middle of their executions. Let us for + * instance consider: + * + * ```cxx + * void foo() + * { + * auto i = errno; + * close(0); + * errno = i; + * } + * ``` + * + * This function (if ran under our Ractor) could change its running thread at + * the `close` function. But the two `errno` invocations are different! Look + * how the source code above is compiled by clang 17 with `-O3` flag @ Linux: + * + * ``` + * foo(int): # @foo(int) + * push rbp + * push r14 + * push rbx + * mov ebx, edi + * call __errno_location@PLT + * mov r14, rax + * mov ebp, dword ptr [rax] + * mov edi, ebx + * call close@PLT + * mov dword ptr [r14], ebp + * pop rbx + * pop r14 + * pop rbp + * ret + * ``` + * + * Notice how `__errno_location@PLT` is `call`-ed only once. The compiler + * assumes that the location of `errno` does not change during a function call. + * Sadly this is no longer true for us. The `close@PLT` now changes threads, + * which should also change where `errno` is stored. + * + * With the `#define errno` below the compilation result changes to this: + * + * ``` + * foo(int): # @foo(int) + * push rbp + * push rbx + * push rax + * mov ebx, edi + * call rb_errno_ptr()@PLT + * mov ebp, dword ptr [rax] + * mov edi, ebx + * call close@PLT + * call rb_errno_ptr()@PLT + * mov dword ptr [rax], ebp + * add rsp, 8 + * pop rbx + * pop rbp + * ret + * ``` + * + * Which fixes the problem. + */ + +/** + * Identical to system `errno`. + * + * @return The last set `errno` number. + */ +int rb_errno(void); + +/** + * Set the errno. + * + * @param err New `errno`. + * @post `errno` is now set to `err`. + */ +void rb_errno_set(int err); + +/** + * The location of `errno` + * + * @return The (thread-specific) location of `errno`. + */ +int *rb_errno_ptr(void); + +/** + * Not sure if it is necessary for extension libraries but this is where the + * "bare" errno is located. + * + * @return The location of `errno`. + */ +static inline int * +rb_orig_errno_ptr(void) +{ + return &errno; +} + +#define rb_orig_errno errno /**< System-provided original `errno`. */ +#undef errno +#define errno (*rb_errno_ptr()) /**< Ractor-aware version of `errno`. */ + +/** @} */ + + /** @cond INTERNAL_MACRO */ #if RBIMPL_HAS_WARNING("-Wgnu-zero-variadic-macro-arguments") # /* Skip it; clang -pedantic doesn't like the following */ #elif defined(__GNUC__) && defined(HAVE_VA_ARGS_MACRO) && defined(__OPTIMIZE__) # define rb_yield_values(argc, ...) \ __extension__({ \ - const int rb_yield_values_argc = (argc); \ - const VALUE rb_yield_values_args[] = {__VA_ARGS__}; \ - const int rb_yield_values_nargs = \ - (int)(sizeof(rb_yield_values_args) / sizeof(VALUE)); \ - rb_yield_values2( \ - rb_varargs_argc_check(rb_yield_values_argc, rb_yield_values_nargs), \ - rb_yield_values_nargs ? rb_yield_values_args : NULL); \ + const int rb_yield_values_argc = (argc); \ + const VALUE rb_yield_values_args[] = {__VA_ARGS__}; \ + const int rb_yield_values_nargs = \ + (int)(sizeof(rb_yield_values_args) / sizeof(VALUE)); \ + rb_yield_values2( \ + rb_varargs_argc_check(rb_yield_values_argc, rb_yield_values_nargs), \ + rb_yield_values_nargs ? rb_yield_values_args : NULL); \ }) # define rb_funcall(recv, mid, argc, ...) \ __extension__({ \ - const int rb_funcall_argc = (argc); \ - const VALUE rb_funcall_args[] = {__VA_ARGS__}; \ - const int rb_funcall_nargs = \ - (int)(sizeof(rb_funcall_args) / sizeof(VALUE)); \ + const int rb_funcall_argc = (argc); \ + const VALUE rb_funcall_args[] = {__VA_ARGS__}; \ + const int rb_funcall_nargs = \ + (int)(sizeof(rb_funcall_args) / sizeof(VALUE)); \ rb_funcallv(recv, mid, \ - rb_varargs_argc_check(rb_funcall_argc, rb_funcall_nargs), \ - rb_funcall_nargs ? rb_funcall_args : NULL); \ + rb_varargs_argc_check(rb_funcall_argc, rb_funcall_nargs), \ + rb_funcall_nargs ? rb_funcall_args : NULL); \ }) #endif /** @endcond */ diff --git a/include/ruby/st.h b/include/ruby/st.h index 1e4bb80686..f35ab43603 100644 --- a/include/ruby/st.h +++ b/include/ruby/st.h @@ -98,6 +98,8 @@ struct st_table { enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK, ST_REPLACE}; +size_t rb_st_table_size(const struct st_table *tbl); +#define st_table_size rb_st_table_size st_table *rb_st_init_table(const struct st_hash_type *); #define st_init_table rb_st_init_table st_table *rb_st_init_table_with_size(const struct st_hash_type *, st_index_t); diff --git a/include/ruby/thread.h b/include/ruby/thread.h index 18c792b386..337f477fd0 100644 --- a/include/ruby/thread.h +++ b/include/ruby/thread.h @@ -128,7 +128,7 @@ RBIMPL_ATTR_NONNULL((1)) * your code to see if it is actually worth releasing the GVL. */ void *rb_thread_call_without_gvl(void *(*func)(void *), void *data1, - rb_unblock_function_t *ubf, void *data2); + rb_unblock_function_t *ubf, void *data2); RBIMPL_ATTR_NONNULL((1)) /** @@ -152,7 +152,7 @@ RBIMPL_ATTR_NONNULL((1)) * @return What `func` returned, or 0 in case `func` did not return. */ void *rb_thread_call_without_gvl2(void *(*func)(void *), void *data1, - rb_unblock_function_t *ubf, void *data2); + rb_unblock_function_t *ubf, void *data2); /* * XXX: unstable/unapproved - out-of-tree code should NOT not depend @@ -190,6 +190,136 @@ void *rb_nogvl(void *(*func)(void *), void *data1, */ #define RUBY_CALL_WO_GVL_FLAG_SKIP_CHECK_INTS_ +/** + * Declare the current Ruby thread should acquire a dedicated + * native thread on M:N thread scheduler. + * + * If a C extension (or a library which the extension relies on) should + * keep to run on a native thread (e.g. using thread-local-storage), + * this function allocates a dedicated native thread for the thread. + * + * @return `false` if the thread already running on a dedicated native + * thread. Otherwise `true`. + */ +bool rb_thread_lock_native_thread(void); + +/** + * Triggered when a new thread is started. + * + * @note The callback will be called *without* the GVL held. + */ +#define RUBY_INTERNAL_THREAD_EVENT_STARTED 1 << 0 + +/** +* Triggered when a thread attempt to acquire the GVL. +* +* @note The callback will be called *without* the GVL held. +*/ +#define RUBY_INTERNAL_THREAD_EVENT_READY 1 << 1 /** acquiring GVL */ + +/** + * Triggered when a thread successfully acquired the GVL. + * + * @note The callback will be called *with* the GVL held. + */ +#define RUBY_INTERNAL_THREAD_EVENT_RESUMED 1 << 2 /** acquired GVL */ + +/** + * Triggered when a thread released the GVL. + * + * @note The callback will be called *without* the GVL held. + */ +#define RUBY_INTERNAL_THREAD_EVENT_SUSPENDED 1 << 3 /** released GVL */ + +/** + * Triggered when a thread exits. + * + * @note The callback will be called *without* the GVL held. + */ +#define RUBY_INTERNAL_THREAD_EVENT_EXITED 1 << 4 /** thread terminated */ + +#define RUBY_INTERNAL_THREAD_EVENT_MASK 0xff /** All Thread events */ + +typedef struct rb_internal_thread_event_data { + VALUE thread; +} rb_internal_thread_event_data_t; + +typedef void (*rb_internal_thread_event_callback)(rb_event_flag_t event, + const rb_internal_thread_event_data_t *event_data, + void *user_data); +typedef struct rb_internal_thread_event_hook rb_internal_thread_event_hook_t; + +/** + * Registers a thread event hook function. + * + * @param[in] func A callback. + * @param[in] events A set of events that `func` should run. + * @param[in] data Passed as-is to `func`. + * @return An opaque pointer to the hook, to unregister it later. + * @note This functionality is a noop on Windows and WebAssembly. + * @note The callback will be called without the GVL held, except for the + * RESUMED event. + * @note Callbacks are not guaranteed to be executed on the native threads + * that corresponds to the Ruby thread. To identify which Ruby thread + * the event refers to, you must use `event_data->thread`. + * @warning This function MUST not be called from a thread event callback. + */ +rb_internal_thread_event_hook_t *rb_internal_thread_add_event_hook( + rb_internal_thread_event_callback func, rb_event_flag_t events, + void *data); + + +/** + * Unregister the passed hook. + * + * @param[in] hook. The hook to unregister. + * @return Whether the hook was found and unregistered. + * @note This functionality is a noop on Windows and WebAssembly. + * @warning This function MUST not be called from a thread event callback. +*/ +bool rb_internal_thread_remove_event_hook( + rb_internal_thread_event_hook_t * hook); + + +typedef int rb_internal_thread_specific_key_t; +#define RB_INTERNAL_THREAD_SPECIFIC_KEY_MAX 8 +/** + * Create a key to store thread specific data. + * + * These APIs are designed for tools using + * rb_internal_thread_event_hook APIs. + * + * Note that only `RB_INTERNAL_THREAD_SPECIFIC_KEY_MAX` keys + * can be created. raises `ThreadError` if exceeded. + * + * Usage: + * // at initialize time: + * int tool_key; // gvar + * Init_tool() { + * tool_key = rb_internal_thread_specific_key_create(); + * } + * + * // at any timing: + * rb_internal_thread_specific_set(thread, tool_key, per_thread_data); + * ... + * per_thread_data = rb_internal_thread_specific_get(thread, tool_key); + */ +rb_internal_thread_specific_key_t rb_internal_thread_specific_key_create(void); + +/** + * Get thread and tool specific data. + * + * This function is async signal safe and thread safe. + */ +void *rb_internal_thread_specific_get(VALUE thread_val, rb_internal_thread_specific_key_t key); + +/** + * Set thread and tool specific data. + * + * This function is async signal safe and thread safe. + */ +void rb_internal_thread_specific_set(VALUE thread_val, rb_internal_thread_specific_key_t key, void *data); + RBIMPL_SYMBOL_EXPORT_END() #endif /* RUBY_THREAD_H */ diff --git a/include/ruby/thread_native.h b/include/ruby/thread_native.h index 2945ff1e4d..8217a67514 100644 --- a/include/ruby/thread_native.h +++ b/include/ruby/thread_native.h @@ -28,6 +28,11 @@ typedef union rb_thread_lock_union { CRITICAL_SECTION crit; } rb_nativethread_lock_t; +struct rb_thread_cond_struct { + struct cond_event_entry *next; + struct cond_event_entry *prev; +}; + typedef struct rb_thread_cond_struct rb_nativethread_cond_t; #elif defined(HAVE_PTHREAD_H) @@ -37,6 +42,12 @@ typedef pthread_t rb_nativethread_id_t; typedef pthread_mutex_t rb_nativethread_lock_t; typedef pthread_cond_t rb_nativethread_cond_t; +#elif defined(__wasi__) // no-thread platforms + +typedef struct rb_nativethread_id_t *rb_nativethread_id_t; +typedef struct rb_nativethread_lock_t *rb_nativethread_lock_t; +typedef struct rb_nativethread_cond_t *rb_nativethread_cond_t; + #elif defined(__DOXYGEN__) /** Opaque type that holds an ID of a native thread. */ diff --git a/include/ruby/util.h b/include/ruby/util.h index b2bc1a09f6..12e69c4b80 100644 --- a/include/ruby/util.h +++ b/include/ruby/util.h @@ -19,7 +19,7 @@ # include <stddef.h> /* size_t */ #endif -#if HAVE_SYS_TYPES_H +#ifdef HAVE_SYS_TYPES_H # include <sys/types.h> /* ssize_t */ #endif @@ -33,9 +33,20 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() -/** an approximation of ceil(n * log10(2)), up to 65536 at least */ +/** an approximation of ceil(n * log10(2)), up to 1,048,576 (1<<20) + * without overflow within 32-bit calculation + */ #define DECIMAL_SIZE_OF_BITS(n) (((n) * 3010 + 9998) / 9999) +/** an approximation of decimal representation size for n-bytes */ +#define DECIMAL_SIZE_OF_BYTES(n) DECIMAL_SIZE_OF_BITS((n) * CHAR_BIT) + +/** + * An approximation of decimal representation size. `expr` may be a + * type name + */ +#define DECIMAL_SIZE_OF(expr) DECIMAL_SIZE_OF_BYTES(sizeof(expr)) + /** * Character to number mapping like `'a'` -> `10`, `'b'` -> `11` etc. For * punctuation etc., the value is -1. "36" terminology comes from the fact @@ -124,7 +135,7 @@ unsigned long ruby_scan_hex(const char *str, size_t len, size_t *ret); # define ruby_qsort qsort_r #else void ruby_qsort(void *, const size_t, const size_t, - int (*)(const void *, const void *, void *), void *); + int (*)(const void *, const void *, void *), void *); #endif RBIMPL_ATTR_NONNULL((1)) diff --git a/include/ruby/version.h b/include/ruby/version.h index 104f78a0c6..e9113177de 100644 --- a/include/ruby/version.h +++ b/include/ruby/version.h @@ -67,7 +67,7 @@ * Minor version. As of writing this version changes annually. Greater * version doesn't mean "better"; they just mean years passed. */ -#define RUBY_API_VERSION_MINOR 1 +#define RUBY_API_VERSION_MINOR 4 /** * Teeny version. This digit is kind of reserved these days. Kept 0 for the @@ -137,7 +137,8 @@ RUBY_EXTERN const int ruby_patchlevel; /** * This is what `ruby -v` prints to the standard error. Something like: - * `"ruby 2.5.9p229 (2021-04-05 revision 67829) [x86_64-linux]"` + * `"ruby 2.5.9p229 (2021-04-05 revision 67829) [x86_64-linux]"`. This doesn't + * include runtime options like a JIT being enabled. */ RUBY_EXTERN const char ruby_description[]; diff --git a/include/ruby/vm.h b/include/ruby/vm.h index 3458c28be7..8779780952 100644 --- a/include/ruby/vm.h +++ b/include/ruby/vm.h @@ -49,6 +49,13 @@ int ruby_vm_destruct(ruby_vm_t *vm); */ void ruby_vm_at_exit(void(*func)(ruby_vm_t *)); +/** + * Returns whether the Ruby VM will free all memory at shutdown. + * + * @return true if free-at-exit is enabled, false otherwise. + */ +bool ruby_free_at_exit_p(void); + RBIMPL_SYMBOL_EXPORT_END() #endif /* RUBY_VM_H */ diff --git a/include/ruby/win32.h b/include/ruby/win32.h index c8ae599f2f..27a3467606 100644 --- a/include/ruby/win32.h +++ b/include/ruby/win32.h @@ -19,11 +19,6 @@ RUBY_SYMBOL_EXPORT_BEGIN */ /* - * Definitions for NT port of Perl - */ - - -/* * Ok now we can include the normal include files. */ @@ -40,6 +35,7 @@ extern "C++" { /* template without extern "C++" */ #endif #include <winsock2.h> #include <ws2tcpip.h> +#include <mswsock.h> #if !defined(_MSC_VER) || _MSC_VER >= 1400 #include <iphlpapi.h> #endif @@ -152,13 +148,19 @@ typedef int clockid_t; #define open rb_w32_uopen #define close(h) rb_w32_close(h) #define fclose(f) rb_w32_fclose(f) -#define read(f, b, s) rb_w32_read(f, b, s) -#define write(f, b, s) rb_w32_write(f, b, s) +#define read(f, b, s) rb_w32_read(f, b, s) +#define write(f, b, s) rb_w32_write(f, b, s) +#define pread(f, b, s, o) rb_w32_pread(f, b, s, o) +#define pwrite(f, b, s, o) rb_w32_pwrite(f, b, s, o) #define getpid() rb_w32_getpid() +#undef HAVE_GETPPID +#define HAVE_GETPPID 1 #define getppid() rb_w32_getppid() #define sleep(x) rb_w32_Sleep((x)*1000) #define Sleep(msec) (void)rb_w32_Sleep(msec) +#undef HAVE_EXECV +#define HAVE_EXECV 1 #undef execv #define execv(path,argv) rb_w32_uaspawn(P_OVERLAY,path,argv) #undef isatty @@ -191,7 +193,6 @@ struct stati128 { long st_ctimensec; }; -#define off_t __int64 #define stat stati128 #undef SIZEOF_STRUCT_STAT_ST_INO #define SIZEOF_STRUCT_STAT_ST_INO sizeof(unsigned __int64) @@ -299,7 +300,6 @@ extern DWORD rb_w32_osver(void); extern int rb_w32_uchown(const char *, int, int); extern int rb_w32_ulink(const char *, const char *); extern ssize_t rb_w32_ureadlink(const char *, char *, size_t); -extern ssize_t rb_w32_wreadlink(const WCHAR *, WCHAR *, size_t); extern int rb_w32_usymlink(const char *src, const char *link); extern int gettimeofday(struct timeval *, struct timezone *); extern int clock_gettime(clockid_t, struct timespec *); @@ -309,7 +309,9 @@ extern rb_pid_t wait(int *); extern rb_pid_t rb_w32_uspawn(int, const char *, const char*); extern rb_pid_t rb_w32_uaspawn(int, const char *, char *const *); extern rb_pid_t rb_w32_uaspawn_flags(int, const char *, char *const *, DWORD); -extern int kill(int, int); +#undef HAVE_KILL +#define HAVE_KILL 1 +extern int kill(rb_pid_t, int); extern int fcntl(int, int, ...); extern int rb_w32_set_nonblock(int); extern rb_pid_t rb_w32_getpid(void); @@ -388,6 +390,7 @@ scalb(double a, long b) #endif #define S_IFLNK 0xa000 +#define S_IFSOCK 0xc000 /* * define this so we can do inplace editing @@ -395,9 +398,9 @@ scalb(double a, long b) #define SUFFIX -extern int rb_w32_ftruncate(int fd, off_t length); -extern int rb_w32_truncate(const char *path, off_t length); -extern int rb_w32_utruncate(const char *path, off_t length); +extern int rb_w32_ftruncate(int fd, rb_off_t length); +extern int rb_w32_truncate(const char *path, rb_off_t length); +extern int rb_w32_utruncate(const char *path, rb_off_t length); #undef HAVE_FTRUNCATE #define HAVE_FTRUNCATE 1 @@ -647,6 +650,8 @@ extern char *rb_w32_strerror(int); #undef setsockopt #define setsockopt(s, v, n, o, l) rb_w32_setsockopt(s, v, n, o, l) +#undef HAVE_SHUTDOWN +#define HAVE_SHUTDOWN 1 #undef shutdown #define shutdown(s, h) rb_w32_shutdown(s, h) @@ -694,10 +699,10 @@ extern char *rb_w32_strerror(int); #endif struct tms { - long tms_utime; - long tms_stime; - long tms_cutime; - long tms_cstime; + long tms_utime; + long tms_stime; + long tms_cutime; + long tms_cstime; }; int rb_w32_times(struct tms *); @@ -714,7 +719,9 @@ int rb_w32_fclose(FILE*); int rb_w32_pipe(int[2]); ssize_t rb_w32_read(int, void *, size_t); ssize_t rb_w32_write(int, const void *, size_t); -off_t rb_w32_lseek(int, off_t, int); +ssize_t rb_w32_pread(int, void *, size_t, rb_off_t offset); +ssize_t rb_w32_pwrite(int, const void *, size_t, rb_off_t offset); +rb_off_t rb_w32_lseek(int, rb_off_t, int); int rb_w32_uutime(const char *, const struct utimbuf *); int rb_w32_uutimes(const char *, const struct timeval *); int rb_w32_uutimensat(int /* must be AT_FDCWD */, const char *, const struct timespec *, int /* must be 0 */); @@ -796,6 +803,25 @@ double rb_w32_pow(double x, double y); #define pow rb_w32_pow #endif +// mmap tiny emulation +#define MAP_FAILED ((void *)-1) + +#define PROT_READ 0x01 +#define PROT_WRITE 0x02 +#define PROT_EXEC 0x04 + +#define MAP_PRIVATE 0x0002 +#define MAP_ANON 0x1000 +#define MAP_ANONYMOUS MAP_ANON + +extern void *rb_w32_mmap(void *, size_t, int, int, int, rb_off_t); +extern int rb_w32_munmap(void *, size_t); +extern int rb_w32_mprotect(void *, size_t, int); + +#define mmap(a, l, p, f, d, o) rb_w32_mmap(a, l, p, f, d, o) +#define munmap(a, l) rb_w32_munmap(a, l) +#define mprotect(a, l, prot) rb_w32_mprotect(a, l, prot) + #if defined(__cplusplus) #if 0 { /* satisfy cc-mode */ |