diff options
Diffstat (limited to 'include/ruby/internal/intern')
39 files changed, 9983 insertions, 649 deletions
diff --git a/include/ruby/internal/intern/array.h b/include/ruby/internal/intern/array.h index aafe0d1350..1909fdf17b 100644 --- a/include/ruby/internal/intern/array.h +++ b/include/ruby/internal/intern/array.h @@ -17,61 +17,640 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cArray. */ +#include "ruby/internal/attr/noalias.h" +#include "ruby/internal/attr/noexcept.h" +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/attr/pure.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() /* array.c */ -void rb_mem_clear(VALUE*, long); -VALUE rb_assoc_new(VALUE, VALUE); -VALUE rb_check_array_type(VALUE); + +RBIMPL_ATTR_NONNULL(()) +RBIMPL_ATTR_NOALIAS() +/** + * Fills the memory region with a series of ::RUBY_Qnil. + * + * @param[out] buf Buffer to squash. + * @param[in] len Number of objects of `buf`. + * @post `buf` is filled with ::RUBY_Qnil. + */ +void rb_mem_clear(VALUE *buf, long len) + RBIMPL_ATTR_NOEXCEPT(true) + ; + +/** + * Identical to rb_ary_new_from_values(), except it expects exactly two + * parameters. + * + * @param[in] car Arbitrary ruby object. + * @param[in] cdr Arbitrary ruby object. + * @return An allocated new array, of length 2, whose contents are the + * passed objects. + */ +VALUE rb_assoc_new(VALUE car, VALUE cdr); + +/** + * Try converting an object to its array representation using its `to_ary` + * method, if any. If there is no such thing, returns ::RUBY_Qnil. + * + * @param[in] obj Arbitrary ruby object to convert. + * @exception rb_eTypeError `obj.to_ary` returned something non-Array. + * @retval RUBY_Qnil No conversion from `obj` to array defined. + * @retval otherwise Converted array representation of `obj`. + * @see rb_io_check_io + * @see rb_check_string_type + * @see rb_check_hash_type + */ +VALUE rb_check_array_type(VALUE obj); + +/** + * Allocates a new, empty array. + * + * @return An allocated new array, whose length is 0. + */ VALUE rb_ary_new(void); + +/** + * Identical to rb_ary_new(), except it additionally specifies how many rooms + * of objects it should allocate. This way you can create an array whose + * capacity is bigger than the length of it. If you can say that an array + * grows to a specific amount, this could be effective than resizing an array + * over and over again and again. + * + * @param[in] capa Designed capacity of the generating array. + * @return An empty array, whose capacity is `capa`. + */ VALUE rb_ary_new_capa(long capa); + +/** + * Constructs an array from the passed objects. + * + * @param[in] n Number of passed objects. + * @param[in] ... Arbitrary ruby objects, filled into the returning array. + * @return An array of size `n`, whose contents are the passed objects. + */ VALUE rb_ary_new_from_args(long n, ...); + +/** + * Identical to rb_ary_new_from_args(), except how objects are passed. + * + * @param[in] n Number of objects of `elts`. + * @param[in] elts Arbitrary ruby objects, filled into the returning array. + * @return An array of size `n`, whose contents are the passed objects. + */ VALUE rb_ary_new_from_values(long n, const VALUE *elts); -VALUE rb_ary_tmp_new(long); -void rb_ary_free(VALUE); -void rb_ary_modify(VALUE); -VALUE rb_ary_freeze(VALUE); -VALUE rb_ary_shared_with_p(VALUE, VALUE); -VALUE rb_ary_aref(int, const VALUE*, VALUE); -VALUE rb_ary_subseq(VALUE, long, long); -void rb_ary_store(VALUE, long, VALUE); -VALUE rb_ary_dup(VALUE); + +/** + * Allocates a hidden (no class) empty array. + * + * @param[in] capa Designed capacity of the array. + * @return A hidden, empty array. + * @see rb_obj_hide() + */ +VALUE rb_ary_hidden_new(long capa); +#define rb_ary_tmp_new rb_ary_hidden_new + +/** + * Destroys the given array for no reason. + * + * @warning DO NOT USE IT. + * @warning Leave this task to our GC. + * @warning It was a wrong indea at the first place to let you know about it. + * + * @param[out] ary The array to be executed. + * @post The given array no longer exists. + * @note Maybe `Array#clear` could be what you want. + * + * @internal + * + * Should have moved this to `internal/array.h`. + */ +void rb_ary_free(VALUE ary); + +/** + * Declares that the array is about to be modified. This for instance let the + * array have a dedicated backend storage. + * + * @param[out] ary Array about to be modified. + * @exception rb_eFrozenError `ary` is frozen. + * @post Upon successful return the passed array is eligible to be + * modified. + */ +void rb_ary_modify(VALUE ary); + +/** @alias{rb_obj_freeze} */ +VALUE rb_ary_freeze(VALUE obj); + +RBIMPL_ATTR_PURE() +/** + * Queries if the passed two arrays share the same backend storage. A use-case + * for knowing such property is to take a snapshot of an array (using + * e.g. rb_ary_replace()), then check later if that snapshot still shares the + * storage with the original. Taking a snapshot is ultra-cheap. If nothing + * happens the impact shall be minimal. But if someone modifies the original, + * that entity shall pay the cost of copy-on-write. You can detect that using + * this API. + * + * @param[in] lhs Comparison LHS. + * @param[in] rhs Comparison RHS. + * @retval RUBY_Qtrue They share the same backend storage. + * @retval RUBY_Qfalse They are distinct. + * @pre Both arguments must be of ::RUBY_T_ARRAY. + */ +VALUE rb_ary_shared_with_p(VALUE lhs, VALUE rhs); + +/** + * Queries element(s) of an array. This is complicated! Refer `Array#slice` + * document for the complete description of how it behaves. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Up to 2 objects. + * @param[in] ary Target array. + * @exception rb_eTypeError `argv` (or its part) includes non-Integer. + * @exception rb_eRangeError rb_cArithSeq is passed, and is OOB. + * @return An element (if requested), or an array of elements (if + * requested), or ::RUBY_Qnil (if index OOB). + * + * @internal + * + * ```rbs + * # "int" is ::Integer or `#to_int`, defined in builtin.rbs + * + * class ::Array[unchecked out T] + * def slice + * : (int i) -> T? + * | (int beg, int len) -> ::Array[T]? + * | (Range[int] r) -> ::Array[T]? + * | (ArithmeticSequence as) -> ::Array[T]? # This also raises RangeError. + * end + * ``` + */ +VALUE rb_ary_aref(int argc, const VALUE *argv, VALUE ary); + +/** + * Obtains a part of the passed array. + * + * @param[in] ary Target array. + * @param[in] beg Subpart index. + * @param[in] len Requested length of returning array. + * @retval RUBY_Qnil Requested range out of bounds of `ary`. + * @retval otherwise An allocated new array whose contents are `ary`'s + * `beg` to `len`. + * @note Return array can be shorter than `len` when for instance + * `[0, 1, 2, 3]`'s 4th to 1,000,000,000th is requested. + */ +VALUE rb_ary_subseq(VALUE ary, long beg, long len); + +/** + * Destructively stores the passed value to the passed array's passed index. + * It also resizes the array's backend storage so that the requested index is + * not out of bounds. + * + * @param[out] ary Target array to modify. + * @param[in] key Where to store `val`. + * @param[in] val What to store at `key`. + * @exception rb_eFrozenError `ary` is frozen. + * @exception rb_eIndexError `key` is negative. + * @post `ary`'s `key`th position is occupied with `val`. + * @post Depending on `key` and previous length of `ary` this operation + * can also create a series of "hole" positions inside of the + * backend storage. They are filled with ::RUBY_Qnil. + */ +void rb_ary_store(VALUE ary, long key, VALUE val); + +/** + * Duplicates an array. + * + * @param[in] ary Target to duplicate. + * @return An allocated new array whose contents are identical to `ary`. + * + * @internal + * + * Not sure why this has to be something different from `ary_make_shared_copy`, + * which seems much efficient. + */ +VALUE rb_ary_dup(VALUE ary); + +/** + * I guess there is no use case of this function in extension libraries, but + * this is a routine identical to rb_ary_dup(). This makes the most sense when + * the passed array is formerly hidden by rb_obj_hide(). + * + * @param[in] ary An array, possibly hidden. + * @return A duplicated new instance of ::rb_cArray. + */ VALUE rb_ary_resurrect(VALUE ary); -VALUE rb_ary_to_ary(VALUE); -VALUE rb_ary_to_s(VALUE); -VALUE rb_ary_cat(VALUE, const VALUE *, long); -VALUE rb_ary_push(VALUE, VALUE); -VALUE rb_ary_pop(VALUE); -VALUE rb_ary_shift(VALUE); -VALUE rb_ary_unshift(VALUE, VALUE); -VALUE rb_ary_entry(VALUE, long); -VALUE rb_ary_each(VALUE); -VALUE rb_ary_join(VALUE, VALUE); -VALUE rb_ary_reverse(VALUE); -VALUE rb_ary_rotate(VALUE, long); -VALUE rb_ary_sort(VALUE); -VALUE rb_ary_sort_bang(VALUE); -VALUE rb_ary_delete(VALUE, VALUE); -VALUE rb_ary_delete_at(VALUE, long); -VALUE rb_ary_clear(VALUE); -VALUE rb_ary_plus(VALUE, VALUE); -VALUE rb_ary_concat(VALUE, VALUE); -VALUE rb_ary_assoc(VALUE, VALUE); -VALUE rb_ary_rassoc(VALUE, VALUE); -VALUE rb_ary_includes(VALUE, VALUE); -VALUE rb_ary_cmp(VALUE, VALUE); + +/** + * Force converts an object to an array. It first tries its `#to_ary` method. + * Takes the result if any. Otherwise creates an array of size 1 whose sole + * element is the passed object. + * + * @param[in] obj Arbitrary ruby object. + * @return An array representation of `obj`. + * @note Unlike rb_str_to_str() which is a variant of + * rb_check_string_type(), rb_ary_to_ary() is not a variant of + * rb_check_array_type(). + */ +VALUE rb_ary_to_ary(VALUE obj); + +/** + * Converts an array into a human-readable string. Historically its behaviour + * changed over time. Currently it is identical to calling `inspect` method. + * This behaviour is from that of python (!!) circa 2006. + * + * @param[in] ary Array to inspect. + * @return Recursively inspected representation of `ary`. + * @see `[ruby-dev:29520]` + */ +VALUE rb_ary_to_s(VALUE ary); + +/** + * Destructively appends multiple elements at the end of the array. + * + * @param[out] ary Where to push `train`. + * @param[in] train Arbitrary ruby objects to push to `ary`. + * @param[in] len Number of objects of `train`. + * @exception rb_eIndexError `len` too large. + * @exception rb_eFrozenError `ary` is frozen. + * @return The passed `ary`. + * @post `ary` has contents from `train` appended at its end. + */ +VALUE rb_ary_cat(VALUE ary, const VALUE *train, long len); + +/** + * Special case of rb_ary_cat() that it adds only one element. + * + * @param[out] ary Where to push `elem`. + * @param[in] elem Arbitrary ruby object to push. + * @exception rb_eFrozenError `ary` is frozen. + * @return The passed `ary`. + * @post `ary` has `elem` appended at its end. + */ +VALUE rb_ary_push(VALUE ary, VALUE elem); + +/** + * Destructively deletes an element from the end of the passed array and + * returns what was deleted. + * + * @param[out] ary Target array to modify. + * @exception rb_eFrozenError `ary` is frozen. + * @return What was at the end of `ary`, or ::RUBY_Qnil if there is + * nothing to remove. + * @post `ary`'s last element, if any, is removed. + * @note There is no way to distinguish whether `ary` was an 1-element + * array whose content was ::RUBY_Qnil, or was empty. + */ +VALUE rb_ary_pop(VALUE ary); + +/** + * Destructively deletes an element from the beginning of the passed array and + * returns what was deleted. It can also be seen as a routine identical to + * rb_ary_pop(), except which side of the array to scrub. + * + * @param[out] ary Target array to modify. + * @exception rb_eFrozenError `ary` is frozen. + * @return What was at the beginning of `ary`, or ::RUBY_Qnil if there is + * nothing to remove. + * @post `ary`'s first element, if any, is removed. As the name implies + * everything else remaining in `ary` gets moved towards `ary`'s + * beginning. + * @note There is no way to distinguish whether `ary` was an 1-element + * array whose content was ::RUBY_Qnil, or was empty. + */ +VALUE rb_ary_shift(VALUE ary); + +/** + * Destructively prepends the passed item at the beginning of the passed array. + * It can also be seen as a routine identical to rb_ary_push(), except which + * side of the array to modify. + * + * @param[out] ary Target array to modify. + * @param[in] elem Arbitrary ruby object to unshift. + * @exception rb_eFrozenError `ary` is frozen. + * @return The passed `ary`. + * @post `ary` has `elem` prepended at this beginning. + */ +VALUE rb_ary_unshift(VALUE ary, VALUE elem); + +RBIMPL_ATTR_PURE() +/** + * Queries an element of an array. When passed offset is negative it counts + * backwards. + * + * @param[in] ary An array to look into. + * @param[in] off Offset (can be negative). + * @return ::RUBY_Qnil when `off` is out of bounds of `ary`. Otherwise + * what is stored at `off`-th position of `ary`. + * @note `ary`'s `off`-th element can happen to be ::RUBY_Qnil. + */ +VALUE rb_ary_entry(VALUE ary, long off); + +/** + * Iteratively yields each element of the passed array to the implicitly passed + * block if any. In case there is no block given, an enumerator that does the + * thing is generated instead. + * + * @param[in] ary Array to iterate over. + * @retval ary Passed block was evaluated. + * @retval otherwise An instance of ::rb_cEnumerator for `Array#each`. + */ +VALUE rb_ary_each(VALUE ary); + +/** + * Recursively stringises the elements of the passed array, flattens that + * result, then joins the sequence using the passed separator. + * + * @param[in] ary Target array to convert. + * @param[in] sep Separator. Either a string, or ::RUBY_Qnil + * if you want no separator. + * @exception rb_eArgError Infinite recursion in `ary`. + * @exception rb_eTypeError `sep` is not a string. + * @exception rb_eEncCompatError Strings do not agree with their encodings. + * @return An instance of ::rb_cString which concatenates stringised + * contents of `ary`, using `sep` as separator. + */ +VALUE rb_ary_join(VALUE ary, VALUE sep); + +/** + * _Destructively_ reverses the passed array in-place. + * + * @warning This is `Array#reverse!`, not `Array#reverse`. + * @param[out] ary Target array to modify. + * @exception rb_eFrozenError `ary` is frozen. + * @return Passed `ary`. + * @post `ary` is reversed. + */ +VALUE rb_ary_reverse(VALUE ary); + +/** + * _Destructively_ rotates the passed array in-place to towards its end. The + * amount can be negative. Would rotate to the opposite direction then. + * + * @warning This is `Array#rotate!`, not `Array#rotate`. + * @param[out] ary Target array to modify. + * @param[in] rot Amount of rotation. + * @exception rb_eFrozenError `ary` is frozen. + * @retval RUBY_Qnil Not rotated. + * @retval ary Rotated. + * @post `ary` is rotated. + */ +VALUE rb_ary_rotate(VALUE ary, long rot); + +/** + * Creates a copy of the passed array, whose elements are sorted according to + * their `<=>` result. + * + * @param[in] ary Array to sort. + * @exception rb_eArgError Comparison not defined among elements. + * @exception rb_eRuntimeError Infinite recursion in `<=>`. + * @return A copy of `ary`, sorted. + * @note As of writing this function uses `qsort` as backend algorithm, + * which means the result is unstable (in terms of sort stability). + */ +VALUE rb_ary_sort(VALUE ary); + +/** + * Destructively sorts the passed array in-place, according to each elements' + * `<=>` result. + * + * @param[in] ary Target array to modify. + * @exception rb_eArgError Comparison not defined among elements. + * @exception rb_eRuntimeError Infinite recursion in `<=>`. + * @return Passed `ary`. + * @post `ary` is sorted. + * @note As of writing this function uses `qsort` as backend algorithm, + * which means the result is unstable (in terms of sort stability). + */ +VALUE rb_ary_sort_bang(VALUE ary); + +/** + * Destructively removes elements from the passed array, so that there would be + * no elements inside that satisfy `==` relationship with the passed object. + * Returns the last deleted element if any. But in case there was nothing to + * delete it gets complicated. It checks for the implicitly passed block. If + * there is a block the return value would be what the block evaluates to. + * Otherwise it resorts to ::RUBY_Qnil. + * + * @param[out] ary Target array to modify. + * @param[in] elem Template object to match against each element. + * @exception rb_eFrozenError `ary` is frozen. + * @return What was deleted, or what was the block returned, or + * ::RUBY_Qnil (see above). + * @post All elements that have `==` relationship with `elem` are purged + * from `ary`. Elements shift their positions so that `ary` gets + * compact. + * + * @internal + * + * Internally there also is `rb_ary_delete_same`, which compares by identity. + */ +VALUE rb_ary_delete(VALUE ary, VALUE elem); + +/** + * Destructively removes an element which resides at the specific index of the + * passed array. Unlike rb_ary_stre() the index can be negative, which means + * the index counts backwards from the array's tail. + * + * @param[out] ary Target array to modify. + * @param[in] pos Position (can be negative). + * @exception rb_eFrozenError `ary` is frozen. + * @return What was deleted, or ::RUBY_Qnil in case of OOB. + * @post `ary`'s `pos`-th element is deleted if any. + * @note There is no way to distinguish whether `pos` is out of bound, + * or `pos` did exist but stored ::RUBY_Qnil as an ordinal value. + */ +VALUE rb_ary_delete_at(VALUE ary, long pos); + +/** + * Destructively removes everything form an array. + * + * @param[out] ary Target array to modify. + * @exception rb_eFrozenError `ary` is frozen. + * @return The passed `ary`. + * @post `ary` is an empty array. + */ +VALUE rb_ary_clear(VALUE ary); + +/** + * Creates a new array, concatenating the former to the latter. + * + * @param[in] lhs Source array #1. + * @param[in] rhs Source array #2. + * @exception rb_eIndexError Result array too big. + * @return A new array containing `rhs` concatenated to `lhs`. + * @note This operation doesn't commute. Don't get confused by the + * "plus" terminology. For historical reasons there are some + * noncommutative `+`s in Ruby. This is one of such things. There + * has been a long discussion around `+`s in programming languages. + * + * @internal + * + * rb_ary_concat() is not a destructive version of rb_ary_plus(). They raise + * different exceptions. Don't know why though. + */ +VALUE rb_ary_plus(VALUE lhs, VALUE rhs); + +/** + * Destructively appends the contents of latter into the end of former. + * + * @param[out] lhs Destination array. + * @param[in] rhs Source array. + * @exception rb_eFrozenError `lhs` is frozen. + * @exception rb_eIndexError Result array too big. + * @exception rb_eTypeError `rhs` doesn't respond to `#to_ary`. + * @return The passed `lhs`. + * @post `lhs` has contents of `rhs` appended to its end. + */ +VALUE rb_ary_concat(VALUE lhs, VALUE rhs); + +/** + * Looks up the passed key, assuming the passed array is an alist. An "alist" + * here is a list of "association"s, much like that of Emacs. Emacs has + * `assoc` function that behaves exactly the same as this one. + * + * ```ruby + * # This is an example of aliist. + * auto_mode_alist = [ + * [ /\.[ch]\z/, :"c-mode" ], + * [ /\.[ch]pp\z/, :"c++-mode" ], + * [ /\.awk\z/, :"awk-mode" ], + * [ /\.cs\z/, :"csharp-mode" ], + * [ /\.go\z/, :"go-mode" ], + * [ /\.java\z/, :"java-mode" ], + * [ /\.pas\z/, :"pascal-mode" ], + * [ /\.rs\z/, :"rust-mode" ], + * [ /\.txt\z/, :"text-mode" ], + * ] + * ``` + * + * This function scans the passed array looking for an element, which itself is + * an array, whose first element is the passed key. If no such element is + * found, returns ::RUBY_Qnil. + * + * Although this function expects the passed array be an array of arrays, it + * can happily accept non-array elements; it just ignores such things. + * + * @param[in] alist An array of arrays. + * @param[in] key Needle. + * @retval RUBY_Qnil Nothing was found. + * @retval otherwise An element in `alist` whose first element is in `==` + * relationship with `key`. + */ +VALUE rb_ary_assoc(VALUE alist, VALUE key); + +/** + * Identical to rb_ary_assoc(), except it scans the passed array from the + * opposite direction. + * + * @param[in] alist An array of arrays. + * @param[in] key Needle. + * @retval RUBY_Qnil Nothing was found. + * @retval otherwise An element in `alist` whose first element is in `==` + * relationship with `key`. + */ +VALUE rb_ary_rassoc(VALUE alist, VALUE key); + +/** + * Queries if the passed array has the passed entry. + * + * @param[in] ary Target array to scan. + * @param[in] elem Target array to find. + * @retval RUBY_Qfalse No element in `ary` is in `==` relationship with + * `elem`. + * @retval RUBY_Qtrue There is at least one element in `ary` which is in + * `==` relationship with `elem`. + * + * @internal + * + * This is the only function in the entire C API that is named using third + * person singular form of a verb (except #ISASCII etc., which are not our + * naming). The counterpart Ruby API of this function is `Array#include?`. + */ +VALUE rb_ary_includes(VALUE ary, VALUE elem); + +/** + * Recursively compares each elements of the two arrays one-by-one using `<=>`. + * + * @param[in] lhs Comparison LHS. + * @param[in] rhs Comparison RHS. + * @retval RUBY_Qnil `lhs` and `rhs` are not comparable. + * @retval -1 `lhs` is less than `rhs`. + * @retval 0 They are equal. + * @retval 1 `rhs` is less then `lhs`. + */ +VALUE rb_ary_cmp(VALUE lhs, VALUE rhs); + +/** + * Replaces the contents of the former object with the contents of the latter. + * + * @param[out] copy Destination object. + * @param[in] orig Source object. + * @exception rb_eTypeError `orig` has no implicit conversion to Array. + * @exception rb_eFrozenError `copy` is frozen. + * @return The passed `copy`. + * @post `copy`'s former components are abandoned. It now has the + * identical length and contents to `orig`. + */ VALUE rb_ary_replace(VALUE copy, VALUE orig); -VALUE rb_get_values_at(VALUE, long, int, const VALUE*, VALUE(*)(VALUE,long)); + +/** + * This _was_ a generalisation of `Array#values_at`, `Struct#values_at`, and + * `MatchData#values_at`. It begun its life as a refactoring effort. However + * as Ruby evolves over time, as of writing none of aforementioned methods + * share their implementations at all. This function is not deprecated; still + * works as it has been. But it is now kind of like a rudimentum. + * + * This function takes an object, which is a receiver, and a series of + * "indices", which are either integers, or ranges of integers. Calls the + * passed callback for each of those indices, along with the receiver. This + * callback is expected to do something like rb_ary_aref(), rb_struct_aref(), + * etc. In case of a range index rb_range_beg_len() expands the range. + * Finally return values of the callback are gathered as an array, then + * returned. + * + * @param[in] obj Arbitrary ruby object. + * @param[in] olen "Length" of `obj`. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv List of "indices", described above. + * @param[in] func Callback function. + * @return A new instance of ::rb_cArray gathering `func`outputs. + * + * @internal + * + * `Array#values_at` no longer uses this function. There is no reason apart + * from historical ones to list this function here. + */ +VALUE rb_get_values_at(VALUE obj, long olen, int argc, const VALUE *argv, VALUE (*func)(VALUE obj, long oidx)); + +/** + * Expands or shrinks the passed array to the passed length. + * + * @param[out] ary An array to modify. + * @param[in] len Desired length of `ary`. + * @exception rb_eFrozenError `ary` is frozen. + * @exception rb_eIndexError `len` too long. + * @return The passed `ary`. + * @post `ary`'s length is `len`. + * @post Depending on `len` and previous length of `ary` this operation + * can also create a series of "hole" positions inside of the + * backend storage. They are filled with ::RUBY_Qnil. + * + * @internal + * + * `len` is signed. Intentional or...? + */ VALUE rb_ary_resize(VALUE ary, long len); -#define rb_ary_new2 rb_ary_new_capa -#define rb_ary_new3 rb_ary_new_from_args -#define rb_ary_new4 rb_ary_new_from_values + +#define rb_ary_new2 rb_ary_new_capa /**< @old{rb_ary_new_capa} */ +#define rb_ary_new3 rb_ary_new_from_args /**< @old{rb_ary_new_from_args} */ +#define rb_ary_new4 rb_ary_new_from_values /**< @old{rb_ary_new_from_values} */ RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/bignum.h b/include/ruby/internal/intern/bignum.h index 1ac92e9c90..c27f77a1fb 100644 --- a/include/ruby/internal/intern/bignum.h +++ b/include/ruby/internal/intern/bignum.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to so-called rb_cBignum. */ #include "ruby/internal/config.h" @@ -26,6 +26,7 @@ # include <stddef.h> #endif +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" #include "ruby/backward/2/long_long.h" @@ -33,71 +34,811 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* bignum.c */ -VALUE rb_big_new(size_t, int); + +/** + * Allocates a bignum object. + * + * @param[in] len Length of the bignum's backend storage, in words. + * @param[in] sign Sign of the bignum. + * @return An allocated new bignum instance. + * @note This only allocates an object, doesn't fill its value in. + * + * @internal + * + * @shyouhei finds it hard to use from extension libraries. `len` is per + * `BDIGIT` but its definition is hidden. + */ +VALUE rb_big_new(size_t len, int sign); + +/** + * Queries if the passed bignum instance is a "bigzero". What is a bigzero? + * Well, bignums are for very big integers, but can also represent tiny ones + * like -1, 0, 1. Bigzero are instances of bignums whose values are zero. + * Knowing if a bignum is bigzero can be handy on occasions, like for instance + * detecting division by zero situation. + * + * @param[in] x A bignum. + * @retval 1 It is a bigzero. + * @retval 0 Otherwise. + */ int rb_bigzero_p(VALUE x); -VALUE rb_big_clone(VALUE); -void rb_big_2comp(VALUE); -VALUE rb_big_norm(VALUE); + +/** + * Duplicates the given bignum. + * + * @param[in] num A bignum. + * @return An allocated bignum, who is equivalent to `num`. + */ +VALUE rb_big_clone(VALUE num); + +/** + * Destructively modify the passed bignum into 2's complement representation. + * + * @note By default bignums are in signed magnitude system. + * + * @param[out] num A bignum to modify. + */ +void rb_big_2comp(VALUE num); + +/** + * Normalises the passed bignum. It for instance returns a fixnum of the same + * value if fixnum can represent that number. + * + * @param[out] x Target bignum (can be destructively modified). + * @return An integer of the identical value (can be `x` itself). + */ +VALUE rb_big_norm(VALUE x); + +/** + * Destructively resizes the backend storage of the passed bignum. + * + * @param[out] big A bignum. + * @param[in] len New length of `big`'s backend, in words. + */ void rb_big_resize(VALUE big, size_t len); -VALUE rb_cstr_to_inum(const char*, int, int); -VALUE rb_str_to_inum(VALUE, int, int); -VALUE rb_cstr2inum(const char*, int); -VALUE rb_str2inum(VALUE, int); -VALUE rb_big2str(VALUE, int); -long rb_big2long(VALUE); + +RBIMPL_ATTR_NONNULL(()) +/** + * Parses C's string to convert into a Ruby's integer. It understands prefixes + * (e.g. `0x`) and underscores. + * + * @param[in] str Stringised representation of the return value. + * @param[in] base Base of conversion. Must be `-36..36` inclusive, + * except `1`. `2..36` means the conversion is done + * according to it, with unmatched prefix understood + * as a part of the result. `-36..-2` means the + * conversion honours prefix when present, or use + * `-base` when absent. `0` is equivalent to `-10`. + * `-1` mandates a prefix. `1` is an error. + * @param[in] badcheck Whether to raise ::rb_eArgError on failure. If + * `0` is passed here this function can return + * `INT2FIX(0)` for parse errors. + * @exception rb_eArgError Failed to parse (and `badcheck` is truthy). + * @return An instance of ::rb_cInteger, which is a numeric interpretation + * of what is written in `str`. + * + * @internal + * + * Not sure if it intentionally accepts `base == -1` or is just buggy. Nobody + * practically uses negative bases these days. + */ +VALUE rb_cstr_to_inum(const char *str, int base, int badcheck); + +/** + * Identical to rb_cstr2inum(), except it takes Ruby's strings instead of C's. + * + * @param[in] str Stringised representation of the return + * value. + * @param[in] base Base of conversion. Must be `-36..36` + * inclusive, except `1`. `2..36` means the + * conversion is done according to it, with + * unmatched prefix understood as a part of the + * result. `-36..-2` means the conversion + * honours prefix when present, or use `-base` + * when absent. `0` is equivalent to `-10`. + * `-1` mandates a prefix. `1` is an error. + * @param[in] badcheck Whether to raise ::rb_eArgError on failure. + * If `0` is passed here this function can + * return `INT2FIX(0)` for parse errors. + * @exception rb_eArgError Failed to parse (and `badcheck` is truthy). + * @exception rb_eTypeError `str` is not a string. + * @exception rb_eEncCompatError `str` is not ASCII compatible. + * @return An instance of ::rb_cInteger, which is a numeric interpretation + * of what is written in `str`. + */ +VALUE rb_str_to_inum(VALUE str, int base, int badcheck); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_cstr_to_inum(), except the second argument controls the base + * and badcheck at once. It basically doesn't raise for parse errors, unless + * the base is zero. + * + * This is an older API. New codes might prefer rb_cstr_to_inum(). + * + * @param[in] str Stringised representation of the return value. + * @param[in] base Base of conversion. Must be `-36..36` inclusive, + * except `1`. `2..36` means the conversion is done + * according to it, with unmatched prefix understood + * as a part of the result. `-36..-2` means the + * conversion honours prefix when present, or use + * `-base` when absent. `0` is equivalent to `-10`. + * `-1` mandates a prefix. `1` is an error. + * @exception rb_eArgError Failed to parse (and `base` is zero). + * @return An instance of ::rb_cInteger, which is a numeric interpretation + * of what is written in `str`. + */ +VALUE rb_cstr2inum(const char *str, int base); + +/** + * Identical to rb_str_to_inum(), except the second argument controls the base + * and badcheck at once. It can also be seen as a routine identical to + * rb_cstr2inum(), except it takes Ruby's strings instead of C's. + * + * This is an older API. New codes might prefer rb_cstr_to_inum(). + * + * @param[in] str Stringised representation of the return + * value. + * @param[in] base Base of conversion. Must be `-36..36` + * inclusive, except `1`. `2..36` means the + * conversion is done according to it, with + * unmatched prefix understood as a part of the + * result. `-36..-2` means the conversion + * honours prefix when present, or use `-base` + * when absent. `0` is equivalent to `-10`. + * `-1` mandates a prefix. `1` is an error. + * @exception rb_eArgError Failed to parse (and `base` is zero). + * @exception rb_eTypeError `str` is not a string. + * @exception rb_eEncCompatError `str` is not ASCII compatible. + * @return An instance of ::rb_cInteger, which is a numeric interpretation + * of what is written in `str`. + */ +VALUE rb_str2inum(VALUE str, int base); + +/** + * Generates a place-value representation of the passed integer. + * + * @param[in] x An integer to stringify. + * @param[in] base `2` to `36` inclusive for each radix. + * @exception rb_eArgError `base` is out of range. + * @exception rb_eRangeError `x` is too big, cannot represent in string. + * @return An instance of ::rb_cString which represents `x`. + */ +VALUE rb_big2str(VALUE x, int base); + +/** + * Converts a bignum into C's `long`. + * + * @param[in] x A bignum. + * @exception rb_eRangeError `x` is out of range of `long`. + * @return The passed value converted into C's `long`. + */ +long rb_big2long(VALUE x); + +/** @alias{rb_big2long} */ #define rb_big2int(x) rb_big2long(x) -unsigned long rb_big2ulong(VALUE); + +/** + * Converts a bignum into C's `unsigned long`. + * + * @param[in] x A bignum. + * @exception rb_eRangeError `x` is out of range of `unsigned long`. + * @return The passed value converted into C's `unsigned long`. + * + * @internal + * + * This function can generate a very large positive integer for a negative + * input. For instance applying Ruby's -4,611,686,018,427,387,905 to this + * function yields C's 13,835,058,055,282,163,711 on my machine. This is how + * it has been. Cannot change any longer. + */ +unsigned long rb_big2ulong(VALUE x); + +/** @alias{rb_big2long} */ #define rb_big2uint(x) rb_big2ulong(x) + #if HAVE_LONG_LONG +/** + * Converts a bignum into C's `long long`. + * + * @param[in] x A bignum. + * @exception rb_eRangeError `x` is out of range of `long long`. + * @return The passed value converted into C's `long long`. + */ LONG_LONG rb_big2ll(VALUE); + +/** + * Converts a bignum into C's `unsigned long long`. + * + * @param[in] x A bignum. + * @exception rb_eRangeError `x` is out of range of `unsigned long long`. + * @return The passed value converted into C's `unsigned long long`. + * + * @internal + * + * This function can generate a very large positive integer for a negative + * input. For instance applying Ruby's -4,611,686,018,427,387,905 to this + * function yields C's 13,835,058,055,282,163,711 on my machine. This is how + * it has been. Cannot change any longer. + */ unsigned LONG_LONG rb_big2ull(VALUE); + #endif /* HAVE_LONG_LONG */ + +RBIMPL_ATTR_NONNULL(()) +/** + * Converts a bignum into a series of its parts. + * + * @param[in] val An integer. + * @param[out] buf Return buffer. + * @param[in] num_longs Number of words of `buf`. + * @exception rb_eTypeError `val` doesn't respond to `#to_int`. + * @post `buf` is filled with `val`'s 2's complement representation, in + * the host CPU's native byte order, from least significant word + * towards the most significant one, for `num_longs` words. + * @note The "pack" terminology comes from `Array#pack`. + */ void rb_big_pack(VALUE val, unsigned long *buf, long num_longs); + +RBIMPL_ATTR_NONNULL(()) +/** + * Constructs a (possibly very big) bignum from a series of integers. `buf[0]` + * would be the return value's least significant word; `buf[num_longs-1]` would + * be that of most significant. + * + * @param[in] buf A series of integers. + * @param[in] num_longs Number of words of `buf`. + * @exception rb_eArgError Result would be too big. + * @return An instance of ::rb_cInteger which is an "unpack"-ed value of + * the parameters. + * @note The "unpack" terminology comes from `String#pack`. + */ VALUE rb_big_unpack(unsigned long *buf, long num_longs); -int rb_uv_to_utf8(char[6],unsigned long); -VALUE rb_dbl2big(double); -double rb_big2dbl(VALUE); -VALUE rb_big_cmp(VALUE, VALUE); -VALUE rb_big_eq(VALUE, VALUE); -VALUE rb_big_eql(VALUE, VALUE); -VALUE rb_big_plus(VALUE, VALUE); -VALUE rb_big_minus(VALUE, VALUE); -VALUE rb_big_mul(VALUE, VALUE); -VALUE rb_big_div(VALUE, VALUE); -VALUE rb_big_idiv(VALUE, VALUE); -VALUE rb_big_modulo(VALUE, VALUE); -VALUE rb_big_divmod(VALUE, VALUE); -VALUE rb_big_pow(VALUE, VALUE); -VALUE rb_big_and(VALUE, VALUE); -VALUE rb_big_or(VALUE, VALUE); -VALUE rb_big_xor(VALUE, VALUE); -VALUE rb_big_lshift(VALUE, VALUE); -VALUE rb_big_rshift(VALUE, VALUE); - -/* For rb_integer_pack and rb_integer_unpack: */ -/* "MS" in MSWORD and MSBYTE means "most significant" */ -/* "LS" in LSWORD and LSBYTE means "least significant" */ + +/* pack.c */ + +RBIMPL_ATTR_NONNULL(()) +/** + * Encodes a Unicode codepoint into its UTF-8 representation. + * + * @param[out] buf Return buffer, must at least be 6 bytes width. + * @param[in] uv An Unicode codepoint. + * @exception rb_eRangeError `uv` is out of Unicode. + * @return Number of bytes written to `buf` + * @post `buf` holds a UTF-8 representation of `uv`. + */ +int rb_uv_to_utf8(char buf[6], unsigned long uv); + +/* bignum.c */ + +/** + * Converts a C's `double` into a bignum. + * + * @param[in] d A value to convert. + * @exception rb_eFloatDomainError `d` is Inf/NaN. + * @return An instance of ::rb_cInteger whose value is approximately `d`. + * + * @internal + * + * @shyouhei is not sure if the result is guaranteed to be the nearest integer + * of `d`. + */ +VALUE rb_dbl2big(double d); + +/** + * Converts a bignum into C's `double`. + * + * @param[in] x A bignum. + * @return The passed value converted into C's `double`. + * + * @internal + * + * @shyouhei is not sure if the result is guaranteed to be `x`'s nearest value + * that a `double` can represent. + */ +double rb_big2dbl(VALUE x); + +/** + * Compares the passed two bignums. + * + * @param[in] lhs Comparison LHS. + * @param[in] rhs Comparison RHS. + * @retval -1 `rhs` is bigger than `lhs`. + * @retval 0 They are identical. + * @retval 1 `lhs` is bigger than `rhs`. + * @see rb_num_coerce_cmp() + */ +VALUE rb_big_cmp(VALUE lhs, VALUE rhs); + +/** + * Equality, in terms of `==`. This checks if the _value_ is the same, not the + * identity. For instance `1 == 1.0` must hold. + * + * @param[in] lhs Comparison LHS. + * @param[in] rhs Comparison RHS. + * @retval RUBY_Qtrue They are the same. + * @retval RUBY_Qfalse They are different. + */ +VALUE rb_big_eq(VALUE lhs, VALUE rhs); + +/** + * Equality, in terms of `eql?`. Unlike rb_big_eq() it does not convert + * ::rb_cFloat etc. This function returns ::RUBY_Qtrue if and only if both + * parameters are bignums, which represent the identical numerical value. + * + * @param[in] lhs Comparison LHS. + * @param[in] rhs Comparison RHS. + * @retval RUBY_Qtrue They are identical. + * @retval RUBY_Qfalse They are distinct. + */ +VALUE rb_big_eql(VALUE lhs, VALUE rhs); + +/** + * Performs addition of the passed two objects. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x + y` evaluates to. + * @see rb_num_coerce_bin() + */ +VALUE rb_big_plus(VALUE x, VALUE y); + +/** + * Performs subtraction of the passed two objects. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x - y` evaluates to. + * @see rb_num_coerce_bin() + */ +VALUE rb_big_minus(VALUE x, VALUE y); + +/** + * Performs multiplication of the passed two objects. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x * y` evaluates to. + * @see rb_num_coerce_bin() + */ +VALUE rb_big_mul(VALUE x, VALUE y); + +/** + * Performs division of the passed two objects. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x / y` evaluates to. + * @see rb_num_coerce_bin() + */ +VALUE rb_big_div(VALUE x, VALUE y); + +/** + * Performs "integer division". This is different from rb_big_div(). + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x.div y` evaluates to. + * @see rb_num_coerce_bin() + */ +VALUE rb_big_idiv(VALUE x, VALUE y); + +/** + * Performs modulo of the passed two objects. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x % y` evaluates to. + * @see rb_num_coerce_bin() + * + * @internal + * + * There also is `rb_big_remainder()` internally, which is different from this + * one. + */ +VALUE rb_big_modulo(VALUE x, VALUE y); + +/** + * Performs "divmod" operation. The operation in bignum's context is that it + * calculates rb_big_idiv() and rb_big_modulo() at once. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x.divmod y` evaluates to. + * @see rb_num_coerce_bin() + */ +VALUE rb_big_divmod(VALUE x, VALUE y); + +/** + * Raises `x` to the powerof `y`. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x ** y` evaluates to. + * @see rb_num_coerce_bin() + * @note This can return an instance of ::rb_cFloat, even when both `x` + * and `y` are bignums. Or an instance of ::rb_cRational, when for + * instance `y` is negative. + */ +VALUE rb_big_pow(VALUE x, VALUE y); + +/** + * Performs bitwise and of the passed two objects. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x & y` evaluates to. + * @see rb_num_coerce_bit() + */ +VALUE rb_big_and(VALUE x, VALUE y); + +/** + * Performs bitwise or of the passed two objects. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x | y` evaluates to. + * @see rb_num_coerce_bit() + */ +VALUE rb_big_or(VALUE x, VALUE y); + +/** + * Performs exclusive or of the passed two objects. + * + * @param[in] x A bignum. + * @param[in] y Arbitrary ruby object. + * @return What `x ^ y` evaluates to. + * @see rb_num_coerce_bit() + */ +VALUE rb_big_xor(VALUE x, VALUE y); + +/** + * Performs shift left. + * + * @param[in] x A bignum. + * @param[in] y Shift amount. + * @exception rb_eTypeError `y` is not an integer. + * @exception rb_eArgError `y` is too big. + * @return `x` shifted left to `y` bits. + * @note `y` can be negative. Shifts right then. + */ +VALUE rb_big_lshift(VALUE x, VALUE y); + +/** + * Performs shift right. + * + * @param[in] x A bignum. + * @param[in] y Shift amount. + * @exception rb_eTypeError `y` is not an integer. + * @return `x` shifted right to `y` bits. + * @note This is arithmetic. Because bignums are not bitfields there is + * no shift right logical operator. + */ +VALUE rb_big_rshift(VALUE x, VALUE y); + +/** + * @name Flags for rb_integer_pack()/rb_integer_unpack() + * @{ + */ + +/** Stores/interprets the most significant word as the first word. */ #define INTEGER_PACK_MSWORD_FIRST 0x01 + +/** Stores/interprets the least significant word as the first word. */ #define INTEGER_PACK_LSWORD_FIRST 0x02 + +/** + * Stores/interprets the most significant byte in a word as the first byte in + * the word. + */ #define INTEGER_PACK_MSBYTE_FIRST 0x10 + +/** + * Stores/interprets the least significant byte in a word as the first byte in + * the word. + */ #define INTEGER_PACK_LSBYTE_FIRST 0x20 + +/** + * Means either #INTEGER_PACK_MSBYTE_FIRST or #INTEGER_PACK_LSBYTE_FIRST, + * depending on the host processor's endian. + */ #define INTEGER_PACK_NATIVE_BYTE_ORDER 0x40 + +/** Uses 2's complement representation. */ #define INTEGER_PACK_2COMP 0x80 + +/** Uses "generic" implementation (handy on test). */ #define INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION 0x400 -/* For rb_integer_unpack: */ + +/** + * Always generates a bignum object even if the integer can be representable + * using fixnum scheme (unpack only) + */ #define INTEGER_PACK_FORCE_BIGNUM 0x100 + +/** + * Interprets the input as a signed negative number (unpack only). If not + * specified returns a positive number. + */ #define INTEGER_PACK_NEGATIVE 0x200 -/* Combinations: */ + +/** Little endian combination. */ #define INTEGER_PACK_LITTLE_ENDIAN \ (INTEGER_PACK_LSWORD_FIRST | \ INTEGER_PACK_LSBYTE_FIRST) + +/** Big endian combination */ #define INTEGER_PACK_BIG_ENDIAN \ (INTEGER_PACK_MSWORD_FIRST | \ INTEGER_PACK_MSBYTE_FIRST) + +/** @} */ + +RBIMPL_ATTR_NONNULL(()) +/** + * Exports an integer into a buffer. This function fills the buffer specified + * by `words` and `numwords` as `val` in the format specified by `wordsize`, + * `nails` and `flags`. + * + * @param[in] val Integer or integer-like object which has + * `#to_int` method. + * @param[out] words Return buffer. + * @param[in] numwords Number of words of `words`. + * @param[in] wordsize Number of bytes per word. + * @param[in] nails Number of padding bits in a word. Most + * significant nails bits of each word are filled + * by zero. + * @param[in] flags Bitwise or of constants whose name starts + * "INTEGER_PACK_". + * @exception rb_eTypeError `val` doesn't respond to `#to_int`. + * + * Possible flags are: + * + * - #INTEGER_PACK_MSWORD_FIRST: + * Stores the most significant word as the first word. + * + * - #INTEGER_PACK_LSWORD_FIRST: + * Stores the least significant word as the first word. + * + * - #INTEGER_PACK_MSBYTE_FIRST: + * Stores the most significant byte in a word as the first byte in the + * word. + * + * - #INTEGER_PACK_LSBYTE_FIRST: + * Stores the least significant byte in a word as the first byte in the + * word. + * + * - #INTEGER_PACK_NATIVE_BYTE_ORDER: + * Either #INTEGER_PACK_MSBYTE_FIRST or #INTEGER_PACK_LSBYTE_FIRST + * corresponding to the host's endian. + * + * - #INTEGER_PACK_2COMP: + * Uses 2's complement representation. + * + * - #INTEGER_PACK_LITTLE_ENDIAN: Shorthand of + * `INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_LSBYTE_FIRST`. + * + * - #INTEGER_PACK_BIG_ENDIAN: Shorthand of + * `INTEGER_PACK_MSWORD_FIRST|INTEGER_PACK_MSBYTE_FIRST`. + * + * - #INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION: + * Uses generic implementation (for test and debug). + * + * This function fills the buffer specified by `words` as `val`'s 2's + * complement representation if #INTEGER_PACK_2COMP is specified in `flags`. + * Otherwise it fills `words` as `abs(val)` and signedness is returned via the + * return value. + * + * @return The signedness and overflow condition. The overflow condition + * depends on #INTEGER_PACK_2COMP. + * + * When #INTEGER_PACK_2COMP is not specified: + * + * - `-2` : + * Negative overflow. `val <= -2**(numwords*(wordsize*CHAR_BIT-nails))` + * + * - `-1` : + * Negative without overflow. + * `-2**(numwords*(wordsize*CHAR_BIT-nails)) < val < 0` + * + * - `0` : zero. `val == 0` + * + * - `1` : + * Positive without overflow. + * `0 < val < 2**(numwords*(wordsize*CHAR_BIT-nails))` + * + * - `2` : + * Positive overflow. `2**(numwords*(wordsize*CHAR_BIT-nails)) <= val` + * + * When #INTEGER_PACK_2COMP is specified: + * + * - `-2` : + * Negative overflow. `val < -2**(numwords*(wordsize*CHAR_BIT-nails))` + * + * - `-1` : + * Negative without overflow. + * `-2**(numwords*(wordsize*CHAR_BIT-nails)) <= val < 0` + * + * - `0` : zero. `val == 0` + * + * - `1` : + * Positive without overflow. + * `0 < val < 2**(numwords*(wordsize*CHAR_BIT-nails))` + * + * - `2` : + * Positive overflow. `2**(numwords*(wordsize*CHAR_BIT-nails)) <= val` + * + * The value, `-2**(numwords*(wordsize*CHAR_BIT-nails))`, is representable in + * 2's complement representation but not representable in absolute value. So + * `-1` is returned for the value if #INTEGER_PACK_2COMP is specified but + * returns `-2` if #INTEGER_PACK_2COMP is not specified. + * + * The least significant words are filled in the buffer when overflow occur. + */ int rb_integer_pack(VALUE val, void *words, size_t numwords, size_t wordsize, size_t nails, int flags); + +RBIMPL_ATTR_NONNULL(()) +/** + * Import an integer from a buffer. + * + * @param[in] words Buffer to import. + * @param[in] numwords Number of words of `words`. + * @param[in] wordsize Number of bytes per word. + * @param[in] nails Number of padding bits in a word. Most + * significant nails bits of each word are ignored. + * @param[in] flags Bitwise or of constants whose name starts + * "INTEGER_PACK_". + * @exception rb_eArgError `numwords * wordsize` too big. + * + * Possible flags are: + * + * - #INTEGER_PACK_MSWORD_FIRST: + * Interpret the first word as the most significant word. + * + * - #INTEGER_PACK_LSWORD_FIRST: + * Interpret the first word as the least significant word. + * + * - #INTEGER_PACK_MSBYTE_FIRST: + * Interpret the first byte in a word as the most significant byte in the + * word. + * + * - #INTEGER_PACK_LSBYTE_FIRST: + * Interpret the first byte in a word as the least significant byte in + * the word. + * + * - #INTEGER_PACK_NATIVE_BYTE_ORDER: + * Either #INTEGER_PACK_MSBYTE_FIRST or #INTEGER_PACK_LSBYTE_FIRST + * corresponding to the host's endian. + * + * - #INTEGER_PACK_2COMP: + * Uses 2's complement representation. + * + * - #INTEGER_PACK_LITTLE_ENDIAN: Shorthand of + * `INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_LSBYTE_FIRST` + * + * - #INTEGER_PACK_BIG_ENDIAN: Shorthand of + * `INTEGER_PACK_MSWORD_FIRST|INTEGER_PACK_MSBYTE_FIRST` + * + * - #INTEGER_PACK_FORCE_BIGNUM: + * Returns a bignum even if its value is representable as a fixnum. + * + * - #INTEGER_PACK_NEGATIVE: + * Returns a non-positive value. (Returns a non-negative value if not + * specified.) + * + * - #INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION: + * Uses generic implementation (for test and debug). + * + * @return An instance of ::rb_cInteger whose value is the interpreted + * `words`. The range of the result value depends on + * #INTEGER_PACK_2COMP and #INTEGER_PACK_NEGATIVE. + * + * When #INTEGER_PACK_2COMP is not set: + * + * - `0 <= val < 2**(numwords*(wordsize*CHAR_BIT-nails))` if + * `!INTEGER_PACK_NEGATIVE` + * + * - `-2**(numwords*(wordsize*CHAR_BIT-nails)) < val <= 0` if + * `INTEGER_PACK_NEGATIVE` + * + * When #INTEGER_PACK_2COMP is set: + * + * - `-2**(numwords*(wordsize*CHAR_BIT-nails)-1)` `<= val <=` + * `2**(numwords*(wordsize*CHAR_BIT-nails)-1)-1` if + * `!INTEGER_PACK_NEGATIVE` + * + * - `-2**(numwords*(wordsize*CHAR_BIT-nails)) <= val <= -1` if + * `INTEGER_PACK_NEGATIVE` + * + * Passing #INTEGER_PACK_2COMP without #INTEGER_PACK_NEGATIVE means sign + * extension. #INTEGER_PACK_2COMP with #INTEGER_PACK_NEGATIVE means assuming + * the higher bits are `1`. + * + * Note that this function returns 0 when `numwords` is zero and + * #INTEGER_PACK_2COMP is set but #INTEGER_PACK_NEGATIVE is not set. + */ VALUE rb_integer_unpack(const void *words, size_t numwords, size_t wordsize, size_t nails, int flags); + +/** + * Calculates the number of bytes needed to represent the absolute value of the + * passed integer. + * + * @param[in] val Integer or integer-like object which has + * `#to_int` method. + * @param[out] nlz_bits_ret Number of leading zero bits in the most + * significant byte is returned if not `NULL`. + * @exception rb_eTypeError `val` doesn't respond to `#to_int`. + * @return `((val_numbits * CHAR_BIT + CHAR_BIT - 1) / CHAR_BIT)`, where + * val_numbits is the number of bits of `abs(val)`. + * @post If `nlz_bits_ret` is not `NULL`, + * `(return_value * CHAR_BIT - val_numbits)` is stored in + * `*nlz_bits_ret`. In this case, + * `0 <= *nlz_bits_ret < CHAR_BIT`. + * + * This function should not overflow. + */ size_t rb_absint_size(VALUE val, int *nlz_bits_ret); + +/** + * Calculates the number of words needed represent the absolute value of the + * passed integer. Unlike rb_absint_size() this function can overflow. It + * returns `(size_t)-1` then. + * + * @param[in] val Integer or integer-like object which has + * `#to_int` method. + * @param[in] word_numbits Number of bits per word. + * @param[out] nlz_bits_ret Number of leading zero bits in the most + * significant word is returned if not `NULL`. + * @exception rb_eTypeError `val` doesn't respond to `#to_int`. + * @retval (size_t)-1 Overflowed. + * @retval otherwise + * `((val_numbits * CHAR_BIT + word_numbits - 1) / word_numbits)`, + * where val_numbits is the number of bits of `abs(val)`. + * @post If `nlz_bits_ret` is not `NULL` and there is no overflow, + * `(return_value * word_numbits - val_numbits)` is stored in + * `*nlz_bits_ret`. In this case, + * `0 <= *nlz_bits_ret < word_numbits.` + * + */ size_t rb_absint_numwords(VALUE val, size_t word_numbits, size_t *nlz_bits_ret); + +/** + * Tests `abs(val)` consists only of a bit or not. + * + * @param[in] val Integer or integer-like object which has + * `#to_int` method. + * @exception rb_eTypeError `val` doesn't respond to `#to_int`. + * @retval 1 `abs(val) == 1 << n` for some `n >= 0`. + * @retval 0 Otherwise. + * + * rb_absint_singlebit_p() can be used to determine required buffer size for + * rb_integer_pack() used with #INTEGER_PACK_2COMP (two's complement). + * + * Following example calculates number of bits required to represent val in + * two's complement number, without sign bit. + * + * ```CXX + * size_t size; + * int neg = FIXNUM_P(val) ? FIX2LONG(val) < 0 : BIGNUM_NEGATIVE_P(val); + * size = rb_absint_numwords(val, 1, NULL) + * if (size == (size_t)-1) ...overflow... + * if (neg && rb_absint_singlebit_p(val)) + * size--; + * ``` + * + * Following example calculates number of bytes required to represent val in + * two's complement number, with sign bit. + * + * ```CXX + * size_t size; + * int neg = FIXNUM_P(val) ? FIX2LONG(val) < 0 : BIGNUM_NEGATIVE_P(val); + * int nlz_bits; + * size = rb_absint_size(val, &nlz_bits); + * if (nlz_bits == 0 && !(neg && rb_absint_singlebit_p(val))) + * size++; + * ``` + */ int rb_absint_singlebit_p(VALUE val); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/class.h b/include/ruby/internal/intern/class.h index d3be80d283..357af5d176 100644 --- a/include/ruby/internal/intern/class.h +++ b/include/ruby/internal/intern/class.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cClass/::rb_cModule. */ #include "ruby/internal/dllexport.h" @@ -27,30 +27,367 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* class.c */ -VALUE rb_class_new(VALUE); -VALUE rb_mod_init_copy(VALUE, VALUE); -VALUE rb_singleton_class_clone(VALUE); -void rb_singleton_class_attached(VALUE,VALUE); -void rb_check_inheritable(VALUE); -VALUE rb_define_class_id(ID, VALUE); -VALUE rb_define_class_id_under(VALUE, ID, VALUE); + +/** + * Creates a new, anonymous class. + * + * @param[in] super What would become a parent class. + * @exception rb_eTypeError `super` is not something inheritable. + * @return An anonymous class that inherits `super`. + */ +VALUE rb_class_new(VALUE super); + +/** + * The comment that comes with this function says `:nodoc:`. Not sure what + * that means though. + * + * @param[out] clone Destination object. + * @param[in] orig Source object. + * @exception rb_eTypeError Cannot copy `orig`. + * @return The passed `clone`. + */ +VALUE rb_mod_init_copy(VALUE clone, VALUE orig); + +/** + * Asserts that the given class can derive a child class. A class might or + * might not be able to do so; for instance a singleton class cannot. + * + * @param[in] super Possible super class. + * @exception rb_eTypeError No it cannot. + * @post Upon successful return `super` can derive. + */ +void rb_check_inheritable(VALUE super); + +/** + * This is a very badly designed API that creates an anonymous class. + * + * @param[in] id Discarded for no reason (why...). + * @param[in] super What would become a parent class. 0 means + * ::rb_cObject. + * @exception rb_eTypeError `super` is not something inheritable. + * @return An anonymous class that inherits `super`. + * @warning You must explicitly name the return value. + */ +VALUE rb_define_class_id(ID id, VALUE super); + +/** + * Identical to rb_define_class_under(), except it takes the name in ::ID + * instead of C's string. + * + * @param[out] outer A class which contains the new class. + * @param[in] id Name of the new class + * @param[in] super A class from which the new class will derive. + * 0 means ::rb_cObject. + * @exception rb_eTypeError The constant name `id` is already taken but the + * constant is not a class. + * @exception rb_eTypeError The class is already defined but the class can + * not be reopened because its superclass is not + * `super`. + * @exception rb_eArgError `super` is NULL. + * @return The created class. + * @post `outer::id` refers the returned class. + * @note If a class named `id` is already defined and its superclass is + * `super`, the function just returns the defined class. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. + */ +VALUE rb_define_class_id_under(VALUE outer, ID id, VALUE super); + +/** + * Creates a new, anonymous module. + * + * @return An anonymous module. + */ VALUE rb_module_new(void); -VALUE rb_define_module_id(ID); -VALUE rb_define_module_id_under(VALUE, ID); -VALUE rb_mod_included_modules(VALUE); -VALUE rb_mod_include_p(VALUE, VALUE); -VALUE rb_mod_ancestors(VALUE); -VALUE rb_class_instance_methods(int, const VALUE*, VALUE); -VALUE rb_class_public_instance_methods(int, const VALUE*, VALUE); -VALUE rb_class_protected_instance_methods(int, const VALUE*, VALUE); -VALUE rb_class_private_instance_methods(int, const VALUE*, VALUE); -VALUE rb_obj_singleton_methods(int, const VALUE*, VALUE); -void rb_define_method_id(VALUE, ID, VALUE (*)(ANYARGS), int); -void rb_undef(VALUE, ID); -void rb_define_protected_method(VALUE, const char*, VALUE (*)(ANYARGS), int); -void rb_define_private_method(VALUE, const char*, VALUE (*)(ANYARGS), int); -void rb_define_singleton_method(VALUE, const char*, VALUE(*)(ANYARGS), int); -VALUE rb_singleton_class(VALUE); + + +/** + * Creates a new, anonymous refinement. + * + * @return An anonymous refinement. + */ +VALUE rb_refinement_new(void); + +/** + * This is a very badly designed API that creates an anonymous module. + * + * @param[in] id Discarded for no reason (why...). + * @return An anonymous module. + * @warning You must explicitly name the return value. + */ +VALUE rb_define_module_id(ID id); + +/** + * Identical to rb_define_module_under(), except it takes the name in ::ID + * instead of C's string. + * + * @param[out] outer A class which contains the new module. + * @param[in] id Name of the new module + * @exception rb_eTypeError The constant name `id` is already taken but the + * constant is not a module. + * @return The created module. + * @post `outer::id` refers the returned module. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. + */ +VALUE rb_define_module_id_under(VALUE outer, ID id); + +/** + * Queries the list of included modules. It can also be seen as a routine to + * first call rb_mod_ancestors(), then rejects non-modules from the return + * value. + * + * @param[in] mod Class or Module. + * @return An array of modules that are either included or prepended in any + * of `mod`'s ancestry tree (including itself). + */ +VALUE rb_mod_included_modules(VALUE mod); + +/** + * Queries if the passed module is included by the module. It can also be seen + * as a routine to first call rb_mod_included_modules(), then see if the return + * value contains the passed module. + * + * @param[in] child A Module. + * @param[in] parent Another Module. + * @exception rb_eTypeError `child` is not an instance of ::rb_cModule. + * @retval RUBY_Qtrue `parent` is either included or prepended in any + * of `child`'s ancestry tree (including itself). + * @return RUBY_Qfalse Otherwise. + */ +VALUE rb_mod_include_p(VALUE child, VALUE parent); + +/** + * Queries the module's ancestors. This routine gathers classes and modules + * that the passed module either inherits, includes, or prepends, then + * recursively applies that routine again and again to the collected entries + * until the list doesn't grow up. + * + * @param[in] mod A module or a class. + * @return An array of classes or modules that `mod` possibly recursively + * inherits, includes, or prepends. + * + * @internal + * + * Above description is written in a recursive language but in practice it + * computes the return value iteratively. + */ +VALUE rb_mod_ancestors(VALUE mod); + +/** + * Queries the class's descendants. This routine gathers classes that are + * subclasses of the given class (or subclasses of those subclasses, etc.), + * returning an array of classes that have the given class as an ancestor. + * The returned array does not include the given class or singleton classes. + * + * @param[in] klass A class. + * @return An array of classes where `klass` is an ancestor. + * + * @internal + */ +VALUE rb_class_descendants(VALUE klass); + +/** + * Queries the class's direct descendants. This routine gathers classes that are + * direct subclasses of the given class, + * returning an array of classes that have the given class as a superclass. + * The returned array does not include singleton classes. + * + * @param[in] klass A class. + * @return An array of classes where `klass` is the `superclass`. + * + * @internal + */ +VALUE rb_class_subclasses(VALUE klass); + + +/** + * Returns the attached object for a singleton class. + * If the given class is not a singleton class, raises a TypeError. + * + * @param[in] klass A class. + * @return The object which has the singleton class `klass`. + * + * @internal + */ +VALUE rb_class_attached_object(VALUE klass); + +/** + * Generates an array of symbols, which are the list of method names defined in + * the passed class. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Array of at most one object, which controls (if + * any) whether the return array includes the names + * of methods defined in ancestors or not. + * @param[in] mod A module or a class. + * @exception rb_eArgError `argc` out of range. + * @return An array of symbols collecting names of instance methods that + * are not private, defined at `mod`. + */ +VALUE rb_class_instance_methods(int argc, const VALUE *argv, VALUE mod); + +/** + * Identical to rb_class_instance_methods(), except it returns names of methods + * that are public only. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Array of at most one object, which controls (if + * any) whether the return array includes the names + * of methods defined in ancestors or not. + * @param[in] mod A module or a class. + * @exception rb_eArgError `argc` out of range. + * @return An array of symbols collecting names of instance methods that + * are public, defined at `mod`. + */ +VALUE rb_class_public_instance_methods(int argc, const VALUE *argv, VALUE mod); + +/** + * Identical to rb_class_instance_methods(), except it returns names of methods + * that are protected only. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Array of at most one object, which controls (if + * any) whether the return array includes the names + * of methods defined in ancestors or not. + * @param[in] mod A module or a class. + * @exception rb_eArgError `argc` out of range. + * @return An array of symbols collecting names of instance methods that + * are protected, defined at `mod`. + */ +VALUE rb_class_protected_instance_methods(int argc, const VALUE *argv, VALUE mod); + +/** + * Identical to rb_class_instance_methods(), except it returns names of methods + * that are private only. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Array of at most one object, which controls (if + * any) whether the return array includes the names + * of methods defined in ancestors or not. + * @param[in] mod A module or a class. + * @exception rb_eArgError `argc` out of range. + * @return An array of symbols collecting names of instance methods that + * are protected, defined at `mod`. + */ +VALUE rb_class_private_instance_methods(int argc, const VALUE *argv, VALUE mod); + +/** + * Identical to rb_class_instance_methods(), except it returns names of + * singleton methods instead of instance methods. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Array of at most one object, which controls (if + * any) whether the return array includes the names + * of methods defined in ancestors or not. + * @param[in] obj Arbitrary ruby object. + * @exception rb_eArgError `argc` out of range. + * @return An array of symbols collecting names of instance methods that + * are not private, defined at the singleton class of `obj`. + */ +VALUE rb_obj_singleton_methods(int argc, const VALUE *argv, VALUE obj); + +/** + * Identical to rb_define_method(), except it takes the name of the method in + * ::ID instead of C's string. + * + * @param[out] klass A module or a class. + * @param[in] mid Name of the function. + * @param[in] func The method body. + * @param[in] arity The number of parameters. See @ref defmethod. + * @note There are in fact 18 different prototypes for func. + * @see ::ruby::backward::cxxanyargs::define_method::rb_define_method_id + */ +void rb_define_method_id(VALUE klass, ID mid, VALUE (*func)(ANYARGS), int arity); + +/* vm_method.c */ + +/** + * Inserts a method entry that hides previous method definition of the given + * name. This is not a deletion of a method. Method of the same name defined + * in a parent class is kept invisible in this way. + * + * @param[out] mod The module to insert an undef. + * @param[in] mid Name of the undef. + * @exception rb_eTypeError `klass` is a non-module. + * @exception rb_eFrozenError `klass` is frozen. + * @exception rb_eNameError No such method named `klass#name`. + * @post `klass#name` is undefined. + * @see rb_undef_method + * + * @internal + * + * @shyouhei doesn't understand why this is not the ::ID -taking variant of + * rb_undef_method(), given rb_remove_method() has its ::ID -taking counterpart + * named rb_remove_method_id(). + */ +void rb_undef(VALUE mod, ID mid); + +/* class.c */ + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_define_method(), except it defines a protected method. + * + * @param[out] klass A module or a class. + * @param[in] mid Name of the function. + * @param[in] func The method body. + * @param[in] arity The number of parameters. See @ref defmethod. + * @note There are in fact 18 different prototypes for func. + * @see ::ruby::backward::cxxanyargs::define_method::rb_define_protected_method + */ +void rb_define_protected_method(VALUE klass, const char *mid, VALUE (*func)(ANYARGS), int arity); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_define_method(), except it defines a private method. + * + * @param[out] klass A module or a class. + * @param[in] mid Name of the function. + * @param[in] func The method body. + * @param[in] arity The number of parameters. See @ref defmethod. + * @note There are in fact 18 different prototypes for func. + * @see ::ruby::backward::cxxanyargs::define_method::rb_define_protected_method + */ +void rb_define_private_method(VALUE klass, const char *mid, VALUE (*func)(ANYARGS), int arity); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_define_method(), except it defines a singleton method. + * + * @param[out] obj Arbitrary ruby object. + * @param[in] mid Name of the function. + * @param[in] func The method body. + * @param[in] arity The number of parameters. See @ref defmethod. + * @note There are in fact 18 different prototypes for func. + * @see ::ruby::backward::cxxanyargs::define_method::rb_define_singleton_method + */ +void rb_define_singleton_method(VALUE obj, const char *mid, VALUE(*func)(ANYARGS), int arity); + +/** + * Finds or creates the singleton class of the passed object. + * + * @param[out] obj Arbitrary ruby object. + * @exception rb_eTypeError `obj` cannot have its singleton class. + * @return A (possibly newly allocated) instance of ::rb_cClass. + * @post `obj` has its singleton class, which is the return value. + * @post In case `obj` is a class, the returned singleton class also has + * its own singleton class in order to keep consistency of the + * inheritance structure of metaclasses. + * @note A new singleton class will be created if `obj` did not have + * one. + * @note The singleton classes for ::RUBY_Qnil, ::RUBY_Qtrue, and + * ::RUBY_Qfalse are ::rb_cNilClass, ::rb_cTrueClass, and + * ::rb_cFalseClass respectively. + * + * @internal + * + * You can _create_ a singleton class of a frozen object. Intentional or ...? + * + * Nowadays there are wider range of objects who cannot have singleton classes + * than before. For instance some string instances cannot for some reason. + */ +VALUE rb_singleton_class(VALUE obj); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/compar.h b/include/ruby/internal/intern/compar.h index d226ca37b1..dc3b377b01 100644 --- a/include/ruby/internal/intern/compar.h +++ b/include/ruby/internal/intern/compar.h @@ -17,17 +17,45 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_mComparable. */ +#include "ruby/internal/attr/cold.h" +#include "ruby/internal/attr/noreturn.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() +/* bignum.c */ + +/** + * Canonicalises the passed `val`, which is the return value of `a <=> b`, into + * C's `{-1, 0, 1}`. This can be handy when you implement a callback function + * to pass to `qsort(3)` etc. + * + * @param[in] val Return value of a space ship operator. + * @param[in] a Comparison LHS. + * @param[in] b Comparison RHS. + * @exception rb_eArgError `a` and `b` are not comparable each other. + * @retval -1 `val` is less than zero. + * @retval 0 `val` is equal to zero. + * @retval 1 `val` is greater than zero. + */ +int rb_cmpint(VALUE val, VALUE a, VALUE b); + /* compar.c */ -int rb_cmpint(VALUE, VALUE, VALUE); -NORETURN(void rb_cmperr(VALUE, VALUE)); + +RBIMPL_ATTR_COLD() +RBIMPL_ATTR_NORETURN() +/** + * Raises "comparison failed" error. + * + * @param[in] a Comparison LHS. + * @param[in] b Comparison RHS. + * @exception rb_eArgError `a` and `b` are not comparable each other. + */ +void rb_cmperr(VALUE a, VALUE b); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/complex.h b/include/ruby/internal/intern/complex.h index 70343221f6..e111bd8ced 100644 --- a/include/ruby/internal/intern/complex.h +++ b/include/ruby/internal/intern/complex.h @@ -17,9 +17,11 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cComplex. */ +#include "ruby/internal/attr/deprecated.h" +#include "ruby/internal/attr/pure.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" #include "ruby/internal/arithmetic/long.h" /* INT2FIX is here. */ @@ -27,32 +29,223 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* complex.c */ -VALUE rb_complex_raw(VALUE, VALUE); + +/** + * Identical to rb_complex_new(), except it assumes both arguments are not + * instances of ::rb_cComplex. It is thus dangerous for extension libraries. + * + * @param[in] real Real part, in any numeric except Complex. + * @param[in] imag Imaginary part, in any numeric except Complex. + * @return An instance of ::rb_cComplex whose value is `real + (imag)i`. + */ +VALUE rb_complex_raw(VALUE real, VALUE imag); + +/** + * Shorthand of `x+0i`. It practically converts `x` into a Complex of the + * identical value. + * + * @param[in] x Any numeric except Complex. + * @return An instance of ::rb_cComplex, whose value is `x + 0i`. + */ #define rb_complex_raw1(x) rb_complex_raw((x), INT2FIX(0)) + +/** @alias{rb_complex_raw} */ #define rb_complex_raw2(x,y) rb_complex_raw((x), (y)) -VALUE rb_complex_new(VALUE, VALUE); + +/** + * Constructs a Complex, by first multiplying the imaginary part with `1i` then + * adds it to the real part. This definition doesn't need both arguments be + * real numbers. It can happily combine two instances of ::rb_cComplex (with + * rotating the latter one). + * + * @param[in] real An instance of ::rb_cNumeric. + * @param[in] imag Another instance of ::rb_cNumeric. + * @return An instance of ::rb_cComplex whose value is `imag * 1i + real`. + */ +VALUE rb_complex_new(VALUE real, VALUE imag); + +/** + * Shorthand of `x+0i`. It practically converts `x` into a Complex of the + * identical value. + * + * @param[in] x Any numeric value. + * @return An instance of ::rb_cComplex, whose value is `x + 0i`. + */ #define rb_complex_new1(x) rb_complex_new((x), INT2FIX(0)) + +/** @alias{rb_complex_new} */ #define rb_complex_new2(x,y) rb_complex_new((x), (y)) + +/** + * Constructs a Complex using polar representations. Unlike rb_complex_new() + * it makes no sense to pass non-real instances to this function. + * + * @param[in] abs Magnitude, in any numeric except Complex. + * @param[in] arg Angle, in radians, in any numeric except Complex. + * @return An instance of ::rb_cComplex which denotes the given polar + * coordinates. + */ VALUE rb_complex_new_polar(VALUE abs, VALUE arg); -DEPRECATED_BY(rb_complex_new_polar, VALUE rb_complex_polar(VALUE abs, VALUE arg)); + +RBIMPL_ATTR_DEPRECATED(("by: rb_complex_new_polar")) +/** @old{rb_complex_new_polar} */ +VALUE rb_complex_polar(VALUE abs, VALUE arg); + +RBIMPL_ATTR_PURE() +/** + * Queries the real part of the passed Complex. + * + * @param[in] z An instance of ::rb_cComplex. + * @return Its real part, which is an instance of ::rb_cNumeric. + */ VALUE rb_complex_real(VALUE z); + +RBIMPL_ATTR_PURE() +/** + * Queries the imaginary part of the passed Complex. + * + * @param[in] z An instance of ::rb_cComplex. + * @return Its imaginary part, which is an instance of ::rb_cNumeric. + */ VALUE rb_complex_imag(VALUE z); + +/** + * Performs addition of the passed two objects. + * + * @param[in] x An instance of ::rb_cComplex. + * @param[in] y Arbitrary ruby object. + * @return What `x + y` evaluates to. + * @see rb_num_coerce_bin() + */ VALUE rb_complex_plus(VALUE x, VALUE y); + +/** + * Performs subtraction of the passed two objects. + * + * @param[in] x An instance of ::rb_cComplex. + * @param[in] y Arbitrary ruby object. + * @return What `x - y` evaluates to. + * @see rb_num_coerce_bin() + */ VALUE rb_complex_minus(VALUE x, VALUE y); + +/** + * Performs multiplication of the passed two objects. + * + * @param[in] x An instance of ::rb_cComplex. + * @param[in] y Arbitrary ruby object. + * @return What `x * y` evaluates to. + * @see rb_num_coerce_bin() + */ VALUE rb_complex_mul(VALUE x, VALUE y); + +/** + * Performs division of the passed two objects. + * + * @param[in] x An instance of ::rb_cComplex. + * @param[in] y Arbitrary ruby object. + * @return What `x / y` evaluates to. + * @see rb_num_coerce_bin() + */ VALUE rb_complex_div(VALUE x, VALUE y); + +/** + * Performs negation of the passed object. + * + * @param[in] z An instance of ::rb_cComplex. + * @return What `-z` evaluates to. + */ VALUE rb_complex_uminus(VALUE z); + +/** + * Performs complex conjugation of the passed object. + * + * @param[in] z An instance of ::rb_cComplex. + * @return Its complex conjugate, in ::rb_cComplex. + */ VALUE rb_complex_conjugate(VALUE z); + +/** + * Queries the absolute (or the magnitude) of the passed object. + * + * @param[in] z An instance of ::rb_cComplex. + * @return Its magnitude, in ::rb_cFloat. + */ VALUE rb_complex_abs(VALUE z); + +/** + * Queries the argument (or the angle) of the passed object. + * + * @param[in] z An instance of ::rb_cComplex. + * @return Its magnitude, in ::rb_cFloat. + */ VALUE rb_complex_arg(VALUE z); + +/** + * Performs exponentiation of the passed two objects. + * + * @param[in] base An instance of ::rb_cComplex. + * @param[in] exp Arbitrary ruby object. + * @return What `base ** exp` evaluates to. + * @see rb_num_coerce_bin() + */ VALUE rb_complex_pow(VALUE base, VALUE exp); + +/** + * Identical to rb_complex_new(), except it takes the arguments as C's double + * instead of Ruby's object. + * + * @param[in] real Real part. + * @param[in] imag Imaginary part. + * @return An instance of ::rb_cComplex whose value is `real + (imag)i`. + */ VALUE rb_dbl_complex_new(double real, double imag); + +/** @alias{rb_complex_plus} */ #define rb_complex_add rb_complex_plus + +/** @alias{rb_complex_minus} */ #define rb_complex_sub rb_complex_minus + +/** @alias{rb_complex_uminus} */ #define rb_complex_nagate rb_complex_uminus -VALUE rb_Complex(VALUE, VALUE); +/** + * Converts various values into a Complex. This function accepts: + * + * - Instances of ::rb_cComplex (taken as-is), + * - Instances of ::rb_cNumeric (adds `0i`), + * - Instances of ::rb_cString (parses), + * - Other objects that respond to `#to_c`. + * + * It (possibly recursively) applies `#to_c` until both sides become a Complex + * value, then computes `imag * 1i + real`. + * + * As a special case, passing ::RUBY_Qundef to `imag` is the same as passing + * `RB_INT2NUM(0)`. + * + * @param[in] real Real part (see above). + * @param[in] imag Imaginary part (see above). + * @exception rb_eTypeError Passed something not described above. + * @return An instance of ::rb_cComplex whose value is `1i * imag + real`. + * + * @internal + * + * This was the implementation of `Kernel#Complex` before, but they diverged. + */ +VALUE rb_Complex(VALUE real, VALUE imag); + +/** + * Shorthand of `x+0i`. It practically converts `x` into a Complex of the + * identical value. + * + * @param[in] x ::rb_cNumeric, ::rb_cString, or something that responds to + * `#to_c`. + * @return An instance of ::rb_cComplex, whose value is `x + 0i`. + */ #define rb_Complex1(x) rb_Complex((x), INT2FIX(0)) + +/** @alias{rb_Complex} */ #define rb_Complex2(x,y) rb_Complex((x), (y)) RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/cont.h b/include/ruby/internal/intern/cont.h index cb651e682e..32647f48aa 100644 --- a/include/ruby/internal/intern/cont.h +++ b/include/ruby/internal/intern/cont.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to rb_cFiber. */ #include "ruby/internal/dllexport.h" @@ -27,20 +27,256 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* cont.c */ -VALUE rb_fiber_new(rb_block_call_func_t, VALUE); + +/** + * Creates a Fiber instance from a C-backended block. + * + * @param[in] func A function, to become the fiber's body. + * @param[in] callback_obj Passed as-is to `func`. + * @return An allocated new instance of rb_cFiber, which is ready to be + * "resume"d. + */ +VALUE rb_fiber_new(rb_block_call_func_t func, VALUE callback_obj); + +/** + * Creates a Fiber instance from a C-backended block with the specified + * storage. + * + * If the given storage is Qundef or Qtrue, this function is equivalent to + * rb_fiber_new() which inherits storage from the current fiber. + * + * Specifying Qtrue is experimental and may be changed in the future. + * + * If the given storage is Qnil, this function will lazy initialize the + * internal storage which starts of empty (without any inheritance). + * + * Otherwise, the given storage is used as the internal storage. + * + * @param[in] func A function, to become the fiber's body. + * @param[in] callback_obj Passed as-is to `func`. + * @param[in] storage The way to set up the storage for the fiber. + * @return An allocated new instance of rb_cFiber, which is ready to be + * "resume"d. + */ +VALUE rb_fiber_new_storage(rb_block_call_func_t func, VALUE callback_obj, VALUE storage); + +/** + * Queries the fiber which is calling this function. Any ruby execution + * context has its fiber, either explicitly or implicitly. + * + * @return The current fiber. + */ VALUE rb_fiber_current(void); -VALUE rb_fiber_alive_p(VALUE); -VALUE rb_obj_is_fiber(VALUE); +/** + * Queries the liveness of the passed fiber. "Alive" in this context means + * that the fiber can still be resumed. Once it reaches is its end of + * execution, this function returns ::RUBY_Qfalse. + * + * @param[in] fiber A target fiber. + * @retval RUBY_Qtrue It is. + * @retval RUBY_Qfalse It isn't. + */ +VALUE rb_fiber_alive_p(VALUE fiber); + +/** + * Queries if an object is a fiber. + * + * @param[in] obj Arbitrary ruby object. + * @retval RUBY_Qtrue It is. + * @retval RUBY_Qfalse It isn't. + */ +VALUE rb_obj_is_fiber(VALUE obj); + +/** + * Resumes the execution of the passed fiber, either from the point at which + * the last rb_fiber_yield() was called if any, or at the beginning of the + * fiber body if it is the first call to this function. + * + * Other arguments are passed into the fiber's body, either as return values of + * rb_fiber_yield() in case it switches to there, or as the block parameter of + * the fiber body if it switches to the beginning of the fiber. + * + * The return value of this function is either the value passed to previous + * rb_fiber_yield() call, or the ultimate evaluated value of the entire fiber + * body if the execution reaches the end of it. + * + * When an exception happens inside of a fiber it propagates to this function. + * + * ```ruby + * f = Fiber.new do |i| + * puts "<x> =>> #{i}" + * puts "<y> <-- #{i + 1}" + * j = Fiber.yield(i + 1) + * puts "<z> =>> #{j}" + * puts "<w> <-- #{j + 1}" + * next j + 1 + * end + * + * puts "[a] <-- 1" + * p = f.resume(1) + * puts "[b] =>> #{p}" + * puts "[c] <-- #{p + 1}" + * q = f.resume(p + 1) + * puts "[d] =>> #{q}" + * ``` + * + * Above program executes in `[a] <x> <y> [b] [c] <z> <w> [d]`. + * + * @param[out] fiber The fiber to resume. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Passed (somehow) to `fiber`. + * @exception rb_eFiberError `fib` is terminated etc. + * @exception rb_eException Any exceptions happen in `fiber`. + * @return (See above) + * @note This function _does_ return. + * + * @internal + * + * @shyouhei expected this function to raise ::rb_eFrozenError for frozen + * fibers but it doesn't in practice. Intentional or ...? + */ VALUE rb_fiber_resume(VALUE fiber, int argc, const VALUE *argv); + +/** + * Identical to rb_fiber_resume(), except you can specify how to handle the + * last element of the given array. + * + * @param[out] fiber The fiber to resume. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Passed (somehow) to `fiber`. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eFiberError `fiber` is terminated etc. + * @exception rb_eException Any exceptions happen in `fiber`. + * @return Either what was yielded or the last value of the fiber body. + */ VALUE rb_fiber_resume_kw(VALUE fiber, int argc, const VALUE *argv, int kw_splat); +/** + * Yields the control back to the point where the current fiber was resumed. + * The passed objects would be the return value of rb_fiber_resume(). This + * fiber then suspends its execution until next time it is resumed. + * + * This function can also raise arbitrary exceptions injected from outside of + * the fiber using rb_fiber_raise(). + * + * ```ruby + * exc = Class.new Exception + * + * f = Fiber.new do + * Fiber.yield + * rescue exc => e + * puts e.message + * end + * + * f.resume + * f.raise exc, "Hi!" + * ``` + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Passed to rb_fiber_resume(). + * @exception rb_eException (See above) + * @return (See rb_fiber_resume() for details) + */ VALUE rb_fiber_yield(int argc, const VALUE *argv); + +/** + * Identical to rb_fiber_yield(), except you can specify how to handle the last + * element of the given array. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Passed to rb_fiber_resume(). + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eException What was raised using `Fiber#raise`. + * @return (See rb_fiber_resume() for details) + */ VALUE rb_fiber_yield_kw(int argc, const VALUE *argv, int kw_splat); +/** + * Transfers control to another fiber, resuming it from where it last stopped + * or starting it if it was not resumed before. The calling fiber will be + * suspended much like in a call to rb_fiber_yield(). + * + * The fiber which receives the transfer call treats it much like a resume + * call. Arguments passed to transfer are treated like those passed to resume. + * + * The two style of control passing to and from fiber (one is rb_fiber_resume() + * and rb_fiber_yield(), another is rb_fiber_transfer() to and from fiber) + * can't be freely mixed. + * + * - If the Fiber's lifecycle had started with transfer, it will never be + * able to yield or be resumed control passing, only finish or transfer + * back. (It still can resume other fibers that are allowed to be + * resumed.) + * + * - If the Fiber's lifecycle had started with resume, it can yield or + * transfer to another Fiber, but can receive control back only the way + * compatible with the way it was given away: if it had transferred, it + * only can be transferred back, and if it had yielded, it only can be + * resumed back. After that, it again can transfer or yield. + * + * If those rules are broken, rb_eFiberError is raised. + * + * For an individual Fiber design, yield/resume is easier to use (the Fiber + * just gives away control, it doesn't need to think about who the control is + * given to), while transfer is more flexible for complex cases, allowing to + * build arbitrary graphs of Fibers dependent on each other. + * + * @param[out] fiber Explicit control destination. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Passed to rb_fiber_resume(). + * @exception rb_eFiberError (See above) + * @exception rb_eException What was raised using `Fiber#raise`. + * @return (See rb_fiber_resume() for details) + */ VALUE rb_fiber_transfer(VALUE fiber, int argc, const VALUE *argv); + +/** + * Identical to rb_fiber_transfer(), except you can specify how to handle the + * last element of the given array. + * + * @param[out] fiber Explicit control destination. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Passed to rb_fiber_resume(). + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eFiberError (See above) + * @exception rb_eException What was raised using `Fiber#raise`. + * @return (See rb_fiber_resume() for details) + */ VALUE rb_fiber_transfer_kw(VALUE fiber, int argc, const VALUE *argv, int kw_splat); +/** + * Identical to rb_fiber_resume() but instead of resuming normal execution of + * the passed fiber, it raises the given exception in it. From inside of the + * fiber this would be seen as if rb_fiber_yield() raised. + * + * This function does return in case the passed fiber gracefully handled the + * passed exception. But if it does not, the raised exception propagates out + * of the passed fiber; this function then does not return. + * + * Parameters are passed to rb_make_exception() to create an exception object. + * See its document for what are allowed here. + * + * It is a failure to call this function against a fiber which is resuming, + * have never run yet, or has already finished running. + * + * @param[out] fiber Where exception is raised. + * @param[in] argc Passed as-is to rb_make_exception(). + * @param[in] argv Passed as-is to rb_make_exception(). + * @exception rb_eFiberError `fiber` is terminated etc. + * @return (See rb_fiber_resume() for details) + */ +VALUE rb_fiber_raise(VALUE fiber, int argc, const VALUE *argv); + RBIMPL_SYMBOL_EXPORT_END() #endif /* RBIMPL_INTERN_CONT_H */ diff --git a/include/ruby/internal/intern/dir.h b/include/ruby/internal/intern/dir.h index 936f4e1f36..da1873e068 100644 --- a/include/ruby/internal/intern/dir.h +++ b/include/ruby/internal/intern/dir.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cDir. */ #include "ruby/internal/dllexport.h" @@ -26,6 +26,15 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* dir.c */ + +/** + * Queries the path of the current working directory of the current process. + * + * @return An instance of ::rb_cString that holds the working directory. + * @note The returned string is in "filesystem" encoding. Most notably on + * Linux this is an alias of default external encoding. Most notably + * on Windows it can be an alias of OS codepage. + */ VALUE rb_dir_getwd(void); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/enum.h b/include/ruby/internal/intern/enum.h index 17c20c1c0a..215ad82672 100644 --- a/include/ruby/internal/intern/enum.h +++ b/include/ruby/internal/intern/enum.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_mEnumerable. */ #include "ruby/internal/dllexport.h" @@ -26,7 +26,47 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* enum.c */ -VALUE rb_enum_values_pack(int, const VALUE*); + +/** + * Basically identical to rb_ary_new_form_values(), except it returns something + * different when `argc` < 2. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arbitrary objects. + * @retval RUBY_Qnil `argc` is zero. + * @retval argv[0] `argc` is one. + * @retval otherwise Otherwise. + * + * @internal + * + * What is this business? Well, this function is about `yield`'s taking + * multiple values. Consider following user-defined class: + * + * ```ruby + * class Foo + * include Enumerable + * + * def each + * yield :q, :w, :e, :r + * end + * end + * + * Foo.new.each_with_object([]) do |i, j| + * j << i # ^^^ <- What to expect for `i`? + * end + * ``` + * + * Here, `Foo#each_with_object` is in fact `Enumerable#each_with_object`, which + * doesn't know what would be yielded. Yet, it has to take a block of arity 2. + * This function is used here, to "pack" arbitrary number of yielded objects + * into one. + * + * If people want to implement their own `Enumerable#each_with_object` this API + * can be handy. Though @shyouhei suspects it is relatively rare for 3rd party + * extension libraries to have such things. Also `Enumerable#each_entry` is + * basically this function exposed as a Ruby method. + */ +VALUE rb_enum_values_pack(int argc, const VALUE *argv); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/enumerator.h b/include/ruby/internal/intern/enumerator.h index c81485155c..20e5d7c6fc 100644 --- a/include/ruby/internal/intern/enumerator.h +++ b/include/ruby/internal/intern/enumerator.h @@ -17,9 +17,10 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cEnumerator. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/intern/eval.h" /* rb_frame_this_func */ #include "ruby/internal/iterator.h" /* rb_block_given_p */ @@ -28,52 +29,230 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() -typedef VALUE rb_enumerator_size_func(VALUE, VALUE, VALUE); +/** + * This is the type of functions that rb_enumeratorize_with_size() expects. In + * theory an enumerator can have indefinite number of elements, but in practice + * it often is the case we can compute the size of an enumerator beforehand. + * If your enumerator has such property, supply a function that calculates such + * values. + * + * @param[in] recv The original receiver of the enumerator. + * @param[in] argv Arguments passed to `Object#enum_for` etc. + * @param[in] eobj The enumerator object. + * @return The size of `eobj`, in ::rb_cNumeric, or ::RUBY_Qnil if the size + * is not known until we actually iterate. + */ +typedef VALUE rb_enumerator_size_func(VALUE recv, VALUE argv, VALUE eobj); +/** + * Decomposed `Enumerator::ArithmeicSequence`. This is a subclass of + * ::rb_cEnumerator, which represents a sequence of numbers with common + * difference. Internal data structure of the class is opaque to users, but + * you can obtain a decomposed one using rb_arithmetic_sequence_extract(). + */ typedef struct { - VALUE begin; - VALUE end; - VALUE step; - int exclude_end; + VALUE begin; /**< "Left" or "lowest" endpoint of the sequence. */ + VALUE end; /**< "Right" or "highest" endpoint of the sequence.*/ + VALUE step; /**< Step between a sequence. */ + int exclude_end; /**< Whether the endpoint is open or closed. */ } rb_arithmetic_sequence_components_t; /* enumerator.c */ -VALUE rb_enumeratorize(VALUE, VALUE, int, const VALUE *); -VALUE rb_enumeratorize_with_size(VALUE, VALUE, int, const VALUE *, rb_enumerator_size_func *); -VALUE rb_enumeratorize_with_size_kw(VALUE, VALUE, int, const VALUE *, rb_enumerator_size_func *, int); -int rb_arithmetic_sequence_extract(VALUE, rb_arithmetic_sequence_components_t *); -VALUE rb_arithmetic_sequence_beg_len_step(VALUE, long *begp, long *lenp, long *stepp, long len, int err); + +/** + * Constructs an enumerator. This roughly resembles `Object#enum_for`. + * + * @param[in] recv A receiver of `meth`. + * @param[in] meth Method ID in a symbol object. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to `meth`. + * @exception rb_eTypeError `meth` is not an instance of ::rb_cSymbol. + * @return A new instance of ::rb_cEnumerator which, when yielded, + * enumerates by calling `meth` on `recv` with `argv`. + */ +VALUE rb_enumeratorize(VALUE recv, VALUE meth, int argc, const VALUE *argv); + +/** + * Identical to rb_enumeratorize(), except you can additionally specify the + * size function of return value. + * + * @param[in] recv A receiver of `meth`. + * @param[in] meth Method ID in a symbol object. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to `meth`. + * @param[in] func Size calculator. + * @exception rb_eTypeError `meth` is not an instance of ::rb_cSymbol. + * @return A new instance of ::rb_cEnumerator which, when yielded, + * enumerates by calling `meth` on `recv` with `argv`. + * @note `func` can be zero, which means the size is unknown. + */ +VALUE rb_enumeratorize_with_size(VALUE recv, VALUE meth, int argc, const VALUE *argv, rb_enumerator_size_func *func); + +/** + * Identical to rb_enumeratorize_with_func(), except you can specify how to + * handle the last element of the given array. + * + * @param[in] recv A receiver of `meth`. + * @param[in] meth Method ID in a symbol object. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to `meth`. + * @param[in] func Size calculator. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eTypeError `meth` is not an instance of ::rb_cSymbol. + * @return A new instance of ::rb_cEnumerator which, when yielded, + * enumerates by calling `meth` on `recv` with `argv`. + * @note `func` can be zero, which means the size is unknown. + */ +VALUE rb_enumeratorize_with_size_kw(VALUE recv, VALUE meth, int argc, const VALUE *argv, rb_enumerator_size_func *func, int kw_splat); + +RBIMPL_ATTR_NONNULL(()) +/** + * Extracts components of the passed arithmetic sequence. This can be seen as + * an extended version of rb_range_values(). + * + * @param[in] as Target instance of `Enumerator::ArithmericSequence`. + * @param[out] buf Decomposed results buffer. + * @return 0 `as` is not `Enumerator::ArithmericSequence`. + * @return 1 Success. + * @post `buf` is filled. + */ +int rb_arithmetic_sequence_extract(VALUE as, rb_arithmetic_sequence_components_t *buf); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_range_beg_len(), except it takes an instance of + * `Enumerator::ArithmericSequence`. + * + * @param[in] as An `Enumerator::ArithmericSequence` instance. + * @param[out] begp Return value buffer. + * @param[out] lenp Return value buffer. + * @param[out] stepp Return value buffer. + * @param[in] len Updated length. + * @param[in] err In case `len` is out of range... + * - `0`: returns ::RUBY_Qnil. + * - `1`: raises ::rb_eRangeError. + * - `2`: `beg` and `len` expanded accordingly. + * @exception rb_eRangeError `as` cannot fit into `long`. + * @retval RUBY_Qfalse `as` is not `Enumerator::ArithmericSequence`. + * @retval RUBY_Qnil `len` is out of `as` but `err` is zero. + * @retval RUBY_Qtrue Otherwise. + * @post `beg` is the (possibly updated) left endpoint. + * @post `len` is the (possibly updated) length of the range. + * + * @internal + * + * Currently no 3rd party applications of this function is found. But that can + * be because this function is relatively new. + */ +VALUE rb_arithmetic_sequence_beg_len_step(VALUE as, long *begp, long *lenp, long *stepp, long len, int err); RBIMPL_SYMBOL_EXPORT_END() +/** @cond INTERNAL_MACRO */ #ifndef RUBY_EXPORT # define rb_enumeratorize_with_size(obj, id, argc, argv, size_fn) \ rb_enumeratorize_with_size(obj, id, argc, argv, (rb_enumerator_size_func *)(size_fn)) # define rb_enumeratorize_with_size_kw(obj, id, argc, argv, size_fn, kw_splat) \ rb_enumeratorize_with_size_kw(obj, id, argc, argv, (rb_enumerator_size_func *)(size_fn), kw_splat) #endif +/** @endcond */ +/** + * This is an implementation detail of #RETURN_SIZED_ENUMERATOR(). You could + * use it directly, but can hardly be handy. + * + * @param[in] obj A receiver. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to the current method. + * @param[in] size_fn Size calculator. + * @return A new instance of ::rb_cEnumerator which, when yielded, + * enumerates by calling the current method on `recv` with `argv`. + */ #define SIZED_ENUMERATOR(obj, argc, argv, size_fn) \ rb_enumeratorize_with_size((obj), ID2SYM(rb_frame_this_func()), \ (argc), (argv), (size_fn)) +/** + * This is an implementation detail of #RETURN_SIZED_ENUMERATOR_KW(). You + * could use it directly, but can hardly be handy. + * + * @param[in] obj A receiver. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to the current method. + * @param[in] size_fn Size calculator. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @return A new instance of ::rb_cEnumerator which, when yielded, + * enumerates by calling the current method on `recv` with `argv`. + */ #define SIZED_ENUMERATOR_KW(obj, argc, argv, size_fn, kw_splat) \ rb_enumeratorize_with_size_kw((obj), ID2SYM(rb_frame_this_func()), \ (argc), (argv), (size_fn), (kw_splat)) +/** + * This roughly resembles `return enum_for(__callee__) unless block_given?`. + * + * @param[in] obj A receiver. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to the current method. + * @param[in] size_fn Size calculator. + * @note This macro may return inside. + */ #define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn) do { \ if (!rb_block_given_p()) \ return SIZED_ENUMERATOR(obj, argc, argv, size_fn); \ } while (0) + +/** + * Identical to #RETURN_SIZED_ENUMERATOR(), except you can specify how to + * handle the last element of the given array. + * + * @param[in] obj A receiver. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to the current method. + * @param[in] size_fn Size calculator. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @note This macro may return inside. + */ #define RETURN_SIZED_ENUMERATOR_KW(obj, argc, argv, size_fn, kw_splat) do { \ if (!rb_block_given_p()) \ return SIZED_ENUMERATOR_KW(obj, argc, argv, size_fn, kw_splat); \ } while (0) +/** + * Identical to #RETURN_SIZED_ENUMERATOR(), except its size is unknown. + * + * @param[in] obj A receiver. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to the current method. + * @note This macro may return inside. + */ #define RETURN_ENUMERATOR(obj, argc, argv) \ RETURN_SIZED_ENUMERATOR(obj, argc, argv, 0) +/** + * Identical to #RETURN_SIZED_ENUMERATOR_KW(), except its size is unknown. It + * can also be seen as a routine identical to #RETURN_ENUMERATOR(), except you + * can specify how to handle the last element of the given array. + * + * @param[in] obj A receiver. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to the current method. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @note This macro may return inside. + */ #define RETURN_ENUMERATOR_KW(obj, argc, argv, kw_splat) \ RETURN_SIZED_ENUMERATOR_KW(obj, argc, argv, 0, kw_splat) diff --git a/include/ruby/internal/intern/error.h b/include/ruby/internal/intern/error.h index aa9fe2daba..11e147a121 100644 --- a/include/ruby/internal/intern/error.h +++ b/include/ruby/internal/intern/error.h @@ -17,44 +17,232 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_eException. */ +#include "ruby/internal/attr/format.h" +#include "ruby/internal/attr/noreturn.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" #include "ruby/internal/fl_type.h" #include "ruby/backward/2/assume.h" -#include "ruby/backward/2/attributes.h" +/** + * This macro is used in conjunction with rb_check_arity(). If you pass it to + * the function's last (max) argument, that means the function does not check + * upper limit. + */ #define UNLIMITED_ARGUMENTS (-1) -#define rb_exc_new2 rb_exc_new_cstr -#define rb_exc_new3 rb_exc_new_str -#define rb_check_trusted rb_check_trusted -#define rb_check_trusted_inline rb_check_trusted + +#define rb_exc_new2 rb_exc_new_cstr /**< @old{rb_exc_new_cstr} */ +#define rb_exc_new3 rb_exc_new_str /**< @old{rb_exc_new_str} */ + +/** @cond INTERNAL_MACRO */ #define rb_check_arity rb_check_arity +/** @endcond */ RBIMPL_SYMBOL_EXPORT_BEGIN() /* error.c */ -VALUE rb_exc_new(VALUE, const char*, long); -VALUE rb_exc_new_cstr(VALUE, const char*); -VALUE rb_exc_new_str(VALUE, VALUE); -PRINTF_ARGS(NORETURN(void rb_loaderror(const char*, ...)), 1, 2); -PRINTF_ARGS(NORETURN(void rb_loaderror_with_path(VALUE path, const char*, ...)), 2, 3); -PRINTF_ARGS(NORETURN(void rb_name_error(ID, const char*, ...)), 2, 3); -PRINTF_ARGS(NORETURN(void rb_name_error_str(VALUE, const char*, ...)), 2, 3); -PRINTF_ARGS(NORETURN(void rb_frozen_error_raise(VALUE, const char*, ...)), 2, 3); -NORETURN(void rb_invalid_str(const char*, const char*)); -NORETURN(void rb_error_frozen(const char*)); -NORETURN(void rb_error_frozen_object(VALUE)); -void rb_error_untrusted(VALUE); -void rb_check_frozen(VALUE); -void rb_check_trusted(VALUE); + +/** + * Creates an instance of the passed exception class. + * + * @param[in] etype A subclass of ::rb_eException. + * @param[in] ptr Buffer contains error message. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eTypeError `etype` is not a class. + * @exception rb_eArgError `len` is negative. + * @return An instance of `etype`. + * @pre At least `len` bytes of continuous memory region shall be + * accessible via `ptr`. + * + * @internal + * + * This function works for non-exception classes as well, as long as they take + * one string argument. + */ +VALUE rb_exc_new(VALUE etype, const char *ptr, long len); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_exc_new(), except it assumes the passed pointer is a pointer + * to a C string. + * + * @param[in] etype A subclass of ::rb_eException. + * @param[in] str A C string (becomes an error message). + * @exception rb_eTypeError `etype` is not a class. + * @return An instance of `etype`. + */ +VALUE rb_exc_new_cstr(VALUE etype, const char *str); + +/** + * Identical to rb_exc_new_cstr(), except it takes a Ruby's string instead of + * C's. + * + * @param[in] etype A subclass of ::rb_eException. + * @param[in] str An instance of ::rb_cString. + * @exception rb_eTypeError `etype` is not a class. + * @return An instance of `etype`. + */ +VALUE rb_exc_new_str(VALUE etype, VALUE str); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL((1)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2) +/** + * Raises an instance of ::rb_eLoadError. + * + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + * @exception rb_eLoadError Always raises this. + * @note It never returns. + * + * @internal + * + * Who needs this? Except ruby itself? + */ +void rb_loaderror(const char *fmt, ...); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL((2)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) +/** + * Identical to rb_loaderror(), except it additionally takes which file is + * unable to load. The path can be obtained later using `LoadError#path` of + * the raising exception. + * + * @param[in] path What failed. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + * @exception rb_eLoadError Always raises this. + * @note It never returns. + */ +void rb_loaderror_with_path(VALUE path, const char *fmt, ...); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL((2)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) +/** + * Raises an instance of ::rb_eNameError. The name can be obtained later using + * `NameError#name` of the raising exception. + * + * @param[in] name What failed. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + * @exception rb_eNameError Always raises this. + * @note It never returns. + */ +void rb_name_error(ID name, const char *fmt, ...); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL((2)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) +/** + * Identical to rb_name_error(), except it takes a ::VALUE instead of ::ID. + * + * @param[in] name What failed. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + * @exception rb_eNameError Always raises this. + * @note It never returns. + */ +void rb_name_error_str(VALUE name, const char *fmt, ...); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL((2)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) +/** + * Raises an instance of ::rb_eFrozenError. The object can be obtained later + * using `FrozenError#receiver` of the raising exception. + * + * @param[in] recv What is frozen. + * @param[in] fmt Format specifier string compatible with rb_sprintf(). + * @exception rb_eFrozenError Always raises this. + * @note It never returns. + * + * @internal + * + * Note however, that it is often not possible to inspect a frozen object, + * because the inspection itself could be forbidden by the frozen-ness. + */ +void rb_frozen_error_raise(VALUE recv, const char *fmt, ...); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL(()) +/** + * Honestly I don't understand the name, but it raises an instance of + * ::rb_eArgError. + * + * @param[in] str A message. + * @param[in] type Another message. + * @exception rb_eArgError Always raises this. + * @note It never returns. + */ +void rb_invalid_str(const char *str, const char *type); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_frozen_error_raise(), except its raising exception has a + * message like "can't modify frozen /what/". + * + * @param[in] what What was frozen. + * @exception rb_eFrozenError Always raises this. + * @note It never returns. + */ +void rb_error_frozen(const char *what); + +/** + * Identical to rb_error_frozen(), except it takes arbitrary Ruby object + * instead of C's string. + * + * @param[in] what What was frozen. + * @exception rb_eFrozenError Always raises this. + * @note It never returns. + */ +void rb_error_frozen_object(VALUE what); + +/** + * Queries if the passed object is frozen. + * + * @param[in] obj Target object to test frozen-ness. + * @exception rb_eFrozenError It is frozen. + * @post Upon successful return it is guaranteed _not_ frozen. + */ +void rb_check_frozen(VALUE obj); + +/** + * Ensures that the passed object can be `initialize_copy` relationship. When + * you implement your own one you would better call this at the right beginning + * of your implementation. + * + * @param[in] obj Destination object. + * @param[in] orig Source object. + * @exception rb_eFrozenError `obj` is frozen. + * @post Upon successful return obj is guaranteed safe to copy orig. + */ void rb_check_copyable(VALUE obj, VALUE orig); -NORETURN(MJIT_STATIC void rb_error_arity(int, int, int)); + +RBIMPL_ATTR_NORETURN() +/** + * @private + * + * This is an implementation detail of rb_scan_args(). You don't have to + * bother. + * + * @pre `argc` is out of range of `min`..`max`, both inclusive. + * @param[in] argc Arbitrary integer. + * @param[in] min Minimum allowed `argc`. + * @param[in] max Maximum allowed `argc`. + * @exception rb_eArgError Always. + */ +void rb_error_arity(int argc, int min, int max); + RBIMPL_SYMBOL_EXPORT_END() -/* Does anyone use this? Remain not deleted for compatibility. */ +/** + * @deprecated + * + * Does anyone use this? Remain not deleted for compatibility. + */ #define rb_check_frozen_internal(obj) do { \ VALUE frozen_obj = (obj); \ if (RB_UNLIKELY(RB_OBJ_FROZEN(frozen_obj))) { \ @@ -62,6 +250,7 @@ RBIMPL_SYMBOL_EXPORT_END() } \ } while (0) +/** @alias{rb_check_frozen} */ static inline void rb_check_frozen_inline(VALUE obj) { @@ -69,8 +258,23 @@ rb_check_frozen_inline(VALUE obj) rb_error_frozen_object(obj); } } + +/** @alias{rb_check_frozen} */ #define rb_check_frozen rb_check_frozen_inline +/** + * Ensures that the passed integer is in the passed range. When you can use + * rb_scan_args() that is preferred over this one (powerful, descriptive). But + * it can have its own application area. + * + * @param[in] argc Arbitrary integer. + * @param[in] min Minimum allowed `argv`. + * @param[in] max Maximum allowed `argv`, or `UNLIMITED_ARGUMENTS`. + * @exception rb_eArgError `argc` out of range. + * @return The passed `argc`. + * @post Upon successful return `argc` is in range of `min`..`max`, both + * inclusive. + */ static inline int rb_check_arity(int argc, int min, int max) { diff --git a/include/ruby/internal/intern/eval.h b/include/ruby/internal/intern/eval.h index 11957053d7..2230f7ab0c 100644 --- a/include/ruby/internal/intern/eval.h +++ b/include/ruby/internal/intern/eval.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Pre-1.9 era evaluator APIs (now considered miscellaneous). */ #include "ruby/internal/attr/noreturn.h" @@ -28,31 +28,194 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* eval.c */ RBIMPL_ATTR_NORETURN() -void rb_exc_raise(VALUE); +/** + * Identical to rb_raise(), except it raises the passed exception instance as- + * is instead of creating new one. + * + * @param[in] exc An instance of a subclass of ::rb_eException. + * @exception exc What is passed. + * @exception rb_eTypeError `exc` is not an exception. + * @note It never returns. + * + * @internal + * + * Wellll actually, it can take more than what is described above. This + * function tries to call `exception` method of the passed object. If that + * function returns an exception object that is used instead. + */ +void rb_exc_raise(VALUE exc); RBIMPL_ATTR_NORETURN() -void rb_exc_fatal(VALUE); +/** + * Identical to rb_fatal(), except it raises the passed exception instance as- + * is instead of creating new one. + * + * @param[in] exc An instance of a subclass of ::rb_eException. + * @exception exc What is passed. + * @note It never returns. + * + * @internal + * + * You know what...? Using this API you can make arbitrary exceptions, like + * `RuntimeError`, that doesn't interface with `rescue` clause. This is very + * confusing. + */ +void rb_exc_fatal(VALUE exc); + +/* process.c */ RBIMPL_ATTR_NORETURN() -VALUE rb_f_exit(int, const VALUE*); +/** + * Identical to rb_exit(), except how arguments are passed. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Contains at most one of the following: + * - ::RUBY_Qtrue - means `EXIT_SUCCESS`. + * - ::RUBY_Qfalse - means `EXIT_FAILURE`. + * - Numerical value - takes that value. + * @exception rb_eArgError Wrong `argc`. + * @exception rb_eSystemExit Exception representing the exit status. + * @note It never returns. + */ +VALUE rb_f_exit(int argc, const VALUE *argv); RBIMPL_ATTR_NORETURN() -VALUE rb_f_abort(int, const VALUE*); +/** + * This is similar to rb_f_exit(). In fact on some situation it internally + * calls rb_exit(). But can be very esoteric on occasions. + * + * It takes up to one argument. If an argument is passed, it tries to display + * that. Otherwise if there is `$!`, displays that exception instead. It + * finally raise ::rb_eSystemExit in both cases. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Contains at most one string-ish object. + * @exception rb_eArgError Wrong `argc`. + * @exception rb_eTypeError No conversion from `argv[0]` to String. + * @exception rb_eSystemExit Exception representing `EXIT_FAILURE`. + * @note It never returns. + */ +VALUE rb_f_abort(int argc, const VALUE *argv); + +/* eval.c*/ RBIMPL_ATTR_NORETURN() +/** + * Raises an instance of ::rb_eInterrupt. + * + * @exception rb_eInterrupt Always raises this exception. + * @note It never returns. + */ void rb_interrupt(void); + +/** + * Queries the name of the Ruby level method that is calling this function. + * The "name" in this context is the one assigned to the function for the first + * time (note that methods can have multiple names via aliases). + * + * @retval 0 There is no method (e.g. toplevel context). + * @retval otherwise The name of the current method. + */ ID rb_frame_this_func(void); RBIMPL_ATTR_NORETURN() -void rb_jump_tag(int); -void rb_obj_call_init(VALUE, int, const VALUE*); +/** + * This function is to re-throw global escapes. Such global escapes include + * exceptions, `throw`, `break`, for example. + * + * It makes sense only when used in conjunction with "protect" series APIs + * e.g. rb_protect(), rb_load_protect(), rb_eval_string_protect(), etc. In + * case these functions experience global escapes, they fill their opaque + * `state` return buffer. You can ignore such escapes. But if you decide + * otherwise, you have to somehow escape globally again. This function is used + * for that purpose. + * + * @param[in] state Opaque state of execution. + * @note It never returns. + * + * @internal + * + * Though not a part of our public API, `state` is in fact an enum + * ruby_tag_type. You can see the potential values by looking at vm_core.h. + */ +void rb_jump_tag(int state); + +/** + * Calls `initialize` method of the passed object with the passed arguments. + * It also forwards the implicitly passed block to the method. + * + * @param[in] obj Receiver object. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Passed as-is to `obj.initialize`. + * @exception rb_eException Any exceptions happen inside. + */ +void rb_obj_call_init(VALUE obj, int argc, const VALUE *argv); + +/** + * Identical to rb_obj_call_init(), except you can specify how to handle the + * last element of the given array. + * + * @param[in] obj Receiver object. + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Passed as-is to `obj.initialize`. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eNoMethodError No such method. + * @exception rb_eException Any exceptions happen inside. + */ void rb_obj_call_init_kw(VALUE, int, const VALUE*, int); -VALUE rb_protect(VALUE (*)(VALUE), VALUE, int*); + +/** + * Identical to rb_frame_this_func(), except it returns the named used to call + * the method. + * + * @retval 0 There is no method (e.g. toplevel context). + * @retval otherwise The name of the current method. + */ ID rb_frame_callee(void); -VALUE rb_make_exception(int, const VALUE*); + +/** + * Constructs an exception object from the list of arguments, in a manner + * similar to Ruby's `raise`. This function can take: + * + * - No arguments at all, i.e. `argc == 0`. This is not a failure. It + * returns ::RUBY_Qnil then. + * + * - An object, which is an instance of ::rb_cString. In this case an + * instance of ::rb_eRuntimeError whose message is the passed string is + * created then returned. + * + * - An object, which responds to `exception` method, and optionally its + * argument, and optionally its backtrace. For example instances of + * subclasses of ::rb_eException have this method. What is returned from + * the method is returned. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv 0 up to 3 objects. + * @exception rb_eArgError Wrong `argc`. + * @exception rb_eTypeError `argv[0].exception` returned non-exception. + * @return An instance of a subclass of ::rb_eException. + * + * @internal + * + * Historically this was _the_ way `raise` converted its arguments to an + * exception. However they diverged. + */ +VALUE rb_make_exception(int argc, const VALUE *argv); /* eval_jump.c */ -void rb_set_end_proc(void (*)(VALUE), VALUE); + +/** + * Registers a function that shall run on process exit. Registered functions + * run in reverse-chronological order, mixed with syntactic `END` block and + * `Kernel#at_exit`. + * + * @param[in] func Function to run at process exit. + * @param[in] arg Passed as-is to `func`. + */ +void rb_set_end_proc(void (*func)(VALUE arg), VALUE arg); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/file.h b/include/ruby/internal/intern/file.h index 9ebefece66..79820fdc61 100644 --- a/include/ruby/internal/intern/file.h +++ b/include/ruby/internal/intern/file.h @@ -17,25 +17,196 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cFile. */ +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/attr/pure.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() /* file.c */ -VALUE rb_file_s_expand_path(int, const VALUE *); -VALUE rb_file_expand_path(VALUE, VALUE); -VALUE rb_file_s_absolute_path(int, const VALUE *); -VALUE rb_file_absolute_path(VALUE, VALUE); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_file_expand_path(), except how arguments are passed. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Filename, and base directory, in that order. + * @exception rb_eArgError Wrong `argc`. + * @exception rb_eTypeError Non-string passed. + * @exception rb_eEncCompatError No conversion from arguments to a path. + * @return Expanded path. + * + * @internal + * + * It seems nobody actually uses this function right now. Maybe delete it? + */ +VALUE rb_file_s_expand_path(int argc, const VALUE *argv); + +/** + * Identical to rb_file_absolute_path(), except it additionally understands + * `~`. If a given pathname starts with `~someone/`, that part expands to the + * user's home directory (or that of current process' owner's in case of `~/`). + * + * @param[in] fname Relative file name. + * @param[in] dname Lookup base directory name, or in case + * ::RUBY_Qnil is passed the process' current + * working directory is assumed. + * @exception rb_eArgError Home directory is not absolute. + * @exception rb_eTypeError Non-string passed. + * @exception rb_eEncCompatError No conversion from arguments to a path. + * @return Expanded path. + */ +VALUE rb_file_expand_path(VALUE fname, VALUE dname); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_file_absolute_path(), except how arguments are passed. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Filename, and base directory, in that order. + * @exception rb_eArgError Wrong `argc`. + * @exception rb_eTypeError Non-string passed. + * @exception rb_eEncCompatError No conversion from arguments to a path. + * @return Expanded path. + * + * @internal + * + * It seems nobody actually uses this function right now. Maybe delete it? + */ +VALUE rb_file_s_absolute_path(int argc, const VALUE *argv); + +/** + * Maps a relative path to its absolute representation. Relative paths are + * referenced from the passed directory name, or from the process' current + * working directory in case ::RUBY_Qnil is passed. + * + * @param[in] fname Relative file name. + * @param[in] dname Lookup base directory name, or in case + * ::RUBY_Qnil is passed the process' current + * working directory is assumed. + * @exception rb_eArgError Strings contain NUL bytes. + * @exception rb_eTypeError Non-string passed. + * @exception rb_eEncCompatError No conversion from arguments to a path. + * @return Expanded path. + */ +VALUE rb_file_absolute_path(VALUE fname, VALUE dname); + +/** + * Strips a file path's last component (and trailing separators if any). This + * function is relatively simple on POSIX environments; just splits the input + * with `/`, strips the last one, if something remains joins them again, + * otherwise the return value is `"."`. However when it comes to Windows this + * function is quite very much complicated. We have to take UNC etc. into + * account. So for instance `"C:foo"`'s dirname is `"C:."`. + * + * @param[in] fname File name to strip. + * @exception rb_eTypeError `fname` is not a String. + * @exception rb_eArgError `fname` contains NUL bytes. + * @exception rb_eEncCompatError `fname`'s encoding is not path-compat. + * @return A dirname of `fname`. + * @note This is a "pure" operation; it computes the return value solely + * from the passed object and never does any file IO. + */ VALUE rb_file_dirname(VALUE fname); -int rb_find_file_ext(VALUE*, const char* const*); -VALUE rb_find_file(VALUE); -VALUE rb_file_directory_p(VALUE,VALUE); -VALUE rb_str_encode_ospath(VALUE); -int rb_is_absolute_path(const char *); + +RBIMPL_ATTR_NONNULL(()) +/** + * Resolves a feature's path. This function takes for instance `"json"` and + * `[".so", ".rb"]`, and iterates over the `$LOAD_PATH` to see if there is + * either `json.so` or `json.rb` in the directory. + * + * This is not what everything `require` does, but at least `require` is built + * on top of it. + * + * @param[in,out] feature File to search, and return buffer. + * @param[in] exts List of file extensions. + * @exception rb_eTypeError `feature` is not a String. + * @exception rb_eArgError `feature` contains NUL bytes. + * @exception rb_eEncCompatError `feature`'s encoding is not path-compat. + * @retval 0 Not found + * @retval otherwise Found index in `ext`, plus one. + * @post `*feature` is a resolved path. + */ +int rb_find_file_ext(VALUE *feature, const char *const *exts); + +/** + * Identical to rb_find_file_ext(), except it takes a feature name and is + * extension at once, e.g. `"json.rb"`. This difference is much like how + * `require` and `load` are different. + * + * @param[in] path A path relative to `$LOAD_PATH`. + * @exception rb_eTypeError `path` is not a String. + * @exception rb_eArgError `path` contains NUL bytes. + * @exception rb_eEncCompatError `path`'s encoding is not path-compat. + * @return Expanded path. + */ +VALUE rb_find_file(VALUE path); + +/** + * Queries if the given path is either a directory, or a symlink that + * (potentially recursively) points to such thing. + * + * @param[in] _ Ignored (why...?) + * @param[in] path String, or IO. In case of IO it issues + * `fstat(2)` instead of `stat(2)`. + * @exception rb_eFrozenError `path` is a frozen IO (why...?) + * @exception rb_eTypeError `path` is neither String nor IO. + * @exception rb_eArgError `path` contains NUL bytes. + * @exception rb_eEncCompatError `path`'s encoding is not path-compat. + * @retval RUBY_Qtrue `path` is a directory. + * @retval RUBY_Qfalse Otherwise. + */ +VALUE rb_file_directory_p(VALUE _, VALUE path); + +/** + * Converts a string into an "OS Path" encoding, if any. In most operating + * systems there are no such things like per-OS default encoding of filename. + * For them this function is no-op. However most notably on MacOS, pathnames + * are UTF-8 encoded. It converts the given string into such encoding. + * + * @param[in] path An instance of ::rb_cString. + * @exception rb_eEncCompatError `path`'s encoding is not path-compat. + * @return `path`'s contents converted to the OS' path encoding. + */ +VALUE rb_str_encode_ospath(VALUE path); + +RBIMPL_ATTR_NONNULL(()) +RBIMPL_ATTR_PURE() +/** + * Queries if the given path is an absolute path. On POSIX environments it is + * as easy as `path[0] == '/'`. However on Windows, drive letters and UNC + * paths are also taken into account. + * + * @param[in] path A possibly relative path string. + * @retval 1 `path` is absolute. + * @retval 0 `path` is relative. + */ +int rb_is_absolute_path(const char *path); + +/** + * Queries the file size of the given file. Because this function calls + * `fstat(2)` internally, it is a failure to pass a closed file to this + * function. + * + * This function flushes the passed file's buffer if any. Can take time. + * + * @param[in] file A file object. + * @exception rb_eFrozenError `file` is frozen. + * @exception rb_eIOError `file` is closed. + * @exception rb_eSystemCallError Permission denied etc. + * @exception rb_eNoMethodError The given non-file object doesn't respond + * to `#size`. + * @return The size of the passed file. + * @note Passing a non-regular file such as a UNIX domain socket to this + * function is not a failure. But the return value is + * unpredictable. POSIX's `<sys/stat.h>` states that "the use of + * this field is unspecified" then. + */ +rb_off_t rb_file_size(VALUE file); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/gc.h b/include/ruby/internal/intern/gc.h deleted file mode 100644 index 30759e0ded..0000000000 --- a/include/ruby/internal/intern/gc.h +++ /dev/null @@ -1,57 +0,0 @@ -#ifndef RBIMPL_INTERN_GC_H /*-*-C++-*-vi:se ft=cpp:*/ -#define RBIMPL_INTERN_GC_H -/** - * @file - * @author Ruby developers <ruby-core@ruby-lang.org> - * @copyright This file is a part of the programming language Ruby. - * Permission is hereby granted, to either redistribute and/or - * modify this file, provided that the conditions mentioned in the - * file COPYING are met. Consult the file for details. - * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are - * implementation details. Don't take them as canon. They could - * rapidly appear then vanish. The name (path) of this header file - * is also an implementation detail. Do not expect it to persist - * at the place it is now. Developers are free to move it anywhere - * anytime at will. - * @note To ruby-core: remember that this header can be possibly - * recursively included from extension libraries written in C++. - * Do not expect for instance `__VA_ARGS__` is always available. - * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Public APIs related to ::rb_mGC. - */ -#include "ruby/internal/dllexport.h" -#include "ruby/internal/value.h" -#include "ruby/backward/2/attributes.h" - -RBIMPL_SYMBOL_EXPORT_BEGIN() - -/* gc.c */ -COLDFUNC NORETURN(void rb_memerror(void)); -PUREFUNC(int rb_during_gc(void)); -void rb_gc_mark_locations(const VALUE*, const VALUE*); -void rb_mark_tbl(struct st_table*); -void rb_mark_tbl_no_pin(struct st_table*); -void rb_mark_set(struct st_table*); -void rb_mark_hash(struct st_table*); -void rb_gc_update_tbl_refs(st_table *ptr); -void rb_gc_mark_maybe(VALUE); -void rb_gc_mark(VALUE); -void rb_gc_mark_movable(VALUE); -VALUE rb_gc_location(VALUE); -void rb_gc_force_recycle(VALUE); -void rb_gc(void); -void rb_gc_copy_finalizer(VALUE,VALUE); -VALUE rb_gc_enable(void); -VALUE rb_gc_disable(void); -VALUE rb_gc_start(void); -VALUE rb_define_finalizer(VALUE, VALUE); -VALUE rb_undefine_finalizer(VALUE); -size_t rb_gc_count(void); -size_t rb_gc_stat(VALUE); -VALUE rb_gc_latest_gc_info(VALUE); -void rb_gc_adjust_memory_usage(ssize_t); - -RBIMPL_SYMBOL_EXPORT_END() - -#endif /* RBIMPL_INTERN_GC_H */ diff --git a/include/ruby/internal/intern/hash.h b/include/ruby/internal/intern/hash.h index 70c37917f1..af8dfd5d8f 100644 --- a/include/ruby/internal/intern/hash.h +++ b/include/ruby/internal/intern/hash.h @@ -17,9 +17,10 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cHash. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" #include "ruby/st.h" @@ -27,31 +28,292 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* hash.c */ -void rb_st_foreach_safe(struct st_table *, int (*)(st_data_t, st_data_t, st_data_t), st_data_t); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_st_foreach(), except it raises exceptions when the callback + * function tampers the table during iterating over it. + * + * @param[in] st Table to iterate over. + * @param[in] func Callback function to apply. + * @param[in] arg Passed as-is to `func`. + * @exception rb_eRuntimeError `st` was tampered during iterating. + * + * @internal + * + * This is declared here because exceptions are Ruby level concept. + * + * This is in fact a very thin wrapper of rb_st_foreach_check(). + */ +void rb_st_foreach_safe(struct st_table *st, st_foreach_callback_func *func, st_data_t arg); + +/** @alias{rb_st_foreach_safe} */ #define st_foreach_safe rb_st_foreach_safe -VALUE rb_check_hash_type(VALUE); -void rb_hash_foreach(VALUE, int (*)(VALUE, VALUE, VALUE), VALUE); -VALUE rb_hash(VALUE); + +/** + * Try converting an object to its hash representation using its `to_hash` + * method, if any. If there is no such thing, returns ::RUBY_Qnil. + * + * @param[in] obj Arbitrary ruby object to convert. + * @exception rb_eTypeError `obj.to_hash` returned something non-Hash. + * @retval RUBY_Qnil No conversion from `obj` to hash defined. + * @retval otherwise Converted hash representation of `obj`. + * @see rb_io_check_io + * @see rb_check_array_type + * @see rb_check_string_type + * + * @internal + * + * There is no rb_hash_to_hash() that analogous to rb_str_to_str(). + * Intentional or ...? + */ +VALUE rb_check_hash_type(VALUE obj); + +RBIMPL_ATTR_NONNULL(()) +/** + * Iterates over a hash. This basically does the same thing as + * rb_st_foreach(). But because the passed hash is a Ruby object, its keys and + * values are both Ruby objects. + * + * @param[in] hash An instance of ::rb_cHash to iterate over. + * @param[in] func Callback function to yield. + * @param[in] arg Passed as-is to `func`. + * @exception rb_eRuntimeError `hash` was tampered during iterating. + */ +void rb_hash_foreach(VALUE hash, int (*func)(VALUE key, VALUE val, VALUE arg), VALUE arg); + +/** + * Calculates a message authentication code of the passed object. The return + * value is a very small integer used as an index of a key of a table. In + * order to calculate the value this function calls `#hash` method of the + * passed object. Ruby provides you a default implementation. But if you + * implement your class in C, that default implementation cannot know the + * underlying data structure. You must implement your own `#hash` method then, + * which must return an integer of uniform distribution in a sufficiently + * instant manner. + * + * @param[in] obj Arbitrary Ruby object. + * @exception rb_eTypeError `obj.hash` returned something non-Integer. + * @return A small integer. + * @note `#hash` can return very big integers, but they get truncated. + */ +VALUE rb_hash(VALUE obj); + +/** + * Creates a new, empty hash object. + * + * @return An allocated new instance of ::rb_cHash. + */ VALUE rb_hash_new(void); -VALUE rb_hash_dup(VALUE); -VALUE rb_hash_freeze(VALUE); -VALUE rb_hash_aref(VALUE, VALUE); -VALUE rb_hash_lookup(VALUE, VALUE); -VALUE rb_hash_lookup2(VALUE, VALUE, VALUE); -VALUE rb_hash_fetch(VALUE, VALUE); -VALUE rb_hash_aset(VALUE, VALUE, VALUE); -VALUE rb_hash_clear(VALUE); -VALUE rb_hash_delete_if(VALUE); -VALUE rb_hash_delete(VALUE,VALUE); -VALUE rb_hash_set_ifnone(VALUE hash, VALUE ifnone); -void rb_hash_bulk_insert(long, const VALUE *, VALUE); + +/** + * Identical to rb_hash_new(), except it additionally specifies how many keys + * it is expected to contain. This way you can create a hash that is large enough + * for your need. For large hashes it means it won't need to be reallocated and + * rehashed as much, improving performance. + * + * @param[in] capa Designed capacity of the hash. + * @return An empty Hash, whose capacity is `capa`. + */ +VALUE rb_hash_new_capa(long capa); + +/** + * Duplicates a hash. + * + * @param[in] hash An instance of ::rb_cHash. + * @return An allocated new instance of ::rb_cHash, whose contents are + * a verbatim copy of from `hash`. + */ +VALUE rb_hash_dup(VALUE hash); + +/** @alias{rb_obj_freeze} */ +VALUE rb_hash_freeze(VALUE obj); + +/** + * Queries the given key in the given hash table. If there is the key in the + * hash, returns the value associated with the key. Otherwise it returns the + * "default" value (defined per hash table). + * + * @param[in] hash Hash table to look into. + * @param[in] key Hash key to look for. + * @return Either the value associated with the key, or the default one if + * absent. + */ +VALUE rb_hash_aref(VALUE hash, VALUE key); + +/** + * Identical to rb_hash_aref(), except it always returns ::RUBY_Qnil for + * misshits. + * + * @param[in] hash Hash table to look into. + * @param[in] key Hash key to look for. + * @return Either the value associated with the key, or ::RUBY_Qnil if + * absent. + * @note A hash can store ::RUBY_Qnil as an ordinary value. You cannot + * distinguish whether the key is missing, or just its associated + * value happens to be ::RUBY_Qnil, as far as you use this API. + */ +VALUE rb_hash_lookup(VALUE hash, VALUE key); + +/** + * Identical to rb_hash_lookup(), except you can specify what to return on + * misshits. This is much like 2-arguments version of `Hash#fetch`. + * + * ```CXX + * VALUE hash; + * VALUE key; + * VALUE tmp = rb_obj_alloc(rb_cObject); + * VALUE val = rb_hash_lookup2(hash, key, tmp); + * if (val == tmp) { + * printf("misshit"); + * } + * else { + * printf("hit"); + * } + * ``` + * + * @param[in] hash Hash table to look into. + * @param[in] key Hash key to look for. + * @param[in] def Default value. + * @retval def `hash` does not have `key`. + * @retval otherwise The value associated with `key`. + */ +VALUE rb_hash_lookup2(VALUE hash, VALUE key, VALUE def); + +/** + * Identical to rb_hash_lookup(), except it yields the (implicitly) passed + * block instead of returning ::RUBY_Qnil. + * + * @param[in] hash Hash table to look into. + * @param[in] key Hash key to look for. + * @exception rb_eKeyError No block given. + * @return Either the value associated with the key, or what the block + * evaluates to if absent. + */ +VALUE rb_hash_fetch(VALUE hash, VALUE key); + +/** + * Inserts or replaces ("upsert"s) the objects into the given hash table. This + * basically associates the given value with the given key. On duplicate key + * this function updates its associated value with the given one. Otherwise it + * inserts the association at the end of the table. + * + * @param[out] hash Target hash table to modify. + * @param[in] key Arbitrary Ruby object. + * @param[in] val A value to be associated with `key`. + * @exception rb_eFrozenError `hash` is frozen. + * @return The passed `val` + * @post `val` is associated with `key` in `hash`. + */ +VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val); + +/** + * Swipes everything out of the passed hash table. + * + * @param[out] hash Target to clear. + * @exception rb_eFrozenError `hash`is frozen. + * @return The passed `hash` + * @post `hash` has no contents. + */ +VALUE rb_hash_clear(VALUE hash); + +/** + * Deletes each entry for which the block returns a truthy value. If there is + * no block given, it returns an enumerator that does the thing. + * + * @param[out] hash Target hash to modify. + * @exception rb_eFrozenError `hash` is frozen. + * @retval hash The hash is modified. + * @retval otherwise An instance of ::rb_cEnumerator that does it. + */ +VALUE rb_hash_delete_if(VALUE hash); + +/** + * Deletes the passed key from the passed hash table, if any. + * + * @param[out] hash Target hash to modify. + * @param[in] key Key to delete. + * @retval RUBY_Qnil `hash` has no such key as `key`. + * @retval otherwise What was associated with `key`. + * @post `hash` has no such key as `key`. + */ +VALUE rb_hash_delete(VALUE hash, VALUE key); + +/** + * Inserts a list of key-value pairs into a hash table at once. It is + * semantically identical to repeatedly calling rb_hash_aset(), but can be + * faster than that. + * + * @param[in] argc Length of `argv`, must be even. + * @param[in] argv A list of key, value, key, value, ... + * @param[out] hash Target hash table to modify. + * @post `hash` has contents from `argv`. + * @note `argv` is allowed to be NULL as long as `argc` is zero. + * + * @internal + * + * What happens for duplicated keys? Well it silently discards older ones to + * accept the newest (rightmost) one. This behaviour also mimics repeated call + * of rb_hash_aset(). + */ +void rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash); + +/** + * Type of callback functions to pass to rb_hash_update_by(). + * + * @param[in] newkey A key of the table. + * @param[in] oldkey Value associated with `key` in hash1. + * @param[in] value Value associated with `key` in hash2. + * @return Either one of the passed values to take. + */ typedef VALUE rb_hash_update_func(VALUE newkey, VALUE oldkey, VALUE value); + +/** + * Destructively merges two hash tables into one. It resolves key conflicts by + * calling the passed function and take its return value. + * + * @param[out] hash1 Target hash to be modified. + * @param[in] hash2 A hash to merge into `hash1`. + * @param[in] func Conflict reconciler. + * @exception rb_eFrozenError `hash1` is frozen. + * @exception rb_eRuntimeError `hash2` is updated instead. + * @return The passed `hash1`. + * @post Contents of `hash2` is merged into `hash1`. + * @note You can pass zero to `func`. This means values from `hash2` + * are always taken. + */ VALUE rb_hash_update_by(VALUE hash1, VALUE hash2, rb_hash_update_func *func); -struct st_table *rb_hash_tbl(VALUE, const char *file, int line); -int rb_path_check(const char*); -int rb_env_path_tainted(void); + +/* file.c */ + +/** + * This function is mysterious. What it does is not immediately obvious. Also + * what it does seems platform dependent. + * + * @param[in] path A local path. + * @retval 0 The "check" succeeded. + * @retval otherwise The "check" failed. + */ +int rb_path_check(const char *path); + +/* hash.c */ + +/** + * Destructively removes every environment variables of the running process. + * + * @return The `ENV` object. + * @post The process has no environment variables. + */ VALUE rb_env_clear(void); -VALUE rb_hash_size(VALUE); + +/** + * Identical to #RHASH_SIZE(), except it returns the size in Ruby's integer + * instead of C's. + * + * @param[in] hash A hash object. + * @return The size of the hash. + */ +VALUE rb_hash_size(VALUE hash); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/io.h b/include/ruby/internal/intern/io.h index d2f2e53486..02c249723e 100644 --- a/include/ruby/internal/intern/io.h +++ b/include/ruby/internal/intern/io.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cIO. */ #include "ruby/internal/dllexport.h" @@ -26,43 +26,634 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* io.c */ + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define rb_defout rb_stdout + +/* string.c */ /* ...why? moved in commit de7161526014b781468cea5d84411e23be */ + +/** + * The field separator character for inputs, or the `$;`. This affects how + * `String#split` works. You can set this via the `-F` command line option. + * You can also assign arbitrary ruby objects programmatically, but it makes + * best sense for you to assign a regular expression here. + * + * @internal + * + * Tidbit: "fs" comes from AWK's `FS` variable. + */ RUBY_EXTERN VALUE rb_fs; + +/* io.c */ /* ...why? given rb_fs is in string.c? */ + +/** + * The field separator character for outputs, or the `$,`. This affects how + * `Array#join` works. + * + * @deprecated Assigning anything other than ::RUBY_Qnil to this variable is + * deprecated. + */ RUBY_EXTERN VALUE rb_output_fs; + +/** + * The record separator character for inputs, or the `$/`. This affects how + * `IO#gets` works. You can set this via the `-0` command line option. + * + * @deprecated Assigning anything other than ::RUBY_Qnil to this variable is + * deprecated. + * + * @internal + * + * Tidbit: "rs" comes from AWK's `RS` variable. + */ RUBY_EXTERN VALUE rb_rs; + +/** + * This is the default value of ::rb_rs, i.e. `"\n"`. It seems it has always + * been just a newline string since the beginning. Not sure why C codes has to + * use this, given there is no way for ruby programs to interface. + * + * Also it has not been deprecated for unknown reasons. + */ RUBY_EXTERN VALUE rb_default_rs; + +/** + * The record separator character for outputs, or the `$\`. This affects how + * `IO#print` works. + * + * @deprecated Assigning anything other than ::RUBY_Qnil to this variable is + * deprecated. + */ RUBY_EXTERN VALUE rb_output_rs; -VALUE rb_io_write(VALUE, VALUE); -VALUE rb_io_gets(VALUE); -VALUE rb_io_getbyte(VALUE); -VALUE rb_io_ungetc(VALUE, VALUE); -VALUE rb_io_ungetbyte(VALUE, VALUE); -VALUE rb_io_close(VALUE); -VALUE rb_io_flush(VALUE); -VALUE rb_io_eof(VALUE); -VALUE rb_io_binmode(VALUE); -VALUE rb_io_ascii8bit_binmode(VALUE); -VALUE rb_io_addstr(VALUE, VALUE); -VALUE rb_io_printf(int, const VALUE*, VALUE); -VALUE rb_io_print(int, const VALUE*, VALUE); -VALUE rb_io_puts(int, const VALUE*, VALUE); -VALUE rb_io_fdopen(int, int, const char*); -VALUE rb_io_get_io(VALUE); -VALUE rb_file_open(const char*, const char*); -VALUE rb_file_open_str(VALUE, const char*); + +/** + * Writes the given string to the given IO. + * + * @param[out] io An IO, opened for writing. + * @param[in] str A String-like object to write to `io`. + * @exception rb_eIOError `io` isn't opened for writing. + * @exception rb_eFrozenError `io` is frozen. + * @exception rb_eTypeError No conversion from `str` to String. + * @exception rb_eSystemCallError `write(2)` failed for some reason. + * @return The number of bytes written to the `io`. + * @post `str` (up to the length of return value) is written to `io`. + * @note This function blocks. + * @note Partial write is a thing. It must be at least questionable not + * to check the return value. + * + * @internal + * + * Above description is in fact inaccurate. This function can take arbitrary + * objects, and calls their `write` method. What is written above in fact + * describes how `IO#write` works. You can pass StringIO etc. here, and would + * work completely differently. + */ +VALUE rb_io_write(VALUE io, VALUE str); + +/** + * Reads a "line" from the given IO. A line here means a chunk of characters + * which is terminated by either `"\n"` or an EOF. + * + * @param[in,out] io An IO, opened for reading. + * @exception rb_eIOError `io` isn't opened for reading. + * @exception rb_eFrozenError `io` is frozen. + * @retval RUBY_Qnil `io` is at EOF. + * @retval otherwise An instance of ::rb_cString. + * @post `io` is read. + * @note Unlike `IO#gets` it doesn't set `$_`. + * @note Unlike `IO#gets` it doesn't consider `$/`. + */ +VALUE rb_io_gets(VALUE io); + +/** + * Reads a byte from the given IO. + * + * @note In Ruby a "byte" always means an 8 bit integer ranging from + * 0 to 255 inclusive. + * @param[in,out] io An IO, opened for reading. + * @exception rb_eIOError `io` is not opened for reading. + * @exception rb_eFrozenError `io` is frozen. + * @retval RUBY_Qnil `io` is at EOF. + * @retval otherwise An instance of ::rb_cInteger. + * @post `io` is read. + * + * @internal + * + * Of course there was a function called `rb_io_getc()`. It was removed in + * commit a25fbe3b3e531bbe479f344af24eaf9d2eeae6ea. + */ +VALUE rb_io_getbyte(VALUE io); + +/** + * "Unget"s a string. This function pushes back the passed string onto the + * passed IO, such that a subsequent buffered read will return it. If the + * passed content is in fact an integer, a single character string of that + * codepoint of the encoding of the IO will be pushed back instead. + * + * It might be counter-intuitive but this function can push back multiple + * characters at once. Also this function can be called multiple times on a + * same IO. Also a "character" can be wider than a byte, depending on the + * encoding of the IO. + * + * @param[out] io An IO, opened for reading. + * @param[in] c Either a String, or an Integer. + * @exception rb_eIOError `io` is not opened for reading. + * @exception rb_eFrozenError `io` is frozen. + * @exception rb_eTypeError No conversion from `c` to ::rb_cString. + * @return Always returns ::RUBY_Qnil. + * + * @internal + * + * Why there is ungetc, given there is no getc? + */ +VALUE rb_io_ungetc(VALUE io, VALUE c); + +/** + * Identical to rb_io_ungetc(), except it doesn't take the encoding of the + * passed IO into account. When an integer is passed, it just casts that value + * to C's `unsigned char`, and pushes that back. + * + * @param[out] io An IO, opened for reading. + * @param[in] b Either a String, or an Integer. + * @exception rb_eIOError `io` is not opened for reading. + * @exception rb_eFrozenError `io` is frozen. + * @exception rb_eTypeError No conversion from `b` to ::rb_cString. + * @return Always returns ::RUBY_Qnil. + */ +VALUE rb_io_ungetbyte(VALUE io, VALUE b); + +/** + * Closes the IO. Any buffered contents are flushed to the operating system. + * Any future operations against the IO would raise ::rb_eIOError. In case the + * io was created using `IO.popen`, it also sets the `$?`. + * + * @param[out] io Target IO to close. + * @return Always returns ::RUBY_Qnil. + * @post `$?` is set in case IO is a pipe. + * @post No operations are possible against `io` any further. + * @note This can block to flush the contents. + * @note This can wake other threads up, especially those who are + * `select()`-ing the passed IO. + * @note Multiple invocations of this function over the same IO again + * and again is not an error, since Ruby 2.3. + * + * @internal + * + * You can close a frozen IO... Is this intentional? + */ +VALUE rb_io_close(VALUE io); + +/** + * Flushes any buffered data within the passed IO to the underlying operating + * system. + * + * @param[out] io Target IO to flush. + * @exception rb_eIOError `io` is closed. + * @exception rb_eFrozenError `io` is frozen. + * @exception rb_eSystemCallError `write(2)` failed for some reason. + * @return The passed `io`. + * @post `io`'s buffers are empty. + * @note This operation also discards the read buffer. Should basically + * be harmless, but in an esoteric situation like when user pushed + * something different from what was read using `ungetc`, this + * operation in fact changes the behaviour of the `io`. + * @note Buffering is difficult. This operation flushes the data from + * our userspace to the kernel, but that doesn't always mean you + * can expect them stored persistently onto your hard drive. + */ +VALUE rb_io_flush(VALUE io); + +/** + * Queries if the passed IO is at the end of file. "The end of file" here mans + * that there are no more data to read. This function blocks until the read + * buffer is filled in, and if that operation reached the end of file, it still + * returns ::RUBY_Qfalse (because there are data yet in that buffer). It + * returns ::RUBY_Qtrue once after the buffer is cleared. + * + * @param[in,out] io Target io to query. + * @exception rb_eIOError `io` is not opened for reading. + * @exception rb_eFrozenError `io` is frozen. + * @retval RUBY_Qfalse There are things yet to be read. + * @retval RUBY_Qtrue "The end of file" situation. + */ +VALUE rb_io_eof(VALUE io); + +/** + * Sets the binmode. This operation nullifies the effect of textmode (newline + * conversion from `"\r\n"` to `"\n"` or vice versa). Note that it doesn't + * stop character encodings conversions. For instance an IO created using: + * + * ```ruby + * File.open( + * "/dev/urandom", + * textmode: true, + * external_encoding: Encoding::GB18030, + * internal_encoding: Encoding::Windows_31J) + * ``` + * + * has both newline and character conversions. If you pass such IO to this + * function, only the `textmode:true` part is cancelled. Texts read through + * the IO would still be encoded in Windows-31J; texts written to the IO will + * be encoded in GB18030. + * + * @param[out] io Target IO to modify. + * @exception rb_eFrozenError `io` is frozen. + * @return The passed `io`. + * @post `io` is in binmode. + * @note There is no equivalent operation in Ruby. You can do this only + * in C. + */ +VALUE rb_io_binmode(VALUE io); + +/** + * Forces no conversions be applied to the passed IO. Unlike rb_io_binmode(), + * this cancels any newline conversions as well as encoding conversions. Any + * texts read/written through the IO will be the verbatim binary contents. + * + * @param[out] io Target IO to modify. + * @exception rb_eFrozenError `io` is frozen. + * @return The passed `io`. + * @post `io` is in binmode. Both external/internal encoding are set to + * rb_ascii8bit_encoding(). + * @note This is the implementation of `IO#binmode`. + */ +VALUE rb_io_ascii8bit_binmode(VALUE io); + +/** + * Identical to rb_io_write(), except it always returns the passed IO. + * + * @param[out] io An IO, opened for writing. + * @param[in] str A String-like object to write to `io`. + * @exception rb_eIOError `io` isn't opened for writing. + * @exception rb_eFrozenError `io` is frozen. + * @exception rb_eTypeError No conversion from `str` to String. + * @exception rb_eSystemCallError `write(2)` failed. + * @return The passed `io`. + * @post `str` is written to `io`. + * @note This function blocks. + * + * @internal + * + * As rb_io_write(), above description is a fake. + */ +VALUE rb_io_addstr(VALUE io, VALUE str); + +/** + * This is a rb_f_sprintf() + rb_io_write() combo. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv A format string followed by its arguments. + * @param[out] io An IO, opened for writing. + * @exception rb_eIOError `io` isn't opened for writing. + * @exception rb_eFrozenError `io` is frozen. + * @exception rb_eTypeError No conversion from `str` to String. + * @exception rb_eSystemCallError `write(2)` failed. + * @return Always returns ::RUBY_Qnil. + * @post `argv` is formatted, then written to `io`. + * @note This function blocks. + * + * @internal + * + * As rb_io_write(), above descriptions include fakes. + */ +VALUE rb_io_printf(int argc, const VALUE *argv, VALUE io); + +/** + * Iterates over the passed array to apply rb_io_write() individually. If + * there is `$,`, this function inserts the string in middle of each + * iterations. If there is `$\`, this function appends the string at the end. + * If the array is empty, this function outputs `$_`. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv An array of strings to display. + * @param[out] io An IO, opened for writing. + * @exception rb_eIOError `io` isn't opened for writing. + * @exception rb_eFrozenError `io` is frozen. + * @exception rb_eTypeError No conversion from `str` to String. + * @exception rb_eSystemCallError `write(2)` failed. + * @return Always returns ::RUBY_Qnil. + * @post `argv` is written to `io`. + * @note This function blocks. + * @note This function calls rb_io_write() multiple times. Which means, + * it is not an atomic operation. Outputs from multiple threads + * can interleave. + * + * @internal + * + * As rb_io_write(), above descriptions include fakes. + */ +VALUE rb_io_print(int argc, const VALUE *argv, VALUE io); + +/** + * Iterates over the passed array to apply rb_io_write() individually. Unlike + * rb_io_print(), this function prints a newline per each element. It also + * flattens the passed array (OTOH rb_io_print() just resorts to + * rb_ary_to_s()). + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv An array of strings to display. + * @param[out] io An IO, opened for writing. + * @exception rb_eIOError `io` isn't opened for writing. + * @exception rb_eFrozenError `io` is frozen. + * @exception rb_eTypeError No conversion from `str` to String. + * @exception rb_eSystemCallError `write(2)` failed. + * @return Always returns ::RUBY_Qnil. + * @post `argv` is written to `io`. + * @note This function blocks. + * @note This function calls rb_io_write() multiple times. Which means, + * it is not an atomic operation. Outputs from multiple threads + * can interleave. + * + * @internal + * + * As rb_io_write(), above descriptions include fakes. + */ +VALUE rb_io_puts(int argc, const VALUE *argv, VALUE io); + +/** + * Creates an IO instance whose backend is the given file descriptor. C + * extension libraries sometimes have file descriptors created elsewhere (maybe + * deep inside of another shared library), which they want ruby programs to + * handle. This function is handy for such situations. + * + * @param[in] fd Target file descriptor. + * @param[in] flags Flags, e.g. `O_CREAT|O_EXCL` + * @param[in] path The path of the file that backs `fd`, for diagnostics. + * @return An allocated instance of ::rb_cIO. + * @note Leave `path` NULL if you don't know. + */ +VALUE rb_io_fdopen(int fd, int flags, const char *path); + +RBIMPL_ATTR_NONNULL(()) +/** + * Opens a file located at the given path. + * + * `fmode` is a C string that represents the open mode. It can be one of: + * + * - `r` (means `O_RDONLY`), + * - `w` (means `O_WRONLY | O_TRUNC | O_CREAT`), + * - `a` (means `O_WRONLY | O_APPEND | O_CREAT`), + * + * Followed by zero or more combinations of: + * + * - `b` (means `_O_BINARY`), + * - `t` (means `_O_TEXT`), + * - `+` (means `O_RDWR`), + * - `x` (means `O_TRUNC`), or + * - `:[BOM|]enc[:enc]` (see below). + * + * This last one specifies external (and internal if any) encodings, + * respectively. If optional `BOM|` is specified and the specified external + * encoding is capable of expressing BOMs, opening file's contents' byte order + * is auto-detected using the mechanism. + * + * So for instance, fmode of `"rt|BOM:utf-16le:utf-8"` specifies that... + * + * - the physical representation of the contents of the file is in UTF-16; + * - honours its BOM but assumes little endian if absent; + * - opens the file for reading; + * - what is read is converted into UTF-8; + * - with newlines cannibalised to `\n`. + * + * @param[in] fname Path to open. + * @param[in] fmode Mode specifier much like `fopen(3)`. + * @exception rb_eArgError `fmode` contradicted (e.g. `"bt"`). + * @exception rb_eSystemCallError `open(2)` failed for some reason. + * @return An instance of ::rb_cIO. + */ +VALUE rb_file_open(const char *fname, const char *fmode); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_file_open(), except it takes the pathname as a Ruby's string + * instead of C's. In case the passed Ruby object is a non-String it tries to + * call `#to_path`. + * + * @param[in] fname Path to open. + * @param[in] fmode Mode specifier much like `fopen(3)`. + * @exception rb_eTypeError `fname` is not a String. + * @exception rb_eEncCompatError `fname` is not ASCII-compatible. + * @exception rb_eArgError `fmode` contradicted (e.g. `"bt"`). + * @exception rb_eSystemCallError `open(2)` failed for some reason. + * @return An instance of ::rb_cIO. + */ +VALUE rb_file_open_str(VALUE fname, const char *fmode); + +/** + * Much like rb_io_gets(), but it reads from the mysterious ARGF object. ARGF + * in this context can be seen as a virtual IO which concatenates contents of + * the files passed to the process via the ARGV, or just STDIN if there are no + * such files. + * + * Unlike rb_io_gets() this function sets `$_`. + * + * @exception rb_eFrozenError ARGF resorts to STDIN but it is frozen. + * @retval RUBY_Qnil ARGF is at EOF. + * @retval otherwise An instance of ::rb_cString. + * @post ARGF is read. + * @post `$_` is set. + * + * @internal + * + * In reality, this function can call `ARGF.gets`. Its redefinition can affect + * the behaviour. + * + * Also, you can tamper ARGV on-the-fly in middle of ARGF usages: + * + * ``` + * gets # Reads the first file. + * ARGV << '/proc/self/limits' # Adds a file. + * gets # Can read from /proc/self/limits. + * ``` + */ VALUE rb_gets(void); -void rb_write_error(const char*); -void rb_write_error2(const char*, long); + +RBIMPL_ATTR_NONNULL(()) +/** + * Writes the given error message to somewhere applicable. On Windows it goes + * to the console. On POSIX environments it goes to the standard error. + * + * @warning IT IS A BAD IDEA to use this function form your C extensions. + * It is often annoying when GUI applications write to consoles; + * users don't want to look at there. Programmers also want to + * control the cause of the message itself, like by rescuing an + * exception. Just let ruby handle errors. That must be better than + * going your own way. + * + * @param[in] str Error message to display. + * @post `str` is written to somewhere. + * + * @internal + * + * AFAIK this function is listed here without marked deprecated because there + * are usages of this function in the wild. + */ +void rb_write_error(const char *str); + +/** + * Identical to rb_write_error(), except it additionally takes the message's + * length. Necessary when you want to handle wide characters. + * + * @param[in] str Error message to display. + * @param[in] len Length of `str`, in bytes. + * @post `str` is written to somewhere. + */ +void rb_write_error2(const char *str, long len); + +/** + * Closes everything. In case of POSIX environments, a child process inherits + * its parent's opened file descriptors. Which is nowadays considered as one + * of the UNIX mistakes. This function closes such inherited file descriptors. + * When your C extension needs to have a child process, don't forget to call + * this from your child process right before exec. + * + * @param[in] lowfd Lower bound of FDs (you want STDIN to remain, no?). + * @param[in] maxhint Hint of max FDs. + * @param[in] noclose_fds A hash, whose keys are an allowlist. + * + * @internal + * + * As of writing, in spite of the name, this function does not actually close + * anything. It just sets `FD_CLOEXEC` for everything and let `execve(2)` to + * atomically close them at once. This is because as far as we know there are + * no such platform that has `fork(2)` but lacks `FD_CLOEXEC`. + * + * Because this function is expected to run on a forked process it is entirely + * async-signal-safe. + */ void rb_close_before_exec(int lowfd, int maxhint, VALUE noclose_fds); + +RBIMPL_ATTR_NONNULL(()) +/** + * This is an rb_cloexec_pipe() + rb_update_max_fd() combo. + * + * @param[out] pipes Return buffer. Must at least hold 2 elements. + * @retval 0 Successful creation of a pipe. + * @retval -1 Failure in underlying system call(s). + * @post `pipes` is filled with file descriptors. + * @post `errno` is set on failure. + */ int rb_pipe(int *pipes); + +/** + * Queries if the given FD is reserved or not. Occasionally Ruby interpreter + * opens files for its own purposes. Use this function to prevent touching + * such behind-the-scene descriptors. + * + * @param[in] fd Target file descriptor. + * @retval 1 `fd` is reserved. + * @retval 0 Otherwise. + */ int rb_reserved_fd_p(int fd); + +/** @alias{rb_reserved_fd_p} */ +#define RB_RESERVED_FD_P(fd) rb_reserved_fd_p(fd) + +/** + * Opens a file that closes on exec. In case of POSIX environments, a child + * process inherits its parent's opened file descriptors. Which is nowadays + * considered as one of the UNIX mistakes. This function opens a file + * descriptor as `open(2)` does, but additionally instructs the operating + * system that we don't want it be seen from child processes. + * + * @param[in] pathname File path to open. + * @param[in] flags Open mode, as in `open(2)`. + * @param[in] mode File mode, in case of `O_CREAT`. + * @retval -1 `open(2)` failed for some reason. + * @retval otherwise An allocated new file descriptor. + * @note This function does not raise. + * + * @internal + * + * Whether this function can take NULL or not depends on the underlying open(2) + * system call implementation but @shyouhei doesn't think it's worth trying. + */ int rb_cloexec_open(const char *pathname, int flags, mode_t mode); + +/** + * Identical to rb_cloexec_fcntl_dupfd(), except it implies minfd is 3. + * + * @param[in] oldfd File descriptor to duplicate. + * @retval -1 `dup2(2)` failed for some reason. + * @retval otherwise An allocated new file descriptor. + * @note This function does not raise. + */ int rb_cloexec_dup(int oldfd); + +/** + * Identical to rb_cloexec_dup(), except you can specify the destination file + * descriptor. If the destination is already squatted by another file + * descriptor that gets silently closed without any warnings. (This is a spec + * requested by POSIX.) + * + * @param[in] oldfd File descriptor to duplicate. + * @param[in] newfd Return value destination. + * @retval -1 `dup2(2)` failed for some reason. + * @retval newfd An allocated new file descriptor. + * @post Whatever sat at `newfd` gets closed with no notifications. + * @post In case return value is -1 `newfd` is untouched. + * @note This function does not raise. + */ int rb_cloexec_dup2(int oldfd, int newfd); + +RBIMPL_ATTR_NONNULL(()) +/** + * Opens a pipe with closing on exec. In case of POSIX environments, a child + * process inherits its parent's opened file descriptors. Which is nowadays + * considered as one of the UNIX mistakes. This function opens a pipe as + * `pipe(2)` does, but additionally instructs the operating system that we + * don't want the duplicated FDs be seen from child processes. + * + * @param[out] fildes Return buffer. Must at least hold 2 elements. + * @retval 0 Successful creation of a pipe. + * @retval -1 Failure in underlying system call(s). + * @post `pipes` is filled with file descriptors. + * @post `errno` is set on failure. + */ int rb_cloexec_pipe(int fildes[2]); + +/** + * Duplicates a file descriptor with closing on exec. In case of POSIX + * environments, a child process inherits its parent's opened file descriptors. + * Which is nowadays considered as one of the UNIX mistakes. This function + * duplicates a file descriptor as `dup(2)` does, but additionally instructs + * the operating system that we don't want the duplicated FD be seen from child + * processes. + * + * @param[in] fd File descriptor to duplicate. + * @param[in] minfd Minimum allowed FD to return. + * @retval -1 `dup(2)` failed for some reason. + * @retval otherwise An allocated new file descriptor. + * @note This function does not raise. + * + * `minfd` is handy when for instance STDERR is closed but you don't want to + * use fd 2. + */ int rb_cloexec_fcntl_dupfd(int fd, int minfd); -#define RB_RESERVED_FD_P(fd) rb_reserved_fd_p(fd) + +/** + * Informs the interpreter that the passed fd can be the max. This information + * is used from rb_close_before_exec(). + * + * @param[in] fd An open FD, which can be large. + */ void rb_update_max_fd(int fd); + +/** + * Sets or clears the close-on-exec flag of the passed file descriptor to the + * desired state. STDIN, STDOUT, STDERR are the exceptional file descriptors + * that shall remain open. All others are to be closed on exec. When a C + * extension library opens a file descriptor using anything other than + * rb_cloexec_open() etc., that file descriptor shall experience this function. + * + * @param[in] fd An open file descriptor. + */ void rb_fd_fix_cloexec(int fd); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/load.h b/include/ruby/internal/intern/load.h index 2cc5be0ebe..9ceb98c2e4 100644 --- a/include/ruby/internal/intern/load.h +++ b/include/ruby/internal/intern/load.h @@ -17,28 +17,239 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_f_require(). */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() /* load.c */ -void rb_load(VALUE, int); -void rb_load_protect(VALUE, int, int*); -int rb_provided(const char*); -int rb_feature_provided(const char *, const char **); -void rb_provide(const char*); -VALUE rb_f_require(VALUE, VALUE); -VALUE rb_require_string(VALUE); - -// extension configuration + +/** + * Loads and executes the Ruby program in the given file. + * + * If the path is an absolute path (e.g. starts with `'/'`), the file will be + * loaded directly using the absolute path. If the path is an explicit + * relative path (e.g. starts with `'./'` or `'../'`), the file will be loaded + * using the relative path from the current directory. Otherwise, the file + * will be searched for in the library directories listed in the `$LOAD_PATH`. + * If the file is found in a directory, this function will attempt to load the + * file relative to that directory. If the file is not found in any of the + * directories in the `$LOAD_PATH`, the file will be loaded using the relative + * path from the current directory. + * + * If the file doesn't exist when there is an attempt to load it, a LoadError + * will be raised. + * + * If the `wrap` parameter is true, the loaded script will be executed under an + * anonymous module, protecting the calling program's global namespace. In no + * circumstance will any local variables in the loaded file be propagated to + * the loading environment. + * + * @param[in] path Pathname of a file to load. + * @param[in] wrap Either to load under an anonymous module. + * @exception rb_eTypeError `path` is not a string. + * @exception rb_eArgError `path` is broken as a pathname. + * @exception rb_eEncCompatError `path` is incompatible with pathnames. + * @exception rb_eLoadError `path` not found. + * @exception rb_eException Any exceptions while loading the contents. + * + * @internal + * + * It seems this function is under the rule of bootsnap's regime? + */ +void rb_load(VALUE path, int wrap); + +/** + * Identical to rb_load(), except it avoids potential global escapes. Such + * global escapes include exceptions, `throw`, `break`, for example. + * + * It first evaluates the given file as rb_load() does. If no global escape + * occurred during the evaluation, it `*state` is set to zero on return. + * Otherwise, it sets `*state` to nonzero. If state is `NULL`, it is not set + * in both cases. + * + * @param[in] path Pathname of a file to load. + * @param[in] wrap Either to load under an anonymous module. + * @param[out] state State of execution. + * @post `*state` is set to zero if succeeded. Nonzero otherwise. + * @warning You have to clear the error info with `rb_set_errinfo(Qnil)` if + * you decide to ignore the caught exception. + * @see rb_load + * @see rb_protect + * + * @internal + * + * Though not a part of our public API, `state` is in fact an + * enum ruby_tag_type. You can see the potential "nonzero" values by looking + * at vm_core.h. + */ +void rb_load_protect(VALUE path, int wrap, int *state); + +RBIMPL_ATTR_NONNULL(()) +/** + * Queries if the given feature has already been loaded into the execution + * context. The "feature" head are things like `"json"` or `"socket"`. + * + * @param[in] feature Name of a library you want to know about. + * @retval 1 Yes there is. + * @retval 0 Not yet. + */ +int rb_provided(const char *feature); + +RBIMPL_ATTR_NONNULL((1)) +/** + * Identical to rb_provided(), except it additionally returns the "canonical" + * name of the loaded feature. This can be handy when for instance you want to + * know the actually loaded library is either `foo.rb` or `foo.so`. + * + * @param[in] feature Name of a library you want to know about. + * @param[out] loading Return buffer. + * @retval 1 Yes there is. + * @retval 0 Not yet. + */ +int rb_feature_provided(const char *feature, const char **loading); + +RBIMPL_ATTR_NONNULL(()) +/** + * Declares that the given feature is already provided by someone else. This + * API can be handy when you have an extension called `foo.so` which, when + * required, also provides functionality of `bar.so`. + * + * @param[in] feature Name of a library which had already been provided. + * @post No further `require` would search `feature`. + */ +void rb_provide(const char *feature); + +/** + * Identical to rb_require_string(), except it ignores the first argument for + * no reason. There seems to be no reason for 3rd party extension libraries to + * use it. + * + * @param[in] self Ignored. Can be anything. + * @param[in] feature Name of a feature, e.g. `"json"`. + * @exception rb_eLoadError No such feature. + * @exception rb_eRuntimeError `$"` is frozen; unable to push. + * @retval RUBY_Qtrue The feature is loaded for the first time. + * @retval RUBY_Qfalse The feature has already been loaded. + * @post `$"` is updated. + */ +VALUE rb_f_require(VALUE self, VALUE feature); + +/** + * Finds and loads the given feature, if absent. + * + * If the feature is an absolute path (e.g. starts with `'/'`), the feature + * will be loaded directly using the absolute path. If the feature is an + * explicit relative path (e.g. starts with `'./'` or `'../'`), the feature + * will be loaded using the relative path from the current directory. + * Otherwise, the feature will be searched for in the library directories + * listed in the `$LOAD_PATH`. + * + * If the feature has the extension `".rb"`, it is loaded as a source file; if + * the extension is `".so"`, `".o"`, or `".dll"`, or the default shared library + * extension on the current platform, Ruby loads the shared library as a Ruby + * extension. Otherwise, Ruby tries adding `".rb"`, `".so"`, and so on to the + * name until found. If the file named cannot be found, a LoadError will be + * raised. + * + * For extension libraries the given feature may use any shared library + * extension. For example, on Linux you can require `"socket.dll"` to actually + * load `socket.so`. + * + * The absolute path of the loaded file is added to `$LOADED_FEATURES`. A file + * will not be loaded again if its path already appears in there. + * + * Any constants or globals within the loaded source file will be available in + * the calling program's global namespace. However, local variables will not + * be propagated to the loading environment. + * + * @param[in] feature Name of a feature, e.g. `"json"`. + * @exception rb_eLoadError No such feature. + * @exception rb_eRuntimeError `$"` is frozen; unable to push. + * @retval RUBY_Qtrue The feature is loaded for the first time. + * @retval RUBY_Qfalse The feature has already been loaded. + * @post `$"` is updated. + */ +VALUE rb_require_string(VALUE feature); + +/** + * Resolves and returns a symbol of a function in the native extension + * specified by the feature and symbol names. Extensions will use this function + * to access the symbols provided by other native extensions. + * + * @param[in] feature Name of a feature, e.g. `"json"`. + * @param[in] symbol Name of a symbol defined by the feature. + * @return The resolved symbol of a function, defined and externed by the + * specified feature. It may be NULL if the feature is not loaded, + * the feature is not extension, or the symbol is not found. + */ +void *rb_ext_resolve_symbol(const char *feature, const char *symbol); + +/** + * This macro is to provide backwards compatibility. It provides a way to + * define function prototypes and resolving function symbols in a safe way. + * + * ```CXX + * // prototypes + * #ifdef HAVE_RB_EXT_RESOLVE_SYMBOL + * VALUE *(*other_extension_func)(VALUE,VALUE); + * #else + * VALUE other_extension_func(VALUE); + * #endif + * + * // in Init_xxx() + * #ifdef HAVE_RB_EXT_RESOLVE_SYMBOL + * other_extension_func = \ + * (VALUE(*)(VALUE,VALUE))rb_ext_resolve_symbol(fname, sym_name); + * if (other_extension_func == NULL) { + * // raise your own error + * } + * #endif + * ``` + */ +#define HAVE_RB_EXT_RESOLVE_SYMBOL 1 + +/** + * @name extension configuration + * @{ + */ + +/** + * Asserts that the extension library that calls this function is aware of + * Ractor. Multiple Ractors run without protecting each other. This doesn't + * interface well with C programs, unless designed with an in-depth + * understanding of how Ractors work. Extension libraries are shut out from + * Ractors by default. This API is to bypass that restriction. Once after it + * was called, successive calls to rb_define_method() etc. become definitions + * of methods that are aware of Ractors. The amendment would be in effect + * until the end of rb_require_string() etc. + * + * @param[in] flag Either the library is aware of Ractors or not. + * @post Methods would be callable form Ractors, if `flag` is true. + */ void rb_ext_ractor_safe(bool flag); + +/** @alias{rb_ext_ractor_safe} */ #define RB_EXT_RACTOR_SAFE(f) rb_ext_ractor_safe(f) + +/** + * This macro is to provide backwards compatibility. It must be safe to do + * something like: + * + * ```CXX + * #ifdef HAVE_RB_EXT_RACTOR_SAFE + * rb_ext_ractor_safe(true); + * #endif + * ``` + */ #define HAVE_RB_EXT_RACTOR_SAFE 1 +/** @} */ + RBIMPL_SYMBOL_EXPORT_END() #endif /* RBIMPL_INTERN_LOAD_H */ diff --git a/include/ruby/internal/intern/marshal.h b/include/ruby/internal/intern/marshal.h index 6b0243244e..118d78a4a0 100644 --- a/include/ruby/internal/intern/marshal.h +++ b/include/ruby/internal/intern/marshal.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to rb_mMarshal. */ #include "ruby/internal/dllexport.h" @@ -26,8 +26,85 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* marshal.c */ -VALUE rb_marshal_dump(VALUE, VALUE); -VALUE rb_marshal_load(VALUE); + +/** + * Serialises the given object and all its referring objects, to write them + * down to the passed port. + * + * @param[in] obj Target object to dump. + * @param[out] port IO-like destination buffer. + * @exception rb_eTypeError `obj` cannot be dumped for some reason. + * @exception rb_eRuntimeError `obj` was tampered during dumping. + * @exception rb_eArgError Traversal too deep. + * @return The passed `port` as-is. + * @post Serialised representation of `obj` is written to `port`. + * @note `port` is basically an IO but StringIO is also possible. + */ +VALUE rb_marshal_dump(VALUE obj, VALUE port); + +/** + * Deserialises a previous output of rb_marshal_dump() into a network of + * objects. + * + * @param[in,out] port Either IO or String. + * @exception rb_eTypeError `port` is in unexpected type. + * @exception rb_eArgError Contents of `port` is broken. + * @return Object(s) rebuilt using the info from `port`. + * + * SECURITY CONSIDERATIONS + * ======================== + * + * @warning By design, rb_marshal_load() can deserialise almost any + * class loaded into the Ruby process. In many cases this can + * lead to remote code execution if the Marshal data is loaded + * from an untrusted source. + * @warning As a result, rb_marshal_load() is not suitable as a general + * purpose serialisation format and you should never unmarshal + * user supplied input or other untrusted data. + * @warning If you need to deserialise untrusted data, use JSON or + * another serialisation format that is only able to load + * simple, 'primitive' types such as String, Array, Hash, etc. + * Never allow user input to specify arbitrary types to + * deserialise into. + */ +VALUE rb_marshal_load(VALUE port); + +/** + * Marshal format compatibility layer. Over time, classes evolve, so that + * their internal data structure change drastically. For instance an instance + * of ::rb_cRange was made of ::RUBY_T_OBJECT in 1.x., but in 3.x it is a + * ::RUBY_T_STRUCT now. In order to keep binary compatibility, we "fake" the + * marshalled representation to stick to old types. This is the API to enable + * that manoeuvre. Here is how: + * + * First, because you are going to keep backwards compatibility, you need to + * retain the old implementation of your class. Rename it, and keep the class + * somewhere (for instance rb_register_global_address() could help). Next + * create your new class. Do whatever you want. + * + * Then, this is the key point. Create two new "bridge" functions that convert + * the structs back and forth: + * + * - the "dumper" function that takes an instance of the new class, and + * returns an instance of the old one. This is called from + * rb_marshal_dump(), to keep it possible for old programs to read your new + * data. + * + * - the "loader" function that takes two arguments, new one and old one, in + * that order. rb_marshal_load() calls this function when it finds a + * representation of the retained old class. The old one passed to this + * function is the reconstructed instance of the old class. + * Reverse-engineer that to modify the new one, to have the identical + * contents. + * + * Finally, connect all of them using this function. + * + * @param[in] newclass The class that needs conversion. + * @param[in] oldclass Old implementation of `newclass`. + * @param[in] dumper Function that converts `newclass` to `oldclass`. + * @param[in] loader Function that converts `oldclass` to `newclass`. + * @exception rb_eTypeError `newclass` has no allocator. + */ void rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE)); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/numeric.h b/include/ruby/internal/intern/numeric.h index effc583756..30863fb0c8 100644 --- a/include/ruby/internal/intern/numeric.h +++ b/include/ruby/internal/intern/numeric.h @@ -17,25 +17,191 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cNumeric. */ +#include "ruby/internal/attr/cold.h" +#include "ruby/internal/attr/noreturn.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" -#include "ruby/backward/2/attributes.h" + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ +#define RB_NUM_COERCE_FUNCS_NEED_OPID 1 RBIMPL_SYMBOL_EXPORT_BEGIN() /* numeric.c */ -NORETURN(void rb_num_zerodiv(void)); -#define RB_NUM_COERCE_FUNCS_NEED_OPID 1 -VALUE rb_num_coerce_bin(VALUE, VALUE, ID); -VALUE rb_num_coerce_cmp(VALUE, VALUE, ID); -VALUE rb_num_coerce_relop(VALUE, VALUE, ID); -VALUE rb_num_coerce_bit(VALUE, VALUE, ID); -VALUE rb_num2fix(VALUE); -VALUE rb_fix2str(VALUE, int); -CONSTFUNC(VALUE rb_dbl_cmp(double, double)); + +RBIMPL_ATTR_NORETURN() +RBIMPL_ATTR_COLD() +/** + * Just always raises an exception. + * + * @exception rb_eZeroDivError Division by zero error. + */ +void rb_num_zerodiv(void); + +/** + * @name Coercion operators. + * + * What is a coercion? Well Ruby is basically an OOPL but it also has + * arithmetic operators. They are implemented in OO manners. For instance + * `a+b` is a binary operation `+`, whose receiver is `a`, and whose (sole) + * argument is `b`. + * + * The problem is, you often want `a+b == b+a` to hold. That is easy if both + * `a` and `b` belongs to the same class... Ensuring `1 + 2 == 2 + 1` is kind + * of intuitive. But if you want `1.0 + 2 == 2 + 1.0`, things start getting + * complicated. `1.0+2` is `Float#+`, while `2+1.0` is `Integer#+`. In order + * to achieve the equality Float's and Integer's methods must agree with their + * behaviours. + * + * Now. Floats versus Integers situation is still controllable because they + * are both built-in. But in Ruby you can define your own numeric classes. + * BigDecimal, which is a rubygems gem distributed along with the interpreter, + * is one of such examples. Rational was another such example before. In + * short you cannot create list of all possible combination of the classes that + * could be the operand of `+` operator. Then how do we achieve the + * commutativity? + * + * Here comes the concept of coercion. If a definition of an operator + * encounters an object which is unknown to the author, just assumes that the + * unknown object knows how to handle the situation. So for instance when + * `1+x` has unknown `x`, it lets the `x` handle this. + * + * ```ruby + * class Foo + * def +(x) + * if we_know_what_is_x? then + * ... # handle here + * else + * y, z = x.coerce self + * return y + z + * end + * end + * end + * ``` + * + * The `x.coerce` method returns a 2-element array which are "casted" versions + * of `x` and `self`. + * + * @{ + */ + +/** + * Coerced binary operation. This function first coerces the two objects, then + * applies the operation. + * + * @param[in] lhs LHS operand. + * @param[in] rhs RHS operand. + * @param[in] op Operator method name. + * @exception rb_eTypeError Coercion failed for some reason. + * @return `lhs op rhs`, in a coerced way. + */ +VALUE rb_num_coerce_bin(VALUE lhs, VALUE rhs, ID op); + +/** + * Identical to rb_num_coerce_bin(), except for return values. This function + * best suits for comparison operators e.g. `<=>`. + * + * @param[in] lhs LHS operand. + * @param[in] rhs RHS operand. + * @param[in] op Operator method name. + * @retval RUBY_Qnil Coercion failed for some reason. + * @retval otherwise `lhs op rhs`, in a coerced way. + */ +VALUE rb_num_coerce_cmp(VALUE lhs, VALUE rhs, ID op); + +/** + * Identical to rb_num_coerce_cmp(), except for return values. This function + * best suits for relationship operators e.g. `<=`. + * + * @param[in] lhs LHS operand. + * @param[in] rhs RHS operand. + * @param[in] op Operator method name. + * @exception rb_eArgError Coercion failed for some reason. + * @return `lhs op rhs`, in a coerced way. + */ +VALUE rb_num_coerce_relop(VALUE lhs, VALUE rhs, ID op); + +/** + * This one is optimised for bitwise operations, but the API is identical to + * rb_num_coerce_bin(). + * + * @param[in] lhs LHS operand. + * @param[in] rhs RHS operand. + * @param[in] op Operator method name. + * @exception rb_eArgError Coercion failed for some reason. + * @return `lhs op rhs`, in a coerced way. + */ +VALUE rb_num_coerce_bit(VALUE lhs, VALUE rhs, ID op); + +/** @} */ + +/** + * Converts a numeric value into a Fixnum. This is not a preserving + * conversion; for instance 1.5 would be converted into 1. + * + * @param[in] val A numeric object. + * @exception rb_eTypeError No conversion from `val` to Integer. + * @exception rb_eRangeError `val` out of range. + * @return A fixnum converted from `val`. + * + * @internal + * + * This seems used from nowhere? + */ +VALUE rb_num2fix(VALUE val); + +/** + * Generates a place-value representation of the given Fixnum, with given + * radix. + * + * @param[in] val A fixnum to stringify. + * @param[in] base `2` to `36` inclusive for each radix. + * @exception rb_eArgError `base` is out of range. + * @return An instance of ::rb_cString representing `val`. + * @pre `val` must be a Fixnum (no checks performed). + */ +VALUE rb_fix2str(VALUE val, int base); + +RBIMPL_ATTR_CONST() +/** + * Compares two `double`s. Handy when implementing a spaceship operator. + * + * @param[in] lhs A value. + * @param[in] rhs Another value. + * @retval RB_INT2FIX(-1) `lhs` is "bigger than" `rhs`. + * @retval RB_INT2FIX(1) `rhs` is "bigger than" `lhs`. + * @retval RB_INT2FIX(0) They are equal. + * @retval RUBY_Qnil Not comparable, e.g. NaN. + */ +VALUE rb_dbl_cmp(double lhs, double rhs); + +/** + * Raises the passed `x` to the power of `y`. + * + * @note The return value can be really big. + * @note Also the return value can be really small, in case `x` is a + * negative number. + * @param[in] x A number. + * @param[in] y Another number. + * @retval Inf Cannot express the result. + * @retval 1 Either `y` is 0 or `x` is 1. + * @retval otherwise An instance of ::rb_cInteger whose value is `x ** y`. + * + * @internal + * + * This function returns Infinity when `y` is big enough not to fit into a + * Fixnum. Warning is issued then. + */ +RUBY_EXTERN VALUE rb_int_positive_pow(long x, unsigned long y); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/object.h b/include/ruby/internal/intern/object.h index d55178584b..9daad7d046 100644 --- a/include/ruby/internal/intern/object.h +++ b/include/ruby/internal/intern/object.h @@ -17,73 +17,483 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cObject. */ +#include "ruby/internal/attr/const.h" +#include "ruby/internal/attr/deprecated.h" +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/attr/pure.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() +/** + * This macro is (used but) mysterious. Why on earth do we need this? + * + * - `obj != orig` check is done anyways inside of rb_obj_init_copy(). + * - rb_obj_init_copy() returns something. No need are there to add `, 1`. + */ #define RB_OBJ_INIT_COPY(obj, orig) \ ((obj) != (orig) && (rb_obj_init_copy((obj), (orig)), 1)) +/** @old{RB_OBJ_INIT_COPY} */ #define OBJ_INIT_COPY(obj, orig) RB_OBJ_INIT_COPY(obj, orig) -VALUE rb_class_new_instance_pass_kw(int, const VALUE *, VALUE); -VALUE rb_class_new_instance(int, const VALUE*, VALUE); -VALUE rb_class_new_instance_kw(int, const VALUE*, VALUE, int); +/* object.c */ + +/** + * Identical to rb_class_new_instance(), except it passes the passed keywords + * if any to the `#initialize` method. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] klass An instance of ::rb_cClass. + * @exception rb_eTypeError `klass`'s allocator is undefined. + * @exception rb_eException Any exceptions can happen inside. + * @return An allocated new instance of `klass`. + * @note This is _the_ implementation of `Object.new`. + */ +VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass); + +/** + * Allocates, then initialises an instance of the given class. It first calls + * the passed class' allocator to obtain an uninitialised object, then calls + * its initialiser with the remaining arguments. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arguments passed to `#initialize`. + * @param[in] klass An instance of ::rb_cClass. + * @exception rb_eTypeError `klass`'s allocator is undefined. + * @exception rb_eException Any exceptions can happen inside. + * @return An allocated new instance of `klass`. + */ +VALUE rb_class_new_instance(int argc, const VALUE *argv, VALUE klass); + +/** + * Identical to rb_class_new_instance(), except you can specify how to handle + * the last element of the given array. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] klass An instance of ::rb_cClass. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eTypeError `klass`'s allocator is undefined. + * @exception rb_eException Any exceptions can happen inside. + * @return An allocated new instance of `klass`. + */ +VALUE rb_class_new_instance_kw(int argc, const VALUE *argv, VALUE klass, int kw_splat); + +/** + * Checks for equality of the passed objects, in terms of `Object#eql?`. + * + * @param[in] lhs Comparison left hand side. + * @param[in] rhs Comparison right hand side. + * @retval non-zero They are equal. + * @retval 0 Otherwise. + * @note This function actually calls `lhs.eql?(rhs)` so you cannot + * implement your class' `#eql?` method using it. + */ +int rb_eql(VALUE lhs, VALUE rhs); + +/** + * Generates a textual representation of the given object. + * + * @param[in] obj Arbitrary ruby object. + * @return An instance of ::rb_cString that represents `obj`. + * @note This is the default implementation of `Object#to_s` that each + * subclasses want to override. + */ +VALUE rb_any_to_s(VALUE obj); + +/** + * Generates a human-readable textual representation of the given object. This + * is largely similar to Ruby level `Object#inspect` but not the same; it + * additionally escapes the inspection result so that the string be compatible + * with that of default internal (or default external, if absent). + * + * @param[in] obj Arbitrary ruby object. + * @return An instance of ::rb_cString that represents `obj`. + */ +VALUE rb_inspect(VALUE obj); + +/** + * Queries if the given object is a direct instance of the given class. + * + * @param[in] obj Arbitrary ruby object. + * @param[in] klass An instance of ::rb_cModule. + * @exception rb_eTypeError `klass` is neither module nor class. + * @retval RUBY_Qtrue `obj` is an instance of `klass`. + * @retval RUBY_Qfalse Otherwise. + */ +VALUE rb_obj_is_instance_of(VALUE obj, VALUE klass); + +/** + * Queries if the given object is an instance (of possibly descendants) of the + * given class. + * + * @param[in] obj Arbitrary ruby object. + * @param[in] klass An instance of ::rb_cModule. + * @exception rb_eTypeError `klass` is neither module nor class. + * @retval RUBY_Qtrue `obj` is a `klass`. + * @retval RUBY_Qfalse Otherwise. + */ +VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass); + +/** + * Allocates an instance of the given class. + * + * @param[in] klass A class to instantiate. + * @exception rb_eTypeError `klass` is not a class. + * @return An allocated, not yet initialised instance of `klass`. + * @note It calls the allocator defined by rb_define_alloc_func(). You + * cannot use this function to define an allocator. Use + * TypedData_Make_Struct or others, instead. + * @note Usually prefer rb_class_new_instance() to rb_obj_alloc() and + * rb_obj_call_init(). + * @see rb_class_new_instance() + * @see rb_obj_call_init() + * @see rb_define_alloc_func() + * @see #TypedData_Make_Struct + */ +VALUE rb_obj_alloc(VALUE klass); + +/** + * Produces a shallow copy of the given object. Its list of instance variables + * are copied, but not the objects they reference. It also copies the frozen + * value state. + * + * @param[in] obj Arbitrary ruby object. + * @exception rb_eException `#initialize_copy` can raise anything. + * @return A "clone" of `obj`. + * + * @internal + * + * Unlike ruby-level `Object#clone`, there is no way to control the frozen-ness + * of the return value. + */ +VALUE rb_obj_clone(VALUE obj); + +/** + * Duplicates the given object. This does almost the same thing as + * rb_obj_clone() do. However it does not copy the singleton class (if any). + * It also doesn't copy frozen-ness. + * + * @param[in] obj Arbitrary ruby object. + * @exception rb_eException `#initialize_copy` can raise anything. + * @return A shallow copy of `obj`. + */ +VALUE rb_obj_dup(VALUE obj); + +/** + * Default implementation of `#initialize_copy`, `#initialize_dup` and + * `#initialize_clone`. It does almost nothing. Just raises exceptions for + * checks. + * + * @param[in] dst The destination object. + * @param[in] src The source object. + * @exception rb_eFrozenError `dst` is frozen. + * @exception rb_eTypeError `dst` and `src` have different classes. + * @return Always returns `dst`. + */ +VALUE rb_obj_init_copy(VALUE src, VALUE dst); + +/** + * Just calls rb_obj_freeze_inline() inside. Does this make any sens to + * extension libraries? + * + * @param[out] obj Object to freeze. + * @return Verbatim `obj`. + */ +VALUE rb_obj_freeze(VALUE obj); + +RBIMPL_ATTR_PURE() +/** + * Just calls RB_OBJ_FROZEN() inside. Does this make any sens to extension + * libraries? + * + * @param[in] obj Object in question. + * @retval RUBY_Qtrue Yes it is. + * @retval RUBY_Qfalse No it isn't. + */ +VALUE rb_obj_frozen_p(VALUE obj); + +/* gc.c */ + +/** + * Finds or creates an integer primary key of the given object. In the old + * days this function was a purely arithmetic operation that maps the + * underlying memory address where the object resides into a Ruby's integer. + * Some time around 2.x this changed. It no longer relates its return values + * to C level pointers. This function assigns some random number to the given + * object if absent. The same number will be returned on all subsequent + * requests. No two active objects share a number. + * + * @param[in] obj Arbitrary ruby object. + * @return An instance of ::rb_cInteger which is an "identifier" of `obj`. + * + * @internal + * + * The "some random number" is in fact a monotonic-increasing process-global + * unique integer, much like an `INTEGER AUTO_INCREMENT PRIMARY KEY` column in + * a MySQL table. + */ +VALUE rb_obj_id(VALUE obj); + +RBIMPL_ATTR_CONST() +/** + * Identical to rb_obj_id(), except it hesitates from allocating a new instance + * of ::rb_cInteger. rb_obj_id() could allocate ::RUBY_T_BIGNUM objects. That + * allocation might perhaps impact negatively. On such situations, this + * function instead returns one-shot temporary small integers that need no + * allocations at all. The values are guaranteed unique at the moment, but no + * future promise is made; could be reused. Use of this API should be very + * instant. It is a failure to store the returned integer to somewhere else. + * + * In short it is difficult to use. + * + * @param[in] obj Arbitrary ruby object. + * @return An instance of ::rb_cInteger unique at the moment. + * + * @internal + * + * This is roughly the old behaviour of rb_obj_id(). + */ +VALUE rb_memory_id(VALUE obj); /* object.c */ -int rb_eql(VALUE, VALUE); -VALUE rb_any_to_s(VALUE); -VALUE rb_inspect(VALUE); -VALUE rb_obj_is_instance_of(VALUE, VALUE); -VALUE rb_obj_is_kind_of(VALUE, VALUE); -VALUE rb_obj_alloc(VALUE); -VALUE rb_obj_clone(VALUE); -VALUE rb_obj_dup(VALUE); -VALUE rb_obj_init_copy(VALUE,VALUE); -VALUE rb_obj_taint(VALUE); RBIMPL_ATTR_PURE() -VALUE rb_obj_tainted(VALUE); -VALUE rb_obj_untaint(VALUE); -VALUE rb_obj_untrust(VALUE); +/** + * Finds a "real" class. As the name implies there are class objects that are + * surreal. This function takes a class, traverses its ancestry tree, and + * returns its nearest ancestor which is neither a module nor a singleton + * class. + * + * @param[in] klass An instance of ::rb_cClass. + * @retval RUBY_Qfalse No real class in `klass`' ancestry tree. + * @retval klass `klass` itself is a real class. + * @retval otherwise Nearest ancestor of `klass` who is real. + */ +VALUE rb_class_real(VALUE klass); RBIMPL_ATTR_PURE() -VALUE rb_obj_untrusted(VALUE); -VALUE rb_obj_trust(VALUE); -VALUE rb_obj_freeze(VALUE); +/** + * Determines if the given two modules are relatives. + * + * @param[in] scion Possible subclass. + * @param[in] ascendant Possible superclass. + * @exception rb_eTypeError `ascendant` is not a module. + * @retval RUBY_Qtrue `scion` inherits, or is equal to `ascendant`. + * @retval RUBY_Qfalse `ascendant` inherits `scion`. + * @retval RUBY_Qnil They are not relatives. + */ +VALUE rb_class_inherited_p(VALUE scion, VALUE ascendant); RBIMPL_ATTR_PURE() -VALUE rb_obj_frozen_p(VALUE); +/** + * Queries the parent of the given class. + * + * @param[in] klass A child class. + * @exception rb_eTypeError `klass` is a `Class.allocate`. + * @retval RUBY_Qfalse `klass` has no superclass. + * @retval otherwise `klass`' superclass. + * + * @internal + * + * Is there any class except ::rb_cBasicObject, that has no superclass? + */ +VALUE rb_class_superclass(VALUE klass); -VALUE rb_obj_id(VALUE); -VALUE rb_memory_id(VALUE); -VALUE rb_obj_class(VALUE); +RBIMPL_ATTR_NONNULL(()) +/** + * Converts an object into another type. Calls the specified conversion method + * if necessary. + * + * @param[in] val An object to convert. + * @param[in] type A value of enum ::ruby_value_type. + * @param[in] name Name to display on error (e.g. "Array"). + * @param[in] mid Conversion method (e.g. "to_ary"). + * @exception rb_eTypeError Failed to convert. + * @return An object of the specified type. + */ +VALUE rb_convert_type(VALUE val, int type, const char *name, const char *mid); -RBIMPL_ATTR_PURE() -VALUE rb_class_real(VALUE); +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_convert_type(), except it returns ::RUBY_Qnil instead of + * raising exceptions, in case of conversion failure. It still raises + * exceptions for various reasons, like when the conversion method itself + * raises, though. + * + * @param[in] val An object to convert. + * @param[in] type A value of enum ::ruby_value_type. + * @param[in] name Name to display on error (e.g. "Array"). + * @param[in] mid Conversion method (e.g. "to_ary"). + * @exception rb_eTypeError The `mid` does not generate `type`. + * @retval RUBY_Qnil No conversion defined. + * @retval otherwise An object of the specified type. + */ +VALUE rb_check_convert_type(VALUE val, int type, const char *name, const char *mid); -RBIMPL_ATTR_PURE() -VALUE rb_class_inherited_p(VALUE, VALUE); -VALUE rb_class_superclass(VALUE); -VALUE rb_class_get_superclass(VALUE); -VALUE rb_convert_type(VALUE,int,const char*,const char*); -VALUE rb_check_convert_type(VALUE,int,const char*,const char*); -VALUE rb_check_to_integer(VALUE, const char *); -VALUE rb_check_to_float(VALUE); -VALUE rb_to_int(VALUE); -VALUE rb_check_to_int(VALUE); -VALUE rb_Integer(VALUE); -VALUE rb_to_float(VALUE); -VALUE rb_Float(VALUE); -VALUE rb_String(VALUE); -VALUE rb_Array(VALUE); -VALUE rb_Hash(VALUE); -double rb_cstr_to_dbl(const char*, int); -double rb_str_to_dbl(VALUE, int); +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_check_convert_type(), except the return value type is fixed + * to ::rb_cInteger. + * + * @param[in] val An object to convert. + * @param[in] mid Conversion method (e.g. "to_ary"). + * @exception rb_eTypeError The `mid` does not generate an integer. + * @retval RUBY_Qnil No conversion defined. + * @retval otherwise An instance of ::rb_cInteger. + */ +VALUE rb_check_to_integer(VALUE val, const char *mid); + +/** + * This is complicated. + * + * - When the passed object is already an instance of ::rb_cFloat, just + * returns it as-is. + * + * - When the passed object is something numeric, the function tries to + * convert it using `#to_f` method. + * + * - If that conversion fails (this happens for instance when the numeric + * is a complex) it returns ::RUBY_Qnil. + * + * - Otherwise returns the conversion result. + * + * - Otherwise it also returns ::RUBY_Qnil. + * + * @param[in] val An object to convert. + * @retval RUBY_Qnil Conversion from `val` to float is undefined. + * @retval otherwise Converted result. + */ +VALUE rb_check_to_float(VALUE val); + +/** + * Identical to rb_check_to_int(), except it raises in case of conversion + * mismatch. + * + * @param[in] val An object to convert. + * @exception rb_eTypeError `#to_int` does not generate an integer. + * @return An instance of ::rb_cInteger. + */ +VALUE rb_to_int(VALUE val); + +/** + * Identical to rb_check_to_integer(), except it uses `#to_int` for conversion. + * + * @param[in] val An object to convert. + * @exception rb_eTypeError `#to_int` does not return an integer. + * @retval RUBY_Qnil No conversion defined. + * @retval otherwise An instance of ::rb_cInteger. + */ +VALUE rb_check_to_int(VALUE val); + +/** + * This is the logic behind `Kernel#Integer`. Numeric types are converted + * directly, with floating point numbers being truncated. Strings are + * interpreted strictly; only leading/trailing whitespaces, plus/minus sign, + * radix indicators such as `0x`, digits, and underscores are allowed. + * Anything else are converted by first trying `#to_int`, then `#to_i`. + * + * This is slightly stricter than `String#to_i`. + * + * @param[in] val An object to convert. + * @exception rb_eArgError Malformed `val` passed. + * @exception rb_eTypeError No conversion defined. + * @return An instance of ::rb_cInteger. + */ +VALUE rb_Integer(VALUE val); + +/** + * Identical to rb_check_to_float(), except it raises on error. + * + * @param[in] val An object to convert. + * @exception rb_eTypeError No conversion defined. + * @return An instance of ::rb_cFloat. + */ +VALUE rb_to_float(VALUE val); + +/** + * This is the logic behind `Kernel#Float`. Numeric types are converted + * directly to the nearest value that a Float can represent. Strings are + * interpreted strictly; only leading/trailing whitespaces are allowed except + * what `strtod` understands. Anything else are converted using `#to_f`. + * + * This is slightly stricter than `String#to_f`. + * + * @param[in] val An object to convert. + * @exception rb_eArgError Malformed `val` passed. + * @exception rb_eTypeError No conversion defined. + * @return An instance of ::rb_cFloat. + */ +VALUE rb_Float(VALUE val); + +/** + * This is the logic behind `Kernel#String`. Arguments are converted by first + * trying `#to_str`, then `#to_s`. + * + * @param[in] val An object to convert. + * @exception rb_eTypeError No conversion defined. + * @return An instance of ::rb_cString. + */ +VALUE rb_String(VALUE val); + +/** + * This is the logic behind `Kernel#Array`. Arguments are converted by first + * trying `#to_ary`, then `#to_a`, and if both failed, returns an array of + * length 1 that contains the passed argument as the sole contents. + * + * @param[in] val An object to convert. + * @return An instance of ::rb_cArray. + */ +VALUE rb_Array(VALUE val); + +/** + * This is the logic behind `Kernel#Hash`. Arguments are converted by first + * trying `#to_hash`. if it failed, and the argument is either ::RUBY_Qnil or + * an empty array, returns an empty hash. Otherwise an exception is raised. + * + * @param[in] val An object to convert. + * @exception rb_eTypeError No conversion defined. + * @return An instance of ::rb_cHash. + */ +VALUE rb_Hash(VALUE val); + +RBIMPL_ATTR_NONNULL(()) +/** + * Converts a textual representation of a real number into a numeric, which is + * the nearest value that the return type can represent, of the value that the + * argument represents. This is in fact a 2-in-1 function whose behaviour can + * be controlled using the second (mode) argument. If the mode is zero, this + * function is in "historical" mode which only understands "floating-constant" + * defined at ISO/IEC 9899:1990 section 6.1.3.1. If the mode is nonzero, it is + * in "extended" mode, which also accepts "hexadecimal-floating-constant" + * defined at ISO/IEC 9899:2018 section 6.4.4.2. + * + * @param[in] str A textual representation of a real number. + * @param[in] mode Conversion mode, as described above. + * @exception rb_eArgError Malformed `str` passed. + * @see https://bugs.ruby-lang.org/issues/2969 + * @note Null pointers are allowed, and it returns 0.0 then. + */ +double rb_cstr_to_dbl(const char *str, int mode); + +/** + * Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of + * C's. + * + * @param[in] str A textual representation of a real number. + * @param[in] mode Conversion mode, as described in rb_cstr_to_dbl(). + * @exception rb_eArgError Malformed `str` passed. + * @see https://bugs.ruby-lang.org/issues/2969 + */ +double rb_str_to_dbl(VALUE str, int mode); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/parse.h b/include/ruby/internal/intern/parse.h index 9424657bbc..7c4e9925b9 100644 --- a/include/ruby/internal/intern/parse.h +++ b/include/ruby/internal/intern/parse.h @@ -17,45 +17,176 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cSymbol. */ #include "ruby/internal/attr/const.h" +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() -/* parse.y */ -ID rb_id_attrset(ID); +/* symbol.c */ + +/** + * Calculates an ID of attribute writer. For instance it returns `:foo=` when + * passed `:foo`. + * + * @param[in] id An id. + * @exception rb_eNameError `id` is not for attributes (e.g. operator). + * @return Calculated name of attribute writer. + */ +ID rb_id_attrset(ID id); RBIMPL_ATTR_CONST() -int rb_is_const_id(ID); +/** + * Classifies the given ID, then sees if it is a constant. In case an ID is in + * Unicode (likely), its "constant"-ness is determined if its first character + * is either upper case or title case. Otherwise it is detected if case- + * folding the first character changes its case or not. + * + * @param[in] id An id to classify. + * @retval 1 It is a constant. + * @retval 0 It isn't. + */ +int rb_is_const_id(ID id); RBIMPL_ATTR_CONST() -int rb_is_global_id(ID); +/** + * Classifies the given ID, then sees if it is a global variable. A global + * variable must start with `$`. + * + * @param[in] id An id to classify. + * @retval 1 It is a global variable. + * @retval 0 It isn't. + */ +int rb_is_global_id(ID id); RBIMPL_ATTR_CONST() -int rb_is_instance_id(ID); +/** + * Classifies the given ID, then sees if it is an instance variable. An + * instance variable must start with `@`, but not `@@`. + * + * @param[in] id An id to classify. + * @retval 1 It is an instance variable. + * @retval 0 It isn't. + */ +int rb_is_instance_id(ID id); RBIMPL_ATTR_CONST() -int rb_is_attrset_id(ID); +/** + * Classifies the given ID, then sees if it is an attribute writer. An + * attribute writer is otherwise a local variable, except it ends with `=`. + * + * @param[in] id An id to classify. + * @retval 1 It is an attribute writer. + * @retval 0 It isn't. + */ +int rb_is_attrset_id(ID id); RBIMPL_ATTR_CONST() -int rb_is_class_id(ID); +/** + * Classifies the given ID, then sees if it is a class variable. A class + * variable is must start with `@@`. + * + * @param[in] id An id to classify. + * @retval 1 It is a class variable. + * @retval 0 It isn't. + */ +int rb_is_class_id(ID id); RBIMPL_ATTR_CONST() -int rb_is_local_id(ID); +/** + * Classifies the given ID, then sees if it is a local variable. A local + * variable starts with a lowercase character, followed by some alphanumeric + * characters or `_`, then ends with anything other than `!`, `?`, or `=`. + * + * @param[in] id An id to classify. + * @retval 1 It is a local variable. + * @retval 0 It isn't. + */ +int rb_is_local_id(ID id); RBIMPL_ATTR_CONST() +/** + * Classifies the given ID, then sees if it is a junk ID. An ID with no + * special syntactic structure is considered junk. This category includes for + * instance punctuation. + * + * @param[in] id An id to classify. + * @retval 1 It is a junk. + * @retval 0 It isn't. + */ int rb_is_junk_id(ID); -int rb_symname_p(const char*); + +RBIMPL_ATTR_NONNULL(()) +/** + * Sees if the passed C string constructs a valid syntactic symbol. Invalid + * ones for instance includes whitespaces. + * + * @param[in] str A C string to check. + * @retval 1 It is a valid symbol name. + * @retval 0 It is invalid as a symbol name. + */ +int rb_symname_p(const char *str); + +/* vm.c */ + +/** + * Queries the last match, or `Regexp.last_match`, or the `$~`. You don't have + * to use it, because in reality you can get `$~` using rb_gv_get() as usual. + * + * @retval RUBY_Qnil The method has not ran a regular expression. + * @retval otherwise An instance of ::rb_cMatch. + */ VALUE rb_backref_get(void); -void rb_backref_set(VALUE); + +/** + * Updates `$~`. You don't have to use it, because in reality you can set `$~` + * using rb_gv_set() as usual. + * + * @param[in] md Arbitrary Ruby object. + * @post The passed object is assigned to `$~`. + * + * @internal + * + * Yes, this function bypasses the Check_Type() that would normally prevent + * evil souls from assigning evil objects to `$~`. Use of this function is a + * really bad smell. + */ +void rb_backref_set(VALUE md); + +/** + * Queries the last line, or the `$_`. You don't have to use it, because in + * reality you can get `$_` using rb_gv_get() as usual. + * + * @retval RUBY_Qnil There has never been a "line" yet. + * @retval otherwise The last set `$_` value. + */ VALUE rb_lastline_get(void); -void rb_lastline_set(VALUE); + +/** + * Updates `$_`. You don't have to use it, because in reality you can set `$_` + * using rb_gv_set() as usual. + * + * @param[in] str Arbitrary Ruby object. + * @post The passed object is assigned to `$_`. + * + * @internal + * + * Unlike `$~`, you can assign non-strings to `$_`, even from ruby scripts. + */ +void rb_lastline_set(VALUE str); /* symbol.c */ + +/** + * Collects every single bits of symbols that have ever interned in the entire + * history of the current process. + * + * @return An array that contains all symbols that have ever existed. + */ VALUE rb_sym_all_symbols(void); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/proc.h b/include/ruby/internal/intern/proc.h index d6f77cbd4d..b8c3c5e146 100644 --- a/include/ruby/internal/intern/proc.h +++ b/include/ruby/internal/intern/proc.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cProc. */ #include "ruby/internal/dllexport.h" @@ -27,26 +27,326 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* proc.c */ + +/** + * Constructs a Proc object from implicitly passed components. When a ruby + * method is called with a block, that block is not explicitly passed around + * using C level function parameters. This function gathers all the necessary + * info to turn them into a Ruby level instance of ::rb_cProc. + * + * @exception rb_eArgError There is no passed block. + * @return An instance of ::rb_cProc. + */ VALUE rb_block_proc(void); + +/** + * Identical to rb_proc_new(), except it returns a lambda. + * + * @exception rb_eArgError There is no passed block. + * @return An instance of ::rb_cProc. + */ VALUE rb_block_lambda(void); -VALUE rb_proc_new(rb_block_call_func_t, VALUE); -VALUE rb_obj_is_proc(VALUE); -VALUE rb_proc_call(VALUE, VALUE); -VALUE rb_proc_call_kw(VALUE, VALUE, int); -VALUE rb_proc_call_with_block(VALUE, int argc, const VALUE *argv, VALUE); -VALUE rb_proc_call_with_block_kw(VALUE, int argc, const VALUE *argv, VALUE, int); -int rb_proc_arity(VALUE); -VALUE rb_proc_lambda_p(VALUE); + +/** + * This is an rb_iterate() + rb_block_proc() combo. + * + * ```CXX + * VALUE + * my_own_iterator(RB_BLOCK_CALL_FUNC_ARGLIST(y, c)) + * { + * const auto plus = rb_intern("+"); + * return rb_funcall(c, plus, 1, y); + * } + * + * VALUE + * my_own_method(VALUE self) + * { + * return rb_proc_new(my_own_iterator, self); + * } + * ``` + * + * @param[in] func A backend function of a proc. + * @param[in] callback_arg Passed to `func`'s callback_arg. + * @return A C-backended proc object. + * + */ +VALUE rb_proc_new(rb_block_call_func_t func, VALUE callback_arg); + +/** + * Queries if the given object is a proc. + * + * @note This is about the object's data structure, not its class etc. + * @param[in] recv Object in question. + * @retval RUBY_Qtrue It is a proc. + * @retval RUBY_Qfalse Otherwise. + */ +VALUE rb_obj_is_proc(VALUE recv); + +/** + * Evaluates the passed proc with the passed arguments. + * + * @param[in] recv The proc to call. + * @param[in] args An instance of ::RArray which is the arguments. + * @exception rb_eException Any exceptions happen inside. + * @return What the proc evaluates to. + */ +VALUE rb_proc_call(VALUE recv, VALUE args); + +/** + * Identical to rb_proc_call(), except you can specify how to handle the last + * element of the given array. + * + * @param[in] recv The proc to call. + * @param[in] args An instance of ::RArray which is the arguments. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `args`' last is not a keyword argument. + * - RB_PASS_KEYWORDS `args`' last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eException Any exceptions happen inside. + * @return What the proc evaluates to. + */ +VALUE rb_proc_call_kw(VALUE recv, VALUE args, int kw_splat); + +/** + * Identical to rb_proc_call(), except you can additionally pass another proc + * object, as a block. Nowadays procs can take blocks: + * + * ```ruby + * l = -> (positional, optional=nil, *rest, kwarg:, **kwrest, &block) { + * # ... how can we pass this `&block`? ^^^^^^ + * } + * ``` + * + * And this function is to pass one to such procs. + * + * @param[in] recv The proc to call. + * @param[in] argc Number of arguments. + * @param[in] argv Arbitrary number of proc arguments. + * @param[in] proc Proc as a passed block. + * @exception rb_eException Any exceptions happen inside. + * @return What the proc evaluates to. + */ +VALUE rb_proc_call_with_block(VALUE recv, int argc, const VALUE *argv, VALUE proc); + +/** + * Identical to rb_proc_call_with_block(), except you can specify how to handle + * the last element of the given array. It can also be seen as a routine + * identical to rb_proc_call_kw(), except you can additionally pass another + * proc object as a block. + * + * @param[in] recv The proc to call. + * @param[in] argc Number of arguments. + * @param[in] argv Arbitrary number of proc arguments. + * @param[in] proc Proc as a passed block. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `args`' last is not a keyword argument. + * - RB_PASS_KEYWORDS `args`' last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eException Any exceptions happen inside. + * @return What the proc evaluates to. + */ +VALUE rb_proc_call_with_block_kw(VALUE recv, int argc, const VALUE *argv, VALUE proc, int kw_splat); + +/** + * Queries the number of mandatory arguments of the given Proc. If its block + * is declared to take no arguments, returns `0`. If the block is known to + * take exactly `n` arguments, returns `n`. If the block has optional + * arguments, returns `-n-1`, where `n` is the number of mandatory arguments, + * with the exception for blocks that are not lambdas and have only a finite + * number of optional arguments; in this latter case, returns `n`. Keyword + * arguments will be considered as a single additional argument, that argument + * being mandatory if any keyword argument is mandatory. + * + * @param[in] recv Target Proc object. + * @retval 0 It takes no arguments. + * @retval >0 It takes exactly this number of arguments. + * @retval <0 It takes optional arguments. + */ +int rb_proc_arity(VALUE recv); + +/** + * Queries if the given object is a lambda. Instances of ::rb_cProc are either + * lambda or proc. They differ in several points. This function can + * distinguish them without actually evaluating their contents. + * + * @param[in] recv Target proc object. + * @retval RUBY_Qtrue It is a lambda. + * @retval RUBY_Qfalse Otherwise. + */ +VALUE rb_proc_lambda_p(VALUE recv); + +/** + * Snapshots the current execution context and turn it into an instance of + * ::rb_cBinding. + * + * @return An instance of ::rb_cBinding. + */ VALUE rb_binding_new(void); -VALUE rb_obj_method(VALUE, VALUE); -VALUE rb_obj_is_method(VALUE); -VALUE rb_method_call(int, const VALUE*, VALUE); -VALUE rb_method_call_kw(int, const VALUE*, VALUE, int); -VALUE rb_method_call_with_block(int, const VALUE *, VALUE, VALUE); -VALUE rb_method_call_with_block_kw(int, const VALUE *, VALUE, VALUE, int); -int rb_mod_method_arity(VALUE, ID); -int rb_obj_method_arity(VALUE, ID); -VALUE rb_protect(VALUE (*)(VALUE), VALUE, int*); + +/** + * Creates a method object. A method object is a proc-like object that you can + * "call". Note that a method object snapshots the method at the time the + * object is created: + * + * ```ruby + * class Foo + * def foo + * return 1 + * end + * end + * + * obj = Foo.new.method(:foo) + * + * class Foo + * def foo + * return 2 + * end + * end + * + * obj.call # => 1, not 2. + * ``` + * + * @param[in] recv Receiver of the method. + * @param[in] mid Method name, in either String or Symbol. + * @exception rb_eNoMethodError No such method. + * @return An instance of ::rb_cMethod. + */ +VALUE rb_obj_method(VALUE recv, VALUE mid); + +/** + * Queries if the given object is a method. + * + * @note This is about the object's data structure, not its class etc. + * @param[in] recv Object in question. + * @retval RUBY_Qtrue It is a method. + * @retval RUBY_Qfalse Otherwise. + */ +VALUE rb_obj_is_method(VALUE recv); + +/** + * Evaluates the passed method with the passed arguments. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] recv The method object to call. + * @exception rb_eTypeError `recv` is not a method. + * @exception rb_eException Any exceptions happen inside. + * @return What the method returns. + */ +VALUE rb_method_call(int argc, const VALUE *argv, VALUE recv); + +/** + * Identical to rb_method_call(), except you can specify how to handle the last + * element of the given array. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] recv The method object to call. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `args`' last is not a keyword argument. + * - RB_PASS_KEYWORDS `args`' last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eTypeError `recv` is not a method. + * @exception rb_eException Any exceptions happen inside. + * @return What the method returns. + */ +VALUE rb_method_call_kw(int argc, const VALUE *argv, VALUE recv, int kw_splat); + +/** + * Identical to rb_proc_call(), except you can additionally pass a proc as a + * block. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] recv The method object to call. + * @param[in] proc Proc as a passed block. + * @exception rb_eTypeError `recv` is not a method. + * @exception rb_eException Any exceptions happen inside. + * @return What the method returns. + */ +VALUE rb_method_call_with_block(int argc, const VALUE *argv, VALUE recv, VALUE proc); + +/** + * Identical to rb_method_call_with_block(), except you can specify how to + * handle the last element of the given array. It can also be seen as a + * routine identical to rb_method_call_kw(), except you can additionally pass + * another proc object as a block. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] recv The method object to call. + * @param[in] proc Proc as a passed block. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `args`' last is not a keyword argument. + * - RB_PASS_KEYWORDS `args`' last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @exception rb_eTypeError `recv` is not a method. + * @exception rb_eException Any exceptions happen inside. + * @return What the method returns. + */ +VALUE rb_method_call_with_block_kw(int argc, const VALUE *argv, VALUE recv, VALUE proc, int kw_splat); + +/** + * Queries the number of mandatory arguments of the method defined in the given + * module. If it is declared to take no arguments, returns `0`. If it takes + * exactly `n` arguments, returns `n`. If it has optional arguments, returns + * `-n-1`, where `n` is the number of mandatory arguments. Keyword arguments + * will be considered as a single additional argument, that argument being + * mandatory if any keyword argument is mandatory. + * + * @param[in] mod Namespace to search a method for. + * @param[in] mid Method id. + * @retval 0 It takes no arguments. + * @retval >0 It takes exactly this number of arguments. + * @retval <0 It takes optional arguments. + */ +int rb_mod_method_arity(VALUE mod, ID mid); + +/** + * Identical to rb_mod_method_arity(), except it searches for singleton methods + * rather than instance methods. + * + * @param[in] obj Object to search for a singleton method. + * @param[in] mid Method id. + * @retval 0 It takes no arguments. + * @retval >0 It takes exactly this number of arguments. + * @retval <0 It takes optional arguments. + */ +int rb_obj_method_arity(VALUE obj, ID mid); + +/* eval.c */ + +RBIMPL_ATTR_NONNULL((1)) +/** + * Protects a function call from potential global escapes from the function. + * Such global escapes include exceptions, `throw`, `break`, for example. + * + * It first calls the function func with `args` as the argument. If no global + * escape occurred during the function, it returns the result and `*state` is + * zero. Otherwise, it returns ::RUBY_Qnil and sets `*state` to nonzero. If + * `state` is `NULL`, it is not set in both cases. + * + * @param[in] func A function that potentially escapes globally. + * @param[in] args Passed as-is to `func`. + * @param[out] state State of execution. + * @return What `func` returns, or an undefined value when it did not + * return. + * @post `*state` is set to zero if succeeded. Nonzero otherwise. + * @warning You have to clear the error info with `rb_set_errinfo(Qnil)` if + * you decide to ignore the caught exception. + * @see rb_eval_string_protect() + * @see rb_load_protect() + * + * @internal + * + * The "undefined value" described above is in fact ::RUBY_Qnil for now. But + * @shyouhei doesn't think that we would never change that. + * + * Though not a part of our public API, `state` is in fact an + * enum ruby_tag_type. You can see the potential "nonzero" values by looking + * at vm_core.h. + */ +VALUE rb_protect(VALUE (*func)(VALUE args), VALUE args, int *state); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/process.h b/include/ruby/internal/intern/process.h index 2b1005a205..cfa5e13162 100644 --- a/include/ruby/internal/intern/process.h +++ b/include/ruby/internal/intern/process.h @@ -17,9 +17,10 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_mProcess. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/attr/noreturn.h" #include "ruby/internal/config.h" /* rb_pid_t is defined here. */ #include "ruby/internal/dllexport.h" @@ -28,17 +29,252 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* process.c */ + +/** + * Wait for the specified process to terminate, reap it, and return its status. + * + * @param[in] pid The process ID to wait for. + * @param[in] flags The flags to pass to waitpid(2). + * @return VALUE An instance of Process::Status. + */ +VALUE rb_process_status_wait(rb_pid_t pid, int flags); + +/** + * Sets the "last status", or the `$?`. + * + * @param[in] status The termination status, as defined in `waitpid(3posix)`. + * @param[in] pid The last child of the current process. + * @post `$?` is updated. + */ void rb_last_status_set(int status, rb_pid_t pid); + +/** + * Queries the "last status", or the `$?`. + * + * @retval RUBY_Qnil The current thread has no dead children. + * @retval otherwise An instance of Process::Status describing the status of + * the child that was most recently `wait`-ed. + */ VALUE rb_last_status_get(void); -int rb_proc_exec(const char*); + +RBIMPL_ATTR_NONNULL(()) +/** + * Executes a shell command. + * + * @warning THIS FUNCTION RETURNS on error! + * @param[in] cmd Passed to the shell. + * @retval -1 Something prevented the command execution. + * @post Upon successful execution this function doesn't return. + * @post In case it returns the `errno` is set properly. + */ +int rb_proc_exec(const char *cmd); RBIMPL_ATTR_NORETURN() -VALUE rb_f_exec(int, const VALUE*); +/** + * Replaces the current process by running the given external command. This is + * the implementation of `Kernel#exec`. + * + * @param[in] argc Number of objects in `argv`. + * @param[in] argv Command and its options to execute. + * @exception rb_eTypeError Invalid options e.g. non-String argv. + * @exception rb_eArgError Invalid options e.g. redirection cycle. + * @exception rb_eNotImpError Not implemented e.g. no `setuid(2)`. + * @exception rb_eRuntimeError `Process::UID.switch` in operation. + * @exception rb_eSystemCallError `execve(2)` failed. + * @warning This function doesn't return. + * @warning On failure it raises. On success the process is replaced. + * + * @internal + * + * @shyouhei have to say that the rdoc for `Kernel#exec` is fairly incomplete. + * AFAIK this function ultimately takes the following signature: + * + * ```rbs + * type boolx = bool | nil # != `boolish` + * + * type rlim_t = Integer # rlim_cur + * | [ Integer, Integer ] # rlim_cur, rlim_max + * + * type uid_t = String # e.g. "root" + * | Integer # e.g. 0 + * + * type gid_t = String # e.g. "wheel" + * | Integer # e.g. 0 + * + * type fmode = String # e.g. "rb" + * | Integer # e.g. O_RDONLY | O_BINARY + * + * type mode_t = Integer # e.g. 0644 + * + * type pgrp = true # Creates a dedicated pgroup + * | 0 # ditto + * | nil # Uses the current one + * | Integer # Uses this specific pgroup + * + * type fd = :in # STDIN + * | :out # STDOUT + * | :err # STDERR + * | IO # This specific IO + * | Integer # A file descriptor of this # + * + * type src = fd | [ fd ] + * type dst = :close # Intuitive + * | fd # Intuitive + * | String # Open a file at this path + * | [ String ] # ... using O_RDONLY + * | [ String, fmode ] # ... using this mode + * | [ String, fmode, mode_t ] # ... with a permission + * | [ :child, fd ] # fd of child side + * + * type redir = Hash[ src, dst ] + * + * # ---- + * + * # Key-value pair of environment variables + * type envp = Hash[ String, String ] + * + * # Actual name (and the name passed to the subprocess if any) + * type arg0 = String | [ String, String ] + * + * # Arbitrary string parameters + * type argv = String + * + * # Exec options: + * type argh = redir | { + * chdir: String, # Working directory + * close_others: boolx, # O_CLOEXEC like behaviour + * gid: gid_t, # setegid(2) + * pgrooup: pgrp, # setpgrp(2) + * rlimit_as: rlim_t, # setrlimit(2) + * rlimit_core: rlim_t, # ditto + * rlimit_cpu: rlim_t, # ditto + * rlimit_data: rlim_t, # ditto + * rlimit_fsize: rlim_t, # ditto + * rlimit_memlock: rlim_t, # ditto + * rlimit_msgqueue: rlim_t, # ditto + * rlimit_nice: rlim_t, # ditto + * rlimit_nofile: rlim_t, # ditto + * rlimit_nproc: rlim_t, # ditto + * rlimit_rss: rlim_t, # ditto + * rlimit_rtprio: rlim_t, # ditto + * rlimit_rttime: rlim_t, # ditto + * rlimit_sbsize: rlim_t, # ditto + * rlimit_sigpending: rlim_t, # ditto + * rlimit_stack: rlim_t, # ditto + * uid: uid_t, # seteuid(2) + * umask: mode_t, # umask(2) + * unsetenv_others: boolx # Unset everything except the passed envp + * } + * + * # ==== + * + * class Kernel + * def self?.exec + * : ( arg0 cmd, *argv args ) -> void + * | ( arg0 cmd, *argv args, argh opts) -> void + * | (envp env, arg0 cmd, *argv args ) -> void + * | (envp env, arg0 cmd, *argv args, argh opts) -> void + * end + * ``` + */ +VALUE rb_f_exec(int argc, const VALUE *argv); + +/** + * Waits for a process, with releasing GVL. + * + * @param[in] pid Process ID. + * @param[out] status The wait status is filled back. + * @param[in] flags Wait options. + * @retval -1 System call failed, errno set. + * @retval 0 WNOHANG but no waitable children. + * @retval otherwise A process ID that was `wait()`-ed. + * @post Upon successful return `status` is updated to have the process' + * status. + * @note `status` can be NULL. + * @note The arguments are passed through to underlying system call(s). + * Can have special meanings. For instance passing `(rb_pid_t)-1` + * to `pid` means it waits for any processes, under + * POSIX-compliant situations. + */ rb_pid_t rb_waitpid(rb_pid_t pid, int *status, int flags); + +/** + * This is a shorthand of rb_waitpid without status and flags. It has been + * like this since the very beginning. The initial revision already did the + * same thing. Not sure why, then, it has been named `syswait`. AFAIK this is + * different from how `wait(3posix)` works. + * + * @param[in] pid Passed to rb_waitpid(). + */ void rb_syswait(rb_pid_t pid); -rb_pid_t rb_spawn(int, const VALUE*); -rb_pid_t rb_spawn_err(int, const VALUE*, char*, size_t); -VALUE rb_proc_times(VALUE); + +/** + * Identical to rb_f_exec(), except it spawns a child process instead of + * replacing the current one. + * + * @param[in] argc Number of objects in `argv`. + * @param[in] argv Command and its options to execute. + * @exception rb_eTypeError Invalid options e.g. non-String argv. + * @exception rb_eArgError Invalid options e.g. redirection cycle. + * @exception rb_eNotImpError Not implemented e.g. no `setuid(2)`. + * @exception rb_eRuntimeError `Process::UID.switch` in operation. + * @retval -1 Child process died for some reason. + * @retval otherwise The ID of the born child. + * + * @internal + * + * This is _really_ identical to rb_f_exec() until ultimately calling the + * system call. Almost everything are shared among these two (and + * rb_f_system()). + */ +rb_pid_t rb_spawn(int argc, const VALUE *argv); + +/** + * Identical to rb_spawn(), except you can additionally know the detailed + * situation in case of abnormal parturitions. + * + * @param[in] argc Number of objects in `argv`. + * @param[in] argv Command and its options to execute. + * @param[out] errbuf Error description write-back buffer. + * @param[in] buflen Number of bytes of `errbuf`, including NUL. + * @exception rb_eTypeError Invalid options e.g. non-String argv. + * @exception rb_eArgError Invalid options e.g. redirection cycle. + * @exception rb_eNotImpError Not implemented e.g. no `setuid(2)`. + * @exception rb_eRuntimeError `Process::UID.switch` in operation. + * @retval -1 Child process died for some reason. + * @retval otherwise The ID of the born child. + * @post In case of `-1`, at most `buflen` bytes of the reason why is + * written back to `errbuf`. + */ +rb_pid_t rb_spawn_err(int argc, const VALUE *argv, char *errbuf, size_t buflen); + +/** + * Gathers info about resources consumed by the current process. + * + * @param[in] _ Not used. Pass anything. + * @return An instance of `Process::Tms`. + * + * @internal + * + * This function might or might not exist depending on `./configure` result. + * It must be a portability hell. Better not use. + */ +VALUE rb_proc_times(VALUE _); + +/** + * "Detaches" a subprocess. In POSIX systems every child processes that a + * process creates must be `wait(2)`-ed. A child process that died yet has not + * been waited so far is called a "zombie", which more or less consumes + * resources. This function automates reclamation of such processes. Once + * after this function successfully returns you can basically forget about the + * child process. + * + * @param[in] pid Process to wait. + * @return An instance of ::rb_cThread which is `waitpid(2)`-ing `pid`. + * @post You can just forget about the return value. GC reclaims it. + * @post You can know the exit status by querying `#value` of the + * return value (which is a blocking operation). + */ VALUE rb_detach_process(rb_pid_t pid); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/random.h b/include/ruby/internal/intern/random.h index 6482a8f6e8..5577f53cb4 100644 --- a/include/ruby/internal/intern/random.h +++ b/include/ruby/internal/intern/random.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief MT19937 backended pseudo random number generator. * @see Matsumoto, M., Nishimura, T., "Mersenne Twister: A 623- * dimensionally equidistributed uniform pseudorandom number @@ -30,13 +30,85 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* random.c */ + +/** + * Generates a 32 bit random number. + * + * @return A random number. + * @note Now that we have ractors, the RNG behind this function is + * per-ractor. + */ unsigned int rb_genrand_int32(void); + +/** + * Generates a `double` random number. + * + * @return A random number. + * @note This function shares the RNG with rb_genrand_int32(). + */ double rb_genrand_real(void); + +/** + * Resets the RNG behind rb_genrand_int32()/rb_genrand_real(). + * + * @post The (now per-ractor) default RNG's internal state is cleared. + */ void rb_reset_random_seed(void); + +/** + * Generates a String of random bytes. + * + * @param[in,out] rnd An instance of ::rb_cRandom. + * @param[in] n Requested number of bytes. + * @return An instance of ::rb_cString, of binary, of `n` bytes length, + * whose contents are random bits. + * + * @internal + * + * @shyouhei doesn't know if this is an Easter egg or an official feature, but + * this function can take a wider range of objects, such as `Socket::Ifaddr`. + * The arguments are just silently ignored and the default RNG is used instead, + * if they are non-RNG. + */ VALUE rb_random_bytes(VALUE rnd, long n); + +/** + * Identical to rb_genrand_int32(), except it generates using the passed RNG. + * + * @param[in,out] rnd An instance of ::rb_cRandom. + * @return A random number. + */ unsigned int rb_random_int32(VALUE rnd); + +/** + * Identical to rb_genrand_real(), except it generates using the passed RNG. + * + * @param[in,out] rnd An instance of ::rb_cRandom. + * @return A random number. + */ double rb_random_real(VALUE rnd); + +/** + * Identical to rb_genrand_ulong_limited(), except it generates using the + * passed RNG. + * + * @param[in,out] rnd An instance of ::rb_cRandom. + * @param[in] limit Max possible return value. + * @return A random number, distributed in `[0, limit]` interval. + * @note Note it can return `limit`. + * @note Whether the return value distributes uniformly in the + * interval or not depends on how the argument RNG behaves; at + * least in case of MT19937 it does. + */ unsigned long rb_random_ulong_limited(VALUE rnd, unsigned long limit); + +/** + * Generates a random number whose upper limit is `i`. + * + * @param[in] i Max possible return value. + * @return A random number, uniformly distributed in `[0, limit]` interval. + * @note Note it can return `i`. + */ unsigned long rb_genrand_ulong_limited(unsigned long i); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/range.h b/include/ruby/internal/intern/range.h index 7ca47915e2..1f7d7c313f 100644 --- a/include/ruby/internal/intern/range.h +++ b/include/ruby/internal/intern/range.h @@ -17,17 +17,71 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cRange. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() /* range.c */ -VALUE rb_range_new(VALUE, VALUE, int); -VALUE rb_range_beg_len(VALUE, long*, long*, long, int); + +/** + * Creates a new Range. + * + * @param[in] beg "Left" or "lowest" endpoint of the range. + * @param[in] end "Right" or "highest" endpoint of the range. + * @param[in] excl Whether the range is open-ended. + * @exception rb_eArgError `beg` and `end` are not comparable. + * @note These days both endpoints can be ::RUBY_Qnil, which means that + * endpoint is unbound. + */ +VALUE rb_range_new(VALUE beg, VALUE end, int excl); + +RBIMPL_ATTR_NONNULL(()) +/** + * Deconstructs a numerical range. As the arguments are `long` based, it + * expects everything are in the `long` domain. + * + * @param[in] range A range of numerical endpoints. + * @param[out] begp Return value buffer. + * @param[out] lenp Return value buffer. + * @param[in] len Updated length. + * @param[in] err In case `len` is out of range... + * - `0`: returns ::RUBY_Qnil. + * - `1`: raises ::rb_eRangeError. + * - `2`: `beg` and `len` expanded accordingly. + * @exception rb_eTypeError `range` is not a numerical range. + * @exception rb_eRangeError `range` cannot fit into `long`. + * @retval RUBY_Qfalse `range` is not an ::rb_cRange. + * @retval RUBY_Qnil `len` is out of `range` but `err` is zero. + * @retval RUBY_Qtrue Otherwise. + * @post `beg` is the (possibly updated) left endpoint. + * @post `len` is the (possibly updated) length of the range. + * + * @internal + * + * The complex error handling switch reflects the fact that `Array#[]=` and + * `String#[]=` behave differently when they take ranges. + */ +VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err); + +RBIMPL_ATTR_NONNULL(()) +/** + * Deconstructs a range into its components. + * + * @param[in] range Range or range-ish object. + * @param[out] begp Return value buffer. + * @param[out] endp Return value buffer. + * @param[out] exclp Return value buffer. + * @retval RUBY_Qfalse `range` is not an instance of ::rb_cRange. + * @retval RUBY_Qtrue Argument pointers are updated. + * @post `*begp` is the left endpoint of the range. + * @post `*endp` is the right endpoint of the range. + * @post `*exclp` is whether the range is open-ended or not. + */ int rb_range_values(VALUE range, VALUE *begp, VALUE *endp, int *exclp); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/rational.h b/include/ruby/internal/intern/rational.h index 30a87ff31f..ff4beca297 100644 --- a/include/ruby/internal/intern/rational.h +++ b/include/ruby/internal/intern/rational.h @@ -17,9 +17,10 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cRational. */ +#include "ruby/internal/attr/pure.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" #include "ruby/internal/arithmetic/long.h" /* INT2FIX is here. */ @@ -27,19 +28,144 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* rational.c */ -VALUE rb_rational_raw(VALUE, VALUE); + +/** + * Identical to rb_rational_new(), except it skips argument validations. It is + * thus dangerous for extension libraries. For instance `1/0r` could be + * constructed using this. + * + * @param[in] num Numerator, an instance of ::rb_cInteger. + * @param[in] den Denominator, an instance of ::rb_cInteger. + * @exception rb_eTypeError Either argument is not an Integer. + * @return An instance of ::rb_cRational whose value is `(num/den)r`. + */ +VALUE rb_rational_raw(VALUE num, VALUE den); + +/** + * Shorthand of `(x/1)r`. As `x` is already an Integer, it practically + * converts it into a Rational of the identical value. + * + * @param[in] x An instance of ::rb_cInteger. + * @return An instance of ::rb_cRational, whose value is `(x/1)r`. + */ #define rb_rational_raw1(x) rb_rational_raw((x), INT2FIX(1)) + +/** @alias{rb_rational_raw} */ #define rb_rational_raw2(x,y) rb_rational_raw((x), (y)) -VALUE rb_rational_new(VALUE, VALUE); + +/** + * Constructs a Rational, with reduction. This returns for instance `(2/3)r` + * for `rb_rational_new(INT2NUM(-384), INT2NUM(-576))`. + * + * @param[in] num Numerator, an instance of ::rb_cInteger. + * @param[in] den Denominator, an instance of ::rb_cInteger. + * @exception rb_eZeroDivError `den` is zero. + * @return An instance of ::rb_cRational whose value is `(num/den)r`. + */ +VALUE rb_rational_new(VALUE num, VALUE den); + +/** + * Shorthand of `(x/1)r`. As `x` is already an Integer, it practically + * converts it into a Rational of the identical value. + * + * @param[in] x An instance of ::rb_cInteger. + * @return An instance of ::rb_cRational, whose value is `(x/1)r`. + */ #define rb_rational_new1(x) rb_rational_new((x), INT2FIX(1)) + +/** @alias{rb_rational_new} */ #define rb_rational_new2(x,y) rb_rational_new((x), (y)) -VALUE rb_Rational(VALUE, VALUE); + +/** + * Converts various values into a Rational. This function accepts: + * + * - Instances of ::rb_cInteger (taken as-is), + * - Instances of ::rb_cRational (taken as-is), + * - Instances of ::rb_cFloat (applies `#to_r`), + * - Instances of ::rb_cComplex (applies `#to_r`), + * - Instances of ::rb_cString (applies `#to_r`), + * - Other objects that respond to `#to_r`. + * + * It (possibly recursively) applies `#to_r` until both sides become either + * Integer or Rational, then divides them. + * + * As a special case, passing ::RUBY_Qundef to `den` is the same as passing + * `RB_INT2NUM(1)`. + * + * @param[in] num Numerator (see above). + * @param[in] den Denominator (see above). + * @exception rb_eTypeError Passed something not described above. + * @exception rb_eFloatDomainError `#to_r` produced Nan/Inf. + * @exception rb_eZeroDivError `#to_r` produced zero for `den`. + * @return An instance of ::rb_cRational whose value is `(num/den)r`. + * + * @internal + * + * This was the implementation of `Kernel#Rational` before, but they diverged. + */ +VALUE rb_Rational(VALUE num, VALUE den); + +/** + * Shorthand of `(x/1)r`. It practically converts it into a Rational of the + * identical value. + * + * @param[in] x ::rb_cInteger, ::rb_cRational, or something that responds to + * `#to_r`. + * @return An instance of ::rb_cRational, whose value is `(x/1)r`. + */ #define rb_Rational1(x) rb_Rational((x), INT2FIX(1)) + +/** @alias{rb_Rational} */ #define rb_Rational2(x,y) rb_Rational((x), (y)) + +RBIMPL_ATTR_PURE() +/** + * Queries the numerator of the passed Rational. + * + * @param[in] rat An instance of ::rb_cRational. + * @return Its numerator part, which is an instance of ::rb_cInteger. + */ VALUE rb_rational_num(VALUE rat); + +RBIMPL_ATTR_PURE() +/** + * Queries the denominator of the passed Rational. + * + * @param[in] rat An instance of ::rb_cRational. + * @return Its denominator part, which is an instance of ::rb_cInteger + * greater than or equal to one.. + */ VALUE rb_rational_den(VALUE rat); -VALUE rb_flt_rationalize_with_prec(VALUE, VALUE); -VALUE rb_flt_rationalize(VALUE); + +/** + * Simplified approximation of a float. It returns a rational `rat` which + * satisfies: + * + * ``` + * flt - |prec| <= rat <= flt + |prec| + * ``` + * + * ```ruby + * 3.141592.rationalize(0.001) # => (201/64)r + * 3.141592.rationalize(0.01)' # => (22/7)r + * 3.141592.rationalize(0.1)' # => (16/5)r + * 3.141592.rationalize(1)' # => (3/1)r + * ``` + * + * @param[in] flt An instance of ::rb_cFloat to rationalise. + * @param[in] prec Another ::rb_cFloat, which is the "precision". + * @return Approximation of `flt`, in ::rb_cRational. + */ +VALUE rb_flt_rationalize_with_prec(VALUE flt, VALUE prec); + +/** + * Identical to rb_flt_rationalize_with_prec(), except it auto-detects + * appropriate precision depending on the passed value. + * + * @param[in] flt An instance of ::rb_cFloat to rationalise. + * @return Approximation of `flt`, in ::rb_cRational. + */ +VALUE rb_flt_rationalize(VALUE flt); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/re.h b/include/ruby/internal/intern/re.h index dd7baef954..4dd58b469b 100644 --- a/include/ruby/internal/intern/re.h +++ b/include/ruby/internal/intern/re.h @@ -17,33 +17,227 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cRegexp. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() /* re.c */ + +/** + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + * + * @internal + * + * This was a function that switched between memcmp and rb_memcicmp depending + * on then-called `ruby_ignorecase`, or the `$=` global variable. That feature + * was abandoned in sometime around version 1.9.0. + */ #define rb_memcmp memcmp -int rb_memcicmp(const void*,const void*,long); -void rb_match_busy(VALUE); -VALUE rb_reg_nth_defined(int, VALUE); -VALUE rb_reg_nth_match(int, VALUE); + +/** + * Identical to st_locale_insensitive_strcasecmp(), except it is timing safe + * and returns something different. + * + * @param[in] s1 Comparison LHS. + * @param[in] s2 Comparison RHS. + * @param[in] n Comparison shall stop after first `n` bytes are scanned. + * @retval <0 `s1` is "less" than `s2`. + * @retval 0 Both sides converted into lowercase would be identical. + * @retval >0 `s1` is "greater" than `s2`. + * @note The "case" here means that of the POSIX Locale. + * + * @internal + * + * Can accept NULLs as long as n is also 0, and returns 0. + */ +int rb_memcicmp(const void *s1,const void *s2, long n); + +/** + * Asserts that the given MatchData is "occupied". MatchData shares its + * backend storages with its Regexp object. But programs can destructively + * tamper its contents. Calling this function beforehand shall prevent such + * modifications to spill over into other objects. + * + * @param[out] md Target instance of ::rb_cMatch. + * @post The object is "busy". + * + * @internal + * + * There is rb_match_unbusy internally, but extension libraries are left unable + * to do so. + */ +void rb_match_busy(VALUE md); + +/** + * Identical to rb_reg_nth_match(), except it just returns Boolean. This could + * skip allocating a returning string, resulting in reduced memory footprints + * if applicable. + * + * @param[in] n Match index. + * @param[in] md An instance of ::rb_cMatch. + * @exception rb_eTypeError `md` is not initialised. + * @retval RUBY_Qnil There is no `n`-th capture. + * @retval RUBY_Qfalse There is a `n`-th capture and is empty. + * @retval RUBY_Qtrue There is a `n`-th capture that has something. + * + */ +VALUE rb_reg_nth_defined(int n, VALUE md); + +/** + * Queries the nth captured substring. + * + * @param[in] n Match index. + * @param[in] md An instance of ::rb_cMatch. + * @exception rb_eTypeError `md` is not initialised. + * @retval RUBY_Qnil There is no `n`-th capture. + * @retval otherwise An allocated instance of ::rb_cString containing + * the contents captured. + */ +VALUE rb_reg_nth_match(int n, VALUE md); + +/** + * Queries the index of the given named capture. Captures could be named. But + * that doesn't mean named ones are not indexed. A regular expression can mix + * named and non-named captures, and they are all indexed. This function + * converts from a name to its index. + * + * @param[in] match An instance of ::rb_cMatch. + * @param[in] backref Capture name, in String, Symbol, or Numeric. + * @exception rb_eIndexError No such named capture. + * @return The index of the given name. + */ int rb_reg_backref_number(VALUE match, VALUE backref); -VALUE rb_reg_last_match(VALUE); -VALUE rb_reg_match_pre(VALUE); -VALUE rb_reg_match_post(VALUE); -VALUE rb_reg_match_last(VALUE); + +/** + * This just returns the argument, stringified. What a poor name. + * + * @param[in] md An instance of ::rb_cMatch. + * @return Its 0th capture (i.e. entire matched string). + */ +VALUE rb_reg_last_match(VALUE md); + +/** + * The portion of the original string before the given match. + * + * @param[in] md An instance of ::rb_cMatch. + * @return Its "prematch". This is perl's ``$```. + */ +VALUE rb_reg_match_pre(VALUE md); + +/** + * The portion of the original string after the given match. + * + * @param[in] md An instance of ::rb_cMatch. + * @return Its "postmatch". This is perl's `$'`. + */ +VALUE rb_reg_match_post(VALUE md); + +/** + * The portion of the original string that captured at the very last. + * + * @param[in] md An instance of ::rb_cMatch. + * @return Its "lastmatch". This is perl's `$+`. + */ +VALUE rb_reg_match_last(VALUE md); + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define HAVE_RB_REG_NEW_STR 1 -VALUE rb_reg_new_str(VALUE, int); -VALUE rb_reg_new(const char *, long, int); + +/** + * Identical to rb_reg_new(), except it takes the expression in Ruby's string + * instead of C's. + * + * @param[in] src Source code in String. + * @param[in] opts Options e.g. ONIG_OPTION_MULTILINE. + * @exception rb_eRegexpError `src` and `opts` do not interface. + * @return Allocated new instance of ::rb_cRegexp. + */ +VALUE rb_reg_new_str(VALUE src, int opts); + +RBIMPL_ATTR_NONNULL(()) +/** + * Creates a new Regular expression. + * + * @param[in] src Source code. + * @param[in] len `strlen(src)`. + * @param[in] opts Options e.g. ONIG_OPTION_MULTILINE. + * @return Allocated new instance of ::rb_cRegexp. + */ +VALUE rb_reg_new(const char *src, long len, int opts); + +/** + * Allocates an instance of ::rb_cRegexp. + * + * @private + * + * Nobody should call this function. Regular expressions that are not + * initialised must not exist in the wild. + */ VALUE rb_reg_alloc(void); + +/** + * Initialises an instance of ::rb_cRegexp. + * + * @private + * + * This just raises for ordinal regexp objects. Extension libraries must not + * use. + */ VALUE rb_reg_init_str(VALUE re, VALUE s, int options); -VALUE rb_reg_match(VALUE, VALUE); -VALUE rb_reg_match2(VALUE); -int rb_reg_options(VALUE); + +/** + * This is the match operator. + * + * @param[in] re An instance of ::rb_cRegexp. + * @param[in] str An instance of ::rb_cString. + * @exception rb_eTypeError `str` is not a string. + * @exception rb_eRegexpError Error inside of Onigmo (unlikely). + * @retval RUBY_Qnil Match failed. + * @retval otherwise Matched position (character index inside of + * `str`). + * @post `Regexp.last_match` is updated. + * @post `$&`, `$~`, etc., are updated. + * @note If you do this in ruby, named captures are assigned to local + * variable of the local scope. But that doesn't happen here. The + * assignment is done by the interpreter. + */ +VALUE rb_reg_match(VALUE re, VALUE str); + +/** + * Identical to rb_reg_match(), except it matches against rb_lastline_get() + * (or, the `$_`). + * + * @param[in] re An instance of ::rb_cRegexp. + * @exception rb_eRegexpError Error inside of Onigmo (unlikely). + * @retval RUBY_Qnil Match failed or `$_` is absent. + * @retval otherwise Matched position (character index inside of + * `$_`). + * @post `Regexp.last_match` is updated. + * @post `$&`, `$~`, etc., are updated. + */ +VALUE rb_reg_match2(VALUE re); + +/** + * Queries the options of the passed regular expression. + * + * @param[in] re An instance of ::rb_cRegexp. + * @return Its options. + * @note Possible return values are defined in Onigmo.h. + */ +int rb_reg_options(VALUE re); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/ruby.h b/include/ruby/internal/intern/ruby.h index 9d9a71cf7a..efe61424ca 100644 --- a/include/ruby/internal/intern/ruby.h +++ b/include/ruby/internal/intern/ruby.h @@ -17,20 +17,60 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Process-global APIs. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() /* ruby.c */ +/** @alias{rb_get_argv} */ #define rb_argv rb_get_argv() + +/** + * The value of `$0` at process bootup. + * + * @note This is just a snapshot of `$0`, not the backend storage of it. `$0` + * could become something different because it is a writable global + * variable. Modifying it for instance affects `ps(1)` output. Don't + * assume they are synced. + */ RUBY_EXTERN VALUE rb_argv0; + +/* io.c */ + +/** + * Queries the arguments passed to the current process that you can access from + * Ruby as `ARGV`. + * + * @return An array of strings containing arguments passed to the process. + */ VALUE rb_get_argv(void); -void *rb_load_file(const char*); -void *rb_load_file_str(VALUE); + +/* ruby.c */ + +RBIMPL_ATTR_NONNULL(()) +/** + * Loads the given file. This function opens the given pathname for reading, + * parses the contents as a Ruby script, and returns an opaque "node" pointer. + * You can then pass it to ruby_run_node() for evaluation. + * + * @param[in] file File name, or "-" to read from stdin. + * @return Opaque "node" pointer. + */ +void *rb_load_file(const char *file); + +/** + * Identical to rb_load_file(), except it takes the argument as a Ruby's string + * instead of C's. + * + * @param[in] file File name, or "-" to read from stdin. + * @return Opaque "node" pointer. + */ +void *rb_load_file_str(VALUE file); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/select.h b/include/ruby/internal/intern/select.h index 43d4cf354c..6ba84c6e63 100644 --- a/include/ruby/internal/intern/select.h +++ b/include/ruby/internal/intern/select.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs to provide ::rb_fd_select(). * @note Functions and structs defined in this header file are not * necessarily ruby-specific. They don't need ::VALUE etc. @@ -35,9 +35,11 @@ # include "ruby/internal/intern/select/largesize.h" #elif defined(_WIN32) # include "ruby/internal/intern/select/win32.h" +# /** Does nothing (defined for compatibility). */ # define rb_fd_resize(n, f) ((void)(f)) #else # include "ruby/internal/intern/select/posix.h" +# /** Does nothing (defined for compatibility). */ # define rb_fd_resize(n, f) ((void)(f)) #endif @@ -45,7 +47,39 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() struct timeval; -int rb_thread_fd_select(int, rb_fdset_t *, rb_fdset_t *, rb_fdset_t *, struct timeval *); +/** + * Waits for multiple file descriptors at once. This is basically a wrapper of + * system-provided select() with releasing GVL, to allow other Ruby threads run + * in parallel. + * + * @param[in] nfds Max FD in everything passed, plus one. + * @param[in,out] rfds Set of FDs to wait for reads. + * @param[in,out] wfds Set of FDs to wait for writes. + * @param[in,out] efds Set of FDs to wait for OOBs. + * @param[in,out] timeout Max blocking duration. + * @retval -1 Failed, errno set. + * @retval 0 Timeout exceeded. + * @retval otherwise Total number of file descriptors returned. + * @post `rfds` contains readable FDs. + * @post `wfds` contains writable FDs. + * @post `efds` contains exceptional FDs. + * @post `timeout` is the time left. + * @note All pointers are allowed to be null pointers. + * + * Although backend threads can run in parallel of this function, touching a + * file descriptor from multiple threads could be problematic. For instance + * what happens when a thread closes a file descriptor that is selected by + * someone else, vastly varies among operating systems. You would better avoid + * touching an fd from more than one threads. + * + * @internal + * + * Although any file descriptors are possible here, it makes completely no + * sense to pass a descriptor that is not `O_NONBLOCK`. If you want to know + * the reason for this limitation in detail, you might find this thread super + * interesting: https://lkml.org/lkml/2004/10/6/117 + */ +int rb_thread_fd_select(int nfds, rb_fdset_t *rfds, rb_fdset_t *wfds, rb_fdset_t *efds, struct timeval *timeout); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/select/largesize.h b/include/ruby/internal/intern/select/largesize.h index ba56a159b1..d65f088c06 100644 --- a/include/ruby/internal/intern/select/largesize.h +++ b/include/ruby/internal/intern/select/largesize.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs to provide ::rb_fd_select(). * * Several Unix platforms support file descriptors bigger than FD_SETSIZE in @@ -35,9 +35,6 @@ * `select(2)` documents how to allocate fd_set dynamically. * http://www.openbsd.org/cgi-bin/man.cgi?query=select&manpath=OpenBSD+4.4 * - * - HP-UX documents how to allocate fd_set dynamically. - * http://docs.hp.com/en/B2355-60105/select.2.html - * * - Solaris 8 has `select_large_fdset` * * - Mac OS X 10.7 (Lion) @@ -66,26 +63,134 @@ struct timeval; +/** + * The data structure which wraps the fd_set bitmap used by select(2). This + * allows Ruby to use FD sets larger than that allowed by historic limitations + * on modern platforms. + */ typedef struct { - int maxfd; - fd_set *fdset; + int maxfd; /**< Maximum allowed number of FDs. */ + fd_set *fdset; /**< File descriptors buffer */ } rb_fdset_t; RBIMPL_SYMBOL_EXPORT_BEGIN() -void rb_fd_init(rb_fdset_t *); -void rb_fd_term(rb_fdset_t *); -void rb_fd_zero(rb_fdset_t *); -void rb_fd_set(int, rb_fdset_t *); -void rb_fd_clr(int, rb_fdset_t *); -int rb_fd_isset(int, const rb_fdset_t *); -void rb_fd_copy(rb_fdset_t *, const fd_set *, int); +RBIMPL_ATTR_NONNULL(()) +/** + * (Re-)initialises a fdset. One must be initialised before other `rb_fd_*` + * operations. Analogous to calling `malloc(3)` to allocate an `fd_set`. + * + * @param[out] f An fdset to squash. + * @post `f` holds no file descriptors. + */ +void rb_fd_init(rb_fdset_t *f); + +RBIMPL_ATTR_NONNULL(()) +/** + * Destroys the ::rb_fdset_t, releasing any memory and resources it used. It + * must be reinitialised using rb_fd_init() before future use. Analogous to + * calling `free(3)` to release memory for an `fd_set`. + * + * @param[out] f An fdset to squash. + * @post `f` holds no file descriptors. + */ +void rb_fd_term(rb_fdset_t *f); + +RBIMPL_ATTR_NONNULL(()) +/** + * Wipes out the current set of FDs. + * + * @param[out] f The fdset to clear. + * @post `f` has no FDs. + */ +void rb_fd_zero(rb_fdset_t *f); + +RBIMPL_ATTR_NONNULL(()) +/** + * Sets an fd to a fdset. + * + * @param[in] fd A file descriptor. + * @param[out] f Target fdset. + * @post `f` holds `fd`. + */ +void rb_fd_set(int fd, rb_fdset_t *f); + +RBIMPL_ATTR_NONNULL(()) +/** + * Releases a specific FD from the given fdset. + * + * @param[in] fd Target FD. + * @param[out] f The fdset that holds `fd`. + * @post `f` doesn't hold n. + */ +void rb_fd_clr(int fd, rb_fdset_t *f); + +RBIMPL_ATTR_NONNULL(()) +RBIMPL_ATTR_PURE() +/** + * Queries if the given FD is in the given set. + * + * @param[in] fd Target FD. + * @param[in] f The fdset to scan. + * @retval 1 Yes there is. + * @retval 0 No there isn't. + * @see http://www.freebsd.org/cgi/query-pr.cgi?pr=91421 + */ +int rb_fd_isset(int fd, const rb_fdset_t *f); + +/** + * Destructively overwrites an fdset with another. + * + * @param[out] dst Target fdset. + * @param[in] src Source fdset. + * @param[in] max Maximum number of file descriptors to copy. + * @post `dst` is a copy of `src`. + */ +void rb_fd_copy(rb_fdset_t *dst, const fd_set *src, int max); + +/** + * Identical to rb_fd_copy(), except it copies unlimited number of file + * descriptors. + * + * @param[out] dst Target fdset. + * @param[in] src Source fdset. + * @post `dst` is a copy of `src`. + */ void rb_fd_dup(rb_fdset_t *dst, const rb_fdset_t *src); -int rb_fd_select(int, rb_fdset_t *, rb_fdset_t *, rb_fdset_t *, struct timeval *); + +/** + * Waits for multiple file descriptors at once. + * + * @param[in] nfds Max FD in everything passed, plus one. + * @param[in,out] rfds Set of FDs to wait for reads. + * @param[in,out] wfds Set of FDs to wait for writes. + * @param[in,out] efds Set of FDs to wait for OOBs. + * @param[in,out] timeout Max blocking duration. + * @retval -1 Failed, errno set. + * @retval 0 Timeout exceeded. + * @retval otherwise Total number of file descriptors returned. + * @post `rfds` contains readable FDs. + * @post `wfds` contains writable FDs. + * @post `efds` contains exceptional FDs. + * @post `timeout` is the time left. + * @note All pointers are allowed to be null pointers. + */ +int rb_fd_select(int nfds, rb_fdset_t *rfds, rb_fdset_t *wfds, rb_fdset_t *efds, struct timeval *timeout); RBIMPL_SYMBOL_EXPORT_END() RBIMPL_ATTR_NONNULL(()) RBIMPL_ATTR_PURE() -/* :TODO: can this function be __attribute__((returns_nonnull)) or not? */ +/** + * Raw pointer to `fd_set`. + * + * @param[in] f Target fdset. + * @retval NULL `f` is already terminated by rb_fd_term(). + * @retval otherwise Underlying fd_set. + * + * @internal + * + * Extension library must not touch raw pointers. It was a bad idea to let + * them use it. + */ static inline fd_set * rb_fd_ptr(const rb_fdset_t *f) { @@ -94,6 +199,12 @@ rb_fd_ptr(const rb_fdset_t *f) RBIMPL_ATTR_NONNULL(()) RBIMPL_ATTR_PURE() +/** + * It seems this function has no use. Maybe just remove? + * + * @param[in] f A set. + * @return Number of file descriptors stored. + */ static inline int rb_fd_max(const rb_fdset_t *f) { diff --git a/include/ruby/internal/intern/select/posix.h b/include/ruby/internal/intern/select/posix.h index 6c1092b39d..0a9b0b2e51 100644 --- a/include/ruby/internal/intern/select/posix.h +++ b/include/ruby/internal/intern/select/posix.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs to provide ::rb_fd_select(). */ #include "ruby/internal/config.h" @@ -30,17 +30,41 @@ # include <unistd.h> /* for select(2) (archaic UNIX) */ #endif -#include "ruby/internal/attr/pure.h" #include "ruby/internal/attr/const.h" +#include "ruby/internal/attr/noalias.h" +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/attr/pure.h" +/** + * The data structure which wraps the fd_set bitmap used by `select(2)`. This + * allows Ruby to use FD sets larger than what has been historically allowed on + * modern platforms. + * + * @internal + * + * ... but because this header file is included only when the system is with + * that "historic restrictions", this is nothing more than an alias of fd_set. + */ typedef fd_set rb_fdset_t; +/** Clears the given ::rb_fdset_t. */ #define rb_fd_zero FD_ZERO + +/** Sets the given fd to the ::rb_fdset_t. */ #define rb_fd_set FD_SET + +/** Unsets the given fd from the ::rb_fdset_t. */ #define rb_fd_clr FD_CLR + +/** Queries if the given fd is in the ::rb_fdset_t. */ #define rb_fd_isset FD_ISSET + +/** Initialises the :given :rb_fdset_t. */ #define rb_fd_init FD_ZERO + +/** Waits for multiple file descriptors at once. */ #define rb_fd_select select + /**@cond INTERNAL_MACRO */ #define rb_fd_copy rb_fd_copy #define rb_fd_dup rb_fd_dup @@ -48,20 +72,50 @@ typedef fd_set rb_fdset_t; #define rb_fd_max rb_fd_max /** @endcond */ +RBIMPL_ATTR_NONNULL(()) +RBIMPL_ATTR_NOALIAS() +/** + * Destructively overwrites an fdset with another. + * + * @param[out] dst Target fdset. + * @param[in] src Source fdset. + * @param[in] n Unused parameter. + * @post `dst` is a copy of `src`. + */ static inline void rb_fd_copy(rb_fdset_t *dst, const fd_set *src, int n) { *dst = *src; } +RBIMPL_ATTR_NONNULL(()) +RBIMPL_ATTR_NOALIAS() +/** + * Destructively overwrites an fdset with another. + * + * @param[out] dst Target fdset. + * @param[in] src Source fdset. + * @post `dst` is a copy of `src`. + */ static inline void -rb_fd_dup(rb_fdset_t *dst, const fd_set *src, int n) +rb_fd_dup(rb_fdset_t *dst, const fd_set *src) { *dst = *src; } RBIMPL_ATTR_PURE() /* :TODO: can this function be __attribute__((returns_nonnull)) or not? */ +/** + * Raw pointer to `fd_set`. + * + * @param[in] f Target fdset. + * @return Underlying fd_set. + * + * @internal + * + * Extension library must not touch raw pointers. It was a bad idea to let + * them use it. + */ static inline fd_set * rb_fd_ptr(rb_fdset_t *f) { @@ -69,14 +123,22 @@ rb_fd_ptr(rb_fdset_t *f) } RBIMPL_ATTR_CONST() +/** + * It seems this function has no use. Maybe just remove? + * + * @param[in] f A set. + * @return Number of file descriptors stored. + */ static inline int rb_fd_max(const rb_fdset_t *f) { return FD_SETSIZE; } -/* :FIXME: What are these? They don't exist for shibling implementations. */ +/** @cond INTERNAL_MACRO */ +/* :FIXME: What are these? They don't exist for sibling implementations. */ #define rb_fd_init_copy(d, s) (*(d) = *(s)) #define rb_fd_term(f) ((void)(f)) +/** @endcond */ #endif /* RBIMPL_INTERN_SELECT_POSIX_H */ diff --git a/include/ruby/internal/intern/select/win32.h b/include/ruby/internal/intern/select/win32.h index ef75a0f760..edaf7a8523 100644 --- a/include/ruby/internal/intern/select/win32.h +++ b/include/ruby/internal/intern/select/win32.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs to provide ::rb_fd_select(). */ #include "ruby/internal/dosish.h" /* for rb_w32_select */ @@ -41,21 +41,83 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() struct timeval; +/** + * The data structure which wraps the fd_set bitmap used by select(2). This + * allows Ruby to use FD sets larger than that allowed by historic limitations + * on modern platforms. + */ typedef struct { - int capa; - fd_set *fdset; + int capa; /**< Maximum allowed number of FDs. */ + fd_set *fdset; /**< File descriptors buffer. */ } rb_fdset_t; -void rb_fd_init(rb_fdset_t *); -void rb_fd_term(rb_fdset_t *); -void rb_fd_set(int, rb_fdset_t *); -void rb_w32_fd_copy(rb_fdset_t *, const fd_set *, int); +RBIMPL_ATTR_NONNULL(()) +/** + * (Re-)initialises a fdset. One must be initialised before other `rb_fd_*` + * operations. Analogous to calling `malloc(3)` to allocate an `fd_set`. + * + * @param[out] f An fdset to squash. + * @post `f` holds no file descriptors. + * + * @internal + * + * Can't this leak memory if the same `f` is passed twice...? + */ +void rb_fd_init(rb_fdset_t *f); + +RBIMPL_ATTR_NONNULL(()) +/** + * Destroys the ::rb_fdset_t, releasing any memory and resources it used. It + * must be reinitialised using rb_fd_init() before future use. Analogous to + * calling `free(3)` to release memory for an `fd_set`. + * + * @param[out] f An fdset to squash. + * @post `f` holds no file descriptors. + */ +void rb_fd_term(rb_fdset_t *f); + +RBIMPL_ATTR_NONNULL(()) +/** + * Sets an fd to a fdset. + * + * @param[in] fd A file descriptor. + * @param[out] f Target fdset. + * @post `f` holds `fd`. + */ +void rb_fd_set(int fd, rb_fdset_t *f); + +RBIMPL_ATTR_NONNULL(()) +/** + * Destructively overwrites an fdset with another. + * + * @param[out] dst Target fdset. + * @param[in] src Source fdset. + * @param[in] max Maximum number of file descriptors to copy. + * @post `dst` is a copy of `src`. + */ +void rb_w32_fd_copy(rb_fdset_t *dst, const fd_set *src, int max); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_w32_fd_copy(), except it copies unlimited number of file + * descriptors. + * + * @param[out] dst Target fdset. + * @param[in] src Source fdset. + * @post `dst` is a copy of `src`. + */ void rb_w32_fd_dup(rb_fdset_t *dst, const rb_fdset_t *src); RBIMPL_SYMBOL_EXPORT_END() RBIMPL_ATTR_NONNULL(()) RBIMPL_ATTR_NOALIAS() +/** + * Wipes out the current set of FDs. + * + * @param[out] f The fdset to clear. + * @post `f` has no FDs. + */ static inline void rb_fd_zero(rb_fdset_t *f) { @@ -63,6 +125,13 @@ rb_fd_zero(rb_fdset_t *f) } RBIMPL_ATTR_NONNULL(()) +/** + * Releases a specific FD from the given fdset. + * + * @param[in] n Target FD. + * @param[out] f The fdset that holds `n`. + * @post `f` doesn't hold n. + */ static inline void rb_fd_clr(int n, rb_fdset_t *f) { @@ -70,6 +139,14 @@ rb_fd_clr(int n, rb_fdset_t *f) } RBIMPL_ATTR_NONNULL(()) +/** + * Queries if the given FD is in the given set. + * + * @param[in] n Target FD. + * @param[in] f The fdset to scan. + * @retval 1 Yes there is. + * @retval 0 No there isn't. + */ static inline int rb_fd_isset(int n, rb_fdset_t *f) { @@ -77,6 +154,14 @@ rb_fd_isset(int n, rb_fdset_t *f) } RBIMPL_ATTR_NONNULL(()) +/** + * Destructively overwrites an fdset with another. + * + * @param[out] dst Target fdset. + * @param[in] src Source fdset. + * @param[in] n Maximum number of file descriptors to copy. + * @post `dst` is a copy of `src`. + */ static inline void rb_fd_copy(rb_fdset_t *dst, const fd_set *src, int n) { @@ -84,12 +169,45 @@ rb_fd_copy(rb_fdset_t *dst, const fd_set *src, int n) } RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_fd_copy(), except it copies unlimited number of file + * descriptors. + * + * @param[out] dst Target fdset. + * @param[in] src Source fdset. + * @post `dst` is a copy of `src`. + */ static inline void rb_fd_dup(rb_fdset_t *dst, const rb_fdset_t *src) { rb_w32_fd_dup(dst, src); } +/** + * Waits for multiple file descriptors at once. + * + * @param[in] n Max FD in everything passed, plus one. + * @param[in,out] rfds Set of FDs to wait for reads. + * @param[in,out] wfds Set of FDs to wait for writes. + * @param[in,out] efds Set of FDs to wait for OOBs. + * @param[in,out] timeout Max blocking duration. + * @retval -1 Failed, errno set. + * @retval 0 Timeout exceeded. + * @retval otherwise Total number of file descriptors returned. + * @post `rfds` contains readable FDs. + * @post `wfds` contains writable FDs. + * @post `efds` contains exceptional FDs. + * @post `timeout` is the time left. + * @note All pointers are allowed to be null pointers. + * + * @internal + * + * This can wait for `SOCKET` and `HANDLE` at once. In order to achieve that + * property we heavily touch the internals of MSVCRT. We `CreateFile` a + * `"NUL"` alongside of a socket and directly manipulate its `struct ioinfo`. + * This is of course a very dirty hack. If we could design the API today we + * could use `CancellIoEx`. But we are older than that Win32 API. + */ static inline int rb_fd_select(int n, rb_fdset_t *rfds, rb_fdset_t *wfds, rb_fdset_t *efds, struct timeval *timeout) { @@ -103,7 +221,18 @@ rb_fd_select(int n, rb_fdset_t *rfds, rb_fdset_t *wfds, rb_fdset_t *efds, struct RBIMPL_ATTR_NONNULL(()) RBIMPL_ATTR_PURE() -/* :TODO: can this function be __attribute__((returns_nonnull)) or not? */ +/** + * Raw pointer to `fd_set`. + * + * @param[in] f Target fdset. + * @retval NULL `f` is already terminated by rb_fd_term(). + * @retval otherwise Underlying fd_set. + * + * @internal + * + * Extension library must not touch raw pointers. It was a bad idea to let + * them use it. + */ static inline fd_set * rb_fd_ptr(const rb_fdset_t *f) { @@ -111,7 +240,13 @@ rb_fd_ptr(const rb_fdset_t *f) } RBIMPL_ATTR_NONNULL(()) -RBIMPL_ATTR_PURE() +RBIMPL_ATTR_PURE_UNLESS_DEBUG() +/** + * It seems this function has no use. Maybe just remove? + * + * @param[in] f A set. + * @return Number of file descriptors stored. + */ static inline int rb_fd_max(const rb_fdset_t *f) { diff --git a/include/ruby/internal/intern/signal.h b/include/ruby/internal/intern/signal.h index 8739c51f53..4773788651 100644 --- a/include/ruby/internal/intern/signal.h +++ b/include/ruby/internal/intern/signal.h @@ -17,22 +17,129 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Signal handling APIs. */ +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/attr/pure.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() /* signal.c */ -VALUE rb_f_kill(int, const VALUE*); -#ifdef POSIX_SIGNAL -#define posix_signal ruby_posix_signal -void (*posix_signal(int, void (*)(int)))(int); -#endif -const char *ruby_signal_name(int); -void ruby_default_signal(int); + +RBIMPL_ATTR_NONNULL(()) +/** + * Sends a signal ("kills") to processes. + * + * The first argument is the signal, either in: + * + * - Numerical representation (e.g. `9`), or + * - Textual representation of canonical (e.g. `:SIGKILL`) name or + * abbreviated (e.g. `:KILL`) name, either in ::rb_cSymbol or ::rb_cString. + * + * All the remaining arguments are numerical representations of process IDs. + * This function iterates over them to send the specified signal. + * + * You can specify both negative PIDs and negative signo to this function: + * + * ``` + * sig \ pid | >= 1 | == 0 | == -1 | <= -2 + * ===========+======+======+=======+======= + * > 0 | #1 | #2 | #3 | #4 + * == 0 | #5 | #6 | #7 | #8 + * < 0 | #9 | #10 | #11 + * ``` + * + * - Case #1: When signo and PID are both positive, this function sends the + * specified signal to the specified process (intuitive). + * + * - Case #2: When signo is positive and PID is zero, this function sends + * that signal to the current process group. + * + * - Case #3: When signo is positive and PID is -1, this function sends that + * signal to everything that the current process is allowed to kill. + * + * - Case #4: When signo is positive and PID is negative (but not -1), this + * function sends that signal to every processes in a process group, whose + * process group ID is the absolute value of the passed PID. + * + * - Case #5: When signo is zero and PID is positive, this function just + * checks for the existence of the specified process and doesn't send + * anything to anyone. In case the process is absent `Errno::ESRCH` is + * raised. + * + * - Case #6: When signo and PID are both zero, this function checks for the + * existence of the current process group. And it must do. This function + * is effectively a no-op then. + * + * - Case #7: When signo is zero and PID is -1, this function checks if there + * is any other process that the current process can kill. At least init + * (PID 1) must exist, so this must not fail. + * + * - Case #8: When signo is zero and PID is negative (but not -1), this + * function checks if there is a process group whose process group ID is + * the absolute value of the passed PID. In case the process group is + * absent `Errno::ESRCH` is raised. + * + * - Case #9: When signo is negative and PID is positive, this function sends + * the absolute value of the passed signo to the process group specified as + * the PID. + * + * - Case #10: When signo is negative and PID is zero, it is highly expected + * that this function sends the absolute value of the passed signo to the + * current process group. Strictly speaking, IEEE Std 1003.1-2017 + * specifies that this (`killpg(3posix)` with an argument of zero) is an + * undefined behaviour. But no operating system is known so far that does + * things differently. + * + * - Case #11: When signo and PID are both negative, the behaviour of this + * function depends on how `killpg(3)` works. On Linux, it seems such + * attempt is strictly prohibited and `Errno::EINVAL` is raised. But on + * macOS, it seems it tries to send the signal actually to the process + * group. + * + * @note Above description is in fact different from how `kill(2)` works. + * We interpret the passed arguments before passing them through to + * system calls. + * @param[in] argc Number of objects in `argv`. + * @param[in] argv Signal, followed by target PIDs. + * @exception rb_eArgError Unknown signal name. + * @exception rb_eSystemCallError Various errors sending signal to processes. + * @return Something numeric. The meaning of this return value is unclear. + * It seems in case of #1 above, this could be the body count. But + * other cases remain mysterious. + */ +VALUE rb_f_kill(int argc, const VALUE *argv); + +RBIMPL_ATTR_PURE() +/** + * Queries the name of the signal. It returns for instance `"KILL"` for + * SIGKILL. + * + * @param[in] signo Signal number to query. + * @retval 0 No such signal. + * @retval otherwise A pointer to a static C string that is the name of + * the signal. + * @warning Don't free the return value. + */ +const char *ruby_signal_name(int signo); + +/** + * Pretends as if there was no custom signal handler. This function sets the + * signal action to SIG_DFL, then kills itself. + * + * @param[in] sig The signal. + * @post Previous signal handler is lost. + * @post Passed signal is sent to the current process. + * + * @internal + * + * @shyouhei doesn't understand the needs of this function being visible from + * extension libraries. + */ +void ruby_default_signal(int sig); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/sprintf.h b/include/ruby/internal/intern/sprintf.h index 2c90548353..aedc0f9ab1 100644 --- a/include/ruby/internal/intern/sprintf.h +++ b/include/ruby/internal/intern/sprintf.h @@ -17,26 +17,142 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Our own private printf(3). + * extension libraries. They could be written in C++98. + * @brief Our own private `printf(3)`. */ #include "ruby/internal/attr/format.h" +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() /* sprintf.c */ -VALUE rb_f_sprintf(int, const VALUE*); +/** + * Identical to rb_str_format(), except how the arguments are arranged. + * + * @param[in] argc Number of objects of `argv`. + * @param[in] argv A format string, followed by its arguments. + * @return A rendered new instance of ::rb_cString. + * + * @internal + * + * You can safely pass NULL to `argv`. Doesn't make any sense though. + */ +VALUE rb_f_sprintf(int argc, const VALUE *argv); + +RBIMPL_ATTR_NONNULL((1)) RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 2) -VALUE rb_sprintf(const char*, ...); -VALUE rb_vsprintf(const char*, va_list); +/** + * Ruby's extended `sprintf(3)`. We ended up reinventing the entire `printf` + * business because we don't want to depend on locales. OS-provided `printf` + * routines might or might not, which caused instabilities of the result + * strings. + * + * The format sequence is a mixture of format specifiers and other verbatim + * contents. Each format specifier starts with a `%`, and has the following + * structure: + * + * ``` + * %[flags][width][.precision][length]conversion + * ``` + * + * This function supports flags of ` `, `#`, `+`, `-`, `0`, width of + * non-negative decimal integer and `*`, precision of non-negative decimal + * integers and `*`, length of `L`, `h`, `t`, `z`, `l`, `ll`, `q`, conversions + * of `A`, `D`, `E`, `G`, `O`, `U`, `X`, `a`, `c`, `d`, `e`, `f`, `g`, `i`, + * `n`, `o`, `p`, `s`, `u`, `x`, and `%`. In case of `_WIN32` it also supports + * `I`. And additionally, it supports magical `PRIsVALUE` macro that can + * stringise arbitrary Ruby objects: + * + * ```CXX + * rb_sprintf("|%"PRIsVALUE"|", RUBY_Qtrue); // => "|true|" + * rb_sprintf("%+"PRIsVALUE, rb_stdin); // => "#<IO:<STDIN>>" + * ``` + * + * @param[in] fmt A `printf`-like format specifier. + * @param[in] ... Variadic number of contents to format. + * @return A rendered new instance of ::rb_cString. + * + * @internal + * + * :FIXME: We can improve this document. + */ +VALUE rb_sprintf(const char *fmt, ...); + +RBIMPL_ATTR_NONNULL((1)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 1, 0) +/** + * Identical to rb_sprintf(), except it takes a `va_list`. + * + * @param[in] fmt A `printf`-like format specifier. + * @param[in] ap Contents to format. + * @return A rendered new instance of ::rb_cString. + */ +VALUE rb_vsprintf(const char *fmt, va_list ap); +RBIMPL_ATTR_NONNULL((2)) RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) -VALUE rb_str_catf(VALUE, const char*, ...); -VALUE rb_str_vcatf(VALUE, const char*, va_list); -VALUE rb_str_format(int, const VALUE *, VALUE); +/** + * Identical to rb_sprintf(), except it renders the output to the specified + * object rather than creating a new one. + * + * @param[out] dst String to modify. + * @param[in] fmt A `printf`-like format specifier. + * @param[in] ... Variadic number of contents to format. + * @exception rb_eTypeError `dst` is not a String. + * @return Passed `dst`. + * @post `dst` has the rendered output appended to its end. + */ +VALUE rb_str_catf(VALUE dst, const char *fmt, ...); + +RBIMPL_ATTR_NONNULL((2)) +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 0) +/** + * Identical to rb_str_catf(), except it takes a `va_list`. It can also be + * seen as a routine identical to rb_vsprintf(), except it renders the output + * to the specified object rather than creating a new one. + * + * @param[out] dst String to modify. + * @param[in] fmt A `printf`-like format specifier. + * @param[in] ap Contents to format. + * @exception rb_eTypeError `dst` is not a String. + * @return Passed `dst`. + * @post `dst` has the rendered output appended to its end. + */ +VALUE rb_str_vcatf(VALUE dst, const char *fmt, va_list ap); + +/** + * Formats a string. + * + * Returns the string resulting from applying `fmt` to `argv`. The format + * sequence is a mixture of format specifiers and other verbatim contents. + * Each format specifier starts with a `%`, and has the following structure: + * + * ``` + * %[flags][width][.precision]type + * ``` + * + * ... which is different from that of rb_sprintf(). Because ruby has no + * `short` or `long`, there is no way to specify a "length" of an argument. + * + * This function supports flags of ` `, `#`, `+`, `-`, `<>`, `{}`, with of + * non-negative decimal integer and `$`, `*`, precision of non-negative decimal + * integer and `$`, `*`, type of `A`, `B`, `E`, `G`, `X`, `a`, `b`, `c`, `d`, + * `e`, `f`, `g`, `i`, `o`, `p`, `s`, `u`, `x`, `%`. This list is also + * (largely the same but) not identical to that of rb_sprintf(). + * + * @param[in] argc Number of objects in `argv`. + * @param[in] argv Format arguments. + * @param[in] fmt A printf-like format specifier. + * @exception rb_eTypeError `fmt` is not a string. + * @exception rb_eArgError Failed to parse `fmt`. + * @return A rendered new instance of ::rb_cString. + * @note Everything it takes must be Ruby objects. + * + */ +VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/string.h b/include/ruby/internal/intern/string.h index a590b2043e..6827563e8d 100644 --- a/include/ruby/internal/intern/string.h +++ b/include/ruby/internal/intern/string.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cString. */ #include "ruby/internal/config.h" @@ -34,6 +34,7 @@ # include <stdint.h> #endif +#include "ruby/internal/attr/deprecated.h" #include "ruby/internal/attr/nonnull.h" #include "ruby/internal/attr/pure.h" #include "ruby/internal/constant_p.h" @@ -45,107 +46,1335 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* string.c */ -VALUE rb_str_new(const char*, long); -VALUE rb_str_new_cstr(const char*); -VALUE rb_str_new_shared(VALUE); -VALUE rb_str_new_frozen(VALUE); -VALUE rb_str_new_with_class(VALUE, const char*, long); -VALUE rb_tainted_str_new_cstr(const char*); -VALUE rb_tainted_str_new(const char*, long); -VALUE rb_external_str_new(const char*, long); -VALUE rb_external_str_new_cstr(const char*); -VALUE rb_locale_str_new(const char*, long); -VALUE rb_locale_str_new_cstr(const char*); -VALUE rb_filesystem_str_new(const char*, long); -VALUE rb_filesystem_str_new_cstr(const char*); -VALUE rb_str_buf_new(long); -VALUE rb_str_buf_new_cstr(const char*); -VALUE rb_str_buf_new2(const char*); -VALUE rb_str_tmp_new(long); -VALUE rb_usascii_str_new(const char*, long); -VALUE rb_usascii_str_new_cstr(const char*); -VALUE rb_utf8_str_new(const char*, long); -VALUE rb_utf8_str_new_cstr(const char*); + +/** + * Allocates an instance of ::rb_cString. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString, of `len` bytes length, of + * "binary" encoding, whose contents are verbatim copy of `ptr`. + * @pre At least `len` bytes of continuous memory region shall be + * accessible via `ptr`. + */ +VALUE rb_str_new(const char *ptr, long len); + +/** + * Identical to rb_str_new(), except it assumes the passed pointer is a pointer + * to a C string. + * + * @param[in] ptr A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @exception rb_eArgError `ptr` is a null pointer. + * @return An instance of ::rb_cString, of "binary" encoding, whose + * contents are verbatim copy of `ptr`. + * @pre `ptr` must not be a null pointer. + */ +VALUE rb_str_new_cstr(const char *ptr); + +/** + * Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of + * C's. Implementation wise it creates a string that shares the backend memory + * region with the receiver. So the name. But there is no way for extension + * libraries to know if a string is of such variant. + * + * @param[in] str An object of ::RString. + * @return An allocated instance of ::rb_cString, which shares the + * encoding, length, and contents with the passed string. + * @pre `str` must not be any arbitrary object except ::RString. + * @note Use #StringValue to enforce the precondition. + */ +VALUE rb_str_new_shared(VALUE str); + +/** + * Creates a frozen copy of the string, if necessary. This function does + * nothing when the passed string is already frozen. Otherwise, it allocates a + * copy of it, which is frozen. The passed string is untouched either ways. + * + * @param[in] str An object of ::RString. + * @return Something frozen. + * @pre `str` must not be any arbitrary object except ::RString. + * @note Use #StringValue to enforce the precondition. + */ +VALUE rb_str_new_frozen(VALUE str); + +/** + * Identical to rb_str_new(), except it takes the class of the allocating + * object. + * + * @param[in] obj A string-ish object. + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of the class of `obj`, of `len` bytes length, of + * "binary" encoding, whose contents are verbatim copy of `ptr`. + * @pre At least `len` bytes of continuous memory region shall be + * accessible via `ptr`. + * + * @internal + * + * Why it doesn't take an instance of ::rb_cClass? + */ +VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len); + +/** + * Identical to rb_str_new(), except it generates a string of "default + * external" encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString. In case encoding conversion from + * "default internal" to "default external" is fully defined over + * the given contents, then the return value is a string of + * "default external" encoding, whose contents are the converted + * ones. Otherwise the string is a junk. + * @warning It doesn't raise on a conversion failure and silently ends up in + * a corrupted output. You can know the failure by querying + * `valid_encoding?` of the result object. + */ +VALUE rb_external_str_new(const char *ptr, long len); + +RBIMPL_ATTR_NONNULL(()) /** + * Identical to rb_external_str_new(), except it assumes the passed pointer is + * a pointer to a C string. It can also be seen as a routine identical to + * rb_str_new_cstr(), except it generates a string of "default external" + * encoding. + * + * @param[in] ptr A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString. In case encoding conversion from + * "default internal" to "default external" is fully defined over + * the given contents, then the return value is a string of + * "default external" encoding, whose contents are the converted + * ones. Otherwise the string is a junk. + * @warning It doesn't raise on a conversion failure and silently ends up in + * a corrupted output. You can know the failure by querying + * `valid_encoding?` of the result object. + * @pre `ptr` must not be a null pointer. + */ +VALUE rb_external_str_new_cstr(const char *ptr); + +/** + * Identical to rb_str_new(), except it generates a string of "locale" + * encoding. It can also be seen as a routine identical to + * rb_external_str_new(), except it generates a string of "locale" encoding + * instead of "default external" encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString. In case encoding conversion from + * "default internal" to "locale" is fully defined over the given + * contents, then the return value is a string of "locale" + * encoding, whose contents are the converted ones. Otherwise the + * string is a junk. + * @warning It doesn't raise on a conversion failure and silently ends up in + * a corrupted output. You can know the failure by querying + * `valid_encoding?` of the result object. + */ +VALUE rb_locale_str_new(const char *ptr, long len); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_locale_str_new(), except it assumes the passed pointer is a + * pointer to a C string. It can also be seen as a routine identical to + * rb_external_str_new_cstr(), except it generates a string of "locale" + * encoding instead of "default external". + * + * @param[in] ptr A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString. In case encoding conversion from + * "default internal" to "locale" is fully defined over the given + * contents, then the return value is a string of "locale" + * encoding, whose contents are the converted ones. Otherwise the + * string is a junk. + * @warning It doesn't raise on a conversion failure and silently ends up in + * a corrupted output. You can know the failure by querying + * `valid_encoding?` of the result object. + * @pre `ptr` must not be a null pointer. + */ +VALUE rb_locale_str_new_cstr(const char *ptr); + +/** + * Identical to rb_str_new(), except it generates a string of "filesystem" + * encoding. It can also be seen as a routine identical to + * rb_external_str_new(), except it generates a string of "filesystem" encoding + * instead of "default external" encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString. In case encoding conversion from + * "default internal" to "filesystem" is fully defined over the + * given contents, then the return value is a string of + * "filesystem" encoding, whose contents are the converted ones. + * Otherwise the string is a junk. + * @warning It doesn't raise on a conversion failure and silently ends up in + * a corrupted output. You can know the failure by querying + * `valid_encoding?` of the result object. + */ +VALUE rb_filesystem_str_new(const char *ptr, long len); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_filesystem_str_new(), except it assumes the passed pointer + * is a pointer to a C string. It can also be seen as a routine identical to + * rb_external_str_new_cstr(), except it generates a string of "filesystem" + * encoding instead of "default external". + * + * @param[in] ptr A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString. In case encoding conversion from + * "default internal" to "filesystem" is fully defined over the + * given contents, then the return value is a string of + * "filesystem" encoding, whose contents are the converted ones. + * Otherwise the string is a junk. + * @warning It doesn't raise on a conversion failure and silently ends up in + * a corrupted output. You can know the failure by querying + * `valid_encoding?` of the result object. + * @pre `ptr` must not be a null pointer. + */ +VALUE rb_filesystem_str_new_cstr(const char *ptr); + +/** + * Allocates a "string buffer". A string buffer here is an instance of + * ::rb_cString, whose capacity is bigger than the length of it. If you can + * say that a string grows to a specific amount of bytes, this could be + * effective than resizing a string over and over again and again. + * + * @param[in] capa Designed capacity of the generating string. + * @return An empty string, of "binary" encoding, whose capacity is `capa`. + */ +VALUE rb_str_buf_new(long capa); + +RBIMPL_ATTR_NONNULL(()) +/** + * This is a rb_str_buf_new() + rb_str_buf_cat() combo. + * + * @param[in] ptr A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString, of "binary" encoding, whose + * contents are verbatim copy of `ptr`. + * @pre `ptr` must not be a null pointer. + * + * @internal + * + * This must be identical to rb_str_new_cstr(), except done in inefficient way? + * @shyouhei doesn't understand why this is not a simple alias. + */ +VALUE rb_str_buf_new_cstr(const char *ptr); + +/** + * Allocates a "temporary" string. This is a hidden empty string. Handy on + * occasions. + * + * @param[in] len Designed length of the string. + * @return A hidden, empty string. + * @see rb_obj_hide() + */ +VALUE rb_str_tmp_new(long len); + +/** + * Identical to rb_str_new(), except it generates a string of "US ASCII" + * encoding. This is different from rb_external_str_new(), not only for the + * output encoding, but also it doesn't convert the contents. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString, of `len` bytes length, of + * "US ASCII" encoding, whose contents are verbatim copy of `ptr`. + */ +VALUE rb_usascii_str_new(const char *ptr, long len); + +/** + * Identical to rb_str_new_cstr(), except it generates a string of "US ASCII" + * encoding. It can also be seen as a routine Identical to + * rb_usascii_str_new(), except it assumes the passed pointer is a pointer to a + * C string. + * + * @param[in] ptr A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @exception rb_eArgError `ptr` is a null pointer. + * @return An instance of ::rb_cString, of "US ASCII" encoding, whose + * contents are verbatim copy of `ptr`. + * @pre `ptr` must not be a null pointer. + */ +VALUE rb_usascii_str_new_cstr(const char *ptr); + +/** + * Identical to rb_str_new(), except it generates a string of "UTF-8" encoding. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString, of `len` bytes length, of + * "UTF-8" encoding, whose contents are verbatim copy of `ptr`. + */ +VALUE rb_utf8_str_new(const char *ptr, long len); + +/** + * Identical to rb_str_new_cstr(), except it generates a string of "UTF-8" + * encoding. It can also be seen as a routine Identical to + * rb_usascii_str_new(), except it assumes the passed pointer is a pointer to a + * C string. + * + * @param[in] ptr A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @exception rb_eArgError `ptr` is a null pointer. + * @return An instance of ::rb_cString, of "UTF-8" encoding, whose contents + * are verbatim copy of `ptr`. + * @pre `ptr` must not be a null pointer. + */ +VALUE rb_utf8_str_new_cstr(const char *ptr); + +/** + * @name Special strings that are backended by C string literals. + * * *_str_new_static functions are intended for C string literals. * They require memory in the range [ptr, ptr+len] to always be readable. * Note that this range covers a total of len + 1 bytes. + * + * @{ + */ + +/** + * Identical to rb_str_new(), except it takes a C string literal. + * + * @param[in] ptr A C string literal. + * @param[in] len `strlen(ptr)`. + * @exception rb_eArgError `len` out of range of `size_t`. + * @pre `ptr` must be a C string constant. + * @return An instance of ::rb_cString, of "binary" encoding, whose backend + * storage is the passed C string literal. + * @warning It is a very bad idea to write to a C string literal (often + * immediate SEGV shall occur). Consider return values of this + * function be read-only. + * + * @internal + * + * Surprisingly it can take NULL, and generates an empty string. */ VALUE rb_str_new_static(const char *ptr, long len); + +/** + * Identical to rb_str_new_static(), except it generates a string of "US ASCII" + * encoding instead of "binary". It can also be seen as a routine identical to + * rb_usascii_str_new(), except it takes a C string literal. + * + * @param[in] ptr A C string literal. + * @param[in] len `strlen(ptr)`. + * @exception rb_eArgError `len` out of range of `size_t`. + * @pre `ptr` must be a C string constant. + * @return An instance of ::rb_cString, of "US ASCII" encoding, whose + * backend storage is the passed C string literal. + * @warning It is a very bad idea to write to a C string literal (often + * immediate SEGV shall occur). Consider return values of this + * function be read-only. + */ VALUE rb_usascii_str_new_static(const char *ptr, long len); + +/** + * Identical to rb_str_new_static(), except it generates a string of "UTF-8" + * encoding instead of "binary". It can also be seen as a routine identical to + * rb_utf8_str_new(), except it takes a C string literal. + * + * @param[in] ptr A C string literal. + * @param[in] len `strlen(ptr)`. + * @exception rb_eArgError `len` out of range of `size_t`. + * @pre `ptr` must be a C string constant. + * @return An instance of ::rb_cString, of "UTF-8" encoding, whose backend + * storage is the passed C string literal. + * @warning It is a very bad idea to write to a C string literal (often + * immediate SEGV shall occur). Consider return values of this + * function be read-only. + */ VALUE rb_utf8_str_new_static(const char *ptr, long len); -VALUE rb_str_to_interned_str(VALUE); -VALUE rb_interned_str(const char *, long); -VALUE rb_interned_str_cstr(const char *); -void rb_str_free(VALUE); -void rb_str_shared_replace(VALUE, VALUE); -VALUE rb_str_buf_append(VALUE, VALUE); + +/** @} */ + +/** + * Identical to rb_interned_str(), except it takes a Ruby's string instead of + * C's. It can also be seen as a routine identical to rb_str_new_shared(), + * except it returns an infamous "f"string. + * + * @param[in] str An object of ::RString. + * @return An instance of ::rb_cString, either cached or allocated, which + * has the identical encoding, length, and contents with the passed + * string. + * @pre `str` must not be any arbitrary object except ::RString. + * @note Use #StringValue to enforce the precondition. + * + * @internal + * + * It actually finds or creates a fstring of the needed property, and + * destructively modifies the receiver behind-the-scene so that it becomes a + * shared string whose parent is the returning fstring. + */ +VALUE rb_str_to_interned_str(VALUE str); + +/** + * Identical to rb_str_new(), except it returns an infamous "f"string. What is + * a fstring? Well it is a special subkind of strings that is immutable, + * deduped globally, and managed by our GC. It is much like a Symbol (in fact + * Symbols are dynamic these days and are backended using fstrings). This + * concept has been silently introduced at some point in 2.x era. Since then + * it gained wider acceptance in the core. Starting from 3.x extension + * libraries can also generate ones. + * + * @param[in] ptr A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eArgError `len` is negative. + * @return A found or created instance of ::rb_cString, of `len` bytes + * length, of "binary" encoding, whose contents are identical to + * that of `ptr`. + * @pre At least `len` bytes of continuous memory region shall be + * accessible via `ptr`. + */ +VALUE rb_interned_str(const char *ptr, long len); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_interned_str(), except it assumes the passed pointer is a + * pointer to a C's string. It can also be seen as a routine identical to + * rb_str_to_interned_str(), except it takes a C's string instead of Ruby's. + * Or it can also be seen as a routine identical to rb_str_new_cstr(), except + * it returns an infamous "f"string. + * + * @param[in] ptr A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString, of "binary" encoding, whose + * contents are verbatim copy of `ptr`. + * @pre `ptr` must not be a null pointer. + */ +VALUE rb_interned_str_cstr(const char *ptr); + +/** + * Destroys the given string for no reason. + * + * @warning DO NOT USE IT. + * @warning Leave this task to our GC. + * @warning It was a bad idea at the first place to let you know about it. + * + * @param[out] str The string to be executed. + * @post The given string no longer exists. + * @note Maybe `String#clear` could be what you want. + * + * @internal + * + * Should have moved this to `internal/string.h`. + */ +void rb_str_free(VALUE str); + +/** + * Replaces the contents of the former with the latter. + * + * @param[out] dst Destination object. + * @param[in] src Source object. + * @pre Both objects must not be any arbitrary objects except + * ::RString. + * @post `dst`'s former components are abandoned. It now has the + * identical encoding, length, and contents to `src`. + * @see rb_str_replace() + * + * @internal + * + * @shyouhei doesn't understand why this is useful to extension libraries. + * Just use rb_str_replace(). What's wrong with that? + */ +void rb_str_shared_replace(VALUE dst, VALUE src); + +/** + * Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of + * C's. It can also be seen as a routine identical to rb_str_shared_replace(), + * except it appends instead of replaces. + * + * @param[out] dst Destination object. + * @param[in] src Source object. + * @exception rb_eEncCompatError Can't mix the encodings. + * @exception rb_eArgError Result string too big. + * @return The passed `dst`. + * @pre Both objects must not be any arbitrary objects except + * ::RString. + * @post `dst` has the contents of `src` appended, with encoding + * converted into `dst`'s one, into the end of `dst`. + */ +VALUE rb_str_buf_append(VALUE dst, VALUE src); + +/** @alias{rb_str_cat} */ VALUE rb_str_buf_cat(VALUE, const char*, long); + +/** @alias{rb_str_cat_cstr} */ VALUE rb_str_buf_cat2(VALUE, const char*); -VALUE rb_str_buf_cat_ascii(VALUE, const char*); -VALUE rb_obj_as_string(VALUE); -VALUE rb_check_string_type(VALUE); -void rb_must_asciicompat(VALUE); -VALUE rb_str_dup(VALUE); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_str_cat_cstr(), except it additionally assumes the source + * string be a NUL terminated ASCII string. + * + * @param[out] dst Destination object. + * @param[in] src Source string. + * @exception rb_eArgError Result string too big. + * @return The passed `dst`. + * @pre `dst` must not be any arbitrary object except ::RString. + * @pre `src` must be a NUL terminated ASCII string. + * @post `dst` has the contents of `src` appended, with encoding + * converted into `dst`'s one, into the end of `dst`. + */ +VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src); + +/** + * Try converting an object to its stringised representation using its `to_s` + * method, if any. If there is no such thing, it resorts to rb_any_to_s() + * output. + * + * @param[in] obj Arbitrary ruby object to stringise. + * @return An instance of ::rb_cString. + */ +VALUE rb_obj_as_string(VALUE obj); + +/** + * Try converting an object to its stringised representation using its `to_str` + * method, if any. If there is no such thing, returns ::RUBY_Qnil. + * + * @param[in] obj Arbitrary ruby object to stringise. + * @exception rb_eTypeError `obj.to_str` returned something non-String. + * @retval RUBY_Qnil No conversion from obj to String defined. + * @return otherwise Stringised representation of `obj`. + * @see rb_io_check_io + * @see rb_check_array_type + * @see rb_check_hash_type + */ +VALUE rb_check_string_type(VALUE obj); + +/** + * Asserts that the given string's encoding is (Ruby's definition of) ASCII + * compatible. + * + * @param[in] obj An instance of ::rb_cString. + * @exception rb_eEncCompatError `obj` is ASCII incompatible. + * + * @internal + * + * @shyouhei doesn't know if this is an Easter egg or an official feature, but + * this function can in fact take non-strings such as Symbols, Regexps, IOs, + * etc. However if something unsupported is passed, it causes SEGV. It seems + * the feature is kind of untested. + */ +void rb_must_asciicompat(VALUE obj); + +/** + * Duplicates a string. + * + * @param[in] str String in question to duplicate. + * @return A duplicated new instance. + * @pre `str` must be of ::RString. + */ +VALUE rb_str_dup(VALUE str); + +/** + * I guess there is no use case of this function in extension libraries, but + * this is a routine identical to rb_str_dup(), except it always creates an + * instance of ::rb_cString regardless of the given object's class. This makes + * the most sense when the passed string is formerly hidden by rb_obj_hide(). + * + * @param[in] str A string, possibly hidden. + * @return A duplicated new instance of ::rb_cString. + */ VALUE rb_str_resurrect(VALUE str); -VALUE rb_str_locktmp(VALUE); -VALUE rb_str_unlocktmp(VALUE); + +/** + * Returns whether a string is chilled or not. + * + * This function is temporary and users must check for its presence using + * #ifdef HAVE_RB_STR_CHILLED_P. If HAVE_RB_STR_CHILLED_P is not defined, then + * strings can't be chilled. + * + * @param[in] str A string. + * @retval 1 The string is chilled. + * @retval 0 Otherwise. + */ +bool rb_str_chilled_p(VALUE str); + +#define HAVE_RB_STR_CHILLED_P 1 + +/** + * Obtains a "temporary lock" of the string. This advisory locking mechanism + * prevents other cooperating threads from tampering the receiver. The same + * thing could be done via freeze mechanism, but this one can also be unlocked + * using rb_str_unlocktmp(). + * + * @param[out] str String to lock. + * @exception rb_eRuntimeError `str` already locked. + * @return The given string. + * @post The string is locked. + */ +VALUE rb_str_locktmp(VALUE str); + +/** + * Releases a lock formerly obtained by rb_str_locktmp(). + * + * @param[out] str String to unlock. + * @exception rb_eRuntimeError `str` already unlocked. + * @return The given string. + * @post The string is locked. + */ +VALUE rb_str_unlocktmp(VALUE str); + +/** @alias{rb_str_new_frozen} */ VALUE rb_str_dup_frozen(VALUE); + +/** @alias{rb_str_new_frozen} */ #define rb_str_dup_frozen rb_str_new_frozen -VALUE rb_str_plus(VALUE, VALUE); -VALUE rb_str_times(VALUE, VALUE); -long rb_str_sublen(VALUE, long); -VALUE rb_str_substr(VALUE, long, long); -VALUE rb_str_subseq(VALUE, long, long); -char *rb_str_subpos(VALUE, long, long*); -void rb_str_modify(VALUE); -void rb_str_modify_expand(VALUE, long); -VALUE rb_str_freeze(VALUE); -void rb_str_set_len(VALUE, long); -VALUE rb_str_resize(VALUE, long); -VALUE rb_str_cat(VALUE, const char*, long); -VALUE rb_str_cat_cstr(VALUE, const char*); + +/** + * Generates a new string, concatenating the former to the latter. It can also + * be seen as a routine identical to rb_str_append(), except it doesn't tamper + * the passed strings to create a new one instead. + * + * @param[in] lhs Source string #1. + * @param[in] rhs Source string #2. + * @exception rb_eEncCompatError Can't mix the encodings. + * @exception rb_eArgError Result string too big. + * @return A new string containing `rhs` concatenated to `lhs`. + * @pre Both objects must not be any arbitrary objects except ::RString. + * @note This operation doesn't commute. Don't get confused by the + * "plus" terminology. For historical reasons there are some + * noncommutative `+`s in Ruby. This is one of such things. There + * has been a long discussion around `+`s in programming languages. + */ +VALUE rb_str_plus(VALUE lhs, VALUE rhs); + +/** + * Repetition of a string. + * + * @param[in] str String to repeat. + * @param[in] num Count, something numeric. + * @exception rb_eArgError `num` is negative. + * @return A new string repeating `num` times of `str`. + */ +VALUE rb_str_times(VALUE str, VALUE num); + +/** + * Byte offset to character offset conversion. This makes sense when the + * receiver is in a multibyte encoding. The string's i-th character does not + * always sit at its i-th byte. This function scans the contents to find the + * character index that matches the byte index. Generally speaking this is an + * `O(n)` operation. Could be slow. + * + * @param[in] str The string to scan. + * @param[in] pos Offset, in bytes. + * @return Offset, in characters. + */ +long rb_str_sublen(VALUE str, long pos); + +/** + * This is the implementation of two-argumented `String#slice`. + * + * - Returns the substring of the given `len` found in `str` at offset `beg`: + * + * ```ruby + * 'foo'[0, 2] # => "fo" + * 'foo'[0, 0] # => "" + * ``` + * + * - Counts backward from the end of `str` if `beg` is negative: + * + * ```ruby + * 'foo'[-2, 2] # => "oo" + * ``` + * + * - Special case: returns a new empty string if `beg` is equal to the length + * of `str`: + * + * ```ruby + * 'foo'[3, 2] # => "" + * ``` + * + * - Returns a null pointer if `beg` is out of range: + * + * ```ruby + * 'foo'[4, 2] # => nil + * 'foo'[-4, 2] # => nil + * ``` + * + * - Returns the trailing substring of `str` if `len` is large: + * + * ```ruby + * 'foo'[1, 50] # => "oo" + * ``` + * + * - Returns a null pointer if `len` is negative: + * + * ```ruby + * 'foo'[0, -1] # => nil + * ``` + * + * @param[in] str The string to slice. + * @param[in] beg Requested offset of the substring. + * @param[in] len Requested length of the substring. + * @retval RUBY_Qnil Parameters out of range. + * @retval otherwise A new string whose contents is the specified + * substring of `str`. + * @pre `str` must not be any arbitrary objects except ::RString. + */ +VALUE rb_str_substr(VALUE str, long beg, long len); + +/** + * Identical to rb_str_substr(), except the numbers are interpreted as byte + * offsets instead of character offsets. + * + * @param[in] str The string to slice. + * @param[in] beg Requested offset of the substring. + * @param[in] len Requested length of the substring. + * @return A new string whose contents is the specified substring of `str`. + * @pre `str` must not be any arbitrary objects except ::RString. + * @pre `beg` and `len` must not point to OOB contents. + */ +VALUE rb_str_subseq(VALUE str, long beg, long len); + +/** + * Identical to rb_str_substr(), except it returns a C's string instead of + * Ruby's. + * + * @param[in] str The string to slice. + * @param[in] beg Requested offset of the substring. + * @param[in,out] len Requested length of the substring. + * @retval NULL Parameters out of range. + * @retval otherwise A pointer inside of `str`'s backend storage where + * the specified substring exist. + * @pre `str` must not be any arbitrary objects except ::RString. + * @post `len` is updated to have the length of the return value. + */ +char *rb_str_subpos(VALUE str, long beg, long *len); + +/** + * Declares that the string is about to be modified. This for instance let the + * string have a dedicated backend storage. + * + * @param[out] str String about to be modified. + * @exception rb_eRuntimeError `str` is `locktmp`-ed. + * @exception rb_eFrozenError `str` is frozen. + * @pre `str` must not be any arbitrary objects except ::RString. + * @post Upon successful return the passed string is eligible to be + * modified. + */ +void rb_str_modify(VALUE str); + +/** + * Identical to rb_str_modify(), except it additionally expands the capacity of + * the receiver. + * + * @param[out] str Target string to modify. + * @param[in] capa Additional capacity to add. + * @exception rb_eArgError `capa` is negative. + * @exception rb_eRuntimeError `str` is `locktmp`-ed. + * @exception rb_eFrozenError `str` is frozen. + * @pre `str` must not be any arbitrary objects except ::RString. + * @post Upon successful return the passed string is modified so that + * its capacity is increased for `capa` bytes. + */ +void rb_str_modify_expand(VALUE str, long capa); + +/** + * This is the implementation of `String#freeze`. + * + * @param[out] str Target string to freeze. + * @return The passed string. + * @post Upon successful return the passed string is frozen. + */ +VALUE rb_str_freeze(VALUE str); + +/** + * Overwrites the length of the string. Typically this is used to shrink a + * string that was formerly expanded. + * + * ```CXX + * extern int fd; + * auto str = rb_eval_string("'...'"); + * rb_str_modify_expand(str, BUFSIZ); + * if (auto len = recv(fd, RSTRING_PTR(str), BUFSIZ, 0); len >= 0) { + * rb_str_set_len(str, len); + * } + * else { + * rb_sys_fail("recv(2)"); + * } + * ``` + * + * @param[out] str String to shrink. + * @param[in] len New length of the string. + * @exception rb_eRuntimeError `str` is `locktmp`-ed. + * @exception rb_eFrozenError `str` is frozen. + * @pre `str` must not be any arbitrary objects except ::RString. + * @post Upon successful return `str`'s length is set to `len`. + */ +void rb_str_set_len(VALUE str, long len); + +/** + * Overwrites the length of the string. In contrast to rb_str_set_len(), this + * function can also expand a string. + * + * @param[out] str String to shrink. + * @param[in] len New length of the string. + * @exception rb_eArgError `len` is negative. + * @exception rb_eRuntimeError `str` is `locktmp`-ed. + * @exception rb_eFrozenError `str` is frozen. + * @return The passed `str`. + * @pre `str` must not be any arbitrary objects except ::RString. + * @post Upon successful return `str` is either expanded or shrunken to + * have its length be `len`. + */ +VALUE rb_str_resize(VALUE str, long len); + +/** + * Destructively appends the passed contents to the string. + * + * @param[out] dst Destination object. + * @param[in] src Contents to append. + * @param[in] srclen Length of `src`. + * @exception rb_eArgError `srclen` is negative. + * @return The passed `dst`. + * @pre `dst` must not be any arbitrary objects except ::RString. + * @post `dst` has the contents of `ptr` appended. + */ +VALUE rb_str_cat(VALUE dst, const char *src, long srclen); + +/** + * Identical to rb_str_cat(), except it assumes the passed pointer is a pointer + * to a C string. + * + * @param[out] dst Destination object. + * @param[in] src Contents to append. + * @exception rb_eArgError Result string too big. + * @exception rb_eArgError `src` is a null pointer. + * @return The passed `dst`. + * @pre `dst` must not be any arbitrary objects except ::RString. + * @pre `src` must not be a null pointer. + * @post `dst` has the contents of `src` appended. + */ +VALUE rb_str_cat_cstr(VALUE dst, const char *src); + +/** @alias{rb_str_cat_cstr} */ VALUE rb_str_cat2(VALUE, const char*); -VALUE rb_str_append(VALUE, VALUE); -VALUE rb_str_concat(VALUE, VALUE); + +/** + * Identical to rb_str_buf_append(), except it converts the right hand side + * before concatenating. + * + * @param[out] dst Destination object. + * @param[in] src Source object. + * @exception rb_eEncCompatError Can't mix the encodings. + * @exception rb_eArgError Result string too big. + * @return The passed `dst`. + * @pre `dst` must not be any arbitrary objects except ::RString. + * @post `dst` has the contents of `src` appended, with encoding + * converted into `dst`'s one, into the end of `dst`. + */ +VALUE rb_str_append(VALUE dst, VALUE src); + +/** + * Identical to rb_str_append(), except it also accepts an integer as a + * codepoint. This resembles `String#<<`. + * + * @param[out] dst Destination object. + * @param[in] src Source object, String or Numeric. + * @exception rb_eRangeError Source numeric is out of range. + * @exception rb_eEncCompatError Source string too long. + * @exception rb_eArgError Result string too big. + * @return The passed `dst`. + * @pre `dst` must not be any arbitrary objects except ::RString. + * @post `dst` has the contents of `src` appended, with encoding + * converted into `dst`'s one, into the end of `dst`. + */ +VALUE rb_str_concat(VALUE dst, VALUE src); + +/* random.c */ + +/** + * This is a universal hash function. + * + * @warning This function changes its value per process. + * @param[in] ptr Target message. + * @param[in] len Length of `ptr` in bytes. + * @return A pseudorandom number suitable for Hash's hash value. + * @see Aumasson, JP., Bernstein, D.J., "SipHash: A Fast Short-Input + * PRF", In proceedings of 13th International Conference on + * Cryptology in India (INDOCRYPT 2012), LNCS 7668, pp. 489-508, + * 2012. http://doi.org/10.1007/978-3-642-34931-7_28 +*/ st_index_t rb_memhash(const void *ptr, long len); -st_index_t rb_hash_start(st_index_t); -st_index_t rb_hash_uint32(st_index_t, uint32_t); -st_index_t rb_hash_uint(st_index_t, st_index_t); -st_index_t rb_hash_end(st_index_t); + +/** + * Starts a series of hashing. Suppose you have a struct: + * + * ```CXX + * struct foo_tag { + * unsigned char bar; + * uint32_t baz; + * }; + * ``` + * + * It is not a wise idea to call rb_memhash() over it, because there could be + * padding bits. Instead you should explicitly iterate over each fields: + * + * ```CXX + * foo_tag foo = { 0, 0, }; + * st_index_t hash = 0; + * + * hash = rb_hash_start(0); + * hash = rb_hash_uint(hash, foo.bar); + * hash = rb_hash_uint32(hash, foo.baz); + * hash = rb_hash_end(hash); + * ``` + * + * @param[in] i Initial value. + * @return A hash value. + */ +st_index_t rb_hash_start(st_index_t i); + +/** @alias{st_hash_uint32} */ #define rb_hash_uint32(h, i) st_hash_uint32((h), (i)) + +/** @alias{st_hash_uint} */ #define rb_hash_uint(h, i) st_hash_uint((h), (i)) + +/** @alias{st_hash_end} */ #define rb_hash_end(h) st_hash_end(h) -st_index_t rb_str_hash(VALUE); -int rb_str_hash_cmp(VALUE,VALUE); -int rb_str_comparable(VALUE, VALUE); -int rb_str_cmp(VALUE, VALUE); + +/* string.c */ + +/** + * Calculates a hash value of a string. This is one of the two functions that + * constructs struct ::st_hash_type. + * + * @param[in] str An object of ::RString. + * @return A hash value. + * @pre `str` must not be any arbitrary object except ::RString. + * + * @internal + * + * Although safe to call, there must be no particular use case of this function + * for extension libraries. Only ruby internals must know about it. + * + * This is not a simple alias of rb_memhash(), because it considers the passed + * string's encoding as well as its contents. + */ +st_index_t rb_str_hash(VALUE str); + +/** + * Compares two strings. This is one of the two functions that constructs + * struct ::st_hash_type. + * + * @param[in] str1 A string. + * @param[in] str2 Another string. + * @retval 1 They have identical contents, length, and encodings. + * @retval 0 Otherwise. + * @pre Both objects must not be any arbitrary objects except + * ::RString. + * + * @internal + * + * In contrast to rb_str_hash(), this could be handy for comparison that only + * concerns equality. rb_str_cmp() returns 1, 0, -1. + */ +int rb_str_hash_cmp(VALUE str1, VALUE str2); + +/** + * Checks if two strings are comparable each other or not. Because + * rb_str_cmp() must return "lesser than" or "greater than" information, + * comparing two strings needs a stricter restriction. Both sides must be in a + * same set of strings which have total order. This is to check that property. + * Intuitive it sounds? But they can have different encodings. A character + * and another might or might not appear in the same order in their codepoints. + * It is complicated than you think. + * + * @param[in] str1 A string. + * @param[in] str2 Another string. + * @retval 1 They agree on a total order. + * @retval 0 Otherwise. + * @pre Both objects must not be any arbitrary objects except + * ::RString. + */ +int rb_str_comparable(VALUE str1, VALUE str2); + +/** + * Compares two strings, as in `strcmp(3)`. This does not consider the current + * locale, but considers the encodings of both sides instead. + * + * @param[in] lhs A string. + * @param[in] rhs Another string. + * @retval -1 `lhs` is "bigger than" `rhs`. + * @retval 1 `rhs` is "bigger than" `lhs`. + * @retval 0 Otherwise, e.g. not comparable. + * @pre Both objects must not be any arbitrary objects except + * ::RString. + */ +int rb_str_cmp(VALUE lhs, VALUE rhs); + +/** + * Equality of two strings. + * + * If `str2` is not a String, it resorts to `str2 == str1`. Otherwise if they + * are not comparable, returns ::RUBY_Qfalse. Otherwise if they have the same + * contents and the length, returns ::RUBY_Qtrue. Otherwise, returns + * ::RUBY_Qfalse. + * + * @param[in] str1 A string. + * @param[in] str2 Another string. + * @retval RUBY_Qtrue They are equal. + * @retval RUBY_Qfalse They are either different, or not comparable. + */ VALUE rb_str_equal(VALUE str1, VALUE str2); -VALUE rb_str_drop_bytes(VALUE, long); -void rb_str_update(VALUE, long, long, VALUE); -VALUE rb_str_replace(VALUE, VALUE); -VALUE rb_str_inspect(VALUE); -VALUE rb_str_dump(VALUE); -VALUE rb_str_split(VALUE, const char*); + +/** + * Shrinks the given string for the given number of bytes. + * + * @param[out] str String to squash. + * @param[in] len Number of bytes to reduce. + * @exception rb_eRuntimeError `str` is `locktmp`-ed. + * @exception rb_eFrozenError `str` is frozen. + * @return The passed `str`. + * @pre `str` must not be any arbitrary objects except ::RString. + * @post `str` is shrunken. + * @warning Can break a multibyte character in middle. + * + * @internal + * + * What if `len` is negative? + */ +VALUE rb_str_drop_bytes(VALUE str, long len); + +/** + * Replaces some (or all) of the contents of the given string. This is the + * implementation of three-argumented `String#[]=`. + * + * @param[out] dst Target string to update. + * @param[in] beg Offset of the affected portion. + * @param[in] len Length of the affected portion. + * @param[in] src Object to be assigned. + * @exception rb_eTypeError `src` has no implicit conversion to String. + * @exception rb_eIndexError `len` is negative, or `beg` is OOB. + * @exception rb_eRuntimeError `dst` is `locktmp`-ed. + * @exception rb_eFrozenError `dst` is frozen. + * @note Unlike rb_str_substr(), this function raises. + * @post A portion of `dst` from `beg` to `len` is the stringised + * representation of `src`. If that replacement string is not the + * same length as the portion it is replacing, `dst` will be + * resized accordingly. + */ +void rb_str_update(VALUE dst, long beg, long len, VALUE src); + +/** + * Replaces the contents of the former object with the stringised contents of + * the latter. + * + * @param[out] dst Destination object. + * @param[in] src Source object. + * @exception rb_eTypeError `src` has no implicit conversion to String. + * @exception rb_eRuntimeError `dst` is `locktmp`-ed. + * @exception rb_eFrozenError `dst` is frozen. + * @return The passed `dst`. + * @pre `dst` must not be any arbitrary object except ::RString. + * @post `dst`'s former components are abandoned. It now has the + * identical encoding, length, and contents to `src`. + */ +VALUE rb_str_replace(VALUE dst, VALUE src); + +/** + * Generates a "readable" version of the receiver. + * + * @warning The output is _insecure_. Never feed one to `eval`. + * @warning The output is not always in the same encoding as the given one. + * @warning A character might or might not be escaped, depending on the + * result encoding. + * @param[in] str String to inspect. + * @return Its inspection, either in default internal encoding if any, or + * in default external encoding otherwise. + * @see rb_str_dump() + * + * @internal + * + * This is a (silent) fix of an actual vulnerability feeding `inspect` output + * strings to `eval`: + * https://github.com/hiki/hiki/commit/8771a6e25198e264a2bf9dc1c102fea2cc8ff975 + * + * ... and its advisory: + * http://hikiwiki.org/en/advisory20040712.html + */ +VALUE rb_str_inspect(VALUE str); + +/** + * "Inverse" of rb_eval_string(). Returns a quoted version of the string. All + * non-printing characters are replaced by `\uNNNN` or `\xHH` notation and all + * special characters are escaped. The result string is guaranteed to render a + * string of the same contents when passed to `eval` and friends. + * + * @param[in] str String to dump. + * @exception rb_eRuntimeError Too many escape sequences causes integer + * overflow on the length of the string. + * @return An US-ASCII string that includes all the necessary info to + * reconstruct the original string. + */ +VALUE rb_str_dump(VALUE str); + +/** + * Divides the given string based on the given delimiter. This is the + * 1-argument 0-block version of `String#split`. + * + * @param[in] str Object in question to split. + * @param[in] delim Delimiter, in C string. + * @exception rb_eTypeError `str` has no implicit conversion to String. + * @exception rb_eArgError `delim` is a null pointer. + * @return An array of strings, which are substrings of the passed `str`. + * If `delim` is an empty C string (i.e. `""`), `str` is split into + * each characters. If `delim` is a C string whose sole content is + * a whitespace (i.e. `" "`), `str` is split on whitespaces, with + * leading and trailing whitespace and runs of contiguous + * whitespace characters ignored. Otherwise, `str` is split + * according to `delim`. + */ +VALUE rb_str_split(VALUE str, const char *delim); + +/** + * This is a ::rb_gvar_setter_t that refutes non-string assignments. + * + * @exception rb_eTypeError Passed something non-string. + */ rb_gvar_setter_t rb_str_setter; -VALUE rb_str_intern(VALUE); -VALUE rb_sym_to_s(VALUE); -long rb_str_strlen(VALUE); + +/* symbol.c */ + +/** + * Identical to rb_to_symbol(), except it assumes the receiver being an + * instance of ::RString. + * + * @param[in] str The name of the id. + * @exception rb_eRuntimeError Too many symbols. + * @return A (possibly new) id whose value is the given `str`. + * @pre `str` must not be any arbitrary object except ::RString. + * @note These days Ruby internally has two kinds of symbols + * (static/dynamic). Symbols created using this function would + * become dynamic ones; i.e. would be garbage collected. It could + * be safer for you to use it than alternatives, when applicable. + */ +VALUE rb_str_intern(VALUE str); + +/* string.c */ + +/** + * This is an rb_sym2str() + rb_str_dup() combo. + * + * @param[in] sym A symbol to query. + * @return A string duplicating the symbol's backend storage. + * + * @internal + * + * This function causes SEGV when the passed value is a static symbol that + * doesn't exist. + */ +VALUE rb_sym_to_s(VALUE sym); + +/** + * Counts the number of characters (not bytes) that are stored inside of the + * given string. This of course depends on its encoding. Also this function + * generally runs in O(n), because for instance you have to scan the entire + * string to know how many characters are there in a UTF-8 string. + * + * @param[in] str Target string to query. + * @return Its number of characters. + */ +long rb_str_strlen(VALUE str); + +/** + * Identical to rb_str_strlen(), except it returns the value in ::rb_cInteger. + * + * @param[in] str Target string to query. + * @return Its number of characters. + */ VALUE rb_str_length(VALUE); -long rb_str_offset(VALUE, long); + +/** + * "Inverse" of rb_str_sublen(). This function scans the contents to find the + * byte index that matches the character index. Generally speaking this is an + * `O(n)` operation. Could be slow. + * + * @param[in] str The string to scan. + * @param[in] pos Offset, in characters. + * @return Offset, in bytes. + */ +long rb_str_offset(VALUE str, long pos); + RBIMPL_ATTR_PURE() -size_t rb_str_capacity(VALUE); -VALUE rb_str_ellipsize(VALUE, long); -VALUE rb_str_scrub(VALUE, VALUE); -VALUE rb_str_succ(VALUE); +/** + * Queries the capacity of the given string. + * + * @see ::RString::capa + * @param[in] str String in question. + * @return Its capacity. + */ +size_t rb_str_capacity(VALUE str); + +/** + * Shortens `str` and adds three dots, an ellipsis, if it is longer than `len` + * characters. The length of the returned string in characters is less than or + * equal to `len`. If the length of `str` is less than or equal `len`, returns + * `str` itself. The encoding of returned string is equal to that of passed + * one. The class of returned string is equal to that of passed one. + * + * @param[in] str The string to shorten. + * @param[in] len The maximum string length. + * @exception rb_eIndexError `len` is negative. + * @retval str No need to add ellipsis. + * @retval otherwise A new, shortened string. + * @note The length is counted in characters. + */ +VALUE rb_str_ellipsize(VALUE str, long len); + +/** + * "Cleanses" the string. A string has its encoding and its contents. They, + * in practice, do not always fit. There are strings in the wild that are + * "broken"; include bit patterns that are not allowed by its encoding. That + * can happen when a user copy&pasted something bad, network input got + * clobbered by a middleman, cosmic rays hit the physical memory, and many more + * occasions. This function takes such strings, and fills the "broken" portion + * with the passed replacement bit pattern. + * + * This function also takes a ruby block. That is a neat way to do things, but + * can be annoying when the caller function want to use a block for another + * purpose. + * + * @param[in] str Target string to scrub. + * @param[in] repl Replacement string. When it is a string, + * this function takes that as a replacement. + * When it is ::RUBY_Qnil, this function tries + * to yield a block (if any) and takes its + * evaluated value as a replacement. In case + * of ::RUBY_Qnil without a block, this + * function takes an encoding-specific default + * character (`U+FFFD`, for instance) as a last + * resort. + * @exception rb_eTypeError `repl` is neither string nor nil. + * @exception rb_eArgError `repl` itself is broken. + * @exception rb_eEncCompatError `repl` and `str` are incompatible. + * @retval RUBY_Qnil `str` is already clean. + * @retval otherwise A new, clean string. + */ +VALUE rb_str_scrub(VALUE str, VALUE repl); + +/** + * Searches for the "successor" of a string. This function is complicated! + * This is the only function in the entire ruby API (either C or Ruby) that + * generates a string out of thin air. First, the successor to an empty string + * is a new empty string: + * + * ```ruby + * ''.succ # => "" + * ``` + * + * Otherwise the successor is calculated by "incrementing" characters. The + * first character to be incremented is the rightmost alphanumeric: or, if no + * alphanumerics, the rightmost character: + * + * ```ruby + * 'THX1138'.succ # => "THX1139" + * '<<koala>>'.succ # => "<<koalb>>" + * '***'.succ # => '**+' + * ``` + * + * The successor to a digit is another digit, "carrying" to the next-left + * character for a "rollover" from 9 to 0, and prepending another digit if + * necessary: + * + * ```ruby + * '00'.succ # => "01" + * '09'.succ # => "10" + * '99'.succ # => "100" + * '-9'.succ # => "-10" + * ``` + * + * The successor to a letter is another letter of the same case, carrying to + * the next-left character for a rollover, and prepending another same-case + * letter if necessary: + * + * ```ruby + * 'aa'.succ # => "ab" + * 'az'.succ # => "ba" + * 'zz'.succ # => "aaa" + * 'AA'.succ # => "AB" + * 'AZ'.succ # => "BA" + * 'ZZ'.succ # => "AAA" + * ``` + * + * The successor to a non-alphanumeric character is the next character in the + * underlying character set's collating sequence, carrying to the next-left + * character for a rollover, and prepending another character if necessary: + * + * ```ruby + * s = "\u03A1" + * s.succ # => "\u03A3" # There is no such thing like \u03A2. + * s = 255.chr * 3 + * s # => "\xFF\xFF\xFF" + * s.succ # => "\x01\x00\x00\x00" + * ``` + * + * Carrying can occur between and among mixtures of alphanumeric characters: + * + * ```ruby + * s = 'zz99zz99' + * s.succ # => "aaa00aa00" + * s = '99zz99zz' + * s.succ # => "100aa00aa" + * s = '1.9.9' + * s.succ # => "2.0.0" + * ``` + * + * @param[in] orig Predecessor string. + * @return Successor string. + */ +VALUE rb_str_succ(VALUE orig); RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail. Don't bother. + * + * @param[in] str A C string. + * @return `strlen`, casted to `long`. + */ static inline long rbimpl_strlen(const char *str) { return RBIMPL_CAST((long)strlen(str)); } +RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail. Don't bother. + * + * @param[in] str A C string literal. + * @return Corresponding Ruby string. + */ static inline VALUE rbimpl_str_new_cstr(const char *str) { @@ -153,13 +1382,15 @@ rbimpl_str_new_cstr(const char *str) return rb_str_new_static(str, len); } -static inline VALUE -rbimpl_tainted_str_new_cstr(const char *str) -{ - long len = rbimpl_strlen(str); - return rb_tainted_str_new(str, len); -} - +RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail. Don't bother. + * + * @param[in] str A C string literal. + * @return Corresponding Ruby string. + */ static inline VALUE rbimpl_usascii_str_new_cstr(const char *str) { @@ -167,6 +1398,15 @@ rbimpl_usascii_str_new_cstr(const char *str) return rb_usascii_str_new_static(str, len); } +RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail. Don't bother. + * + * @param[in] str A C string literal. + * @return Corresponding Ruby string. + */ static inline VALUE rbimpl_utf8_str_new_cstr(const char *str) { @@ -174,6 +1414,15 @@ rbimpl_utf8_str_new_cstr(const char *str) return rb_utf8_str_new_static(str, len); } +RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail. Don't bother. + * + * @param[in] str A C string literal. + * @return Corresponding Ruby string. + */ static inline VALUE rbimpl_external_str_new_cstr(const char *str) { @@ -181,6 +1430,15 @@ rbimpl_external_str_new_cstr(const char *str) return rb_external_str_new(str, len); } +RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail. Don't bother. + * + * @param[in] str A C string literal. + * @return Corresponding Ruby string. + */ static inline VALUE rbimpl_locale_str_new_cstr(const char *str) { @@ -188,6 +1446,15 @@ rbimpl_locale_str_new_cstr(const char *str) return rb_locale_str_new(str, len); } +RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail. Don't bother. + * + * @param[in] str A C string literal. + * @return Corresponding Ruby string. + */ static inline VALUE rbimpl_str_buf_new_cstr(const char *str) { @@ -196,6 +1463,16 @@ rbimpl_str_buf_new_cstr(const char *str) return rb_str_buf_cat(buf, str, len); } +RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail. Don't bother. + * + * @param[out] buf A string buffer. + * @param[in] str A C string literal. + * @return `buf` itself. + */ static inline VALUE rbimpl_str_cat_cstr(VALUE buf, const char *str) { @@ -203,6 +1480,16 @@ rbimpl_str_cat_cstr(VALUE buf, const char *str) return rb_str_cat(buf, str, len); } +RBIMPL_ATTR_NONNULL(()) +/** + * @private + * + * This is an implementation detail. Don't bother. + * + * @param[in] exc An exception class. + * @param[in] str A C string literal. + * @return An instance of `exc`. + */ static inline VALUE rbimpl_exc_new_cstr(VALUE exc, const char *str) { @@ -210,88 +1497,275 @@ rbimpl_exc_new_cstr(VALUE exc, const char *str) return rb_exc_new(exc, str, len); } +/** + * Allocates an instance of ::rb_cString. + * + * @param[in] str A memory region of `len` bytes length. + * @param[in] len Length of `ptr`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString, of `len` bytes length, of + * "binary" encoding, whose contents are verbatim copy of `str`. + * @pre At least `len` bytes of continuous memory region shall be + * accessible via `str`. + */ #define rb_str_new(str, len) \ ((RBIMPL_CONSTANT_P(str) && \ RBIMPL_CONSTANT_P(len) ? \ rb_str_new_static : \ rb_str_new) ((str), (len))) +/** + * Identical to #rb_str_new, except it assumes the passed pointer is a pointer + * to a C string. + * + * @param[in] str A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString, of "binary" encoding, whose + * contents are verbatim copy of `str`. + * @pre `str` must not be a null pointer. + */ #define rb_str_new_cstr(str) \ ((RBIMPL_CONSTANT_P(str) ? \ rbimpl_str_new_cstr : \ rb_str_new_cstr) (str)) +/** + * Identical to #rb_str_new, except it generates a string of "US ASCII" + * encoding. This is different from rb_external_str_new(), not only for the + * output encoding, but also it doesn't convert the contents. + * + * @param[in] str A memory region of `len` bytes length. + * @param[in] len Length of `str`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString, of `len` bytes length, of + * "US ASCII" encoding, whose contents are verbatim copy of `str`. + */ #define rb_usascii_str_new(str, len) \ ((RBIMPL_CONSTANT_P(str) && \ RBIMPL_CONSTANT_P(len) ? \ rb_usascii_str_new_static : \ rb_usascii_str_new) ((str), (len))) +/** + * Identical to #rb_str_new, except it generates a string of "UTF-8" encoding. + * + * @param[in] str A memory region of `len` bytes length. + * @param[in] len Length of `str`, in bytes, not including the + * terminating NUL character. + * @exception rb_eNoMemError Failed to allocate `len+1` bytes. + * @exception rb_eArgError `len` is negative. + * @return An instance of ::rb_cString, of `len` bytes length, of + * "UTF-8" encoding, whose contents are verbatim copy of `str`. + */ #define rb_utf8_str_new(str, len) \ ((RBIMPL_CONSTANT_P(str) && \ RBIMPL_CONSTANT_P(len) ? \ rb_utf8_str_new_static : \ rb_utf8_str_new) ((str), (len))) -#define rb_tainted_str_new_cstr(str) \ - ((RBIMPL_CONSTANT_P(str) ? \ - rbimpl_tainted_str_new_cstr : \ - rb_tainted_str_new_cstr) (str)) - +/** + * Identical to #rb_str_new_cstr, except it generates a string of "US ASCII" + * encoding. It can also be seen as a routine Identical to + * #rb_usascii_str_new, except it assumes the passed pointer is a pointer to a + * C string. + * + * @param[in] str A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString, of "US ASCII" encoding, whose + * contents are verbatim copy of `str`. + * @pre `str` must not be a null pointer. + */ #define rb_usascii_str_new_cstr(str) \ ((RBIMPL_CONSTANT_P(str) ? \ rbimpl_usascii_str_new_cstr : \ rb_usascii_str_new_cstr) (str)) +/** + * Identical to #rb_str_new_cstr, except it generates a string of "UTF-8" + * encoding. It can also be seen as a routine Identical to #rb_utf8_str_new, + * except it assumes the passed pointer is a pointer to a C string. + * + * @param[in] str A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString, of "UTF-8" encoding, whose contents + * are verbatim copy of `str`. + * @pre `str` must not be a null pointer. + */ #define rb_utf8_str_new_cstr(str) \ ((RBIMPL_CONSTANT_P(str) ? \ rbimpl_utf8_str_new_cstr : \ rb_utf8_str_new_cstr) (str)) +/** + * Identical to #rb_str_new_cstr, except it generates a string of "default + * external" encoding. + * + * @param[in] str A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString. In case encoding conversion from + * "default internal" to "default external" is fully defined over + * the given contents, then the return value is a string of + * "default external" encoding, whose contents are the converted + * ones. Otherwise the string is a junk. + * @warning It doesn't raise on a conversion failure and silently ends up in + * a corrupted output. You can know the failure by querying + * `valid_encoding?` of the result object. + * @pre `str` must not be a null pointer. + */ #define rb_external_str_new_cstr(str) \ ((RBIMPL_CONSTANT_P(str) ? \ rbimpl_external_str_new_cstr : \ rb_external_str_new_cstr) (str)) +/** + * Identical to #rb_external_str_new_cstr, except it generates a string of + * "locale" encoding instead of "default external". + * + * @param[in] str A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString. In case encoding conversion from + * "default internal" to "locale" is fully defined over the given + * contents, then the return value is a string of "locale" + * encoding, whose contents are the converted ones. Otherwise the + * string is a junk. + * @warning It doesn't raise on a conversion failure and silently ends up in + * a corrupted output. You can know the failure by querying + * `valid_encoding?` of the result object. + * @pre `str` must not be a null pointer. + */ #define rb_locale_str_new_cstr(str) \ ((RBIMPL_CONSTANT_P(str) ? \ rbimpl_locale_str_new_cstr : \ rb_locale_str_new_cstr) (str)) +/** + * Identical to #rb_str_new_cstr, except done differently. + * + * @param[in] str A C string. + * @exception rb_eNoMemError Failed to allocate memory. + * @return An instance of ::rb_cString, of "binary" encoding, whose + * contents are verbatim copy of `str`. + * @pre `str` must not be a null pointer. + */ #define rb_str_buf_new_cstr(str) \ ((RBIMPL_CONSTANT_P(str) ? \ rbimpl_str_buf_new_cstr : \ rb_str_buf_new_cstr) (str)) +/** + * Identical to rb_str_cat(), except it assumes the passed pointer is a pointer + * to a C string. + * + * @param[out] buf Destination object. + * @param[in] str Contents to append. + * @exception rb_eArgError Result string too big. + * @return The passed `buf`. + * @pre `buf` must not be any arbitrary objects except ::RString. + * @pre `str` must not be a null pointer. + * @post `buf` has the contents of `str` appended. + */ #define rb_str_cat_cstr(buf, str) \ ((RBIMPL_CONSTANT_P(str) ? \ rbimpl_str_cat_cstr : \ rb_str_cat_cstr) ((buf), (str))) +/** + * Identical to rb_exc_new(), except it assumes the passed pointer is a pointer + * to a C string. + * + * @param[out] exc A subclass of ::rb_eException. + * @param[in] str Message to raise. + * @return An instance of `exc` whose message is `str`. + * @pre `str` must not be a null pointer. + */ #define rb_exc_new_cstr(exc, str) \ ((RBIMPL_CONSTANT_P(str) ? \ rbimpl_exc_new_cstr : \ rb_exc_new_cstr) ((exc), (str))) -#define rb_str_new2 rb_str_new_cstr -#define rb_str_new3 rb_str_new_shared -#define rb_str_new4 rb_str_new_frozen -#define rb_str_new5 rb_str_new_with_class -#define rb_tainted_str_new2 rb_tainted_str_new_cstr -#define rb_str_buf_new2 rb_str_buf_new_cstr -#define rb_usascii_str_new2 rb_usascii_str_new_cstr -#define rb_str_buf_cat rb_str_cat -#define rb_str_buf_cat2 rb_str_cat_cstr -#define rb_str_cat2 rb_str_cat_cstr +#define rb_str_new2 rb_str_new_cstr /**< @old{rb_str_new_cstr} */ +#define rb_str_new3 rb_str_new_shared /**< @old{rb_str_new_shared} */ +#define rb_str_new4 rb_str_new_frozen /**< @old{rb_str_new_frozen} */ +#define rb_str_new5 rb_str_new_with_class /**< @old{rb_str_new_with_class} */ +#define rb_str_buf_new2 rb_str_buf_new_cstr /**< @old{rb_str_buf_new_cstr} */ +#define rb_usascii_str_new2 rb_usascii_str_new_cstr /**< @old{rb_usascii_str_new_cstr} */ +#define rb_str_buf_cat rb_str_cat /**< @alias{rb_str_cat} */ +#define rb_str_buf_cat2 rb_str_cat_cstr /**< @old{rb_usascii_str_new_cstr} */ +#define rb_str_cat2 rb_str_cat_cstr /**< @old{rb_str_cat_cstr} */ + +/** + * Length of a string literal. + * + * @param[in] str A C String literal. + * @return An integer constant expression that represents `str`'s length, + * in bytes, not including the terminating NUL character. + */ #define rb_strlen_lit(str) (sizeof(str "") - 1) + +/** + * Identical to rb_str_new_static(), except it cannot take string variables. + * + * @param[in] str A C string literal. + * @pre `str` must not be a variable. + * @return An instance of ::rb_cString, of "binary" encoding, whose backend + * storage is the passed C string literal. + * @warning It is a very bad idea to write to a C string literal (often + * immediate SEGV shall occur). Consider return values of this + * function be read-only. + */ #define rb_str_new_lit(str) rb_str_new_static((str), rb_strlen_lit(str)) + +/** + * Identical to rb_usascii_str_new_static(), except it cannot take string + * variables. + * + * @param[in] str A C string literal. + * @pre `str` must not be a variable. + * @return An instance of ::rb_cString, of "US ASCII" encoding, whose + * backend storage is the passed C string literal. + * @warning It is a very bad idea to write to a C string literal (often + * immediate SEGV shall occur). Consider return values of this + * function be read-only. + */ #define rb_usascii_str_new_lit(str) rb_usascii_str_new_static((str), rb_strlen_lit(str)) + +/** + * Identical to rb_utf8_str_new_static(), except it cannot take string + * variables. + * + * @param[in] str A C string literal. + * @pre `str` must not be a variable. + * @return An instance of ::rb_cString, of "UTF-8" encoding, whose backend + * storage is the passed C string literal. + * @warning It is a very bad idea to write to a C string literal (often + * immediate SEGV shall occur). Consider return values of this + * function be read-only. + */ #define rb_utf8_str_new_lit(str) rb_utf8_str_new_static((str), rb_strlen_lit(str)) + +/** + * Identical to rb_enc_str_new_static(), except it cannot take string + * variables. + * + * @param[in] str A C string literal. + * @param[in] enc A pointer to an encoding. + * @pre `str` must not be a variable. + * @return An instance of ::rb_cString, of the passed encoding, whose + * backend storage is the passed C string literal. + * @warning It is a very bad idea to write to a C string literal (often + * immediate SEGV shall occur). Consider return values of this + * function be read-only. + */ #define rb_enc_str_new_lit(str, enc) rb_enc_str_new_static((str), rb_strlen_lit(str), (enc)) -#define rb_str_new_literal(str) rb_str_new_lit(str) -#define rb_usascii_str_new_literal(str) rb_usascii_str_new_lit(str) -#define rb_utf8_str_new_literal(str) rb_utf8_str_new_lit(str) -#define rb_enc_str_new_literal(str, enc) rb_enc_str_new_lit(str, enc) + +#define rb_str_new_literal(str) rb_str_new_lit(str) /**< @alias{rb_str_new_lit} */ +#define rb_usascii_str_new_literal(str) rb_usascii_str_new_lit(str) /**< @alias{rb_usascii_str_new_lit} */ +#define rb_utf8_str_new_literal(str) rb_utf8_str_new_lit(str) /**< @alias{rb_utf8_str_new_lit} */ +#define rb_enc_str_new_literal(str, enc) rb_enc_str_new_lit(str, enc) /**< @alias{rb_enc_str_new_lit} */ RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/struct.h b/include/ruby/internal/intern/struct.h index 8818da96c7..16b3fad4e0 100644 --- a/include/ruby/internal/intern/struct.h +++ b/include/ruby/internal/intern/struct.h @@ -17,9 +17,10 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cStruct. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/intern/vm.h" /* rb_alloc_func_t */ #include "ruby/internal/value.h" @@ -27,21 +28,198 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* struct.c */ -VALUE rb_struct_new(VALUE, ...); -VALUE rb_struct_define(const char*, ...); -VALUE rb_struct_define_under(VALUE, const char*, ...); -VALUE rb_struct_alloc(VALUE, VALUE); -VALUE rb_struct_initialize(VALUE, VALUE); -VALUE rb_struct_aref(VALUE, VALUE); -VALUE rb_struct_aset(VALUE, VALUE, VALUE); -VALUE rb_struct_getmember(VALUE, ID); -VALUE rb_struct_s_members(VALUE); -VALUE rb_struct_members(VALUE); -VALUE rb_struct_size(VALUE s); -VALUE rb_struct_alloc_noinit(VALUE); -VALUE rb_struct_define_without_accessor(const char *, VALUE, rb_alloc_func_t, ...); + +/** + * Creates an instance of the given struct. + * + * @param[in] klass The class of the instance to allocate. + * @param[in] ... The fields. + * @return Allocated instance of `klass`. + * @pre `klass` must be a subclass of ::rb_cStruct. + * @note Number of variadic arguments must much that of the passed klass' + * fields. + */ +VALUE rb_struct_new(VALUE klass, ...); + +/** + * Defines a struct class. + * + * @param[in] name Name of the class. + * @param[in] ... Arbitrary number of `const char*`, terminated by + * NULL. Each of which are the name of fields. + * @exception rb_eNameError `name` is not a constant name. + * @exception rb_eTypeError `name` is already taken. + * @exception rb_eArgError Duplicated field name. + * @return The defined class. + * @post Global toplevel constant `name` is defined. + * @note `name` is allowed to be a null pointer. This function creates + * an anonymous struct class then. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. + * + * @internal + * + * Not seriously checked but it seems this function does not share its + * implementation with how `Struct.new` is implemented...? + */ +VALUE rb_struct_define(const char *name, ...); + +RBIMPL_ATTR_NONNULL((2)) +/** + * Identical to rb_struct_define(), except it defines the class under the + * specified namespace instead of global toplevel. + * + * @param[out] space Namespace that the defining class shall reside. + * @param[in] name Name of the class. + * @param[in] ... Arbitrary number of `const char*`, terminated by + * NULL. Each of which are the name of fields. + * @exception rb_eNameError `name` is not a constant name. + * @exception rb_eTypeError `name` is already taken. + * @exception rb_eArgError Duplicated field name. + * @return The defined class. + * @post `name` is a constant under `space`. + * @note In contrast to rb_struct_define(), it doesn't make any sense to + * pass a null pointer to this function. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. + */ +VALUE rb_struct_define_under(VALUE space, const char *name, ...); + +/** + * Identical to rb_struct_new(), except it takes the field values as a Ruby + * array. + * + * @param[in] klass The class of the instance to allocate. + * @param[in] values Field values. + * @return Allocated instance of `klass`. + * @pre `klass` must be a subclass of ::rb_cStruct. + * @pre `values` must be an instance of struct ::RArray. + */ +VALUE rb_struct_alloc(VALUE klass, VALUE values); + +/** + * Mass-assigns a struct's fields. + * + * @param[out] self An instance of a struct class to squash. + * @param[in] values New values. + * @return ::RUBY_Qnil. + */ +VALUE rb_struct_initialize(VALUE self, VALUE values); + +/** + * Identical to rb_struct_aref(), except it takes ::ID instead of ::VALUE. + * + * @param[in] self An instance of a struct class. + * @param[in] key Key to query. + * @exception rb_eTypeError `self` is not a struct. + * @exception rb_eNameError No such field. + * @return The value stored at `key` in `self`. + */ +VALUE rb_struct_getmember(VALUE self, ID key); + +/** + * Queries the list of the names of the fields of the given struct class. + * + * @param[in] klass A subclass of ::rb_cStruct. + * @return The list of the names of the fields of `klass`. + */ +VALUE rb_struct_s_members(VALUE klass); + +/** + * Queries the list of the names of the fields of the class of the given struct + * object. This is almost the same as calling rb_struct_s_members() over the + * class of the receiver. + * + * @internal + * + * "Almost"? What exactly is the difference? + * + * @endinternal + * + * @param[in] self An instance of a subclass of ::rb_cStruct. + * @return The list of the names of the fields. + */ +VALUE rb_struct_members(VALUE self); + +/** + * Allocates an instance of the given class. This consequential name is of + * course because rb_struct_alloc() not only allocates but also initialises an + * instance. The API design is broken. + * + * @param[in] klass A subclass of ::rb_cStruct. + * @return An allocated instance of `klass`, not initialised. + */ +VALUE rb_struct_alloc_noinit(VALUE klass); + +/** + * Identical to rb_struct_define(), except it does not define accessor methods. + * You have to define them yourself. Forget about the allocator function + * parameter; it is for internal use only. Extension libraries are unable to + * properly allocate a ruby struct, because `RStruct` is opaque. + * + * @internal + * + * Several flags must be set up properly for ::RUBY_T_STRUCT objects, which are + * also missing for extension libraries. + * + * @endinternal + * + * @param[in] name Name of the class. + * @param[in] super Superclass of the defining class. + * @param[in] func Must be 0 for extension libraries. + * @param[in] ... Arbitrary number of `const char*`, terminated by + * NULL. Each of which are the name of fields. + * @exception rb_eNameError `name` is not a constant name. + * @exception rb_eTypeError `name` is already taken. + * @exception rb_eArgError Duplicated field name. + * @return The defined class. + * @post Global toplevel constant `name` is defined. + * @note `name` is allowed to be a null pointer. This function creates + * an anonymous struct class then. + */ +VALUE rb_struct_define_without_accessor(const char *name, VALUE super, rb_alloc_func_t func, ...); + +RBIMPL_ATTR_NONNULL((2)) +/** + * Identical to rb_struct_define_without_accessor(), except it defines the + * class under the specified namespace instead of global toplevel. It can also + * be seen as a routine identical to rb_struct_define_under(), except it does + * not define accessor methods. + * + * @param[out] outer Namespace that the defining class shall reside. + * @param[in] class_name Name of the class. + * @param[in] super Superclass of the defining class. + * @param[in] alloc Must be 0 for extension libraries. + * @param[in] ... Arbitrary number of `const char*`, terminated by + * NULL. Each of which are the name of fields. + * @exception rb_eNameError `class_name` is not a constant name. + * @exception rb_eTypeError `class_name` is already taken. + * @exception rb_eArgError Duplicated field name. + * @return The defined class. + * @post `class_name` is a constant under `outer`. + * @note In contrast to rb_struct_define_without_accessor(), it doesn't + * make any sense to pass a null name. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. + */ VALUE rb_struct_define_without_accessor_under(VALUE outer, const char *class_name, VALUE super, rb_alloc_func_t alloc, ...); +/** + * Defines an anonymous data class. + * + * @endinternal + * + * @param[in] super Superclass of the defining class. Must be a + * descendant of ::rb_cData, or 0 as ::rb_cData. + * @param[in] ... Arbitrary number of `const char*`, terminated by + * NULL. Each of which are the name of fields. + * @exception rb_eArgError Duplicated field name. + * @return The defined class. + * @note The GC does not collect nor move classes returned by this + * function. They are immortal. + */ +VALUE rb_data_define(VALUE super, ...); + RBIMPL_SYMBOL_EXPORT_END() #endif /* RBIMPL_INTERN_STRUCT_H */ diff --git a/include/ruby/internal/intern/thread.h b/include/ruby/internal/intern/thread.h index 7c656a628c..716375acd7 100644 --- a/include/ruby/internal/intern/thread.h +++ b/include/ruby/internal/intern/thread.h @@ -17,11 +17,12 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cThread. */ -#include "ruby/internal/config.h" +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/cast.h" +#include "ruby/internal/config.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" @@ -30,45 +31,460 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() struct timeval; /* thread.c */ + +/** + * Tries to switch to another thread. This function blocks until the current + * thread re-acquires the GVL. + * + * @exception rb_eInterrupt Operation interrupted. + */ void rb_thread_schedule(void); -int rb_thread_wait_fd(int); -int rb_thread_fd_writable(int); -void rb_thread_fd_close(int); + +/** + * Blocks the current thread until the given file descriptor is ready to be + * read. + * + * @param[in] fd A file descriptor. + * @exception rb_eIOError Closed stream. + * @exception rb_eSystemCallError Situations like EBADF. + */ +int rb_thread_wait_fd(int fd); + +/** + * Identical to rb_thread_wait_fd(), except it blocks the current thread until + * the given file descriptor is ready to be written. + * + * @param[in] fd A file descriptor. + * @exception rb_eIOError Closed stream. + * @exception rb_eSystemCallError Situations like EBADF. + */ +int rb_thread_fd_writable(int fd); + +/** + * Notifies a closing of a file descriptor to other threads. Multiple threads + * can wait for the given file descriptor at once. If such file descriptor is + * closed, threads need to start propagating their exceptions. This is the API + * to kick that process. + * + * @param[in] fd A file descriptor. + * @note This function blocks until all the threads waiting for such fd + * have woken up. + */ +void rb_thread_fd_close(int fd); + +/** + * Checks if the thread this function is running is the only thread that is + * currently alive. + * + * @retval 1 Yes it is. + * @retval 0 No it isn't. + * + * @internal + * + * Above description is in fact inaccurate. There are Ractors these days. + */ int rb_thread_alone(void); -void rb_thread_sleep(int); + +/** + * Blocks for the given period of time. + * + * @warning This function can be interrupted by signals. + * @param[in] sec Duration in seconds. + * @exception rb_eInterrupt Interrupted. + */ +void rb_thread_sleep(int sec); + +/** + * Blocks indefinitely. + * + * @exception rb_eInterrupt Interrupted. + */ void rb_thread_sleep_forever(void); + +/** + * Identical to rb_thread_sleep_forever(), except the thread calling this + * function is considered "dead" when our deadlock checker is triggered. + * + * @exception rb_eInterrupt Interrupted. + */ void rb_thread_sleep_deadly(void); + +/** + * Stops the current thread. This is not the end of the thread's lifecycle. A + * stopped thread can later be woken up. + * + * @exception rb_eThreadError Stopping this thread would deadlock. + * @retval ::RUBY_Qnil Always. + * + * @internal + * + * The return value makes no sense at all. + */ VALUE rb_thread_stop(void); -VALUE rb_thread_wakeup(VALUE); -VALUE rb_thread_wakeup_alive(VALUE); -VALUE rb_thread_run(VALUE); -VALUE rb_thread_kill(VALUE); -VALUE rb_thread_create(VALUE (*)(void *), void*); -void rb_thread_wait_for(struct timeval); + +/** + * Marks a given thread as eligible for scheduling. + * + * @note It may still remain blocked on I/O. + * @note This does not invoke the scheduler itself. + * + * @param[out] thread Thread in question to wake up. + * @exception rb_eThreadError Stop flogging a dead horse. + * @return The passed thread. + * @post The passed thread is made runnable. + */ +VALUE rb_thread_wakeup(VALUE thread); + +/** + * Identical to rb_thread_wakeup(), except it doesn't raise on an already + * killed thread. + * + * @param[out] thread A thread to wake up. + * @retval RUBY_Qnil `thread` is already killed. + * @retval otherwise `thread` is alive. + * @post The passed thread is made runnable, unless killed. + */ +VALUE rb_thread_wakeup_alive(VALUE thread); + +/** + * This is a rb_thread_wakeup() + rb_thread_schedule() combo. + * + * @note There is no guarantee that this function yields to the passed + * thread. It may still remain blocked on I/O. + * @param[out] thread Thread in question to wake up. + * @exception rb_eThreadError Stop flogging a dead horse. + * @return The passed thread. + */ +VALUE rb_thread_run(VALUE thread); + +/** + * Terminates the given thread. Unlike a stopped thread, a killed thread could + * never be revived. This function does return, when passed e.g. an already + * killed thread. But if the passed thread is the only one, or a special + * thread called "main", then it also terminates the entire process. + * + * @param[out] thread The thread to terminate. + * @exception rb_eFatal The passed thread is the running thread. + * @exception rb_eSystemExit The passed thread is the last thread. + * @return The passed thread. + * @post Either the passed thread, or the process entirely, is killed. + * + * @internal + * + * It seems killing the main thread also kills the entire process even if there + * are multiple running ractors. No idea why. + */ +VALUE rb_thread_kill(VALUE thread); + +RBIMPL_ATTR_NONNULL((1)) +/** + * Creates a Ruby thread that is backended by a C function. + * + * @param[in] f The function to run on a thread. + * @param[in,out] g Passed through to `f`. + * @exception rb_eThreadError Could not create a ruby thread. + * @exception rb_eSystemCallError Situations like `EPERM`. + * @return Allocated instance of ::rb_cThread. + * @note This doesn't wait for anything. + */ +VALUE rb_thread_create(VALUE (*f)(void *g), void *g); + +/** + * Identical to rb_thread_sleep(), except it takes struct `timeval` instead. + * + * @warning This function can be interrupted by signals. + * @param[in] time Duration. + * @exception rb_eInterrupt Interrupted. + */ +void rb_thread_wait_for(struct timeval time); + +/** + * Obtains the "current" thread. + * + * @return The current thread of the current ractor of the current execution + * context. + * @pre This function must be called from a thread controlled by ruby. + */ VALUE rb_thread_current(void); + +/** + * Obtains the "main" thread. There are threads called main. Historically the + * (only) main thread was the one which runs when the process boots. Now that + * we have Ractor, there are more than one main threads. + * + * @return The main thread of the current ractor of the current execution + * context. + * @pre This function must be called from a thread controlled by ruby. + */ VALUE rb_thread_main(void); -VALUE rb_thread_local_aref(VALUE, ID); -VALUE rb_thread_local_aset(VALUE, ID, VALUE); + +/** + * This badly named function reads from a Fiber local storage. When this + * function was born there was no such thing like a Fiber. The world was + * innocent. But now... This is a Fiber local storage. Sorry. + * + * @param[in] thread Thread that the target Fiber is running. + * @param[in] key The name of the Fiber local storage to read. + * @retval RUBY_Qnil No such storage. + * @retval otherwise The value stored at `key`. + * @note There in fact are "true" thread local storage, but Ruby doesn't + * provide any interface of them to you, C programmers. + */ +VALUE rb_thread_local_aref(VALUE thread, ID key); + +/** + * This badly named function writes to a Fiber local storage. When this + * function was born there was no such thing like a Fiber. The world was + * innocent. But now... This is a Fiber local storage. Sorry. + * + * @param[in] thread Thread that the target Fiber is running. + * @param[in] key The name of the Fiber local storage to write. + * @param[in] val The new value of the storage. + * @exception rb_eFrozenError `thread` is frozen. + * @return The passed `val` as-is. + * @post Fiber local storage `key` has value of `val`. + * @note There in fact are "true" thread local storage, but Ruby doesn't + * provide any interface of them to you, C programmers. + */ +VALUE rb_thread_local_aset(VALUE thread, ID key, VALUE val); + +/** + * A `pthread_atfork(3posix)`-like API. Ruby expects its child processes to + * call this function at the very beginning of their processes. If you plan to + * fork a process don't forget to call it. + */ void rb_thread_atfork(void); + +/** + * :FIXME: situation of this function is unclear. It seems nobody uses it. + * Maybe a good idea to KonMari. + */ void rb_thread_atfork_before_exec(void); -VALUE rb_exec_recursive(VALUE(*)(VALUE, VALUE, int),VALUE,VALUE); -VALUE rb_exec_recursive_paired(VALUE(*)(VALUE, VALUE, int),VALUE,VALUE,VALUE); -VALUE rb_exec_recursive_outer(VALUE(*)(VALUE, VALUE, int),VALUE,VALUE); -VALUE rb_exec_recursive_paired_outer(VALUE(*)(VALUE, VALUE, int),VALUE,VALUE,VALUE); +/** + * "Recursion" API entry point. This basically calls the given function with + * the given arguments, but additionally with recursion flag. The flag is set + * to 1 if the execution have already experienced the passed `g` parameter + * before. + * + * @param[in] f The function that possibly recurs. + * @param[in,out] g Passed as-is to `f`. + * @param[in,out] h Passed as-is to `f`. + * @return The return value of f. + */ +VALUE rb_exec_recursive(VALUE (*f)(VALUE g, VALUE h, int r), VALUE g, VALUE h); + +/** + * Identical to rb_exec_recursive(), except it checks for the recursion on the + * ordered pair of `{ g, p }` instead of just `g`. + * + * @param[in] f The function that possibly recurs. + * @param[in,out] g Passed as-is to `f`. + * @param[in] p Paired object for recursion detection. + * @param[in,out] h Passed as-is to `f`. + */ +VALUE rb_exec_recursive_paired(VALUE (*f)(VALUE g, VALUE h, int r), VALUE g, VALUE p, VALUE h); + +/** + * Identical to rb_exec_recursive(), except it calls `f` for outermost + * recursion only. Inner recursions yield calls to rb_throw_obj(). + * + * @param[in] f The function that possibly recurs. + * @param[in,out] g Passed as-is to `f`. + * @param[in,out] h Passed as-is to `f`. + * @return The return value of f. + * + * @internal + * + * It seems nobody uses the "it calls rb_throw_obj()" part of this function. + * @shyouhei doesn't understand the needs. + */ +VALUE rb_exec_recursive_outer(VALUE (*f)(VALUE g, VALUE h, int r), VALUE g, VALUE h); + +/** + * Identical to rb_exec_recursive_outer(), except it checks for the recursion + * on the ordered pair of `{ g, p }` instead of just `g`. It can also be seen + * as a routine identical to rb_exec_recursive_paired(), except it calls `f` + * for outermost recursion only. Inner recursions yield calls to + * rb_throw_obj(). + * + * @param[in] f The function that possibly recurs. + * @param[in,out] g Passed as-is to `f`. + * @param[in] p Paired object for recursion detection. + * @param[in,out] h Passed as-is to `f`. + * + * @internal + * + * It seems nobody uses the "it calls rb_throw_obj()" part of this function. + * @shyouhei doesn't understand the needs. + */ +VALUE rb_exec_recursive_paired_outer(VALUE (*f)(VALUE g, VALUE h, int r), VALUE g, VALUE p, VALUE h); + +/** + * This is the type of UBFs. An UBF is a function that unblocks a blocking + * region. For instance when a thread is blocking due to `pselect(3posix)`, it + * is highly expected that `pthread_kill(3posix)` can interrupt the system call + * and the thread could revive. Or when a thread is blocking due to + * `waitpid(3posix)`, it is highly expected that killing the waited process + * should suffice. An UBF is a function that does such things. Designing your + * own UBF needs deep understanding of why your blocking region blocks, how + * threads work in ruby, and a matter of luck. It often is the case you simply + * cannot cancel something that had already begun. + * + * @see rb_thread_call_without_gvl() + */ typedef void rb_unblock_function_t(void *); + +/** + * @private + * + * This is an implementation detail. Must be a mistake to be here. + * + * @internal + * + * Why is this function type different from what rb_thread_call_without_gvl() + * takes? + */ typedef VALUE rb_blocking_function_t(void *); + +/** + * Checks for interrupts. In ruby, signals are masked by default. You can + * call this function at will to check if there are pending signals. In case + * there are, they would be handled in this function. + * + * If your extension library has a function that takes a long time, consider + * calling it periodically. + * + * @note It might switch to another thread. + */ void rb_thread_check_ints(void); + +/** + * Checks if the thread's execution was recently interrupted. If called from + * that thread, this function can be used to detect spurious wake-ups. + * + * @param[in] thval Thread in question. + * @retval 0 The thread was not interrupted. + * @retval otherwise The thread was interrupted recently. + * + * @internal + * + * Above description is not a lie. But actually the return value is an opaque + * trap vector. If you know which bit means which, you can know what happened. + */ int rb_thread_interrupted(VALUE thval); +/** + * A special UBF for blocking IO operations. You need deep understanding of + * what this actually do before using. Basically you should not use it from + * extension libraries. It is too easy to mess up. + */ #define RUBY_UBF_IO RBIMPL_CAST((rb_unblock_function_t *)-1) + +/** + * A special UBF for blocking process operations. You need deep understanding + * of what this actually do before using. Basically you should not use it from + * extension libraries. It is too easy to mess up. + */ #define RUBY_UBF_PROCESS RBIMPL_CAST((rb_unblock_function_t *)-1) + +/* thread_sync.c */ + +/** + * Creates a mutex. + * + * @return An allocated instance of rb_cMutex. + */ VALUE rb_mutex_new(void); + +/** + * Queries if there are any threads that holds the lock. + * + * @param[in] mutex The mutex in question. + * @retval RUBY_Qtrue The mutex is locked by someone. + * @retval RUBY_Qfalse The mutex is not locked by anyone. + */ VALUE rb_mutex_locked_p(VALUE mutex); + +/** + * Attempts to lock the mutex, without waiting for other threads to unlock it. + * Failure in locking the mutex can be detected by the return value. + * + * @param[out] mutex The mutex to lock. + * @retval RUBY_Qtrue Successfully locked by the current thread. + * @retval RUBY_Qfalse Otherwise. + * @note This function also returns ::RUBY_Qfalse when the mutex is + * already owned by the calling thread itself. + */ VALUE rb_mutex_trylock(VALUE mutex); + +/** + * Attempts to lock the mutex. It waits until the mutex gets available. + * + * @param[out] mutex The mutex to lock. + * @exception rb_eThreadError Recursive deadlock situation. + * @return The passed mutex. + * @post The mutex is owned by the current thread. + */ VALUE rb_mutex_lock(VALUE mutex); + +/** + * Releases the mutex. + * + * @param[out] mutex The mutex to unlock. + * @exception rb_eThreadError The mutex is not owned by the current thread. + * @return The passed mutex. + * @post Upon successful return the passed mutex is no longer owned by + * the current thread. + */ VALUE rb_mutex_unlock(VALUE mutex); + +/** + * Releases the lock held in the mutex and waits for the period of time; + * reacquires the lock on wakeup. + * + * @pre The lock has to be owned by the current thread beforehand. + * @param[out] self The target mutex. + * @param[in] timeout Duration, in seconds, in ::rb_cNumeric. + * @exception rb_eArgError `timeout` is negative. + * @exception rb_eRangeError `timeout` is out of range of `time_t`. + * @exception rb_eThreadError The mutex is not owned by the current thread. + * @return Number of seconds it actually slept. + * @warning It is a failure not to check the return value. This function + * can return spuriously for various reasons. Maybe other threads + * can rb_thread_wakeup(). Maybe an end user can press the + * Control and C key from the interactive console. On the other + * hand it can also take longer than the specified. The mutex + * could be locked by someone else. It waits then. + * @post Upon successful return the passed mutex is owned by the current + * thread. + * + * @internal + * + * This function is called from `ConditionVariable#wait`. So it is not a + * deprecated feature. However @shyouhei have never seen any similar mutex + * primitive available in any other languages than Ruby. + * + * EDIT: In 2021, @shyouhei asked @ko1 in person about this API. He answered + * that it is his invention. The motivation behind its design is to eliminate + * needs of condition variables as primitives. Unlike other languages, Ruby's + * `ConditionVariable` class was written in pure-Ruby initially. We don't have + * to implement machine-native condition variables in assembly each time we + * port Ruby to a new architecture. This function made it possible. "I felt I + * was a genius when this idea came to me", said @ko1. + * + * `rb_cConditionVariable` is now written in C for speed, though. + */ VALUE rb_mutex_sleep(VALUE self, VALUE timeout); + +/** + * Obtains the lock, runs the passed function, and releases the lock when it + * completes. + * + * @param[out] mutex The mutex to lock. + * @param[in] func What to do during the mutex is locked. + * @param[in,out] arg Passed as-is to `func`. + */ VALUE rb_mutex_synchronize(VALUE mutex, VALUE (*func)(VALUE arg), VALUE arg); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/time.h b/include/ruby/internal/intern/time.h index c7ae6ec2f5..df482862eb 100644 --- a/include/ruby/internal/intern/time.h +++ b/include/ruby/internal/intern/time.h @@ -17,7 +17,7 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cTime. */ #include "ruby/internal/config.h" @@ -26,6 +26,7 @@ # include <time.h> /* for time_t */ #endif +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" @@ -35,15 +36,124 @@ struct timespec; struct timeval; /* time.c */ -void rb_timespec_now(struct timespec *); -VALUE rb_time_new(time_t, long); -VALUE rb_time_nano_new(time_t, long); -VALUE rb_time_timespec_new(const struct timespec *, int); -VALUE rb_time_num_new(VALUE, VALUE); + +RBIMPL_ATTR_NONNULL(()) +/** + * Fills the current time into the given struct. + * + * @param[out] ts Return buffer. + * @exception rb_eSystemCallError Access denied for hardware clock. + * @post Current time is stored in `*ts`. + */ +void rb_timespec_now(struct timespec *ts); + +/** + * Creates an instance of ::rb_cTime with the given time and the local + * timezone. + * + * @param[in] sec Seconds since the UNIX epoch. + * @param[in] usec Subsecond part, in microseconds resolution. + * @exception rb_eRangeError Cannot express the time. + * @return An allocated instance of ::rb_cTime. + */ +VALUE rb_time_new(time_t sec, long usec); + +/** + * Identical to rb_time_new(), except it accepts the time in nanoseconds + * resolution. + * + * @param[in] sec Seconds since the UNIX epoch. + * @param[in] nsec Subsecond part, in nanoseconds resolution. + * @exception rb_eRangeError Cannot express the time. + * @return An allocated instance of ::rb_cTime. + */ +VALUE rb_time_nano_new(time_t sec, long nsec); + +RBIMPL_ATTR_NONNULL(()) +/** + * Creates an instance of ::rb_cTime, with given time and offset. + * + * @param[in] ts Time specifier. + * @param[in] offset Offset specifier, can take following values: + * - `INT_MAX`: `ts` is in local time. + * - `INT_MAX - 1`: `ts` is in UTC. + * - `-86400` to `86400`: fixed timezone. + * @exception rb_eArgError Malformed `offset`. + * @return An allocated instance of ::rb_cTime. + */ +VALUE rb_time_timespec_new(const struct timespec *ts, int offset); + +/** + * Identical to rb_time_timespec_new(), except it takes Ruby values instead of + * C structs. + * + * @param[in] timev Something numeric. Currently Integers, Rationals, + * and Floats are accepted. + * @param[in] off Offset specifier. As of 2.7 this argument is + * heavily extended to take following kinds of + * objects: + * - ::RUBY_Qundef ... means UTC. + * - ::rb_cString ... "+12:34" etc. + * - A mysterious "zone" object. This is largely + * undocumented. However the initial intent was + * that we want to accept + * `ActiveSupport::TimeZone` here. Other gems + * could also be possible... But how to make an + * acceptable class is beyond this document. + * @exception rb_eArgError Malformed `off`. + * @return An allocated instance of ::rb_cTime. + */ +VALUE rb_time_num_new(VALUE timev, VALUE off); + +/** + * Creates a "time interval". This basically converts an instance of + * ::rb_cNumeric into a struct `timeval`, but for instance negative time + * interval must not exist. + * + * @param[in] num An instance of ::rb_cNumeric. + * @exception rb_eArgError `num` is negative. + * @exception rb_eRangeError `num` is out of range of `timeval::tv_sec`. + * @return A struct that represents the identical time to `num`. + */ struct timeval rb_time_interval(VALUE num); + +/** + * Converts an instance of rb_cTime to a struct timeval that represents the + * identical point of time. It can also take something numeric; would consider + * it as a UNIX time then. + * + * @param[in] time Instance of either ::rb_cTime or ::rb_cNumeric. + * @exception rb_eRangeError `time` is out of range of `timeval::tv_sec`. + * @return A struct that represents the identical time to `num`. + */ struct timeval rb_time_timeval(VALUE time); + +/** + * Identical to rb_time_timeval(), except for return type. + * + * @param[in] time Instance of either ::rb_cTime or ::rb_cNumeric. + * @exception rb_eRangeError `time` is out of range of `timeval::tv_sec`. + * @return A struct that represents the identical time to `num`. + */ struct timespec rb_time_timespec(VALUE time); + +/** + * Identical to rb_time_interval(), except for return type. + * + * @param[in] num An instance of ::rb_cNumeric. + * @exception rb_eArgError `num` is negative. + * @exception rb_eRangeError `num` is out of range of `timespec::tv_sec`. + * @return A struct that represents the identical time to `num`. + */ struct timespec rb_time_timespec_interval(VALUE num); + +/** + * Queries the offset, in seconds between the time zone of the time and the + * UTC. + * + * @param[in] time An instance of ::rb_cTime. + * @return Numeric offset. + */ VALUE rb_time_utc_offset(VALUE time); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/variable.h b/include/ruby/internal/intern/variable.h index faa0cc004f..479c3950c1 100644 --- a/include/ruby/internal/intern/variable.h +++ b/include/ruby/internal/intern/variable.h @@ -17,9 +17,10 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. + * extension libraries. They could be written in C++98. * @brief Public APIs related to names inside of a Ruby program. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/attr/noreturn.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" @@ -28,56 +29,599 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* variable.c */ -VALUE rb_mod_name(VALUE); -VALUE rb_class_path(VALUE); -VALUE rb_class_path_cached(VALUE); -void rb_set_class_path(VALUE, VALUE, const char*); -void rb_set_class_path_string(VALUE, VALUE, VALUE); -VALUE rb_path_to_class(VALUE); -VALUE rb_path2class(const char*); -VALUE rb_class_name(VALUE); -VALUE rb_autoload_load(VALUE, ID); -VALUE rb_autoload_p(VALUE, ID); -VALUE rb_f_trace_var(int, const VALUE*); -VALUE rb_f_untrace_var(int, const VALUE*); + +/** + * Queries the name of a module. + * + * @param[in] mod An instance of ::rb_cModule. + * @retval RUBY_Qnil `mod` is anonymous. + * @retval otherwise `mod` is onymous. + */ +VALUE rb_mod_name(VALUE mod); + +/** + * Identical to rb_mod_name(), except it returns `#<Class: ...>` style + * inspection for anonymous modules. + * + * @param[in] mod An instance of ::rb_cModule. + * @return An instance of ::rb_cString representing `mod`'s path. + */ +VALUE rb_class_path(VALUE mod); + +/** + * @alias{rb_mod_name} + * + * @internal + * + * Am I missing something? Why we have the same thing in different names? + */ +VALUE rb_class_path_cached(VALUE mod); + +RBIMPL_ATTR_NONNULL(()) +/** + * Names a class. + * + * @param[out] klass Target module to name. + * @param[out] space Namespace that `klass` shall reside. + * @param[in] name Name of `klass`. + * @post `klass` has `space::klass` name. + */ +void rb_set_class_path(VALUE klass, VALUE space, const char *name); + +/** + * Identical to rb_set_class_path(), except it accepts the name as Ruby's + * string instead of C's. + * + * @param[out] klass Target module to name. + * @param[out] space Namespace that `klass` shall reside. + * @param[in] name Name of `klass`. + * @post `klass` has `space::klass` name. + */ +void rb_set_class_path_string(VALUE klass, VALUE space, VALUE name); + +/** + * Identical to rb_path2class(), except it accepts the path as Ruby's string + * instead of C's. + * + * @param[in] path Path to query. + * @exception rb_eArgError No such constant. + * @exception rb_eTypeError The path resolved to a non-module. + * @return Resolved class. + */ +VALUE rb_path_to_class(VALUE path); + +RBIMPL_ATTR_NONNULL(()) +/** + * Resolves a `Q::W::E::R`-style path string to the actual class it points. + * + * @param[in] path Path to query. + * @exception rb_eArgError No such constant. + * @exception rb_eTypeError The path resolved to a non-module. + * @return Resolved class. + */ +VALUE rb_path2class(const char *path); + +/** + * Queries the name of the given object's class. + * + * @param[in] obj Arbitrary object. + * @return An instance of ::rb_cString representing `obj`'s class' path. + */ +VALUE rb_class_name(VALUE obj); + +/** + * Kicks the autoload procedure as if it was "touched". + * + * @param[out] space Namespace where autoload is defined. + * @param[in] name Name of the autoloaded constant. + * @retval RUBY_Qfalse No such autoload. + * @retval RUBY_Qtrue Autoload successfully initiated. + * @note As an autoloaded library is expected to define `space::name`, + * it is a nature of this function to have process-global side + * effects. + * @note Multiple threads can simultaneously call this API. It blocks + * then. That must not last indefinitely but can take longer than + * you expect. + * + * @internal + * + * @shyouhei has no idea why extension libraries should use this API. + */ +VALUE rb_autoload_load(VALUE space, ID name); + +/** + * Queries if an autoload is defined at a point. + * + * @param[in] space Namespace where autoload is defined. + * @param[in] name Name of the autoloaded constant. + * @retval RUBY_Qnil No such autoload. + * @retval otherwise The feature (path) registered at `space::name`. + */ +VALUE rb_autoload_p(VALUE space, ID name); + +/** + * Traces a global variable. + * + * @param[in] argc Either 1 or 2. + * @param[in] argv Variable name, optionally a Proc. + * @retval RUBY_Qnil No previous tracers. + * @retval otherwise Previous tracers. + * + * @internal + * + * @shyouhei has no idea why extension libraries should use this API. + */ +VALUE rb_f_trace_var(int argc, const VALUE *argv); + +/** + * Deletes the passed tracer from the passed global variable, or if omitted, + * deletes everything. + * + * @param[in] argc Either 1 or 2. + * @param[in] argv Variable name, optionally a Proc. + * @retval RUBY_Qnil No previous tracers. + * @retval otherwise Deleted tracers. + * + * @internal + * + * @shyouhei has no idea why extension libraries should use this API. + */ +VALUE rb_f_untrace_var(int argc, const VALUE *argv); + +/** + * Queries the list of global variables. + * + * @return The list of the name of the global variables. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ VALUE rb_f_global_variables(void); -void rb_alias_variable(ID, ID); -void rb_copy_generic_ivar(VALUE,VALUE); -void rb_free_generic_ivar(VALUE); -VALUE rb_ivar_get(VALUE, ID); -VALUE rb_ivar_set(VALUE, ID, VALUE); -VALUE rb_ivar_defined(VALUE, ID); -void rb_ivar_foreach(VALUE, int (*)(ID, VALUE, st_data_t), st_data_t); -st_index_t rb_ivar_count(VALUE); -VALUE rb_attr_get(VALUE, ID); -VALUE rb_obj_instance_variables(VALUE); -VALUE rb_obj_remove_instance_variable(VALUE, VALUE); + +/** + * Aliases a global variable. Did you know that you can alias a global + * variable? It is like aliasing methods: + * + * ```ruby + * alias $dst $src + * ``` + * + * This C function does the same thing. + * + * @param[in] dst Destination name. + * @param[in] src Source name. + * @post A global variable named `dst` is defined to be an alias of a + * global variable named `src`. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +void rb_alias_variable(ID dst, ID src); + +/** + * Frees the list of instance variables. 3rd parties need not know, but there + * are several ways to store an object's instance variables, depending on its + * internal structure. This function makes sense when the passed objects is + * using so-called "generic" backend storage. People need not be aware of this + * working behind-the-scenes. + * + * @param[out] obj The object in question. + * + * @internal + * + * This just destroys the given object. @shyouhei has no idea why extension + * libraries should use this API. + */ +void rb_free_generic_ivar(VALUE obj); + +/** + * Identical to rb_iv_get(), except it accepts the name as an ::ID instead of a + * C string. + * + * @param[in] obj Target object. + * @param[in] name Target instance variable to query. + * @retval RUBY_nil No such instance variable. + * @retval otherwise The value assigned to the instance variable. + */ +VALUE rb_ivar_get(VALUE obj, ID name); + +/** + * Identical to rb_iv_set(), except it accepts the name as an ::ID instead of a + * C string. + * + * @param[out] obj Target object. + * @param[in] name Target instance variable. + * @param[in] val Value to assign. + * @exception rb_eFrozenError Can't modify `obj`. + * @exception rb_eArgError `obj` has too many instance variables. + * @return Passed value. + * @post An instance variable named `name` is defined if absent on + * `obj`, whose value is set to `val`. + */ +VALUE rb_ivar_set(VALUE obj, ID name, VALUE val); + +/** + * Queries if the instance variable is defined at the object. This roughly + * resembles `defined?(@name)` in `obj`'s context. + * + * @param[in] obj Target object. + * @param[in] name Target instance variable to query. + * @retval RUBY_Qtrue There is an instance variable. + * @retval RUBY_Qfalse No such instance variable. + */ +VALUE rb_ivar_defined(VALUE obj, ID name); + +/** + * Iterates over an object's instance variables. + * + * @param[in] obj Target object. + * @param[in] func Callback function. + * @param[in] arg Passed as-is to the last argument of `func`. + */ +void rb_ivar_foreach(VALUE obj, int (*func)(ID name, VALUE val, st_data_t arg), st_data_t arg); + +/** + * Number of instance variables defined on an object. + * + * @param[in] obj Target object. + * @return Number of instance variables defined on `obj`. + */ +st_index_t rb_ivar_count(VALUE obj); + +/** + * Identical to rb_ivar_get() + * + * @param[in] obj Target object. + * @param[in] name Target instance variable to query. + * @retval RUBY_nil No such instance variable. + * @retval otherwise The value assigned to the instance variable. + * + * @internal + * + * Am I missing something? Why we have the same thing in different names? + */ +VALUE rb_attr_get(VALUE obj, ID name); + +/** + * Resembles `Object#instance_variables`. + * + * @param[in] obj Target object to query. + * @return An array of instance variable names for the receiver. + * @note Simply defining an accessor does not create the corresponding + * instance variable. + */ +VALUE rb_obj_instance_variables(VALUE obj); + +/** + * Resembles `Object#remove_instance_variable`. + * + * @param[out] obj Target object. + * @param[in] name Variable name to remove, either in Symbol or String. + * @return What was removed. + * @pre Instance variable named `name` is deleted from `obj`. + */ +VALUE rb_obj_remove_instance_variable(VALUE obj, VALUE name); + +/** + * This API is mysterious. It has been there since the initial revision. No + * single bits of documents has ever been written. The function name doesn't + * describe anything. What should be passed to the argument, or what should be + * the return value, are not obvious. Yet it has evolved over time. The + * source code is written in counter-intuitive way (as of 3.0). + * + * Simply put, don't try to understand this API. + */ void *rb_mod_const_at(VALUE, void*); + +/** + * This is a variant of rb_mod_const_at(). As a result, it is also mysterious. + * It _seems_ it iterates over the ancestry tree of the module. But what that + * means is beyond a human brain. + */ void *rb_mod_const_of(VALUE, void*); + +/** + * This is another mysterious API that comes with no documents at all. It + * seems it expects some specific data structure for the passed pointer. But + * the details has never been made explicit. It seems nobody should use this + * API. + */ VALUE rb_const_list(void*); -VALUE rb_mod_constants(int, const VALUE *, VALUE); -VALUE rb_mod_remove_const(VALUE, VALUE); -int rb_const_defined(VALUE, ID); -int rb_const_defined_at(VALUE, ID); -int rb_const_defined_from(VALUE, ID); -VALUE rb_const_get(VALUE, ID); -VALUE rb_const_get_at(VALUE, ID); -VALUE rb_const_get_from(VALUE, ID); -void rb_const_set(VALUE, ID, VALUE); -VALUE rb_const_remove(VALUE, ID); + +/** + * Resembles `Module#constants`. List up the constants defined at the + * receiver. This includes the names of constants in any included modules, + * unless `argv[0]` is ::RUBY_Qfalse. + * + * The implementation makes no guarantees about the order in which the + * constants are yielded. + * + * @param[in] argc Either 0 or 1. + * @param[in] argv Pointer to ::RUBY_Qfalse, if `argc == 1`. + * @param[in] recv Target namespace. + * @return An array of symbols, which are constant names under `recv`. + */ +VALUE rb_mod_constants(int argc, const VALUE *argv, VALUE recv); + +/** + * Resembles `Module#remove_const`. + * + * @param[out] space Target namespace. + * @param[in] name Variable name to remove, either in Symbol or String. + * @return What was removed. + * @pre Constant named `space::name` is deleted. + * @note In case what was removed was in fact a module or a class, this + * operation does not affect its name. Which means when people + * for instance look at it using `p` etc., it still introduces + * itself using the deleted name. Can confuse people. + */ +VALUE rb_mod_remove_const(VALUE space, VALUE name); + +/** + * Queries if the constant is defined at the namespace. + * + * @param[in] space Target namespace. + * @param[in] name Target name to query. + * @retval RUBY_Qtrue There is a constant. + * @retval RUBY_Qfalse No such constant. + * + * @internal + * + * The return values are not typo! This function returns ruby values casted to + * `int`. Completely brain-damaged design. + */ +int rb_const_defined(VALUE space, ID name); + +/** + * Identical to rb_const_defined(), except it doesn't look for parent classes. + * For instance `Array` is a toplevel constant, which is visible from + * everywhere. But this function does not take such things into account. It + * concerns only what is directly defined inside of the given namespace. + * + * @param[in] space Target namespace. + * @param[in] name Target name to query. + * @retval RUBY_Qtrue There is a constant. + * @retval RUBY_Qfalse No such constant. + * + * @internal + * + * The return values are not typo! This function returns ruby values casted to + * `int`. Completely brain-damaged design. + */ +int rb_const_defined_at(VALUE space, ID name); + +/** + * Identical to rb_const_defined(), except it returns false for private + * constants. + * + * @param[in] space Target namespace. + * @param[in] name Target name to query. + * @retval RUBY_Qtrue There is a constant. + * @retval RUBY_Qfalse No such constant. + * + * @internal + * + * What does "from" mean? The name sounds quite cryptic. + * + * The return values are not typo! This function returns ruby values casted to + * `int`. Completely brain-damaged design. + */ +int rb_const_defined_from(VALUE space, ID name); + +/** + * Identical to rb_const_defined(), except it returns the actual defined value. + * + * @param[in] space Target namespace. + * @param[in] name Target name to query. + * @exception rb_eNameError No such constant. + * @return The defined constant. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +VALUE rb_const_get(VALUE space, ID name); + +/** + * Identical to rb_const_defined_at(), except it returns the actual defined + * value. It can also be seen as a routine identical to rb_const_get(), except + * it doesn't look for parent classes. + * + * @param[in] space Target namespace. + * @param[in] name Target name to query. + * @exception rb_eNameError No such constant. + * @return The defined constant. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +VALUE rb_const_get_at(VALUE space, ID name); + +/** + * Identical to rb_const_defined_at(), except it returns the actual defined + * value. It can also be seen as a routine identical to rb_const_get(), except + * it doesn't return a private constant. + * + * @param[in] space Target namespace. + * @param[in] name Target name to query. + * @exception rb_eNameError No such constant. + * @return The defined constant. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +VALUE rb_const_get_from(VALUE space, ID name); + +/** + * Names a constant. + * + * @param[out] space Target namespace. + * @param[in] name Target name to query. + * @param[in] val Value to define. + * @exception rb_eTypeError `space` is not a module. + * @post `name` is a constant under `space`, whose value is `val`. + * @note You can reassign. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +void rb_const_set(VALUE space, ID name, VALUE val); + +/** + * Identical to rb_mod_remove_const(), except it takes the name as ::ID instead + * of ::VALUE. + * + * @param[out] space Target namespace. + * @param[in] name Variable name to remove, either in Symbol or String. + * @return What was removed. + * @pre Constant named `space::name` is deleted. + * @note In case what was removed was in fact a module or a class, this + * operation does not affect its name. Which means when people + * for instance look at it using `p` etc., it still introduces + * itself using the deleted name. Can confuse people. + */ +VALUE rb_const_remove(VALUE space, ID name); + #if 0 /* EXPERIMENTAL: remove if no problem */ RBIMPL_ATTR_NORETURN() -VALUE rb_mod_const_missing(VALUE,VALUE); +/** + * This is the default implementation of `Module#const_missing`. + * + * @param[in] space Target namespace. + * @param[in] name Target name that is nonexistent. + * @exception rb_eNameError Always. + */ +VALUE rb_mod_const_missing(VALUE space, VALUE name); #endif -VALUE rb_cvar_defined(VALUE, ID); -void rb_cvar_set(VALUE, ID, VALUE); -VALUE rb_cvar_get(VALUE, ID); -VALUE rb_cvar_find(VALUE, ID, VALUE*); -void rb_cv_set(VALUE, const char*, VALUE); -VALUE rb_cv_get(VALUE, const char*); + +/** + * Queries if the given class has the given class variable. + * + * @param[in] klass Target class. + * @param[in] name Name to query. + * @return RUBY_Qtrue Yes there is. + * @return RUBY_Qfalse No there isn't. + * @pre `klass` must be an instance of rb_cModule. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +VALUE rb_cvar_defined(VALUE klass, ID name); + +/** + * Assigns a value to a class variable. + * + * @param[out] klass Target class. + * @param[in] name Variable name. + * @param[in] val Value to be assigned. + * @post `klass` has a class variable named `name` whose value is `val`. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +void rb_cvar_set(VALUE klass, ID name, VALUE val); + +/** + * Obtains a value from a class variable. + * + * @param[in] klass Target class. + * @param[in] name Variable name. + * @exception rb_eNameError Uninitialised class variable. + * @exception rb_eRuntimeError `[Bug#14541]` situation. + * @return Class variable named `name` under `klass`. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +VALUE rb_cvar_get(VALUE klass, ID name); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_cvar_get(), except it takes additional "front" pointer. + * This extra parameter is a buffer, which will have the class where the + * queried class variable actually resides. + * + * @param[in] klass Target class. + * @param[in] name Variable name. + * @param[out] front Return buffer. + * @exception rb_eNameError Uninitialised class variable. + * @exception rb_eRuntimeError `[Bug#14541]` situation. + * @return Class variable named `name` under `klass`. + * @post `front` has the class object, which is an ancestor of `klass`, + * where the queried class variable actually resides. + * + * @internal + * + * Above description is in fact inaccurate. This API interfaces with Ractors. + */ +VALUE rb_cvar_find(VALUE klass, ID name, VALUE *front); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_cvar_set(), except it accepts C's string instead of ::ID. + * + * @param[out] klass Target class. + * @param[in] name Variable name. + * @param[in] val Value to be assigned. + * @post `klass` has a class variable named `name` whose value is `val`. + */ +void rb_cv_set(VALUE klass, const char *name, VALUE val); + +RBIMPL_ATTR_NONNULL(()) +/** + * Identical to rb_cvar_get(), except it accepts C's string instead of ::ID. + * + * @param[in] klass Target class. + * @param[in] name Variable name. + * @exception rb_eNameError Uninitialised class variable. + * @exception rb_eRuntimeError `[Bug#14541]` situation. + * @return Class variable named `name` under `klass`. + */ +VALUE rb_cv_get(VALUE klass, const char *name); + +RBIMPL_ATTR_NONNULL(()) +/** + * @alias{rb_cv_set} + * + * @internal + * + * Am I missing something? Why we have the same thing in different names? + */ void rb_define_class_variable(VALUE, const char*, VALUE); -VALUE rb_mod_class_variables(int, const VALUE*, VALUE); -VALUE rb_mod_remove_cvar(VALUE, VALUE); + +/** + * Resembles `Module#class_variables`. List up the variables defined at the + * receiver. This includes the names of constants in any included modules, + * unless `argv[0]` is ::RUBY_Qfalse. + * + * The implementation makes no guarantees about the order in which the + * constants are yielded. + * + * @param[in] argc Either 0 or 1. + * @param[in] argv Pointer to ::RUBY_Qfalse, if `argc == 1`. + * @param[in] recv Target class. + * @return An array of symbols, which are class variable names under + * `recv`. + */ +VALUE rb_mod_class_variables(int argc, const VALUE *argv, VALUE recv); + +/** + * Resembles `Module#remove_class_variable`. + * + * @param[out] mod Target class. + * @param[in] name Variable name to remove, either in Symbol or String. + * @return What was removed. + * @pre Instance variable named `name` is deleted from `obj`. + */ +VALUE rb_mod_remove_cvar(VALUE mod, VALUE name); RBIMPL_SYMBOL_EXPORT_END() diff --git a/include/ruby/internal/intern/vm.h b/include/ruby/internal/intern/vm.h index 298a6ad2bb..29e0c7f534 100644 --- a/include/ruby/internal/intern/vm.h +++ b/include/ruby/internal/intern/vm.h @@ -17,9 +17,10 @@ * recursively included from extension libraries written in C++. * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of - * extension libraries. They could be written in C++98. - * @brief Public APIs related to ::rb_cRubyVM. + * extension libraries. They could be written in C++98. + * @brief Public APIs related to rb_cRubyVM. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/attr/noreturn.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" @@ -27,40 +28,373 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() /* vm.c */ + +/** + * Resembles `__LINE__`. + * + * @retval 0 Current execution context not in a ruby method. + * @retval otherwise The current line number of the current thread of the + * current ractor of the current execution context. + */ int rb_sourceline(void); + +/** + * Resembles `__FILE__`. + * + * @retval 0 Current execution context not in a ruby method. + * @retval otherwise The current source path of the current thread of the + * current ractor of the current execution context. + * @note This may or may not be an absolute path. + */ const char *rb_sourcefile(void); + +/** + * Resembles `__method__`. + * + * @param[out] idp Return buffer for method id. + * @param[out] klassp Return buffer for class. + * @retval 0 Current execution context not in a method. + * @retval 1 Successful return. + * @post Upon successful return `*idp` and `*klassp` are updated to have + * the current method name and its defined class respectively. + * @note Both parameters can be `NULL`. + */ int rb_frame_method_id_and_class(ID *idp, VALUE *klassp); /* vm_eval.c */ -VALUE rb_check_funcall(VALUE, ID, int, const VALUE*); -VALUE rb_check_funcall_kw(VALUE, ID, int, const VALUE*, int); -void rb_remove_method(VALUE, const char*); -void rb_remove_method_id(VALUE, ID); -VALUE rb_eval_cmd_kw(VALUE, VALUE, int); -VALUE rb_apply(VALUE, ID, VALUE); +/** + * Identical to rb_funcallv(), except it returns ::RUBY_Qundef instead of + * raising ::rb_eNoMethodError. + * + * @param[in,out] recv Receiver of the method. + * @param[in] mid Name of the method to call. + * @param[in] argc Number of arguments. + * @param[in] argv Arbitrary number of method arguments. + * @retval RUBY_Qundef `recv` doesn't respond to `mid`. + * @retval otherwise What the method evaluates to. + */ +VALUE rb_check_funcall(VALUE recv, ID mid, int argc, const VALUE *argv); -VALUE rb_obj_instance_eval(int, const VALUE*, VALUE); -VALUE rb_obj_instance_exec(int, const VALUE*, VALUE); -VALUE rb_mod_module_eval(int, const VALUE*, VALUE); -VALUE rb_mod_module_exec(int, const VALUE*, VALUE); +/** + * Identical to rb_check_funcall(), except you can specify how to handle the + * last element of the given array. It can also be seen as a routine identical + * to rb_funcallv_kw(), except it returns ::RUBY_Qundef instead of raising + * ::rb_eNoMethodError. + * + * @param[in,out] recv Receiver of the method. + * @param[in] mid Name of the method to call. + * @param[in] argc Number of arguments. + * @param[in] argv Arbitrary number of method arguments. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `argv`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `argv`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @retval RUBY_Qundef `recv` doesn't respond to `mid`. + * @retval otherwise What the method evaluates to. + */ +VALUE rb_check_funcall_kw(VALUE recv, ID mid, int argc, const VALUE *argv, int kw_splat); + +/** + * This API is practically a variant of rb_proc_call_kw() now. Historically + * when there still was a concept called `$SAFE`, this was an API for that. + * But we no longer have that. This function basically ended its role. It + * just remains here because of no harm. + * + * @param[in] cmd A string, or something callable. + * @param[in] arg Argument passed to the call. + * @param[in] kw_splat Handling of keyword parameters: + * - RB_NO_KEYWORDS `arg`'s last is not a keyword argument. + * - RB_PASS_KEYWORDS `arg`'s last is a keyword argument. + * - RB_PASS_CALLED_KEYWORDS it depends if there is a passed block. + * @return What the command evaluates to. + */ +VALUE rb_eval_cmd_kw(VALUE cmd, VALUE arg, int kw_splat); + +/** + * Identical to rb_funcallv(), except it takes Ruby's array instead of C's. + * @param[in,out] recv Receiver of the method. + * @param[in] mid Name of the method to call. + * @param[in] args An instance of ::RArray. + * @exception rb_eNoMethodError No such method. + * @exception rb_eException Any exceptions happen inside. + * @return What the method evaluates to. + * @pre `args` must be an ::RArray. Call `to_ary` beforehand when + * necessary. + */ +VALUE rb_apply(VALUE recv, ID mid, VALUE args); + +/** + * Evaluates a string containing Ruby source code, or the given block, within + * the context of the receiver. In order to set the context, the variable + * `self` is set to `recv` while the code is executing, giving the code access + * to `recv`'s instance variables and private methods. + * + * When given a block, `recv` is also passed in as the block's only argument. + * + * When given a string, the optional second and third parameters supply a + * filename and starting line number that are used when reporting compilation + * errors. + * + * @param[in] argc Number of objects in `argv` + * @param[in] argv C array of 0 up to 3 elements. + * @param[in] recv The object in question. + * @return What was evaluated. + */ +VALUE rb_obj_instance_eval(int argc, const VALUE *argv, VALUE recv); + +/** + * Executes the given block within the context of the receiver. In order to + * set the context, the variable `self` is set to `recv` while the code is + * executing, giving the code access to `recv`'s instance variables. Arguments + * are passed as block parameters. + * + * @param[in] argc Number of objects in `argv` + * @param[in] argv Arbitrary parameters to be passed to the block. + * @param[in] recv The object in question. + * @return What was evaluated. + * @note Don't confuse this with rb_obj_instance_eval(). The key + * difference is whether you can pass arbitrary parameters to the + * block, like this: + * + * ```ruby + * class Foo + * def initialize + * @foo = 5 + * end + * end + * Foo.new.instance_exec(7) {|i| @foo + i } # => 12 + * ``` + */ +VALUE rb_obj_instance_exec(int argc, const VALUE *argv, VALUE recv); + +/** + * Identical to rb_obj_instance_eval(), except it evaluates within the context + * of module. + * + * @param[in] argc Number of objects in `argv` + * @param[in] argv C array of 0 up to 3 elements. + * @param[in] mod The module in question. + * @pre `mod` must be a Module. + * @return What was evaluated. + */ +VALUE rb_mod_module_eval(int argc, const VALUE *argv, VALUE mod); + +/** + * Identical to rb_obj_instance_exec(), except it evaluates within the context + * of module. + * + * @param[in] argc Number of objects in `argv` + * @param[in] argv Arbitrary parameters to be passed to the block. + * @param[in] mod The module in question. + * @pre `mod` must be a Module. + * @return What was evaluated. + */ +VALUE rb_mod_module_exec(int argc, const VALUE *argv, VALUE mod); /* vm_method.c */ + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define HAVE_RB_DEFINE_ALLOC_FUNC 1 -typedef VALUE (*rb_alloc_func_t)(VALUE); -void rb_define_alloc_func(VALUE, rb_alloc_func_t); -void rb_undef_alloc_func(VALUE); -rb_alloc_func_t rb_get_alloc_func(VALUE); -void rb_clear_constant_cache(void); -void rb_alias(VALUE, ID, ID); -void rb_attr(VALUE,ID,int,int,int); -int rb_method_boundp(VALUE, ID, int); -int rb_method_basic_definition_p(VALUE, ID); - -int rb_obj_respond_to(VALUE, ID, int); -int rb_respond_to(VALUE, ID); + +/** + * This is the type of functions that ruby calls when trying to allocate an + * object. It is sometimes necessary to allocate extra memory regions for an + * object. When you define a class that uses ::RTypedData, it is typically the + * case. On such situations define a function of this type and pass it to + * rb_define_alloc_func(). + * + * @param[in] klass The class that this function is registered. + * @return A newly allocated instance of `klass`. + */ +typedef VALUE (*rb_alloc_func_t)(VALUE klass); + +/** + * Sets the allocator function of a class. + * + * @param[out] klass The class to modify. + * @param[in] func An allocator function for the class. + * @pre `klass` must be an instance of Class. + */ +void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func); + +/** + * Deletes the allocator function of a class. It is sometimes desirable to + * restrict creation of an instance of a class. For example it rarely makes + * sense for a DB adaptor class to allow programmers creating DB row objects + * without querying the DB itself. You can kill sporadic creation of such + * objects then, by nullifying the allocator function using this API. + * + * @param[out] klass The class to modify. + * @pre `klass` must be an instance of Class. + */ +void rb_undef_alloc_func(VALUE klass); + +/** + * Queries the allocator function of a class. + * + * @param[in] klass The class in question. + * @pre `klass` must be an instance of Class. + * @retval 0 No allocator function is registered. + * @retval otherwise The allocator function. + * + * @internal + * + * Who cares? @shyouhei finds no practical usage of the return value. Maybe we + * need KonMari. + */ +rb_alloc_func_t rb_get_alloc_func(VALUE klass); + +/** + * Clears the inline constant caches associated with a particular ID. Extension + * libraries should not bother with such things. Just forget about this API (or + * even, the presence of constant caches). + */ +void rb_clear_constant_cache_for_id(ID id); + +/** + * Resembles `alias`. + * + * @param[out] klass Where to define an alias. + * @param[in] dst New name. + * @param[in] src Existing name. + * @exception rb_eTypeError `klass` is not a class. + * @exception rb_eFrozenError `klass` is frozen. + * @exception rb_eNameError No such method named `src`. + * @post `klass` has a method named `dst`, which is the identical to its + * method named `src`. + */ +void rb_alias(VALUE klass, ID dst, ID src); + +/** + * This function resembles now-deprecated `Module#attr`. + * + * @param[out] klass Where to define an attribute. + * @param[in] name Name of an instance variable. + * @param[in] need_reader Whether attr_reader is needed. + * @param[in] need_writer Whether attr_writer is needed. + * @param[in] honour_visibility Whether to use the current visibility. + * @exception rb_eTypeError `klass` is not a class. + * @exception rb_eFrozenError `klass` is frozen. + * @post If `need_reader` is set `klass` has a method named `name`. + * @post If `need_writer` is set `klass` has a method named `name=`. + * + * @internal + * + * The three `int` arguments should have been bool, but there was no such thing + * like a bool when K&R was used in this project. + */ +void rb_attr(VALUE klass, ID name, int need_reader, int need_writer, int honour_visibility); + +RBIMPL_ATTR_NONNULL(()) +/** + * Removes a method. Don't confuse this to rb_undef_method(), which doesn't + * remove a method. This one resembles `Module#remove_method`. + * + * @param[out] klass The class to remove a method. + * @param[in] name Name of a method to be removed. + * @exception rb_eTypeError `klass` is a non-module. + * @exception rb_eFrozenError `klass` is frozen. + * @exception rb_eNameError No such method. + * @see rb_undef_method + */ +void rb_remove_method(VALUE klass, const char *name); + +/** + * Identical to rb_remove_method(), except it accepts the method name as ::ID. + * + * @param[out] klass The class to remove a method. + * @param[in] mid Name of a method to be removed. + * @exception rb_eTypeError `klass` is a non-module. + * @exception rb_eFrozenError `klass` is frozen. + * @exception rb_eNameError No such method. + * @see rb_undef + */ +void rb_remove_method_id(VALUE klass, ID mid); + +/** + * Queries if the klass has this method. This function has only one line of + * document in the implementation that states "// deprecated". Don't know what + * that means though. + * + * @param[in] klass The class in question. + * @param[in] id The method name to query. + * @param[in] ex Undocumented magic value. + * @retval false Method not found. + * @retval true There is a method. + * @pre `klass` must be a module. + * + * @internal + * + * @shyouhei has no motivation to describe what should be passed to `ex`. It + * seems this function should just be trashed. + */ +int rb_method_boundp(VALUE klass, ID id, int ex); + +/** + * Well... Let us hesitate from describing what a "basic definition" is. This + * nuanced concept should have been kept private. Just please. Don't touch + * it. This function is a badly distributed random number generator. Right? + * + * @param[in] klass The class in question. + * @param[in] mid The method name in question. + * @retval 1 It is. + * @retval 0 It isn't. + */ +int rb_method_basic_definition_p(VALUE klass, ID mid); + +/** + * Identical to rb_respond_to(), except it additionally takes the visibility + * parameter. This does not make difference unless the object has + * `respond_to?` undefined, but has `respond_to_missing?` defined. That case + * the passed argument becomes the second argument of `respond_to_missing?`. + * + * @param[in] obj The object in question. + * @param[in] mid The method name in question. + * @param[in] private_p This is the second argument of `obj`'s + * `respond_to_missing?`. + * @retval 1 Yes it does. + * @retval 0 No it doesn't. + */ +int rb_obj_respond_to(VALUE obj, ID mid, int private_p); + +/** + * Queries if the object responds to the method. This involves calling the + * object's `respond_to?` method. + * + * @param[in] obj The object in question. + * @param[in] mid The method name in question. + * @retval 1 Yes it does. + * @retval 0 No it doesn't. + */ +int rb_respond_to(VALUE obj, ID mid); RBIMPL_ATTR_NORETURN() +/** + * Raises ::rb_eNotImpError. This function is used as an argument to + * rb_define_method() etc. + * + * ```CXX + * rb_define_method(rb_cFoo, "foo", rb_f_notimplement, -1); + * ``` + * + * @param argc Unused parameter. + * @param argv Unused parameter. + * @param obj Unused parameter. + * @param marker Unused parameter. + * @exception rb_eNotImpError Always. + * @return Never returns. + * + * @internal + * + * See also the Q&A section of include/ruby/internal/anyargs.h. + */ VALUE rb_f_notimplement(int argc, const VALUE *argv, VALUE obj, VALUE marker); #if !defined(RUBY_EXPORT) && defined(_WIN32) RUBY_EXTERN VALUE (*const rb_f_notimplement_)(int, const VALUE *, VALUE, VALUE marker); @@ -68,7 +402,27 @@ RUBY_EXTERN VALUE (*const rb_f_notimplement_)(int, const VALUE *, VALUE, VALUE m #endif /* vm_backtrace.c */ + +/** + * Prints the backtrace out to the standard error. This just confuses people + * for no reason. Evil souls must only use it. + * + * @internal + * + * Actually it is very useful when called from an interactive GDB session. + */ void rb_backtrace(void); + +/** + * Creates the good old fashioned array-of-strings style backtrace info. + * + * @return An array which contains strings, which are the textual + * representations of the backtrace locations of the current thread of + * the current ractor of the current execution context. + * @note Ruby scripts can access more sophisticated + * `Thread::Backtrace::Location`. But it seems there is no way for C + * extensions to use that API. + */ VALUE rb_make_backtrace(void); RBIMPL_SYMBOL_EXPORT_END() |