summaryrefslogtreecommitdiff
path: root/include/ruby/encoding.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/ruby/encoding.h')
-rw-r--r--include/ruby/encoding.h2278
1 files changed, 9 insertions, 2269 deletions
diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h
index 95cf74a3b8..1256393701 100644
--- a/include/ruby/encoding.h
+++ b/include/ruby/encoding.h
@@ -16,2276 +16,16 @@
* relatively less rooms for things in this header file be useful when writing
* an extension library.
*/
-#include "ruby/internal/config.h"
-#include <stdarg.h>
#include "ruby/ruby.h"
-#include "ruby/oniguruma.h"
-#include "ruby/internal/attr/const.h"
-#include "ruby/internal/attr/deprecated.h"
-#include "ruby/internal/attr/format.h"
-#include "ruby/internal/attr/noalias.h"
-#include "ruby/internal/attr/nonnull.h"
-#include "ruby/internal/attr/noreturn.h"
-#include "ruby/internal/attr/returns_nonnull.h"
-#include "ruby/internal/attr/pure.h"
-#include "ruby/internal/core/rbasic.h"
-#include "ruby/internal/dllexport.h"
-#include "ruby/internal/fl_type.h"
-RBIMPL_SYMBOL_EXPORT_BEGIN()
-
-/**
- * @private
- *
- * Bit constants used when embedding encodings into ::RBasic::flags. Extension
- * libraries must not bother such things.
- */
-enum ruby_encoding_consts {
-
- /** Max possible number of embeddable encodings. */
- RUBY_ENCODING_INLINE_MAX = 127,
-
- /** Where inline encodings reside. */
- RUBY_ENCODING_SHIFT = (RUBY_FL_USHIFT+10),
-
- /** Bits we use to store inline encodings. */
- RUBY_ENCODING_MASK = (RUBY_ENCODING_INLINE_MAX<<RUBY_ENCODING_SHIFT
- /* RUBY_FL_USER10..RUBY_FL_USER16 */),
-
- /** Max possible length of an encoding name. */
- RUBY_ENCODING_MAXNAMELEN = 42
-};
-
-#define ENCODING_INLINE_MAX RUBY_ENCODING_INLINE_MAX /**< @old{RUBY_ENCODING_INLINE_MAX} */
-#define ENCODING_SHIFT RUBY_ENCODING_SHIFT /**< @old{RUBY_ENCODING_SHIFT} */
-#define ENCODING_MASK RUBY_ENCODING_MASK /**< @old{RUBY_ENCODING_SHIFT} */
-
-/**
- * Destructively assigns the passed encoding to the passed object. The object
- * must be capable of having inline encoding. Using this macro needs deep
- * understanding of bit level object binary layout.
- *
- * @param[out] obj Target object to modify.
- * @param[in] i Encoding in encindex format.
- * @post `obj`'s encoding is `i`.
- */
-#define RB_ENCODING_SET_INLINED(obj,i) do {\
- RBASIC(obj)->flags &= ~RUBY_ENCODING_MASK;\
- RBASIC(obj)->flags |= (VALUE)(i) << RUBY_ENCODING_SHIFT;\
-} while (0)
-
-/** @alias{rb_enc_set_index} */
-#define RB_ENCODING_SET(obj,i) rb_enc_set_index((obj), (i))
-
-/**
- * Queries the encoding of the passed object. The encoding must be smaller
- * than ::RUBY_ENCODING_INLINE_MAX, which means you have some assumption on the
- * return value. This means the API is for internal use only.
- *
- * @param[in] obj Target object.
- * @return `obj`'s encoding index.
- */
-#define RB_ENCODING_GET_INLINED(obj) \
- (int)((RBASIC(obj)->flags & RUBY_ENCODING_MASK)>>RUBY_ENCODING_SHIFT)
-
-/**
- * @alias{rb_enc_get_index}
- *
- * @internal
- *
- * Implementation wise this is not a verbatim alias of rb_enc_get_index(). But
- * the API is consistent. Don't bother.
- */
-#define RB_ENCODING_GET(obj) \
- (RB_ENCODING_GET_INLINED(obj) != RUBY_ENCODING_INLINE_MAX ? \
- RB_ENCODING_GET_INLINED(obj) : \
- rb_enc_get_index(obj))
-
-/**
- * Queries if the passed object is in ascii 8bit (== binary) encoding. The
- * object must be capable of having inline encoding. Using this macro needs
- * deep understanding of bit level object binary layout.
- *
- * @param[in] obj An object to check.
- * @retval 1 It is.
- * @retval 0 It isn't.
- */
-#define RB_ENCODING_IS_ASCII8BIT(obj) (RB_ENCODING_GET_INLINED(obj) == 0)
-
-#define ENCODING_SET_INLINED(obj,i) RB_ENCODING_SET_INLINED(obj,i) /**< @old{RB_ENCODING_SET_INLINED} */
-#define ENCODING_SET(obj,i) RB_ENCODING_SET(obj,i) /**< @old{RB_ENCODING_SET} */
-#define ENCODING_GET_INLINED(obj) RB_ENCODING_GET_INLINED(obj) /**< @old{RB_ENCODING_GET_INLINED} */
-#define ENCODING_GET(obj) RB_ENCODING_GET(obj) /**< @old{RB_ENCODING_GET} */
-#define ENCODING_IS_ASCII8BIT(obj) RB_ENCODING_IS_ASCII8BIT(obj) /**< @old{RB_ENCODING_IS_ASCII8BIT} */
-#define ENCODING_MAXNAMELEN RUBY_ENCODING_MAXNAMELEN /**< @old{RUBY_ENCODING_MAXNAMELEN} */
-
-/** What rb_enc_str_coderange() returns. */
-enum ruby_coderange_type {
-
- /** The object's coderange is unclear yet. */
- RUBY_ENC_CODERANGE_UNKNOWN = 0,
-
- /** The object holds 0 to 127 inclusive and nothing else. */
- RUBY_ENC_CODERANGE_7BIT = ((int)RUBY_FL_USER8),
-
- /** The object's encoding and contents are consistent each other */
- RUBY_ENC_CODERANGE_VALID = ((int)RUBY_FL_USER9),
-
- /** The object holds invalid/malformed/broken character(s). */
- RUBY_ENC_CODERANGE_BROKEN = ((int)(RUBY_FL_USER8|RUBY_FL_USER9)),
-
- /** Where the coderange resides. */
- RUBY_ENC_CODERANGE_MASK = (RUBY_ENC_CODERANGE_7BIT|
- RUBY_ENC_CODERANGE_VALID|
- RUBY_ENC_CODERANGE_BROKEN)
-};
-
-RBIMPL_ATTR_CONST()
-/**
- * @private
- *
- * This is an implementation detail of #RB_ENC_CODERANGE_CLEAN_P. People don't
- * use it directly.
- *
- * @param[in] cr An enum ::ruby_coderange_type.
- * @retval 1 It is.
- * @retval 0 It isn't.
- */
-static inline int
-rb_enc_coderange_clean_p(int cr)
-{
- return (cr ^ (cr >> 1)) & RUBY_ENC_CODERANGE_7BIT;
-}
-
-/**
- * Queries if a code range is "clean". "Clean" in this context means it is
- * known and valid.
- *
- * @param[in] cr An enum ::ruby_coderange_type.
- * @retval 1 It is.
- * @retval 0 It isn't.
- */
-#define RB_ENC_CODERANGE_CLEAN_P(cr) rb_enc_coderange_clean_p(cr)
-
-/**
- * Queries the (inline) code range of the passed object. The object must be
- * capable of having inline encoding. Using this macro needs deep
- * understanding of bit level object binary layout.
- *
- * @param[in] obj Target object.
- * @return An enum ::ruby_coderange_type.
- */
-#define RB_ENC_CODERANGE(obj) ((int)RBASIC(obj)->flags & RUBY_ENC_CODERANGE_MASK)
-
-/**
- * Queries the (inline) code range of the passed object is
- * ::RUBY_ENC_CODERANGE_7BIT. The object must be capable of having inline
- * encoding. Using this macro needs deep understanding of bit level object
- * binary layout.
- *
- * @param[in] obj Target object.
- * @retval 1 It is ascii only.
- * @retval 0 Otherwise (including cases when the range is not known).
- */
-#define RB_ENC_CODERANGE_ASCIIONLY(obj) (RB_ENC_CODERANGE(obj) == RUBY_ENC_CODERANGE_7BIT)
-
-/**
- * Destructively modifies the passed object so that its (inline) code range is
- * the passed one. The object must be capable of having inline encoding.
- * Using this macro needs deep understanding of bit level object binary layout.
- *
- * @param[out] obj Target object.
- * @param[out] cr An enum ::ruby_coderange_type.
- * @post `obj`'s code range is `cr`.
- */
-#define RB_ENC_CODERANGE_SET(obj,cr) (\
- RBASIC(obj)->flags = \
- (RBASIC(obj)->flags & ~RUBY_ENC_CODERANGE_MASK) | (cr))
-
-/**
- * Destructively clears the passed object's (inline) code range. The object
- * must be capable of having inline encoding. Using this macro needs deep
- * understanding of bit level object binary layout.
- *
- * @param[out] obj Target object.
- * @post `obj`'s code range is ::RUBY_ENC_CODERANGE_UNKNOWN.
- */
-#define RB_ENC_CODERANGE_CLEAR(obj) RB_ENC_CODERANGE_SET((obj),0)
-
-/* assumed ASCII compatibility */
-/**
- * "Mix" two code ranges into one. This is handy for instance when you
- * concatenate two strings into one. Consider one of then is valid but the
- * other isn't. The result must be invalid. This macro computes that kind of
- * mixture.
- *
- * @param[in] a An enum ::ruby_coderange_type.
- * @param[in] b Another enum ::ruby_coderange_type.
- * @return The `a` "and" `b`.
- */
-#define RB_ENC_CODERANGE_AND(a, b) \
- ((a) == RUBY_ENC_CODERANGE_7BIT ? (b) : \
- (a) != RUBY_ENC_CODERANGE_VALID ? RUBY_ENC_CODERANGE_UNKNOWN : \
- (b) == RUBY_ENC_CODERANGE_7BIT ? RUBY_ENC_CODERANGE_VALID : (b))
-
-/**
- * This is #RB_ENCODING_SET + RB_ENC_CODERANGE_SET combo. The object must be
- * capable of having inline encoding. Using this macro needs deep
- * understanding of bit level object binary layout.
- *
- * @param[out] obj Target object.
- * @param[in] encindex Encoding in encindex format.
- * @param[in] cr An enum ::ruby_coderange_type.
- * @post `obj`'s encoding is `encindex`.
- * @post `obj`'s code range is `cr`.
- */
-#define RB_ENCODING_CODERANGE_SET(obj, encindex, cr) \
- do { \
- VALUE rb_encoding_coderange_obj = (obj); \
- RB_ENCODING_SET(rb_encoding_coderange_obj, (encindex)); \
- RB_ENC_CODERANGE_SET(rb_encoding_coderange_obj, (cr)); \
- } while (0)
-
-#define ENC_CODERANGE_MASK RUBY_ENC_CODERANGE_MASK /**< @old{RUBY_ENC_CODERANGE_MASK} */
-#define ENC_CODERANGE_UNKNOWN RUBY_ENC_CODERANGE_UNKNOWN /**< @old{RUBY_ENC_CODERANGE_UNKNOWN} */
-#define ENC_CODERANGE_7BIT RUBY_ENC_CODERANGE_7BIT /**< @old{RUBY_ENC_CODERANGE_7BIT} */
-#define ENC_CODERANGE_VALID RUBY_ENC_CODERANGE_VALID /**< @old{RUBY_ENC_CODERANGE_VALID} */
-#define ENC_CODERANGE_BROKEN RUBY_ENC_CODERANGE_BROKEN /**< @old{RUBY_ENC_CODERANGE_BROKEN} */
-#define ENC_CODERANGE_CLEAN_P(cr) RB_ENC_CODERANGE_CLEAN_P(cr) /**< @old{RB_ENC_CODERANGE_CLEAN_P} */
-#define ENC_CODERANGE(obj) RB_ENC_CODERANGE(obj) /**< @old{RB_ENC_CODERANGE} */
-#define ENC_CODERANGE_ASCIIONLY(obj) RB_ENC_CODERANGE_ASCIIONLY(obj) /**< @old{RB_ENC_CODERANGE_ASCIIONLY} */
-#define ENC_CODERANGE_SET(obj,cr) RB_ENC_CODERANGE_SET(obj,cr) /**< @old{RB_ENC_CODERANGE_SET} */
-#define ENC_CODERANGE_CLEAR(obj) RB_ENC_CODERANGE_CLEAR(obj) /**< @old{RB_ENC_CODERANGE_CLEAR} */
-#define ENC_CODERANGE_AND(a, b) RB_ENC_CODERANGE_AND(a, b) /**< @old{RB_ENC_CODERANGE_AND} */
-#define ENCODING_CODERANGE_SET(obj, encindex, cr) RB_ENCODING_CODERANGE_SET(obj, encindex, cr) /**< @old{RB_ENCODING_CODERANGE_SET} */
-
-/**
- * The type of encoding. Our design here is we take Oniguruma/Onigmo's
- * multilingualisation schema as our base data structure.
- */
-typedef const OnigEncodingType rb_encoding;
-
-RBIMPL_ATTR_NOALIAS()
-/**
- * Converts a character option to its encoding. It only supports a very
- * limited set of Japanese encodings due to its Japanese origin. Ruby still
- * has this in-core for backwards compatibility. But new codes must not bother
- * such concept like one-character encoding option. Consider deprecated in
- * practice.
- *
- * @param[in] c One of `['n', 'e', 's', 'u', 'i', 'x', 'm']`.
- * @param[out] option Return buffer.
- * @param[out] kcode Return buffer.
- * @retval 1 `c` understood properly.
- * @retval 0 `c` is not understood.
- * @post `option` is a ::OnigOptionType.
- * @post `kcode` is an enum `ruby_preserved_encindex`.
- *
- * @internal
- *
- * `kcode` is opaque because `ruby_preserved_encindex` is not visible from
- * extension libraries. But who cares?
- */
-int rb_char_to_option_kcode(int c, int *option, int *kcode);
-
-/**
- * Creates a new encoding, using the passed one as a template.
- *
- * @param[in] name Name of the creating encoding.
- * @param[in] src Template.
- * @exception rb_eArgError Duplicated or malformed `name`.
- * @return Replicated new encoding's index.
- * @post Encoding named `name` is created as a copy of `src`, whose index
- * is the return value.
- *
- * @internal
- *
- * `name` can be `NULL`, but that just raises an exception. OTOH it seems no
- * sanity check is done against `src`...?
- */
-int rb_enc_replicate(const char *name, rb_encoding *src);
-
-/**
- * Creates a new "dummy" encoding. Roughly speaking, an encoding is dummy when
- * it is stateful. Notable example of dummy encoding are those defined in
- * ISO/IEC 2022
- *
- * @param[in] name Name of the creating encoding.
- * @exception rb_eArgError Duplicated or malformed `name`.
- * @return New dummy encoding's index.
- * @post Encoding named `name` is created, whose index is the return
- * value.
- */
-int rb_define_dummy_encoding(const char *name);
-
-RBIMPL_ATTR_PURE()
-/**
- * Queries if the passed encoding is dummy.
- *
- * @param[in] enc Encoding in question.
- * @retval 1 It is.
- * @retval 0 It isn't.
- */
-int rb_enc_dummy_p(rb_encoding *enc);
-
-RBIMPL_ATTR_PURE()
-/**
- * Queries the index of the encoding. An encoding's index is a Ruby-local
- * concept. It is a (sequential) number assigned to each encoding.
- *
- * @param[in] enc Encoding in question.
- * @return Its index.
- * @note You can pass null pointers to this function. It is equivalent
- * to rb_usascii_encindex() then.
- */
-int rb_enc_to_index(rb_encoding *enc);
-
-/**
- * Queries the index of the encoding of the passed object, if any.
- *
- * @param[in] obj Object in question.
- * @retval -1 `obj` is incapable of having an encoding.
- * @retval otherwise `obj`'s encoding's index.
- */
-int rb_enc_get_index(VALUE obj);
-
-/**
- * Destructively assigns an encoding (via its index) to an object.
- *
- * @param[out] obj Object in question.
- * @param[in] encindex An encoding index.
- * @exception rb_eFrozenError `obj` is frozen.
- * @exception rb_eArgError `obj` is incapable of having an encoding.
- * @exception rb_eEncodingError `encindex` is out of bounds.
- * @exception rb_eLoadError Failed to load the encoding.
- */
-void rb_enc_set_index(VALUE obj, int encindex);
-
-RBIMPL_ATTR_PURE()
-/**
- * Queries if the passed object can have its encoding.
- *
- * @param[in] obj Object in question.
- * @retval 1 It can.
- * @retval 0 It cannot.
- */
-int rb_enc_capable(VALUE obj);
-
-/**
- * Queries the index of the encoding.
- *
- * @param[in] name Name of the encoding to find.
- * @exception rb_eArgError No such encoding named `name`.
- * @retval -1 `name` exists, but unable to load.
- * @retval otherwise Index of encoding named `name`.
- */
-int rb_enc_find_index(const char *name);
-
-/**
- * Registers an "alias" name. In the wild, an encoding can be called using
- * multiple names. For instance an encoding known as `"CP932"` is also called
- * `"SJIS"` on occasions. This API registers such relationships.
- *
- * @param[in] alias New name.
- * @param[in] orig Old name.
- * @exception rb_eArgError `alias` is duplicated or malformed.
- * @retval -1 Failed to load `orig`.
- * @retval otherwise The index of `orig` and `alias`.
- * @post `alias` is a synonym of `orig`. They refer to the identical
- * encoding.
- */
-int rb_enc_alias(const char *alias, const char *orig);
-
-/**
- * Obtains a encoding index from a wider range of objects (than
- * rb_enc_find_index()).
- *
- * @param[in] obj An ::rb_cEncoding, or its name in ::rb_cString.
- * @retval -1 `obj` is unexpected type/contents.
- * @retval otherwise Index corresponding to `obj`.
- */
-int rb_to_encoding_index(VALUE obj);
-
-/**
- * Identical to rb_find_encoding(), except it raises an exception instead of
- * returning NULL.
- *
- * @param[in] obj An ::rb_cEncoding, or its name in ::rb_cString.
- * @exception rb_eTypeError `obj` is neither ::rb_cEncoding nor ::rb_cString.
- * @exception rb_eArgError `obj` is an unknown encoding name.
- * @return Encoding of `obj`.
- */
-rb_encoding *rb_to_encoding(VALUE obj);
-
-/**
- * Identical to rb_to_encoding_index(), except the return type.
- *
- * @param[in] obj An ::rb_cEncoding, or its name in ::rb_cString.
- * @exception rb_eTypeError `obj` is neither ::rb_cEncoding nor ::rb_cString.
- * @retval NULL No such encoding.
- * @return otherwise Encoding of `obj`.
- */
-rb_encoding *rb_find_encoding(VALUE obj);
-
-/**
- * Identical to rb_enc_get_index(), except the return type.
- *
- * @param[in] obj Object in question.
- * @retval NULL Obj is incapable of having an encoding.
- * @retval otherwise `obj`'s encoding.
- */
-rb_encoding *rb_enc_get(VALUE obj);
-
-/**
- * Look for the "common" encoding between the two. One character can or cannot
- * be expressed depending on an encoding. This function finds the super-set of
- * encodings that satisfy contents of both arguments. If that is impossible
- * returns NULL.
- *
- * @param[in] str1 An object.
- * @param[in] str2 Another object.
- * @retval NULL No encoding can satisfy both at once.
- * @retval otherwise Common encoding between the two.
- * @note Arguments can be non-string, e.g. Regexp.
- */
-rb_encoding *rb_enc_compatible(VALUE str1, VALUE str2);
-
-/**
- * Identical to rb_enc_compatible(), except it raises an exception instead of
- * returning NULL.
- *
- * @param[in] str1 An object.
- * @param[in] str2 Another object.
- * @exception rb_eEncCompatError No encoding can satisfy both.
- * @return Common encoding between the two.
- * @note Arguments can be non-string, e.g. Regexp.
- */
-rb_encoding *rb_enc_check(VALUE str1,VALUE str2);
-
-/**
- * Identical to rb_enc_set_index(), except it additionally does contents fix-up
- * depending on the passed object. It for instance changes the byte length of
- * terminating `U+0000` according to the passed encoding.
- *
- * @param[out] obj Object in question.
- * @param[in] encindex An encoding index.
- * @exception rb_eFrozenError `obj` is frozen.
- * @exception rb_eArgError `obj` is incapable of having an encoding.
- * @exception rb_eEncodingError `encindex` is out of bounds.
- * @exception rb_eLoadError Failed to load the encoding.
- * @return The passed `obj`.
- * @post `obj`'s contents might be fixed according to `encindex`.
- */
-VALUE rb_enc_associate_index(VALUE obj, int encindex);
-
-/**
- * Identical to rb_enc_associate(), except it takes an encoding itself instead
- * of its index.
- *
- * @param[out] obj Object in question.
- * @param[in] enc An encoding.
- * @exception rb_eFrozenError `obj` is frozen.
- * @exception rb_eArgError `obj` is incapable of having an encoding.
- * @return The passed `obj`.
- * @post `obj`'s contents might be fixed according to `enc`.
- */
-VALUE rb_enc_associate(VALUE obj, rb_encoding *enc);
-
-/**
- * Destructively copies the encoding of the latter object to that of former
- * one. It can also be seen as a routine identical to
- * rb_enc_associate_index(), except it takes an object's encoding instead of an
- * encoding's index.
- *
- * @param[out] dst Object to modify.
- * @param[in] src Object to reference.
- * @exception rb_eFrozenError `dst` is frozen.
- * @exception rb_eArgError `dst` is incapable of having an encoding.
- * @exception rb_eEncodingError `src` is incapable of having an encoding.
- * @post `dst`'s encoding is that of `src`'s.
- */
-void rb_enc_copy(VALUE dst, VALUE src);
-
-/**
- * Identical to rb_enc_str_new(), except it additionally takes an encoding.
- *
- * @param[in] ptr A memory region of `len` bytes length.
- * @param[in] len Length of `ptr`, in bytes, not including the
- * terminating NUL character.
- * @param[in] enc Encoding of `ptr`.
- * @exception rb_eNoMemError Failed to allocate `len+1` bytes.
- * @exception rb_eArgError `len` is negative.
- * @return An instance of ::rb_cString, of `len` bytes length, of `enc`
- * encoding, whose contents are verbatim copy of `ptr`.
- * @pre At least `len` bytes of continuous memory region shall be
- * accessible via `ptr`.
- * @note `enc` can be a null pointer. It can also be seen as a routine
- * identical to rb_usascii_str_new() then.
- */
-VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc);
-
-RBIMPL_ATTR_NONNULL((1))
-/**
- * Identical to rb_enc_str_new(), except it assumes the passed pointer is a
- * pointer to a C string. It can also be seen as a routine identical to
- * rb_str_new_cstr(), except it additionally takes an encoding.
- *
- * @param[in] ptr A C string.
- * @param[in] enc Encoding of `ptr`.
- * @exception rb_eNoMemError Failed to allocate memory.
- * @return An instance of ::rb_cString, of `enc` encoding, whose contents
- * are verbatim copy of `ptr`.
- * @pre `ptr` must not be a null pointer.
- * @pre Because `ptr` is a C string it makes no sense for `enc` to be
- * something like UTF-32.
- * @note `enc` can be a null pointer. It can also be seen as a routine
- * identical to rb_usascii_str_new_cstr() then.
- */
-VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc);
-
-/**
- * Identical to rb_enc_str_new(), except it takes a C string literal. It can
- * also be seen as a routine identical to rb_str_new_static(), except it
- * additionally takes an encoding.
- *
- * @param[in] ptr A C string literal.
- * @param[in] len `strlen(ptr)`.
- * @param[in] enc Encoding of `ptr`.
- * @exception rb_eArgError `len` out of range of `size_t`.
- * @pre `ptr` must be a C string constant.
- * @return An instance of ::rb_cString, of `enc` encoding, whose backend
- * storage is the passed C string literal.
- * @warning It is a very bad idea to write to a C string literal (often
- * immediate SEGV shall occur). Consider return values of this
- * function be read-only.
- * @note `enc` can be a null pointer. It can also be seen as a routine
- * identical to rb_usascii_str_new_static() then.
- */
-VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc);
-
-/**
- * Identical to rb_enc_str_new(), except it returns a "f"string. It can also
- * be seen as a routine identical to rb_interned_str(), except it additionally
- * takes an encoding.
- *
- * @param[in] ptr A memory region of `len` bytes length.
- * @param[in] len Length of `ptr`, in bytes, not including the
- * terminating NUL character.
- * @param[in] enc Encoding of `ptr`.
- * @exception rb_eArgError `len` is negative.
- * @return A found or created instance of ::rb_cString, of `len` bytes
- * length, of `enc` encoding, whose contents are identical to that
- * of `ptr`.
- * @pre At least `len` bytes of continuous memory region shall be
- * accessible via `ptr`.
- * @note `enc` can be a null pointer.
- */
-VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc);
-
-RBIMPL_ATTR_NONNULL((1))
-/**
- * Identical to rb_enc_str_new_cstr(), except it returns a "f"string. It can
- * also be seen as a routine identical to rb_interned_str_cstr(), except it
- * additionally takes an encoding.
- *
- * @param[in] ptr A memory region of `len` bytes length.
- * @param[in] enc Encoding of `ptr`.
- * @return A found or created instance of ::rb_cString of `enc` encoding,
- * whose contents are identical to that of `ptr`.
- * @pre At least `len` bytes of continuous memory region shall be
- * accessible via `ptr`.
- * @note `enc` can be a null pointer.
- */
-VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc);
-
-/**
- * Identical to rb_reg_new(), except it additionally takes an encoding.
- *
- * @param[in] ptr A memory region of `len` bytes length.
- * @param[in] len Length of `ptr`, in bytes, not including the
- * terminating NUL character.
- * @param[in] enc Encoding of `ptr`.
- * @param[in] opts Options e.g. ONIG_OPTION_MULTILINE.
- * @exception rb_eRegexpError Failed to compile `ptr`.
- * @return An allocated new instance of ::rb_cRegexp, of `enc` encoding,
- * whose expression is compiled according to `ptr`.
- */
-VALUE rb_enc_reg_new(const char *ptr, long len, rb_encoding *enc, int opts);
-
-RBIMPL_ATTR_NONNULL((2))
-RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
-/**
- * Identical to rb_sprintf(), except it additionally takes an encoding. The
- * passed encoding rules both the incoming format specifier and the resulting
- * string.
- *
- * @param[in] enc Encoding of `fmt`.
- * @param[in] fmt A `printf`-like format specifier.
- * @param[in] ... Variadic number of contents to format.
- * @return A rendered new instance of ::rb_cString, of `enc` encoding.
- */
-VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt, ...);
-
-RBIMPL_ATTR_NONNULL((2))
-RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 0)
-/**
- * Identical to rb_enc_sprintf(), except it takes a `va_list` instead of
- * variadic arguments. It can also be seen as a routine identical to
- * rb_vsprintf(), except it additionally takes an encoding.
- *
- * @param[in] enc Encoding of `fmt`.
- * @param[in] fmt A `printf`-like format specifier.
- * @param[in] ap Contents to format.
- * @return A rendered new instance of ::rb_cString, of `enc` encoding.
- */
-VALUE rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap);
-
-/**
- * Counts the number of characters of the passed string, according to the
- * passed encoding. This has to be complicated. The passed string could be
- * invalid and/or broken. This routine would scan from the beginning til the
- * end, byte by byte, to seek out character boundaries. Could be super slow.
- *
- * @param[in] head Leftmost pointer to the string.
- * @param[in] tail Rightmost pointer to the string.
- * @param[in] enc Encoding of the string.
- * @return Number of characters exist in `head` .. `tail`. The definition
- * of "character" depends on the passed `enc`.
- */
-long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc);
-
-/**
- * Queries the n-th character. Like rb_enc_strlen() this function can be fast
- * or slow depending on the contents. Don't expect characters to be uniformly
- * distributed across the entire string.
- *
- * @param[in] head Leftmost pointer to the string.
- * @param[in] tail Rightmost pointer to the string.
- * @param[in] nth Requested index of characters.
- * @param[in] enc Encoding of the string.
- * @return Pointer to the first byte of the character that is `nth`
- * character ahead of `head`, or `tail` if there is no such
- * character (OOB etc). The definition of "character" depends on
- * the passed `enc`.
- */
-char *rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc);
-
-/**
- * Identical to rb_enc_get_index(), except the return type.
- *
- * @param[in] obj Object in question.
- * @exception rb_eTypeError `obj` is incapable of having an encoding.
- * @return `obj`'s encoding.
- */
-VALUE rb_obj_encoding(VALUE obj);
-
-/**
- * Identical to rb_str_cat(), except it additionally takes an encoding.
- *
- * @param[out] str Destination object.
- * @param[in] ptr Contents to append.
- * @param[in] len Length of `src`, in bytes.
- * @param[in] enc Encoding of `ptr`.
- * @exception rb_eArgError `len` is negative.
- * @exception rb_eEncCompatError `enc` is not compatible with `str`.
- * @return The passed `dst`.
- * @post The contents of `ptr` is copied, transcoded into `dst`'s
- * encoding, then pasted into `dst`'s end.
- */
-VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc);
-
-/**
- * Encodes the passed code point into a series of bytes.
- *
- * @param[in] code Code point.
- * @param[in] enc Target encoding scheme.
- * @exception rb_eRangeError `enc` does not glean `code`.
- * @return An instance of ::rb_cString, of `enc` encoding, whose sole
- * contents is `code` represented in `enc`.
- * @note No way to encode code points bigger than UINT_MAX.
- *
- * @internal
- *
- * In other languages, APIs like this one could be seen as the primitive
- * routines where encodings' "encode" feature are implemented. However in case
- * of Ruby this is not the primitive one. We directly manipulate encoded
- * strings. Encoding conversion routines transocde an encoded string directly
- * to another one; not via a code point array.
- */
-VALUE rb_enc_uint_chr(unsigned int code, rb_encoding *enc);
-
-/**
- * Identical to rb_external_str_new(), except it additionally takes an
- * encoding. However the whole point of rb_external_str_new() is to encode a
- * string into default external encoding. Being able to specify arbitrary
- * encoding just ruins the designed purpose the function meseems.
- *
- * @param[in] ptr A memory region of `len` bytes length.
- * @param[in] len Length of `ptr`, in bytes, not including the
- * terminating NUL character.
- * @param[in] enc Target encoding scheme.
- * @exception rb_eArgError `len` is negative.
- * @return An instance of ::rb_cString. In case encoding conversion from
- * "default internal" to `enc` is fully defined over the given
- * contents, then the return value is a string of `enc` encoding,
- * whose contents are the converted ones. Otherwise the string is
- * a junk.
- * @warning It doesn't raise on a conversion failure and silently ends up in
- * a corrupted output. You can know the failure by querying
- * `valid_encoding?` of the result object.
- *
- * @internal
- *
- * @shyouhei has no idea why this one does not follow the naming convention
- * that others obey. It seems to him that this should have been called
- * `rb_enc_external_str_new`.
- */
-VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc);
-
-/**
- * Identical to rb_str_export(), except it additionally takes an encoding.
- *
- * @param[in] obj Target object.
- * @param[in] enc Target encoding.
- * @exception rb_eTypeError No implicit conversion to String.
- * @return Converted ruby string of `enc` encoding.
- */
-VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc);
-
-/**
- * Encoding conversion main routine.
- *
- * @param[in] str String to convert.
- * @param[in] from Source encoding.
- * @param[in] to Destination encoding.
- * @return A copy of `str`, with conversion from `from` to `to` applied.
- * @note `from` can be a null pointer. `str`'s encoding is taken then.
- * @note `to` can be a null pointer. No-op then.
- */
-VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to);
-
-/**
- * Identical to rb_str_conv_enc(), except it additionally takes IO encoder
- * options. The extra arguments can be constructed using io_extract_modeenc()
- * etc.
- *
- * @param[in] str String to convert.
- * @param[in] from Source encoding.
- * @param[in] to Destination encoding.
- * @param[in] ecflags A set of enum ::ruby_econv_flag_type.
- * @param[in] ecopts Optional hash.
- * @return A copy of `str`, with conversion from `from` to `to` applied.
- * @note `from` can be a null pointer. `str`'s encoding is taken then.
- * @note `to` can be a null pointer. No-op then.
- * @note `ecopts` can be ::RUBY_Qnil, which is equivalent to passing an
- * empty hash.
- */
-VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts);
-
-/** @cond INTERNAL_MACRO */
-#ifdef HAVE_BUILTIN___BUILTIN_CONSTANT_P
-#define rb_enc_str_new(str, len, enc) RB_GNUC_EXTENSION_BLOCK( \
- (__builtin_constant_p(str) && __builtin_constant_p(len)) ? \
- rb_enc_str_new_static((str), (len), (enc)) : \
- rb_enc_str_new((str), (len), (enc)) \
-)
-#define rb_enc_str_new_cstr(str, enc) RB_GNUC_EXTENSION_BLOCK( \
- (__builtin_constant_p(str)) ? \
- rb_enc_str_new_static((str), (long)strlen(str), (enc)) : \
- rb_enc_str_new_cstr((str), (enc)) \
-)
-#endif
-/** @endcond */
-
-RBIMPL_ATTR_NORETURN()
-RBIMPL_ATTR_NONNULL((3))
-RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 3, 4)
-/**
- * Identical to rb_raise(), except it additionally takes an encoding.
- *
- * @param[in] enc Encoding of the generating exception.
- * @param[in] exc A subclass of ::rb_eException.
- * @param[in] fmt Format specifier string compatible with rb_sprintf().
- * @param[in] ... Contents of the message.
- * @exception exc The specified exception.
- * @note It never returns.
- */
-void rb_enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...);
-
-/**
- * Identical to rb_find_encoding(), except it takes an encoding index instead
- * of a Ruby object.
- *
- * @param[in] idx An encoding index.
- * @retval NULL No such encoding.
- * @retval otherwise An encoding whose index is `idx`.
- */
-rb_encoding *rb_enc_from_index(int idx);
-
-/**
- * Identical to rb_find_encoding(), except it takes a C's string instead of
- * Ruby's.
- *
- * @param[in] name Name of the encoding to query.
- * @retval NULL No such encoding.
- * @retval otherwise An encoding whose index is `idx`.
- */
-rb_encoding *rb_enc_find(const char *name);
-
-/**
- * Queries the (canonical) name of the passed encoding.
- *
- * @param[in] enc An encoding.
- * @return Its name.
- */
-#define rb_enc_name(enc) (enc)->name
-
-/**
- * Queries the minimum number of bytes that the passed encoding needs to
- * represent a character. For ASCII and compatible encodings this is typically
- * 1. There are however encodings whose minimum is not 1; they are
- * historically called wide characters.
- *
- * @param[in] enc An encoding.
- * @return Its least possible number of bytes except 0.
- */
-#define rb_enc_mbminlen(enc) (enc)->min_enc_len
-
-/**
- * Queries the maximum number of bytes that the passed encoding needs to
- * represent a character. Fixed-width encodings have the same value for this
- * one and #rb_enc_mbminlen. However there are variable-width encodings.
- * UTF-8, for instance, takes from 1 up to 6 bytes.
- *
- * @param[in] enc An encoding.
- * @return Its maximum possible number of bytes of a character.
- */
-#define rb_enc_mbmaxlen(enc) (enc)->max_enc_len
-
-/**
- * Queries the number of bytes of the character at the passed pointer.
- *
- * @param[in] p Pointer to a character's first byte.
- * @param[in] e End of the string that has `p`.
- * @param[in] enc Encoding of the string.
- * @return If the character at `p` does not end until `e`, number of bytes
- * between `p` and `e`. Otherwise the number of bytes that the
- * character at `p` is encoded.
- *
- * @internal
- *
- * Strictly speaking there are chances when `p` points to a middle byte of a
- * wide character. This function returns "the number of bytes from `p` to
- * nearest of either `e` or the next character boundary", if you go strict.
- */
-int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc);
-
-/**
- * Identical to rb_enc_mbclen() unless the character at `p` overruns `e`. That
- * can happen for instance when you read from a socket and its partial read
- * cuts a wide character in-between. In those situations this function
- * "estimates" theoretical length of the character in question. Typically it
- * tends to be possible to know how many bytes a character needs before
- * actually reaching its end; for instance UTF-8 encodes a character's length
- * in the first byte of it. This function returns that info.
- *
- * @note This implies that the string is not broken.
- *
- * @param[in] p Pointer to the character's first byte.
- * @param[in] e End of the string that has `p`.
- * @param[in] enc Encoding of the string.
- * @return Number of bytes of character at `p`, measured or estimated.
- */
-int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc);
-
-/**
- * Queries the number of bytes of the character at the passed pointer. This
- * function returns 3 different types of information:
- *
- * ```CXX
- * auto n = rb_enc_precise_mbclen(p, q, r);
- *
- * if (ONIGENC_MBCLEN_CHARFOUND_P(n)) {
- * // Character found. Normal return.
- * auto found_length = ONIGENC_MBCLEN_CHARFOUND_LEN(n);
- * }
- * else if (ONIGENC_MBCLEN_NEEDMORE_P(n)) {
- * // Character overruns past `q`; needs more.
- * auto requested_length = ONIGENC_MBCLEN_NEEDMORE_LEN(n);
- * }
- * else {
- * // `p` is broken.
- * assert(ONIGENC_MBCLEN_INVALID_P(n));
- * }
- * ```
- *
- * @param[in] p Pointer to the character's first byte.
- * @param[in] e End of the string that has `p`.
- * @param[in] enc Encoding of the string.
- * @return Encoded read/needed number of bytes (see above).
- */
-int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc);
-
-#define MBCLEN_CHARFOUND_P(ret) ONIGENC_MBCLEN_CHARFOUND_P(ret) /**< @old{ONIGENC_MBCLEN_CHARFOUND_P} */
-#define MBCLEN_CHARFOUND_LEN(ret) ONIGENC_MBCLEN_CHARFOUND_LEN(ret) /**< @old{ONIGENC_MBCLEN_CHARFOUND_LEN} */
-#define MBCLEN_INVALID_P(ret) ONIGENC_MBCLEN_INVALID_P(ret) /**< @old{ONIGENC_MBCLEN_INVALID_P} */
-#define MBCLEN_NEEDMORE_P(ret) ONIGENC_MBCLEN_NEEDMORE_P(ret) /**< @old{ONIGENC_MBCLEN_NEEDMORE_P} */
-#define MBCLEN_NEEDMORE_LEN(ret) ONIGENC_MBCLEN_NEEDMORE_LEN(ret) /**< @old{ONIGENC_MBCLEN_NEEDMORE_LEN} */
-
-/**
- * Queries the code point of character pointed by the passed pointer. If that
- * code point is included in ASCII that code point is returned. Otherwise -1.
- * This can be different from just looking at the first byte. For instance it
- * reads 2 bytes in case of UTF-16BE.
- *
- * @param[in] p Pointer to the character's first byte.
- * @param[in] e End of the string that has `p`.
- * @param[in] len Return buffer.
- * @param[in] enc Encoding of the string.
- * @retval -1 The character at `p` is not i ASCII.
- * @retval otherwise A code point of the character at `p`.
- * @post `len` (if set) is the number of bytes of `p`.
- */
-int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc);
-
-/**
- * Queries the code point of character pointed by the passed pointer.
- * Exceptions happen in case of broken input.
- *
- * @param[in] p Pointer to the character's first byte.
- * @param[in] e End of the string that has `p`.
- * @param[in] len Return buffer.
- * @param[in] enc Encoding of the string.
- * @exception rb_eArgError `p` is broken.
- * @return Code point of the character pointed by `p`.
- * @post `len` (if set) is the number of bytes of `p`.
- */
-unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc);
-
-RBIMPL_ATTR_DEPRECATED(("use rb_enc_codepoint_len instead."))
-/**
- * Queries the code point of character pointed by the passed pointer.
- * Exceptions happen in case of broken input.
- *
- * @deprecated Use rb_enc_codepoint_len() instead.
- * @param[in] p Pointer to the character's first byte.
- * @param[in] e End of the string that has `p`.
- * @param[in] enc Encoding of the string.
- * @exception rb_eArgError `p` is broken.
- * @return Code point of the character pointed by `p`.
- */
-unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc);
-
-/** @cond INTERNAL_MACRO */
-#define rb_enc_codepoint(p,e,enc) rb_enc_codepoint_len((p),(e),0,(enc))
-/** @endcond */
-
-/**
- * Identical to rb_enc_codepoint(), except it assumes the passed character is
- * not broken.
- *
- * @param[in] p Pointer to the character's first byte.
- * @param[in] e End of the string that has `p`.
- * @param[in] enc Encoding of the string.
- * @return Code point of the character pointed by `p`.
- */
-#define rb_enc_mbc_to_codepoint(p, e, enc) ONIGENC_MBC_TO_CODE((enc),(UChar*)(p),(UChar*)(e))
-
-/**
- * Queries the number of bytes requested to represent the passed code point
- * using the passed encoding.
- *
- * @param[in] code Code point in question.
- * @param[in] enc Encoding to convert the code into a byte sequence.
- * @exception rb_eArgError `enc` does not glean `code`.
- * @return Number of bytes requested to represent `code` using `enc`.
- */
-int rb_enc_codelen(int code, rb_encoding *enc);
-
-/**
- * Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
- *
- * @param[in] code Code point in question.
- * @param[in] enc Encoding to convert the code into a byte sequence.
- * @retval 0 `code` is invalid.
- * @return otherwise Number of bytes used for `enc` to encode `code`.
- */
-int rb_enc_code_to_mbclen(int code, rb_encoding *enc);
-
-/** @cond INTERNAL_MACRO */
-#define rb_enc_code_to_mbclen(c, enc) ONIGENC_CODE_TO_MBCLEN((enc), (c));
-/** @endcond */
-
-/**
- * Identical to rb_enc_uint_chr(), except it writes back to the passed buffer
- * instead of allocating one.
- *
- * @param[in] c Code point.
- * @param[out] buf Return buffer.
- * @param[in] enc Target encoding scheme.
- * @post `c` is encoded according to `enc`, then written to `buf`.
- */
-#define rb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC((enc),(c),(UChar*)(buf))
-
-/**
- * Queries the previous (left) character.
- *
- * @param[in] s Start of the string.
- * @param[in] p Pointer to a character.
- * @param[in] e End of the string.
- * @param[in] enc Encoding.
- * @retval NULL No previous character.
- * @retval otherwise Pointer to the head of the previous character.
- */
-#define rb_enc_prev_char(s,p,e,enc) ((char *)onigenc_get_prev_char_head((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e)))
-
-/**
- * Queries the left boundary of a character. This function takes a pointer
- * that is not necessarily a head of a character, and searches for its head.
- *
- * @param[in] s Start of the string.
- * @param[in] p Pointer to a possibly-middle of a character.
- * @param[in] e End of the string.
- * @param[in] enc Encoding.
- * @return Pointer to the head of the character that contains `p`.
- */
-#define rb_enc_left_char_head(s,p,e,enc) ((char *)onigenc_get_left_adjust_char_head((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e)))
-
-/**
- * Queries the right boundary of a character. This function takes a pointer
- * that is not necessarily a head of a character, and searches for its tail.
- *
- * @param[in] s Start of the string.
- * @param[in] p Pointer to a possibly-middle of a character.
- * @param[in] e End of the string.
- * @param[in] enc Encoding.
- * @return Pointer to the end of the character that contains `p`.
- */
-#define rb_enc_right_char_head(s,p,e,enc) ((char *)onigenc_get_right_adjust_char_head((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e)))
-
-/**
- * Scans the string backwards for n characters.
- *
- * @param[in] s Start of the string.
- * @param[in] p Pointer to a character.
- * @param[in] e End of the string.
- * @param[in] n Steps.
- * @param[in] enc Encoding.
- * @retval NULL There are no `n` characters left.
- * @retval otherwise Pointer to `n` character before `p`.
- */
-#define rb_enc_step_back(s,p,e,n,enc) ((char *)onigenc_step_back((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e),(int)(n)))
-
-/**
- * Queries if the passed pointer points to a newline character. What is a
- * newline and what is not depends on the passed encoding.
- *
- * @param[in] p Pointer to a possibly-middle of a character.
- * @param[in] end End of the string.
- * @param[in] enc Encoding.
- * @retval 0 It isn't.
- * @retval otherwise It is.
- */
-#define rb_enc_is_newline(p,end,enc) ONIGENC_IS_MBC_NEWLINE((enc),(UChar*)(p),(UChar*)(end))
-
-/**
- * Queries if the passed code point is of passed character type in the passed
- * encoding. The "character type" here is a set of macros defined in onigmo.h,
- * like `ONIGENC_CTYPE_PUNCT`.
- *
- * @param[in] c A code point.
- * @param[in] t Type (see above).
- * @param[in] enc Encoding.
- * @retval 1 `c` is of `t` in `enc`.
- * @retval 0 Otherwise.
- */
-#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE((enc),(c),(t))
-
-/**
- * Identical to rb_isascii(), except it additionally takes an encoding.
- *
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @retval 0 `c` is out of range of ASCII character set in `enc`.
- * @retval 1 Otherwise.
- *
- * @internal
- *
- * `enc` is ignored. This is at least an intentional implementation detail
- * (not a bug). But there could be rooms for future extensions.
- */
-#define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)
-
-/**
- * Identical to rb_isalpha(), except it additionally takes an encoding.
- *
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @retval 1 `enc` classifies `c` as "ALPHA".
- * @retval 0 Otherwise.
- */
-#define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA((enc),(c))
-
-/**
- * Identical to rb_islower(), except it additionally takes an encoding.
- *
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @retval 1 `enc` classifies `c` as "LOWER".
- * @retval 0 Otherwise.
- */
-#define rb_enc_islower(c,enc) ONIGENC_IS_CODE_LOWER((enc),(c))
-
-/**
- * Identical to rb_isupper(), except it additionally takes an encoding.
- *
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @retval 1 `enc` classifies `c` as "UPPER".
- * @retval 0 Otherwise.
- */
-#define rb_enc_isupper(c,enc) ONIGENC_IS_CODE_UPPER((enc),(c))
-
-/**
- * Identical to rb_ispunct(), except it additionally takes an encoding.
- *
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @retval 1 `enc` classifies `c` as "PUNCT".
- * @retval 0 Otherwise.
- */
-#define rb_enc_ispunct(c,enc) ONIGENC_IS_CODE_PUNCT((enc),(c))
-
-/**
- * Identical to rb_isalnum(), except it additionally takes an encoding.
- *
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @retval 1 `enc` classifies `c` as "ANUM".
- * @retval 0 Otherwise.
- */
-#define rb_enc_isalnum(c,enc) ONIGENC_IS_CODE_ALNUM((enc),(c))
-
-/**
- * Identical to rb_isprint(), except it additionally takes an encoding.
- *
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @retval 1 `enc` classifies `c` as "PRINT".
- * @retval 0 Otherwise.
- */
-#define rb_enc_isprint(c,enc) ONIGENC_IS_CODE_PRINT((enc),(c))
-
-/**
- * Identical to rb_isspace(), except it additionally takes an encoding.
- *
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @retval 1 `enc` classifies `c` as "PRINT".
- * @retval 0 Otherwise.
- */
-#define rb_enc_isspace(c,enc) ONIGENC_IS_CODE_SPACE((enc),(c))
-
-/**
- * Identical to rb_isdigit(), except it additionally takes an encoding.
- *
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @retval 1 `enc` classifies `c` as "DIGIT".
- * @retval 0 Otherwise.
- */
-#define rb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT((enc),(c))
-
-/**
- * @private
- *
- * This is an implementation detail of rb_enc_asciicompat(). People don't use
- * it directly. Just always use rb_enc_asciicompat().
- *
- * @param[in] enc Encoding in question.
- * @retval 1 It is ASCII compatible.
- * @retval 0 It isn't.
- */
-static inline int
-rb_enc_asciicompat_inline(rb_encoding *enc)
-{
- return rb_enc_mbminlen(enc)==1 && !rb_enc_dummy_p(enc);
-}
-
-/**
- * Queries if the passed encoding is _in some sense_ compatible with ASCII.
- * The concept of ASCII compatibility is nuanced, and private to our
- * implementation. For instance SJIS is ASCII compatible to us, despite their
- * having different characters at code point `0x5C`. This is based on some
- * practical consideration that Japanese people confuses SJIS to be "upper
- * compatible" with ASCII (which is in fact a wrong idea, but we just don't go
- * strict here). An example of ASCII incompatible encoding is UTF-16. UTF-16
- * shares code points with ASCII, but employs a completely different encoding
- * scheme.
- *
- * @param[in] enc Encoding in question.
- * @retval 0 It is incompatible.
- * @retval 1 It is compatible.
- */
-#define rb_enc_asciicompat(enc) rb_enc_asciicompat_inline(enc)
-
-RBIMPL_ATTR_CONST()
-/**
- * Identical to rb_toupper(), except it additionally takes an encoding.
- *
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @return `c`'s (Ruby's definition of) upper case counterpart.
- *
- * @internal
- *
- * As `RBIMPL_ATTR_CONST` implies this function ignores `enc`.
- */
-int rb_enc_toupper(int c, rb_encoding *enc);
-
-RBIMPL_ATTR_CONST()
-/**
- * Identical to rb_tolower(), except it additionally takes an encoding.
- *
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @return `c`'s (Ruby's definition of) lower case counterpart.
- *
- * @internal
- *
- * As `RBIMPL_ATTR_CONST` implies this function ignores `enc`.
- */
-int rb_enc_tolower(int c, rb_encoding *enc);
-
-/**
- * Identical to rb_intern2(), except it additionally takes an encoding.
- *
- * @param[in] name The name of the id.
- * @param[in] len Length of `name`.
- * @param[in] enc `name`'s encoding.
- * @exception rb_eRuntimeError Too many symbols.
- * @return A (possibly new) id whose value is the given name.
- * @note These days Ruby internally has two kinds of symbols
- * (static/dynamic). Symbols created using this function would
- * become static ones; i.e. would never be garbage collected. It
- * is up to you to avoid memory leaks. Think twice before using
- * it.
- */
-ID rb_intern3(const char *name, long len, rb_encoding *enc);
-
-RBIMPL_ATTR_NONNULL(())
-/**
- * Identical to rb_symname_p(), except it additionally takes an encoding.
- *
- * @param[in] str A C string to check.
- * @param[in] enc `str`'s encoding.
- * @retval 1 It is a valid symbol name.
- * @retval 0 It is invalid as a symbol name.
- */
-int rb_enc_symname_p(const char *str, rb_encoding *enc);
-
-/**
- * Identical to rb_enc_symname_p(), except it additionally takes the passed
- * string's length. This is needed for strings containing NUL bytes, like in
- * case of UTF-32.
- *
- * @param[in] name A C string to check.
- * @param[in] len Number of bytes of `str`.
- * @param[in] enc `str`'s encoding.
- * @retval 1 It is a valid symbol name.
- * @retval 0 It is invalid as a symbol name.
- */
-int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc);
-
-/**
- * Scans the passed string to collect its code range. Because a Ruby's string
- * is mutable, its contents change from time to time; so does its code range.
- * A long-lived string tends to fall back to ::RUBY_ENC_CODERANGE_UNKNOWN.
- * This API scans it and re-assigns a fine-grained code range constant.
- *
- * @param[out] str A string.
- * @return An enum ::ruby_coderange_type.
- */
-int rb_enc_str_coderange(VALUE str);
-
-/**
- * Scans the passed string until it finds something odd. Returns the number of
- * bytes scanned. As the name implies this is suitable for repeated call. One
- * of its application is `IO#readlines`. The method reads from its receiver's
- * read buffer, maybe more than once, looking for newlines. But "newline" can
- * be different among encodings. This API is used to detect broken contents to
- * properly mark them as such.
- *
- * @param[in] str String to scan.
- * @param[in] end End of `str`.
- * @param[in] enc `str`'s encoding.
- * @param[out] cr Return buffer.
- * @return Distance between `str` and first such byte where broken.
- * @post `cr` has the code range type.
- */
-long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr);
-
-/**
- * Queries if the passed string is "ASCII only". An ASCII only string is a
- * string who doesn't have any non-ASCII characters at all. This doesn't
- * necessarily mean the string is in ASCII encoding. For instance a String of
- * CP932 encoding can quite much be ASCII only, depending on its contents.
- *
- * @param[in] str String in question.
- * @retval 1 It doesn't have non-ASCII characters.
- * @retval 0 It has characters that are out of ASCII.
- */
-int rb_enc_str_asciionly_p(VALUE str);
-
-/**
- * Queries if the passed string is in an ASCII-compatible encoding.
- *
- * @param[in] str A Ruby's string to query.
- * @retval 0 `str` is not a String, or an ASCII-incompatible string.
- * @retval 1 Otherwise.
- */
-#define rb_enc_str_asciicompat_p(str) rb_enc_asciicompat(rb_enc_get(str))
-
-/**
- * Queries the Ruby-level counterpart instance of ::rb_cEncoding that
- * corresponds to the passed encoding.
- *
- * @param[in] enc An encoding
- * @retval RUBY_Qnil `enc` is a null pointer.
- * @retval otherwise An instance of ::rb_cEncoding.
- */
-VALUE rb_enc_from_encoding(rb_encoding *enc);
-
-RBIMPL_ATTR_PURE()
-/**
- * Queries if the passed encoding is either one of UTF-8/16/32.
- *
- * @note It does not take UTF-7, which we actually support, into account.
- *
- * @param[in] enc Encoding in question.
- * @retval 0 It is not a Unicode variant.
- * @retval otherwise It is.
- *
- * @internal
- *
- * In reality it returns 1/0, but the value is abstracted as
- * `ONIGENC_FLAG_UNICODE`.
- */
-int rb_enc_unicode_p(rb_encoding *enc);
-
-RBIMPL_ATTR_RETURNS_NONNULL()
-/**
- * Queries the encoding that represents ASCII-8BIT a.k.a. binary.
- *
- * @return The encoding that represents ASCII-8BIT.
- *
- * @internal
- *
- * This can not return NULL once the process properly boots up.
- */
-rb_encoding *rb_ascii8bit_encoding(void);
-
-RBIMPL_ATTR_RETURNS_NONNULL()
-/**
- * Queries the encoding that represents UTF-8.
- *
- * @return The encoding that represents UTF-8.
- *
- * @internal
- *
- * This can not return NULL once the process properly boots up.
- */
-rb_encoding *rb_utf8_encoding(void);
-
-RBIMPL_ATTR_RETURNS_NONNULL()
-/**
- * Queries the encoding that represents US-ASCII.
- *
- * @return The encoding that represents US-ASCII.
- *
- * @internal
- *
- * This can not return NULL once the process properly boots up.
- */
-rb_encoding *rb_usascii_encoding(void);
-
-/**
- * Queries the encoding that represents the current locale.
- *
- * @return The encoding that represents the process' locale.
- *
- * @internal
- *
- * This is dynamic. If you change the process' locale by e.g. calling
- * `setlocale(3)`, that should also change the return value of this function.
- *
- * There is no official way for Ruby scripts to manipulate locales, though.
- */
-rb_encoding *rb_locale_encoding(void);
-
-/**
- * Queries the "filesystem" encoding. This is the encoding that ruby expects
- * info from the OS' file system are in. This affects for instance return
- * value of rb_dir_getwd(). Most notably on Windows it can be an alias of OS
- * codepage. Most notably on Linux users can set this via default external
- * encoding.
- *
- * @return The "filesystem" encoding.
- */
-rb_encoding *rb_filesystem_encoding(void);
-
-/**
- * Queries the "default external" encoding. This is used to interact with
- * outer-process things such as File. Though not recommended, you can set this
- * using rb_enc_set_default_external().
- *
- * @return The "default external" encoding.
- */
-rb_encoding *rb_default_external_encoding(void);
-
-/**
- * Queries the "default internal" encoding. This could be a null pointer.
- * Otherwise, outer-process info are transcoded from default external encoding
- * to this one during reading from an IO.
- *
- * @return The "default internal" encoding (if any).
- */
-rb_encoding *rb_default_internal_encoding(void);
-
-#ifndef rb_ascii8bit_encindex
-RBIMPL_ATTR_CONST()
-/**
- * Identical to rb_ascii8bit_encoding(), except it returns the encoding's index
- * instead of the encoding itself.
- *
- * @return The index of encoding of ASCII-8BIT.
- *
- * @internal
- *
- * This happens to be 0.
- */
-int rb_ascii8bit_encindex(void);
-#endif
-
-#ifndef rb_utf8_encindex
-RBIMPL_ATTR_CONST()
-/**
- * Identical to rb_utf8_encoding(), except it returns the encoding's index
- * instead of the encoding itself.
- *
- * @return The index of encoding of UTF-8.
- */
-int rb_utf8_encindex(void);
-#endif
-
-#ifndef rb_usascii_encindex
-RBIMPL_ATTR_CONST()
-/**
- * Identical to rb_usascii_encoding(), except it returns the encoding's index
- * instead of the encoding itself.
- *
- * @return The index of encoding of UTF-8.
- */
-int rb_usascii_encindex(void);
-#endif
-
-/**
- * Identical to rb_locale_encoding(), except it returns the encoding's index
- * instead of the encoding itself.
- *
- * @return The index of the locale encoding.
- */
-int rb_locale_encindex(void);
-
-/**
- * Identical to rb_filesystem_encoding(), except it returns the encoding's
- * index instead of the encoding itself.
- *
- * @return The index of the filesystem encoding.
- */
-int rb_filesystem_encindex(void);
-
-/**
- * Identical to rb_default_external_encoding(), except it returns the
- * Ruby-level counterpart instance of ::rb_cEncoding that corresponds to the
- * default external encoding.
- *
- * @return An instance of ::rb_cEncoding of default external.
- */
-VALUE rb_enc_default_external(void);
-
-/**
- * Identical to rb_default_internal_encoding(), except it returns the
- * Ruby-level counterpart instance of ::rb_cEncoding that corresponds to the
- * default internal encoding.
- *
- * @return An instance of ::rb_cEncoding of default internal.
- */
-VALUE rb_enc_default_internal(void);
-
-/**
- * Destructively assigns the passed encoding as the default external encoding.
- * You should not use this API. It has process-global side effects. Also it
- * doesn't change encodings of strings that have already been read.
- *
- * @param[in] encoding Ruby level encoding.
- * @exception rb_eArgError `encoding` is ::RUBY_Qnil.
- * @post The default external encoding is `encoding`.
- */
-void rb_enc_set_default_external(VALUE encoding);
-
-/**
- * Destructively assigns the passed encoding as the default internal encoding.
- * You should not use this API. It has process-global side effects. Also it
- * doesn't change encodings of strings that have already been read.
- *
- * @param[in] encoding Ruby level encoding.
- * @post The default internal encoding is `encoding`.
- * @note Unlike rb_enc_set_default_external() you can pass ::RUBY_Qnil.
- */
-void rb_enc_set_default_internal(VALUE encoding);
-
-/**
- * Returns a platform-depended "charmap" of the current locale. This
- * information is called a "Codeset name" in IEEE 1003.1 section 13
- * (`<langinfo.h>`). This is a very low-level API. The return value can have
- * no corresponding encoding when passed to rb_find_encoding().
- *
- * @param[in] klass Ignored for no reason (why...)
- * @return The low-level locale charmap, in Ruby's String.
- */
-VALUE rb_locale_charmap(VALUE klass);
-
-RBIMPL_ATTR_NONNULL(())
-/**
- * Looks for the passed string in the passed buffer.
- *
- * @param[in] x Buffer that potentially includes `y`.
- * @param[in] m Number of bytes of `x`.
- * @param[in] y Query string.
- * @param[in] n Number of bytes of `y`.
- * @param[in] enc Encoding of both `x` and `y`.
- * @retval -1 Not found.
- * @retval otherwise Found index in `x`.
- * @note This API can match at a non-character-boundary.
- */
-long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc);
-
-RBIMPL_ATTR_NONNULL(())
-/**
- * Returns a path component directly adjacent to the passed pointer.
- *
- * ```
- * "/multi/byte/encoded/pathname.txt"
- * ^ ^ ^
- * | | +--- end
- * | +--- @return
- * +--- path
- * ```
- *
- * @param[in] path Where to start scanning.
- * @param[in] end End of the path string.
- * @param[in] enc Encoding of the string.
- * @return A pointer in the passed string where the next path component
- * resides, or `end` if there is no next path component.
- */
-char *rb_enc_path_next(const char *path, const char *end, rb_encoding *enc);
-
-RBIMPL_ATTR_NONNULL(())
-/**
- * Seeks for non-prefix part of a pathname. This can be a no-op when the OS
- * has no such concept like a path prefix. But there are OSes where path
- * prefixes do exist.
- *
- * ```
- * "C:\multi\byte\encoded\pathname.txt"
- * ^ ^ ^
- * | | +--- end
- * | +--- @return
- * +--- path
- * ```
- *
- * @param[in] path Where to start scanning.
- * @param[in] end End of the path string.
- * @param[in] enc Encoding of the string.
- * @return A pointer in the passed string where non-prefix part starts, or
- * `path` if the OS does not have path prefix.
- */
-char *rb_enc_path_skip_prefix(const char *path, const char *end, rb_encoding *enc);
-
-RBIMPL_ATTR_NONNULL(())
-/**
- * Returns the last path component.
- *
- * ```
- * "/multi/byte/encoded/pathname.txt"
- * ^ ^ ^
- * | | +--- end
- * | +--- @return
- * +--- path
- * ```
- *
- * @param[in] path Where to start scanning.
- * @param[in] end End of the path string.
- * @param[in] enc Encoding of the string.
- * @return A pointer in the passed string where the last path component
- * resides, or `end` if there is no more path component.
- */
-char *rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc);
-
-RBIMPL_ATTR_NONNULL(())
-/**
- * This just returns the passed end basically. It makes difference in case the
- * passed string ends with tons of path separators like the following:
- *
- * ```
- * "/path/that/ends/with/lots/of/slashes//////////////"
- * ^ ^ ^
- * | | +--- end
- * | +--- @return
- * +--- path
- * ```
- *
- * @param[in] path Where to start scanning.
- * @param[in] end End of the path string.
- * @param[in] enc Encoding of the string.
- * @return A pointer in the passed string where the trailing path
- * separators start, or `end` if there is no trailing path
- * separators.
- *
- * @internal
- *
- * It seems this function was introduced to mimic what POSIX says about
- * `basename(3)`.
- */
-char *rb_enc_path_end(const char *path, const char *end, rb_encoding *enc);
-
-RBIMPL_ATTR_NONNULL((1, 4))
-/**
- * Our own encoding-aware version of `basename(3)`. Normally, this function
- * returns the last path component of the given name. However in case the
- * passed name ends with a path separator, it returns the name of the
- * directory, not the last (empty) component. Also if the passed name is a
- * root directory, it returns that root directory. Note however that Windows
- * filesystem have drive letters, which this function does not return.
- *
- * @param[in] name Target path.
- * @param[out] baselen Return buffer.
- * @param[in,out] alllen Number of bytes of `name`.
- * @param[enc] enc Encoding of `name`.
- * @return The rightmost component of `name`.
- * @post `baselen`, if passed, is updated to be the number of bytes
- * of the returned basename.
- * @post `alllen`, if passed, is updated to be the number of bytes of
- * strings not considered as the basename.
- */
-const char *ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc);
-
-RBIMPL_ATTR_NONNULL((1, 3))
-/**
- * Our own encoding-aware version of `extname`. This function first applies
- * rb_enc_path_last_separator() to the passed name and only concerns its return
- * value (ignores any parent directories). This function returns complicated
- * results:
- *
- * ```CXX
- * auto path = "...";
- * auto len = strlen(path);
- * auto ret = ruby_enc_find_extname(path, &len, rb_ascii8bit_encoding());
- *
- * switch(len) {
- * case 0:
- * if (ret == 0) {
- * // `path` is a file without extensions.
- * }
- * else {
- * // `path` is a dotfile.
- * // `ret` is the file's name.
- * }
- * break;
- *
- * case 1:
- * // `path` _ends_ with a dot.
- * // `ret` is that dot.
- * break;
- *
- * default:
- * // `path` has an extension.
- * // `ret` is that extension.
- * }
- * ```
- *
- * @param[in] name Target path.
- * @param[in,out] len Number of bytes of `name`.
- * @param[in] enc Encoding of `name`.
- * @return See above.
- * @post `len`, if passed, is updated (see above).
- */
-const char *ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc);
-
-/**
- * Identical to rb_check_id(), except it takes a pointer to a memory region
- * instead of Ruby's string.
- *
- * @param[in] ptr A pointer to a memory region.
- * @param[in] len Number of bytes of `ptr`.
- * @param[in] enc Encoding of `ptr`.
- * @exception rb_eEncodingError `ptr` contains non-ASCII according to `enc`.
- * @retval 0 No such id ever existed in the history.
- * @retval otherwise The id that represents the given name.
- */
-ID rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc);
-
-/**
- * Identical to rb_check_id_cstr(), except for the return type. It can also be
- * seen as a routine identical to rb_check_symbol(), except it takes a pointer
- * to a memory region instead of Ruby's string.
- *
- * @param[in] ptr A pointer to a memory region.
- * @param[in] len Number of bytes of `ptr`.
- * @param[in] enc Encoding of `ptr`.
- * @exception rb_eEncodingError `ptr` contains non-ASCII according to `enc`.
- * @retval RUBY_Qnil No such id ever existed in the history.
- * @retval otherwise The id that represents the given name.
- */
-VALUE rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc);
-
-/**
- * `Encoding` class.
- *
- * @ingroup object
- */
-RUBY_EXTERN VALUE rb_cEncoding;
-
-/* econv stuff */
-
-/** return value of rb_econv_convert() */
-typedef enum {
-
- /**
- * The conversion stopped when it found an invalid sequence.
- */
- econv_invalid_byte_sequence,
-
- /**
- * The conversion stopped when it found a character in the input which
- * cannot be representable in the output.
- */
- econv_undefined_conversion,
-
- /**
- * The conversion stopped because there is no destination.
- */
- econv_destination_buffer_full,
-
- /**
- * The conversion stopped because there is no input.
- */
- econv_source_buffer_empty,
-
- /**
- * The conversion stopped after converting everything. This is arguably
- * the expected normal end of conversion.
- */
- econv_finished,
-
- /**
- * The conversion stopped after writing something to somewhere, before
- * reading everything.
- */
- econv_after_output,
-
- /**
- * The conversion stopped in middle of reading a character, possibly due to
- * a partial read of a socket etc.
- */
- econv_incomplete_input
-} rb_econv_result_t;
-
-/** An opaque struct that represents a lowest level of encoding conversion. */
-typedef struct rb_econv_t rb_econv_t;
-
-/**
- * Converts the contents of the passed string from its encoding to the passed
- * one.
- *
- * @param[in] str Target string.
- * @param[in] to Destination encoding.
- * @param[in] ecflags A set of enum
- * ::ruby_econv_flag_type.
- * @param[in] ecopts A keyword hash, like
- * ::rb_io_t::rb_io_enc_t::ecopts.
- * @exception rb_eArgError Not fully converted.
- * @exception rb_eInvalidByteSequenceError `str` is malformed.
- * @exception rb_eUndefinedConversionError `str` has a character not
- * representable using `to`.
- * @exception rb_eConversionNotFoundError There is no known conversion from
- * `str`'s encoding to `to`.
- * @return A string whose encoding is `to`, and whose contents is converted
- * contents of `str`.
- * @note Use rb_econv_prepare_options() to generate `ecopts`.
- */
-VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts);
-
-/**
- * Queries if there is more than one way to convert between the passed two
- * encodings. Encoding conversion are has_and_belongs_to_many relationships.
- * There could be no direct conversion defined for the passed pair. Ruby tries
- * to find an indirect way to do so then. For instance ISO-8859-1 has no
- * direct conversion to ISO-2022-JP. But there is ISO-8859-1 to UTF-8
- * conversion; then there is UTF-8 to EUC-JP conversion; finally there also is
- * EUC-JP to ISO-2022-JP conversion. So in short ISO-8859-1 can be converted
- * to ISO-2022-JP using that path. This function returns true. Obviously not
- * everything that can be represented using UTF-8 can also be represented using
- * EUC-JP. Conversions in practice can fail depending on the actual input, and
- * that renders exceptions in case of rb_str_encode().
- *
- * @param[in] from_encoding One encoding.
- * @param[in] to_encoding Another encoding.
- * @retval 0 No way to convert the two.
- * @retval 1 At least one way to convert the two.
- *
- * @internal
- *
- * Practically @shyouhei knows no way for this function to return 0. It seems
- * everything can eventually be converted to/from UTF-8, which connects
- * everything.
- */
-int rb_econv_has_convpath_p(const char* from_encoding, const char* to_encoding);
-
-/**
- * Identical to rb_econv_prepare_opts(), except it additionally takes the
- * initial value of flags. The extra bits are bitwise-ORed to the return
- * value.
- *
- * @param[in] opthash Keyword arguments.
- * @param[out] ecopts Return buffer.
- * @param[in] ecflags Default set of enum ::ruby_econv_flag_type.
- * @exception rb_eArgError Unknown/Broken values passed.
- * @return Calculated set of enum ::ruby_econv_flag_type.
- * @post `ecopts` holds a hash object suitable for
- * ::rb_io_t::rb_io_enc_t::ecopts.
- */
-int rb_econv_prepare_options(VALUE opthash, VALUE *ecopts, int ecflags);
-
-/**
- * Splits a keyword arguments hash (that for instance `String#encode` took)
- * into a set of enum ::ruby_econv_flag_type and a hash storing replacement
- * characters etc.
- *
- * @param[in] opthash Keyword arguments.
- * @param[out] ecopts Return buffer.
- * @exception rb_eArgError Unknown/Broken values passed.
- * @return Calculated set of enum ::ruby_econv_flag_type.
- * @post `ecopts` holds a hash object suitable for
- * ::rb_io_t::rb_io_enc_t::ecopts.
- */
-int rb_econv_prepare_opts(VALUE opthash, VALUE *ecopts);
-
-/**
- * Creates a new instance of struct ::rb_econv_t.
- *
- * @param[in] source_encoding Name of an encoding.
- * @param[in] destination_encoding Name of another encoding.
- * @param[in] ecflags A set of enum ::ruby_econv_flag_type.
- * @exception rb_eArgError No such encoding.
- * @retval NULL Failed to create a struct ::rb_econv_t.
- * @retval otherwise Allocated struct ::rb_econv_t.
- * @warning Return value must be passed to rb_econv_close() exactly once.
- */
-rb_econv_t *rb_econv_open(const char *source_encoding, const char *destination_encoding, int ecflags);
-
-/**
- * Identical to rb_econv_open(), except it additionally takes a hash of
- * optional strings.
- *
- *
- * @param[in] source_encoding Name of an encoding.
- * @param[in] destination_encoding Name of another encoding.
- * @param[in] ecflags A set of enum ::ruby_econv_flag_type.
- * @param[in] ecopts Optional set of strings.
- * @exception rb_eArgError No such encoding.
- * @retval NULL Failed to create a struct ::rb_econv_t.
- * @retval otherwise Allocated struct ::rb_econv_t.
- * @warning Return value must be passed to rb_econv_close() exactly once.
- */
-rb_econv_t *rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts);
-
-/**
- * Converts a string from an encoding to another.
- *
- * Possible flags are either ::RUBY_ECONV_PARTIAL_INPUT (means the source
- * buffer is a part of much larger one), ::RUBY_ECONV_AFTER_OUTPUT (instructs
- * the converter to stop after output before input), or both of them.
- *
- * @param[in,out] ec Conversion specification/state etc.
- * @param[in] source_buffer_ptr Target string.
- * @param[in] source_buffer_end End of target string.
- * @param[out] destination_buffer_ptr Return buffer.
- * @param[out] destination_buffer_end End of return buffer.
- * @param[in] flags Flags (see above).
- * @return The status of the conversion.
- * @post `destination_buffer_ptr` holds conversion results.
- */
-rb_econv_result_t rb_econv_convert(rb_econv_t *ec,
- const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end,
- unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
- int flags);
-
-/**
- * Destructs a converter. Note that a converter can have a buffer, and can be
- * non-empty. Calling this would lose your data then.
- *
- * @param[out] ec The converter to destroy.
- * @post `ec` is no longer a valid pointer.
- */
-void rb_econv_close(rb_econv_t *ec);
-
-/**
- * Assigns the replacement string. The string passed here would appear in
- * converted string when it cannot represent its source counterpart. This can
- * happen for instance you convert an emoji to ISO-8859-1.
- *
- * @param[out] ec Target converter.
- * @param[in] str Replacement string.
- * @param[in] len Number of bytes of `str`.
- * @param[in] encname Name of encoding of `str`.
- * @retval 0 Success.
- * @retval -1 Failure (ENOMEM etc.).
- * @post `ec`'s replacement string is set to `str`.
- */
-int rb_econv_set_replacement(rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname);
-
-/**
- * "Decorate"s a converter. There are special kind of converters that
- * transforms the contents, like replacing CR into CRLF. You can add such
- * decorators to a converter using this API. By using this function a
- * decorator is prepended at the beginning of a conversion sequence: in case of
- * CRLF conversion, newlines are converted before encodings are converted.
- *
- * @param[out] ec Target converter to decorate.
- * @param[in] decorator_name Name of decorator to prepend.
- * @retval 0 Success.
- * @retval -1 Failure (no such decorator etc.).
- * @post Decorator works before encoding conversion happens.
- *
- * @internal
- *
- * What is the possible value of the `decorator_name` is not public. You have
- * to read through `transcode.c` carefully.
- */
-int rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name);
-
-/**
- * Identical to rb_econv_decorate_at_first(), except it adds to the opposite
- * direction. For instance CRLF conversion would run _after_ encodings are
- * converted.
- *
- * @param[out] ec Target converter to decorate.
- * @param[in] decorator_name Name of decorator to prepend.
- * @retval 0 Success.
- * @retval -1 Failure (no such decorator etc.).
- * @post Decorator works after encoding conversion happens.
- */
-int rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name);
-
-/**
- * Creates a `rb_eConverterNotFoundError` exception object (but does not
- * raise).
- *
- * @param[in] senc Name of source encoding.
- * @param[in] denc Name of destination encoding.
- * @param[in] ecflags A set of enum ::ruby_econv_flag_type.
- * @return An instance of `rb_eConverterNotFoundError`.
- */
-VALUE rb_econv_open_exc(const char *senc, const char *denc, int ecflags);
-
-/**
- * Appends the passed string to the passed converter's output buffer. This can
- * be handy when an encoding needs bytes out of thin air; for instance
- * ISO-2022-JP has "shift function" which does not correspond to any
- * characters.
- *
- * @param[out] ec Target converter.
- * @param[in] str String to insert.
- * @param[in] len Number of bytes of `str`.
- * @param[in] str_encoding Encoding of `str`.
- * @retval 0 Success.
- * @retval -1 Failure (conversion error etc.).
- * @note `str_encoding` can be anything, and `str` itself is converted
- * when necessary.
- */
-int rb_econv_insert_output(rb_econv_t *ec,
- const unsigned char *str, size_t len, const char *str_encoding);
-
-/**
- * Queries an encoding name which best suits for rb_econv_insert_output()'s
- * last parameter. Strings in this encoding need no conversion when inserted;
- * can be both time/space efficient.
- *
- * @param[in] ec Target converter.
- * @return Its encoding for insertion.
- */
-const char *rb_econv_encoding_to_insert_output(rb_econv_t *ec);
-
-/**
- * This is a rb_econv_make_exception() + rb_exc_raise() combo.
- *
- * @param[in] ec (Possibly failed) conversion.
- * @exception rb_eInvalidByteSequenceError Invalid byte sequence.
- * @exception rb_eUndefinedConversionError Conversion undefined.
- * @note This function can return when no error.
- */
-void rb_econv_check_error(rb_econv_t *ec);
-
-/**
- * This function makes sense right after rb_econv_convert() returns. As listed
- * in ::rb_econv_result_t, rb_econv_convert() can bail out for various reasons.
- * This function checks the passed converter's internal state and convert it to
- * an appropriate exception object.
- *
- * @param[in] ec Target converter.
- * @retval RUBY_Qnil The converter has no error.
- * @retval otherwise Conversion error turned into an exception.
- */
-VALUE rb_econv_make_exception(rb_econv_t *ec);
-
-/**
- * Queries if rb_econv_putback() makes sense, i.e. there are invalid byte
- * sequences remain in the buffer.
- *
- * @param[in] ec Target converter.
- * @return Number of bytes that can be pushed back.
- */
-int rb_econv_putbackable(rb_econv_t *ec);
-
-/**
- * Puts back the bytes. In case of ::econv_invalid_byte_sequence, some of
- * those invalid bytes are discarded and the others are buffered to be
- * converted later. The latter bytes can be put back using this API.
- *
- * @param[out] ec Target converter (invalid byte sequence).
- * @param[out] p Return buffer.
- * @param[in] n Max number of bytes to put back.
- * @post At most `n` bytes of what was put back is written to `p`.
- */
-void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n);
-
-/**
- * Queries the passed encoding's corresponding ASCII compatible encoding. "The
- * corresponding ASCII compatible encoding" in this context is an ASCII
- * compatible encoding which can represent exactly the same character sets as
- * the given ASCII incompatible encoding. For instance that of UTF-16LE is
- * UTF-8.
- *
- * @param[in] encname Name of an ASCII incompatible encoding.
- * @retval NULL `encname` is already ASCII compatible.
- * @retval otherwise The corresponding ASCII compatible encoding.
- */
-const char *rb_econv_asciicompat_encoding(const char *encname);
-
-/**
- * Identical to rb_econv_convert(), except it takes Ruby's string instead of
- * C's pointer.
- *
- * @param[in,out] ec Target converter.
- * @param[in] src Source string.
- * @param[in] flags Flags (see rb_econv_convert).
- * @exception rb_eArgError Converted string is too long.
- * @exception rb_eInvalidByteSequenceError Invalid byte sequence.
- * @exception rb_eUndefinedConversionError Conversion undefined.
- * @return The conversion result.
- */
-VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags);
-
-/**
- * Identical to rb_econv_str_convert(), except it converts only a part of the
- * passed string. Can be handy when you for instance want to do line-buffered
- * conversion.
- *
- * @param[in,out] ec Target converter.
- * @param[in] src Source string.
- * @param[in] byteoff Number of bytes to seek.
- * @param[in] bytesize Number of bytes to read.
- * @param[in] flags Flags (see rb_econv_convert).
- * @exception rb_eArgError Converted string is too long.
- * @exception rb_eInvalidByteSequenceError Invalid byte sequence.
- * @exception rb_eUndefinedConversionError Conversion undefined.
- * @return The conversion result.
- */
-VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags);
-
-/**
- * Identical to rb_econv_str_convert(), except it appends the conversion result
- * to the additionally passed string instead of creating a new string. It can
- * also be seen as a routine identical to rb_econv_append(), except it takes a
- * Ruby's string instead of C's pointer.
- *
- * @param[in,out] ec Target converter.
- * @param[in] src Source string.
- * @param[in] dst Return buffer.
- * @param[in] flags Flags (see rb_econv_convert).
- * @exception rb_eArgError Converted string is too long.
- * @exception rb_eInvalidByteSequenceError Invalid byte sequence.
- * @exception rb_eUndefinedConversionError Conversion undefined.
- * @return The conversion result.
- */
-VALUE rb_econv_str_append(rb_econv_t *ec, VALUE src, VALUE dst, int flags);
-
-/**
- * Identical to rb_econv_str_append(), except it appends only a part of the
- * passed string with conversion. It can also be seen as a routine identical
- * to rb_econv_substr_convert(), except it appends the conversion result to the
- * additionally passed string instead of creating a new string.
- *
- * @param[in,out] ec Target converter.
- * @param[in] src Source string.
- * @param[in] byteoff Number of bytes to seek.
- * @param[in] bytesize Number of bytes to read.
- * @param[in] dst Return buffer.
- * @param[in] flags Flags (see rb_econv_convert).
- * @exception rb_eArgError Converted string is too long.
- * @exception rb_eInvalidByteSequenceError Invalid byte sequence.
- * @exception rb_eUndefinedConversionError Conversion undefined.
- * @return The conversion result.
- */
-VALUE rb_econv_substr_append(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, VALUE dst, int flags);
-
-/**
- * Converts the passed C's pointer according to the passed converter, then
- * append the conversion result to the passed Ruby's string. This way buffer
- * overflow is properly avoided to resize the destination properly.
- *
- * @param[in,out] ec Target converter.
- * @param[in] bytesrc Target string.
- * @param[in] bytesize Number of bytes of `bytesrc`.
- * @param[in] dst Return buffer.
- * @param[in] flags Flags (see rb_econv_convert).
- * @exception rb_eArgError Converted string is too long.
- * @exception rb_eInvalidByteSequenceError Invalid byte sequence.
- * @exception rb_eUndefinedConversionError Conversion undefined.
- * @return The conversion result.
- */
-VALUE rb_econv_append(rb_econv_t *ec, const char *bytesrc, long bytesize, VALUE dst, int flags);
-
-/**
- * This badly named function does not set the destination encoding to binary,
- * but instead just nullifies newline conversion decorators if any. Other
- * ordinal character conversions still happen after this; something non-binary
- * would still be generated.
- *
- * @param[out] ec Target converter to modify.
- * @post Any newline conversions, if any, would be killed.
- */
-void rb_econv_binmode(rb_econv_t *ec);
-
-/**
- * This enum is kind of omnibus. Gathers various constants.
- */
-enum ruby_econv_flag_type {
-
- /**
- * @name Flags for rb_econv_open()
- *
- * @{
- */
-
- /** Mask for error handling related bits. */
- RUBY_ECONV_ERROR_HANDLER_MASK = 0x000000ff,
-
- /** Special handling of invalid sequences are there. */
- RUBY_ECONV_INVALID_MASK = 0x0000000f,
-
- /** Invalid sequences shall be replaced. */
- RUBY_ECONV_INVALID_REPLACE = 0x00000002,
-
- /** Special handling of undefined conversion are there. */
- RUBY_ECONV_UNDEF_MASK = 0x000000f0,
-
- /** Undefined characters shall be replaced. */
- RUBY_ECONV_UNDEF_REPLACE = 0x00000020,
-
- /** Undefined characters shall be escaped. */
- RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030,
-
- /** Decorators are there. */
- RUBY_ECONV_DECORATOR_MASK = 0x0000ff00,
-
- /** Newline converters are there. */
- RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00003f00,
-
- /** (Unclear; seems unused). */
- RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00,
-
- /** (Unclear; seems unused). */
- RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00003000,
-
- /** Universal newline mode. */
- RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100,
-
- /** CR to CRLF conversion shall happen. */
- RUBY_ECONV_CRLF_NEWLINE_DECORATOR = 0x00001000,
-
- /** CRLF to CR conversion shall happen. */
- RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000,
-
- /** Texts shall be XML-escaped. */
- RUBY_ECONV_XML_TEXT_DECORATOR = 0x00004000,
-
- /** Texts shall be AttrValue escaped */
- RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00008000,
-
- /** (Unclear; seems unused). */
- RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000,
-
- /** Texts shall be AttrValue escaped. */
- RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR = 0x00100000,
-
- /** Newline decorator's default. */
- RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR =
-#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32)
- RUBY_ECONV_CRLF_NEWLINE_DECORATOR,
-#else
- 0,
-#endif
-
-#define ECONV_ERROR_HANDLER_MASK RUBY_ECONV_ERROR_HANDLER_MASK /**< @old{RUBY_ECONV_ERROR_HANDLER_MASK} */
-#define ECONV_INVALID_MASK RUBY_ECONV_INVALID_MASK /**< @old{RUBY_ECONV_INVALID_MASK} */
-#define ECONV_INVALID_REPLACE RUBY_ECONV_INVALID_REPLACE /**< @old{RUBY_ECONV_INVALID_REPLACE} */
-#define ECONV_UNDEF_MASK RUBY_ECONV_UNDEF_MASK /**< @old{RUBY_ECONV_UNDEF_MASK} */
-#define ECONV_UNDEF_REPLACE RUBY_ECONV_UNDEF_REPLACE /**< @old{RUBY_ECONV_UNDEF_REPLACE} */
-#define ECONV_UNDEF_HEX_CHARREF RUBY_ECONV_UNDEF_HEX_CHARREF /**< @old{RUBY_ECONV_UNDEF_HEX_CHARREF} */
-#define ECONV_DECORATOR_MASK RUBY_ECONV_DECORATOR_MASK /**< @old{RUBY_ECONV_DECORATOR_MASK} */
-#define ECONV_NEWLINE_DECORATOR_MASK RUBY_ECONV_NEWLINE_DECORATOR_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_MASK} */
-#define ECONV_NEWLINE_DECORATOR_READ_MASK RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK} */
-#define ECONV_NEWLINE_DECORATOR_WRITE_MASK RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK} */
-#define ECONV_UNIVERSAL_NEWLINE_DECORATOR RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR} */
-#define ECONV_CRLF_NEWLINE_DECORATOR RUBY_ECONV_CRLF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CRLF_NEWLINE_DECORATOR} */
-#define ECONV_CR_NEWLINE_DECORATOR RUBY_ECONV_CR_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CR_NEWLINE_DECORATOR} */
-#define ECONV_XML_TEXT_DECORATOR RUBY_ECONV_XML_TEXT_DECORATOR /**< @old{RUBY_ECONV_XML_TEXT_DECORATOR} */
-#define ECONV_XML_ATTR_CONTENT_DECORATOR RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR} */
-#define ECONV_STATEFUL_DECORATOR_MASK RUBY_ECONV_STATEFUL_DECORATOR_MASK /**< @old{RUBY_ECONV_STATEFUL_DECORATOR_MASK} */
-#define ECONV_XML_ATTR_QUOTE_DECORATOR RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR} */
-#define ECONV_DEFAULT_NEWLINE_DECORATOR RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR} */
- /** @} */
-
- /**
- * @name Flags for rb_econv_convert()
- *
- * @{
- */
-
- /** Indicates the input is a part of much larger one. */
- RUBY_ECONV_PARTIAL_INPUT = 0x00010000,
-
- /** Instructs the converter to stop after output. */
- RUBY_ECONV_AFTER_OUTPUT = 0x00020000,
-#define ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT /**< @old{RUBY_ECONV_PARTIAL_INPUT} */
-#define ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT /**< @old{RUBY_ECONV_AFTER_OUTPUT} */
-
- RUBY_ECONV_FLAGS_PLACEHOLDER /**< Placeholder (not used) */
-};
-
-RBIMPL_SYMBOL_EXPORT_END()
+#include "ruby/internal/encoding/coderange.h"
+#include "ruby/internal/encoding/ctype.h"
+#include "ruby/internal/encoding/encoding.h"
+#include "ruby/internal/encoding/pathname.h"
+#include "ruby/internal/encoding/re.h"
+#include "ruby/internal/encoding/sprintf.h"
+#include "ruby/internal/encoding/string.h"
+#include "ruby/internal/encoding/symbol.h"
+#include "ruby/internal/encoding/transcode.h"
#endif /* RUBY_ENCODING_H */