summaryrefslogtreecommitdiff
path: root/include/ruby/internal/encoding
diff options
context:
space:
mode:
Diffstat (limited to 'include/ruby/internal/encoding')
-rw-r--r--include/ruby/internal/encoding/coderange.h2
-rw-r--r--include/ruby/internal/encoding/ctype.h101
-rw-r--r--include/ruby/internal/encoding/encoding.h50
-rw-r--r--include/ruby/internal/encoding/string.h10
-rw-r--r--include/ruby/internal/encoding/transcode.h18
5 files changed, 93 insertions, 88 deletions
diff --git a/include/ruby/internal/encoding/coderange.h b/include/ruby/internal/encoding/coderange.h
index 7a81208c9e..c89f871518 100644
--- a/include/ruby/internal/encoding/coderange.h
+++ b/include/ruby/internal/encoding/coderange.h
@@ -79,7 +79,7 @@ RBIMPL_ATTR_CONST()
static inline bool
RB_ENC_CODERANGE_CLEAN_P(enum ruby_coderange_type cr)
{
- return rb_enc_coderange_clean_p(cr);
+ return rb_enc_coderange_clean_p(RBIMPL_CAST((int)cr));
}
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
diff --git a/include/ruby/internal/encoding/ctype.h b/include/ruby/internal/encoding/ctype.h
index 64aaf0a990..05c314aeb3 100644
--- a/include/ruby/internal/encoding/ctype.h
+++ b/include/ruby/internal/encoding/ctype.h
@@ -36,8 +36,8 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
* @param[in] p Pointer to a possibly-middle of a character.
* @param[in] end End of the string.
* @param[in] enc Encoding.
- * @retval 0 It isn't.
- * @retval otherwise It is.
+ * @retval false It isn't.
+ * @retval true It is.
*/
static inline bool
rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
@@ -53,11 +53,11 @@ rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
* encoding. The "character type" here is a set of macros defined in onigmo.h,
* like `ONIGENC_CTYPE_PUNCT`.
*
- * @param[in] c An `OnigCodePoint` value.
- * @param[in] t An `OnigCtype` value.
- * @param[in] enc A `rb_encoding*` value.
- * @retval 1 `c` is of `t` in `enc`.
- * @retval 0 Otherwise.
+ * @param[in] c An `OnigCodePoint` value.
+ * @param[in] t An `OnigCtype` value.
+ * @param[in] enc A `rb_encoding*` value.
+ * @retval true `c` is of `t` in `enc`.
+ * @retval false Otherwise.
*/
static inline bool
rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
@@ -68,10 +68,10 @@ rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
/**
* Identical to rb_isascii(), except it additionally takes an encoding.
*
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @retval 0 `c` is out of range of ASCII character set in `enc`.
- * @retval 1 Otherwise.
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval false `c` is out of range of ASCII character set in `enc`.
+ * @retval true Otherwise.
*
* @internal
*
@@ -87,10 +87,10 @@ rb_enc_isascii(OnigCodePoint c, rb_encoding *enc)
/**
* Identical to rb_isalpha(), except it additionally takes an encoding.
*
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @retval 1 `enc` classifies `c` as "ALPHA".
- * @retval 0 Otherwise.
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval true `enc` classifies `c` as "ALPHA".
+ * @retval false Otherwise.
*/
static inline bool
rb_enc_isalpha(OnigCodePoint c, rb_encoding *enc)
@@ -101,10 +101,10 @@ rb_enc_isalpha(OnigCodePoint c, rb_encoding *enc)
/**
* Identical to rb_islower(), except it additionally takes an encoding.
*
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @retval 1 `enc` classifies `c` as "LOWER".
- * @retval 0 Otherwise.
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval true `enc` classifies `c` as "LOWER".
+ * @retval false Otherwise.
*/
static inline bool
rb_enc_islower(OnigCodePoint c, rb_encoding *enc)
@@ -115,10 +115,10 @@ rb_enc_islower(OnigCodePoint c, rb_encoding *enc)
/**
* Identical to rb_isupper(), except it additionally takes an encoding.
*
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @retval 1 `enc` classifies `c` as "UPPER".
- * @retval 0 Otherwise.
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval true `enc` classifies `c` as "UPPER".
+ * @retval false Otherwise.
*/
static inline bool
rb_enc_isupper(OnigCodePoint c, rb_encoding *enc)
@@ -127,12 +127,26 @@ rb_enc_isupper(OnigCodePoint c, rb_encoding *enc)
}
/**
+ * Identical to rb_iscntrl(), except it additionally takes an encoding.
+ *
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval true `enc` classifies `c` as "CNTRL".
+ * @retval false Otherwise.
+ */
+static inline bool
+rb_enc_iscntrl(OnigCodePoint c, rb_encoding *enc)
+{
+ return ONIGENC_IS_CODE_CNTRL(enc, c);
+}
+
+/**
* Identical to rb_ispunct(), except it additionally takes an encoding.
*
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @retval 1 `enc` classifies `c` as "PUNCT".
- * @retval 0 Otherwise.
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval true `enc` classifies `c` as "PUNCT".
+ * @retval false Otherwise.
*/
static inline bool
rb_enc_ispunct(OnigCodePoint c, rb_encoding *enc)
@@ -143,10 +157,10 @@ rb_enc_ispunct(OnigCodePoint c, rb_encoding *enc)
/**
* Identical to rb_isalnum(), except it additionally takes an encoding.
*
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @retval 1 `enc` classifies `c` as "ANUM".
- * @retval 0 Otherwise.
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval true `enc` classifies `c` as "ANUM".
+ * @retval false Otherwise.
*/
static inline bool
rb_enc_isalnum(OnigCodePoint c, rb_encoding *enc)
@@ -157,10 +171,10 @@ rb_enc_isalnum(OnigCodePoint c, rb_encoding *enc)
/**
* Identical to rb_isprint(), except it additionally takes an encoding.
*
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @retval 1 `enc` classifies `c` as "PRINT".
- * @retval 0 Otherwise.
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval true `enc` classifies `c` as "PRINT".
+ * @retval false Otherwise.
*/
static inline bool
rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
@@ -171,10 +185,10 @@ rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
/**
* Identical to rb_isspace(), except it additionally takes an encoding.
*
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @retval 1 `enc` classifies `c` as "PRINT".
- * @retval 0 Otherwise.
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval true `enc` classifies `c` as "PRINT".
+ * @retval false Otherwise.
*/
static inline bool
rb_enc_isspace(OnigCodePoint c, rb_encoding *enc)
@@ -185,10 +199,10 @@ rb_enc_isspace(OnigCodePoint c, rb_encoding *enc)
/**
* Identical to rb_isdigit(), except it additionally takes an encoding.
*
- * @param[in] c A code point.
- * @param[in] enc An encoding.
- * @retval 1 `enc` classifies `c` as "DIGIT".
- * @retval 0 Otherwise.
+ * @param[in] c A code point.
+ * @param[in] enc An encoding.
+ * @retval true `enc` classifies `c` as "DIGIT".
+ * @retval false Otherwise.
*/
static inline bool
rb_enc_isdigit(OnigCodePoint c, rb_encoding *enc)
@@ -235,6 +249,7 @@ RBIMPL_SYMBOL_EXPORT_END()
#define rb_enc_isdigit rb_enc_isdigit
#define rb_enc_islower rb_enc_islower
#define rb_enc_isprint rb_enc_isprint
+#define rb_enc_iscntrl rb_enc_iscntrl
#define rb_enc_ispunct rb_enc_ispunct
#define rb_enc_isspace rb_enc_isspace
#define rb_enc_isupper rb_enc_isupper
diff --git a/include/ruby/internal/encoding/encoding.h b/include/ruby/internal/encoding/encoding.h
index 33f7f27fc1..a58f9f2b15 100644
--- a/include/ruby/internal/encoding/encoding.h
+++ b/include/ruby/internal/encoding/encoding.h
@@ -28,6 +28,7 @@
#include "ruby/internal/attr/pure.h"
#include "ruby/internal/attr/returns_nonnull.h"
#include "ruby/internal/dllexport.h"
+#include "ruby/internal/encoding/coderange.h"
#include "ruby/internal/value.h"
#include "ruby/internal/core/rbasic.h"
#include "ruby/internal/fl_type.h"
@@ -65,21 +66,21 @@ enum ruby_encoding_consts {
#define ENCODING_INLINE_MAX RUBY_ENCODING_INLINE_MAX /**< @old{RUBY_ENCODING_INLINE_MAX} */
#define ENCODING_SHIFT RUBY_ENCODING_SHIFT /**< @old{RUBY_ENCODING_SHIFT} */
-#define ENCODING_MASK RUBY_ENCODING_MASK /**< @old{RUBY_ENCODING_SHIFT} */
+#define ENCODING_MASK RUBY_ENCODING_MASK /**< @old{RUBY_ENCODING_MASK} */
/**
* Destructively assigns the passed encoding to the passed object. The object
* must be capable of having inline encoding. Using this macro needs deep
* understanding of bit level object binary layout.
*
- * @param[out] obj Target object to modify.
- * @param[in] i Encoding in encindex format.
- * @post `obj`'s encoding is `i`.
+ * @param[out] obj Target object to modify.
+ * @param[in] ecindex Encoding in encindex format.
+ * @post `obj`'s encoding is `encindex`.
*/
static inline void
RB_ENCODING_SET_INLINED(VALUE obj, int encindex)
{
- VALUE f = /* upcast */ encindex;
+ VALUE f = /* upcast */ RBIMPL_CAST((VALUE)encindex);
f <<= RUBY_ENCODING_SHIFT;
RB_FL_UNSET_RAW(obj, RUBY_ENCODING_MASK);
@@ -139,23 +140,6 @@ RBIMPL_ATTR_NOALIAS()
int rb_char_to_option_kcode(int c, int *option, int *kcode);
/**
- * Creates a new encoding, using the passed one as a template.
- *
- * @param[in] name Name of the creating encoding.
- * @param[in] src Template.
- * @exception rb_eArgError Duplicated or malformed `name`.
- * @return Replicated new encoding's index.
- * @post Encoding named `name` is created as a copy of `src`, whose index
- * is the return value.
- *
- * @internal
- *
- * `name` can be `NULL`, but that just raises an exception. OTOH it seems no
- * sanity check is done against `src`...?
- */
-int rb_enc_replicate(const char *name, rb_encoding *src);
-
-/**
* Creates a new "dummy" encoding. Roughly speaking, an encoding is dummy when
* it is stateful. Notable example of dummy encoding are those defined in
* ISO/IEC 2022
@@ -375,8 +359,8 @@ rb_encoding *rb_enc_check(VALUE str1,VALUE str2);
VALUE rb_enc_associate_index(VALUE obj, int encindex);
/**
- * Identical to rb_enc_associate(), except it takes an encoding itself instead
- * of its index.
+ * Identical to rb_enc_associate_index(), except it takes an encoding itself
+ * instead of its index.
*
* @param[out] obj Object in question.
* @param[in] enc An encoding.
@@ -626,10 +610,10 @@ int rb_enc_codelen(int code, rb_encoding *enc);
/**
* Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
*
- * @param[in] code Code point in question.
- * @param[in] enc Encoding to convert the code into a byte sequence.
- * @retval 0 `code` is invalid.
- * @return otherwise Number of bytes used for `enc` to encode `code`.
+ * @param[in] c Code point in question.
+ * @param[in] enc Encoding to convert `c` into a byte sequence.
+ * @retval 0 `c` is invalid.
+ * @return otherwise Number of bytes needed for `enc` to encode `c`.
*/
static inline int
rb_enc_code_to_mbclen(int c, rb_encoding *enc)
@@ -643,10 +627,12 @@ rb_enc_code_to_mbclen(int c, rb_encoding *enc)
* Identical to rb_enc_uint_chr(), except it writes back to the passed buffer
* instead of allocating one.
*
- * @param[in] c Code point.
- * @param[out] buf Return buffer.
- * @param[in] enc Target encoding scheme.
- * @post `c` is encoded according to `enc`, then written to `buf`.
+ * @param[in] c Code point.
+ * @param[out] buf Return buffer.
+ * @param[in] enc Target encoding scheme.
+ * @retval <= 0 `c` is invalid in `enc`.
+ * @return otherwise Number of bytes written to `buf`.
+ * @post `c` is encoded according to `enc`, then written to `buf`.
*
* @internal
*
diff --git a/include/ruby/internal/encoding/string.h b/include/ruby/internal/encoding/string.h
index f8ce809199..2cfa91c01e 100644
--- a/include/ruby/internal/encoding/string.h
+++ b/include/ruby/internal/encoding/string.h
@@ -30,7 +30,7 @@
RBIMPL_SYMBOL_EXPORT_BEGIN()
/**
- * Identical to rb_enc_str_new(), except it additionally takes an encoding.
+ * Identical to rb_str_new(), except it additionally takes an encoding.
*
* @param[in] ptr A memory region of `len` bytes length.
* @param[in] len Length of `ptr`, in bytes, not including the
@@ -190,7 +190,7 @@ VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
* In other languages, APIs like this one could be seen as the primitive
* routines where encodings' "encode" feature are implemented. However in case
* of Ruby this is not the primitive one. We directly manipulate encoded
- * strings. Encoding conversion routines transocde an encoded string directly
+ * strings. Encoding conversion routines transcode an encoded string directly
* to another one; not via a code point array.
*/
VALUE rb_enc_uint_chr(unsigned int code, rb_encoding *enc);
@@ -307,13 +307,13 @@ RBIMPL_ATTR_NONNULL(())
/**
* Looks for the passed string in the passed buffer.
*
- * @param[in] x Buffer that potentially includes `y`.
+ * @param[in] x Query string.
* @param[in] m Number of bytes of `x`.
- * @param[in] y Query string.
+ * @param[in] y Buffer that potentially includes `x`.
* @param[in] n Number of bytes of `y`.
* @param[in] enc Encoding of both `x` and `y`.
* @retval -1 Not found.
- * @retval otherwise Found index in `x`.
+ * @retval otherwise Found index in `y`.
* @note This API can match at a non-character-boundary.
*/
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc);
diff --git a/include/ruby/internal/encoding/transcode.h b/include/ruby/internal/encoding/transcode.h
index 60c96a41c9..7f26d2eae9 100644
--- a/include/ruby/internal/encoding/transcode.h
+++ b/include/ruby/internal/encoding/transcode.h
@@ -476,16 +476,16 @@ enum ruby_econv_flag_type {
RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030,
/** Decorators are there. */
- RUBY_ECONV_DECORATOR_MASK = 0x0000ff00,
+ RUBY_ECONV_DECORATOR_MASK = 0x0001ff00,
/** Newline converters are there. */
- RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00003f00,
+ RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00007f00,
/** (Unclear; seems unused). */
RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00,
/** (Unclear; seems unused). */
- RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00003000,
+ RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00007000,
/** Universal newline mode. */
RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100,
@@ -496,11 +496,14 @@ enum ruby_econv_flag_type {
/** CRLF to CR conversion shall happen. */
RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000,
+ /** CRLF to LF conversion shall happen. */
+ RUBY_ECONV_LF_NEWLINE_DECORATOR = 0x00004000,
+
/** Texts shall be XML-escaped. */
- RUBY_ECONV_XML_TEXT_DECORATOR = 0x00004000,
+ RUBY_ECONV_XML_TEXT_DECORATOR = 0x00008000,
/** Texts shall be AttrValue escaped */
- RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00008000,
+ RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00010000,
/** (Unclear; seems unused). */
RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000,
@@ -529,6 +532,7 @@ enum ruby_econv_flag_type {
#define ECONV_UNIVERSAL_NEWLINE_DECORATOR RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR} */
#define ECONV_CRLF_NEWLINE_DECORATOR RUBY_ECONV_CRLF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CRLF_NEWLINE_DECORATOR} */
#define ECONV_CR_NEWLINE_DECORATOR RUBY_ECONV_CR_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CR_NEWLINE_DECORATOR} */
+#define ECONV_LF_NEWLINE_DECORATOR RUBY_ECONV_LF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_LF_NEWLINE_DECORATOR} */
#define ECONV_XML_TEXT_DECORATOR RUBY_ECONV_XML_TEXT_DECORATOR /**< @old{RUBY_ECONV_XML_TEXT_DECORATOR} */
#define ECONV_XML_ATTR_CONTENT_DECORATOR RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR} */
#define ECONV_STATEFUL_DECORATOR_MASK RUBY_ECONV_STATEFUL_DECORATOR_MASK /**< @old{RUBY_ECONV_STATEFUL_DECORATOR_MASK} */
@@ -543,10 +547,10 @@ enum ruby_econv_flag_type {
*/
/** Indicates the input is a part of much larger one. */
- RUBY_ECONV_PARTIAL_INPUT = 0x00010000,
+ RUBY_ECONV_PARTIAL_INPUT = 0x00020000,
/** Instructs the converter to stop after output. */
- RUBY_ECONV_AFTER_OUTPUT = 0x00020000,
+ RUBY_ECONV_AFTER_OUTPUT = 0x00040000,
#define ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT /**< @old{RUBY_ECONV_PARTIAL_INPUT} */
#define ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT /**< @old{RUBY_ECONV_AFTER_OUTPUT} */