diff options
Diffstat (limited to 'include/ruby/internal/encoding')
| -rw-r--r-- | include/ruby/internal/encoding/ctype.h | 101 | ||||
| -rw-r--r-- | include/ruby/internal/encoding/encoding.h | 30 | ||||
| -rw-r--r-- | include/ruby/internal/encoding/string.h | 2 | ||||
| -rw-r--r-- | include/ruby/internal/encoding/transcode.h | 18 |
4 files changed, 86 insertions, 65 deletions
diff --git a/include/ruby/internal/encoding/ctype.h b/include/ruby/internal/encoding/ctype.h index 64aaf0a990..05c314aeb3 100644 --- a/include/ruby/internal/encoding/ctype.h +++ b/include/ruby/internal/encoding/ctype.h @@ -36,8 +36,8 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() * @param[in] p Pointer to a possibly-middle of a character. * @param[in] end End of the string. * @param[in] enc Encoding. - * @retval 0 It isn't. - * @retval otherwise It is. + * @retval false It isn't. + * @retval true It is. */ static inline bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc) @@ -53,11 +53,11 @@ rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc) * encoding. The "character type" here is a set of macros defined in onigmo.h, * like `ONIGENC_CTYPE_PUNCT`. * - * @param[in] c An `OnigCodePoint` value. - * @param[in] t An `OnigCtype` value. - * @param[in] enc A `rb_encoding*` value. - * @retval 1 `c` is of `t` in `enc`. - * @retval 0 Otherwise. + * @param[in] c An `OnigCodePoint` value. + * @param[in] t An `OnigCtype` value. + * @param[in] enc A `rb_encoding*` value. + * @retval true `c` is of `t` in `enc`. + * @retval false Otherwise. */ static inline bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc) @@ -68,10 +68,10 @@ rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc) /** * Identical to rb_isascii(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 0 `c` is out of range of ASCII character set in `enc`. - * @retval 1 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval false `c` is out of range of ASCII character set in `enc`. + * @retval true Otherwise. * * @internal * @@ -87,10 +87,10 @@ rb_enc_isascii(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_isalpha(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "ALPHA". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "ALPHA". + * @retval false Otherwise. */ static inline bool rb_enc_isalpha(OnigCodePoint c, rb_encoding *enc) @@ -101,10 +101,10 @@ rb_enc_isalpha(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_islower(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "LOWER". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "LOWER". + * @retval false Otherwise. */ static inline bool rb_enc_islower(OnigCodePoint c, rb_encoding *enc) @@ -115,10 +115,10 @@ rb_enc_islower(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_isupper(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "UPPER". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "UPPER". + * @retval false Otherwise. */ static inline bool rb_enc_isupper(OnigCodePoint c, rb_encoding *enc) @@ -127,12 +127,26 @@ rb_enc_isupper(OnigCodePoint c, rb_encoding *enc) } /** + * Identical to rb_iscntrl(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "CNTRL". + * @retval false Otherwise. + */ +static inline bool +rb_enc_iscntrl(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_CNTRL(enc, c); +} + +/** * Identical to rb_ispunct(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "PUNCT". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "PUNCT". + * @retval false Otherwise. */ static inline bool rb_enc_ispunct(OnigCodePoint c, rb_encoding *enc) @@ -143,10 +157,10 @@ rb_enc_ispunct(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_isalnum(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "ANUM". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "ANUM". + * @retval false Otherwise. */ static inline bool rb_enc_isalnum(OnigCodePoint c, rb_encoding *enc) @@ -157,10 +171,10 @@ rb_enc_isalnum(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_isprint(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "PRINT". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "PRINT". + * @retval false Otherwise. */ static inline bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc) @@ -171,10 +185,10 @@ rb_enc_isprint(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_isspace(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "PRINT". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "PRINT". + * @retval false Otherwise. */ static inline bool rb_enc_isspace(OnigCodePoint c, rb_encoding *enc) @@ -185,10 +199,10 @@ rb_enc_isspace(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_isdigit(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "DIGIT". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "DIGIT". + * @retval false Otherwise. */ static inline bool rb_enc_isdigit(OnigCodePoint c, rb_encoding *enc) @@ -235,6 +249,7 @@ RBIMPL_SYMBOL_EXPORT_END() #define rb_enc_isdigit rb_enc_isdigit #define rb_enc_islower rb_enc_islower #define rb_enc_isprint rb_enc_isprint +#define rb_enc_iscntrl rb_enc_iscntrl #define rb_enc_ispunct rb_enc_ispunct #define rb_enc_isspace rb_enc_isspace #define rb_enc_isupper rb_enc_isupper diff --git a/include/ruby/internal/encoding/encoding.h b/include/ruby/internal/encoding/encoding.h index 33f7f27fc1..4748ca806b 100644 --- a/include/ruby/internal/encoding/encoding.h +++ b/include/ruby/internal/encoding/encoding.h @@ -65,16 +65,16 @@ enum ruby_encoding_consts { #define ENCODING_INLINE_MAX RUBY_ENCODING_INLINE_MAX /**< @old{RUBY_ENCODING_INLINE_MAX} */ #define ENCODING_SHIFT RUBY_ENCODING_SHIFT /**< @old{RUBY_ENCODING_SHIFT} */ -#define ENCODING_MASK RUBY_ENCODING_MASK /**< @old{RUBY_ENCODING_SHIFT} */ +#define ENCODING_MASK RUBY_ENCODING_MASK /**< @old{RUBY_ENCODING_MASK} */ /** * Destructively assigns the passed encoding to the passed object. The object * must be capable of having inline encoding. Using this macro needs deep * understanding of bit level object binary layout. * - * @param[out] obj Target object to modify. - * @param[in] i Encoding in encindex format. - * @post `obj`'s encoding is `i`. + * @param[out] obj Target object to modify. + * @param[in] ecindex Encoding in encindex format. + * @post `obj`'s encoding is `encindex`. */ static inline void RB_ENCODING_SET_INLINED(VALUE obj, int encindex) @@ -375,8 +375,8 @@ rb_encoding *rb_enc_check(VALUE str1,VALUE str2); VALUE rb_enc_associate_index(VALUE obj, int encindex); /** - * Identical to rb_enc_associate(), except it takes an encoding itself instead - * of its index. + * Identical to rb_enc_associate_index(), except it takes an encoding itself + * instead of its index. * * @param[out] obj Object in question. * @param[in] enc An encoding. @@ -626,10 +626,10 @@ int rb_enc_codelen(int code, rb_encoding *enc); /** * Identical to rb_enc_codelen(), except it returns 0 for invalid code points. * - * @param[in] code Code point in question. - * @param[in] enc Encoding to convert the code into a byte sequence. - * @retval 0 `code` is invalid. - * @return otherwise Number of bytes used for `enc` to encode `code`. + * @param[in] c Code point in question. + * @param[in] enc Encoding to convert `c` into a byte sequence. + * @retval 0 `c` is invalid. + * @return otherwise Number of bytes needed for `enc` to encode `c`. */ static inline int rb_enc_code_to_mbclen(int c, rb_encoding *enc) @@ -643,10 +643,12 @@ rb_enc_code_to_mbclen(int c, rb_encoding *enc) * Identical to rb_enc_uint_chr(), except it writes back to the passed buffer * instead of allocating one. * - * @param[in] c Code point. - * @param[out] buf Return buffer. - * @param[in] enc Target encoding scheme. - * @post `c` is encoded according to `enc`, then written to `buf`. + * @param[in] c Code point. + * @param[out] buf Return buffer. + * @param[in] enc Target encoding scheme. + * @retval <= 0 `c` is invalid in `enc`. + * @return otherwise Number of bytes written to `buf`. + * @post `c` is encoded according to `enc`, then written to `buf`. * * @internal * diff --git a/include/ruby/internal/encoding/string.h b/include/ruby/internal/encoding/string.h index f8ce809199..6ed7ca1c90 100644 --- a/include/ruby/internal/encoding/string.h +++ b/include/ruby/internal/encoding/string.h @@ -190,7 +190,7 @@ VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc) * In other languages, APIs like this one could be seen as the primitive * routines where encodings' "encode" feature are implemented. However in case * of Ruby this is not the primitive one. We directly manipulate encoded - * strings. Encoding conversion routines transocde an encoded string directly + * strings. Encoding conversion routines transcode an encoded string directly * to another one; not via a code point array. */ VALUE rb_enc_uint_chr(unsigned int code, rb_encoding *enc); diff --git a/include/ruby/internal/encoding/transcode.h b/include/ruby/internal/encoding/transcode.h index 60c96a41c9..7f26d2eae9 100644 --- a/include/ruby/internal/encoding/transcode.h +++ b/include/ruby/internal/encoding/transcode.h @@ -476,16 +476,16 @@ enum ruby_econv_flag_type { RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030, /** Decorators are there. */ - RUBY_ECONV_DECORATOR_MASK = 0x0000ff00, + RUBY_ECONV_DECORATOR_MASK = 0x0001ff00, /** Newline converters are there. */ - RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00003f00, + RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00007f00, /** (Unclear; seems unused). */ RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00, /** (Unclear; seems unused). */ - RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00003000, + RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00007000, /** Universal newline mode. */ RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100, @@ -496,11 +496,14 @@ enum ruby_econv_flag_type { /** CRLF to CR conversion shall happen. */ RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000, + /** CRLF to LF conversion shall happen. */ + RUBY_ECONV_LF_NEWLINE_DECORATOR = 0x00004000, + /** Texts shall be XML-escaped. */ - RUBY_ECONV_XML_TEXT_DECORATOR = 0x00004000, + RUBY_ECONV_XML_TEXT_DECORATOR = 0x00008000, /** Texts shall be AttrValue escaped */ - RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00008000, + RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00010000, /** (Unclear; seems unused). */ RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000, @@ -529,6 +532,7 @@ enum ruby_econv_flag_type { #define ECONV_UNIVERSAL_NEWLINE_DECORATOR RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR} */ #define ECONV_CRLF_NEWLINE_DECORATOR RUBY_ECONV_CRLF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CRLF_NEWLINE_DECORATOR} */ #define ECONV_CR_NEWLINE_DECORATOR RUBY_ECONV_CR_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CR_NEWLINE_DECORATOR} */ +#define ECONV_LF_NEWLINE_DECORATOR RUBY_ECONV_LF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_LF_NEWLINE_DECORATOR} */ #define ECONV_XML_TEXT_DECORATOR RUBY_ECONV_XML_TEXT_DECORATOR /**< @old{RUBY_ECONV_XML_TEXT_DECORATOR} */ #define ECONV_XML_ATTR_CONTENT_DECORATOR RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR} */ #define ECONV_STATEFUL_DECORATOR_MASK RUBY_ECONV_STATEFUL_DECORATOR_MASK /**< @old{RUBY_ECONV_STATEFUL_DECORATOR_MASK} */ @@ -543,10 +547,10 @@ enum ruby_econv_flag_type { */ /** Indicates the input is a part of much larger one. */ - RUBY_ECONV_PARTIAL_INPUT = 0x00010000, + RUBY_ECONV_PARTIAL_INPUT = 0x00020000, /** Instructs the converter to stop after output. */ - RUBY_ECONV_AFTER_OUTPUT = 0x00020000, + RUBY_ECONV_AFTER_OUTPUT = 0x00040000, #define ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT /**< @old{RUBY_ECONV_PARTIAL_INPUT} */ #define ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT /**< @old{RUBY_ECONV_AFTER_OUTPUT} */ |
