diff options
| author | Matt Boldt <me@mattboldt.com> | 2023-11-28 09:19:25 -0600 |
|---|---|---|
| committer | Kevin Newton <kddnewton@gmail.com> | 2023-11-29 12:08:15 -0500 |
| commit | 9fc40d2b26fad25960eff6dd4d35d6592f6faaef (patch) | |
| tree | 5aac44364f0d18ed99744e5ff75cd8a276d47f20 /prism | |
| parent | 86d9a6dcb61b47bcacfe98200cb6d47da6bb1134 (diff) | |
[ruby/prism] Add MacJapanese encoding
MacJapanese (also aliased as MacJapan) is a modified Shift_JIS
encoding, but is implemented identically in Ruby
https://github.com/ruby/prism/commit/9e0a097699
Diffstat (limited to 'prism')
| -rw-r--r-- | prism/enc/pm_encoding.h | 1 | ||||
| -rw-r--r-- | prism/enc/pm_mac_japanese.c | 57 | ||||
| -rw-r--r-- | prism/prism.c | 2 |
3 files changed, 60 insertions, 0 deletions
diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h index 5b79902389..797029365c 100644 --- a/prism/enc/pm_encoding.h +++ b/prism/enc/pm_encoding.h @@ -206,6 +206,7 @@ extern pm_encoding_t pm_encoding_mac_croatian; extern pm_encoding_t pm_encoding_mac_cyrillic; extern pm_encoding_t pm_encoding_mac_greek; extern pm_encoding_t pm_encoding_mac_iceland; +extern pm_encoding_t pm_encoding_mac_japanese; extern pm_encoding_t pm_encoding_mac_roman; extern pm_encoding_t pm_encoding_mac_romania; extern pm_encoding_t pm_encoding_mac_thai; diff --git a/prism/enc/pm_mac_japanese.c b/prism/enc/pm_mac_japanese.c new file mode 100644 index 0000000000..a5185f0e55 --- /dev/null +++ b/prism/enc/pm_mac_japanese.c @@ -0,0 +1,57 @@ +#include "prism/enc/pm_encoding.h" + +static size_t +pm_encoding_mac_japanese_char_width(const uint8_t *b, ptrdiff_t n) { + // These are the single byte characters. + if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) { + return 1; + } + + // These are the double byte characters. + if ( + (n > 1) && + ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) && + (b[1] >= 0x40 && b[1] <= 0xFC) + ) { + return 2; + } + + return 0; +} + +static size_t +pm_encoding_mac_japanese_alpha_char(const uint8_t *b, ptrdiff_t n) { + if (pm_encoding_mac_japanese_char_width(b, n) == 1) { + return pm_encoding_ascii_alpha_char(b, n); + } else { + return 0; + } +} + +static size_t +pm_encoding_mac_japanese_alnum_char(const uint8_t *b, ptrdiff_t n) { + if (pm_encoding_mac_japanese_char_width(b, n) == 1) { + return pm_encoding_ascii_alnum_char(b, n); + } else { + return 0; + } +} + +static bool +pm_encoding_mac_japanese_isupper_char(const uint8_t *b, ptrdiff_t n) { + if (pm_encoding_mac_japanese_char_width(b, n) == 1) { + return pm_encoding_ascii_isupper_char(b, n); + } else { + return 0; + } +} + +/** MacJapanese encoding */ +pm_encoding_t pm_encoding_mac_japanese = { + .name = "MacJapanese", + .char_width = pm_encoding_mac_japanese_char_width, + .alnum_char = pm_encoding_mac_japanese_alnum_char, + .alpha_char = pm_encoding_mac_japanese_alpha_char, + .isupper_char = pm_encoding_mac_japanese_isupper_char, + .multibyte = true +}; diff --git a/prism/prism.c b/prism/prism.c index 960b652db8..154d8ea6b2 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -6303,6 +6303,8 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star ENCODING1("macCyrillic", pm_encoding_mac_cyrillic); ENCODING1("macGreek", pm_encoding_mac_greek); ENCODING1("macIceland", pm_encoding_mac_iceland); + ENCODING1("MacJapanese", pm_encoding_mac_japanese); + ENCODING1("MacJapan", pm_encoding_mac_japanese); ENCODING1("macRoman", pm_encoding_mac_roman); ENCODING1("macRomania", pm_encoding_mac_romania); ENCODING1("macThai", pm_encoding_mac_thai); |
