diff options
| author | Kevin Newton <kddnewton@gmail.com> | 2023-11-29 11:18:38 -0500 |
|---|---|---|
| committer | Kevin Newton <kddnewton@gmail.com> | 2023-11-30 21:37:56 -0500 |
| commit | 219c3c1c09417d99e9de295c15a9deb8d158ff86 (patch) | |
| tree | 327ada71819a616691e462aa06461d4f4150a996 | |
| parent | ba1cdadfc8ad6f284efeefbcebef1ca85bae178c (diff) | |
[ruby/prism] Add other UTF8 encodings
https://github.com/ruby/prism/commit/709fb6e09f
| -rw-r--r-- | prism/enc/pm_encoding.h | 3 | ||||
| -rw-r--r-- | prism/enc/pm_unicode.c | 36 | ||||
| -rw-r--r-- | prism/prism.c | 3 | ||||
| -rw-r--r-- | test/prism/encoding_test.rb | 10 |
4 files changed, 46 insertions, 6 deletions
diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h index 797029365c..1985f00f26 100644 --- a/prism/enc/pm_encoding.h +++ b/prism/enc/pm_encoding.h @@ -216,6 +216,9 @@ extern pm_encoding_t pm_encoding_shift_jis; extern pm_encoding_t pm_encoding_tis_620; extern pm_encoding_t pm_encoding_utf_8; extern pm_encoding_t pm_encoding_utf8_mac; +extern pm_encoding_t pm_encoding_utf8_docomo; +extern pm_encoding_t pm_encoding_utf8_kddi; +extern pm_encoding_t pm_encoding_utf8_softbank; extern pm_encoding_t pm_encoding_windows_1250; extern pm_encoding_t pm_encoding_windows_1251; extern pm_encoding_t pm_encoding_windows_1252; diff --git a/prism/enc/pm_unicode.c b/prism/enc/pm_unicode.c index 41c1f25812..09aa907a1d 100644 --- a/prism/enc/pm_unicode.c +++ b/prism/enc/pm_unicode.c @@ -2350,7 +2350,7 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) { /** UTF-8 */ pm_encoding_t pm_encoding_utf_8 = { - .name = "utf-8", + .name = "UTF-8", .char_width = pm_encoding_utf_8_char_width, .alnum_char = pm_encoding_utf_8_alnum_char, .alpha_char = pm_encoding_utf_8_alpha_char, @@ -2358,9 +2358,39 @@ pm_encoding_t pm_encoding_utf_8 = { .multibyte = true }; -/** UTF8-mac */ +/** UTF8-MAC */ pm_encoding_t pm_encoding_utf8_mac = { - .name = "utf8-mac", + .name = "UTF8-MAC", + .char_width = pm_encoding_utf_8_char_width, + .alnum_char = pm_encoding_utf_8_alnum_char, + .alpha_char = pm_encoding_utf_8_alpha_char, + .isupper_char = pm_encoding_utf_8_isupper_char, + .multibyte = true +}; + +/** UTF8-DoCoMo */ +pm_encoding_t pm_encoding_utf8_docomo = { + .name = "UTF8-DoCoMo", + .char_width = pm_encoding_utf_8_char_width, + .alnum_char = pm_encoding_utf_8_alnum_char, + .alpha_char = pm_encoding_utf_8_alpha_char, + .isupper_char = pm_encoding_utf_8_isupper_char, + .multibyte = true +}; + +/** UTF8-KDDI */ +pm_encoding_t pm_encoding_utf8_kddi = { + .name = "UTF8-KDDI", + .char_width = pm_encoding_utf_8_char_width, + .alnum_char = pm_encoding_utf_8_alnum_char, + .alpha_char = pm_encoding_utf_8_alpha_char, + .isupper_char = pm_encoding_utf_8_isupper_char, + .multibyte = true +}; + +/** UTF8-SoftBank */ +pm_encoding_t pm_encoding_utf8_softbank = { + .name = "UTF8-SoftBank", .char_width = pm_encoding_utf_8_char_width, .alnum_char = pm_encoding_utf_8_alnum_char, .alpha_char = pm_encoding_utf_8_alpha_char, diff --git a/prism/prism.c b/prism/prism.c index 2147992351..a0c0e728b6 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -6324,6 +6324,9 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star case 'U': case 'u': ENCODING1("US-ASCII", pm_encoding_ascii); ENCODING2("UTF8-MAC", "UTF-8-HFS", pm_encoding_utf8_mac); + ENCODING1("UTF8-DoCoMo", pm_encoding_utf8_docomo); + ENCODING1("UTF8-KDDI", pm_encoding_utf8_kddi); + ENCODING1("UTF8-SoftBank", pm_encoding_utf8_softbank); break; case 'W': case 'w': ENCODING1("Windows-31J", pm_encoding_windows_31j); diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb index 76162bec1e..13c622e40a 100644 --- a/test/prism/encoding_test.rb +++ b/test/prism/encoding_test.rb @@ -54,6 +54,7 @@ module Prism Encoding::MACROMANIA => 0x00...0x100, Encoding::MACTHAI => 0x00...0x100, Encoding::MACTURKISH => 0x00...0x100, + Encoding::MACUKRAINE => 0x00...0x100, Encoding::TIS_620 => 0x00...0x100, Encoding::Windows_1250 => 0x00...0x100, Encoding::Windows_1251 => 0x00...0x100, @@ -82,9 +83,12 @@ module Prism # suite. if ENV["PRISM_TEST_ALL_ENCODINGS"] encodings.merge!( - Encoding::EUC_JP => 0x00...0x1000000, - Encoding::UTF_8 => 0x00...0x110000, - Encoding::UTF8_MAC => 0x00...0x110000 + Encoding::EUC_JP => 0x00...0x1000000, + Encoding::UTF_8 => 0x00...0x110000, + Encoding::UTF8_MAC => 0x00...0x110000, + Encoding::UTF8_DoCoMo => 0x00...0x110000, + Encoding::UTF8_KDDI => 0x00...0x110000, + Encoding::UTF8_SoftBank => 0x00...0x110000 ) end |
