summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-11-29 11:18:38 -0500
committerKevin Newton <kddnewton@gmail.com>2023-11-30 21:37:56 -0500
commit219c3c1c09417d99e9de295c15a9deb8d158ff86 (patch)
tree327ada71819a616691e462aa06461d4f4150a996
parentba1cdadfc8ad6f284efeefbcebef1ca85bae178c (diff)
[ruby/prism] Add other UTF8 encodings
https://github.com/ruby/prism/commit/709fb6e09f
-rw-r--r--prism/enc/pm_encoding.h3
-rw-r--r--prism/enc/pm_unicode.c36
-rw-r--r--prism/prism.c3
-rw-r--r--test/prism/encoding_test.rb10
4 files changed, 46 insertions, 6 deletions
diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h
index 797029365c..1985f00f26 100644
--- a/prism/enc/pm_encoding.h
+++ b/prism/enc/pm_encoding.h
@@ -216,6 +216,9 @@ extern pm_encoding_t pm_encoding_shift_jis;
extern pm_encoding_t pm_encoding_tis_620;
extern pm_encoding_t pm_encoding_utf_8;
extern pm_encoding_t pm_encoding_utf8_mac;
+extern pm_encoding_t pm_encoding_utf8_docomo;
+extern pm_encoding_t pm_encoding_utf8_kddi;
+extern pm_encoding_t pm_encoding_utf8_softbank;
extern pm_encoding_t pm_encoding_windows_1250;
extern pm_encoding_t pm_encoding_windows_1251;
extern pm_encoding_t pm_encoding_windows_1252;
diff --git a/prism/enc/pm_unicode.c b/prism/enc/pm_unicode.c
index 41c1f25812..09aa907a1d 100644
--- a/prism/enc/pm_unicode.c
+++ b/prism/enc/pm_unicode.c
@@ -2350,7 +2350,7 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
/** UTF-8 */
pm_encoding_t pm_encoding_utf_8 = {
- .name = "utf-8",
+ .name = "UTF-8",
.char_width = pm_encoding_utf_8_char_width,
.alnum_char = pm_encoding_utf_8_alnum_char,
.alpha_char = pm_encoding_utf_8_alpha_char,
@@ -2358,9 +2358,39 @@ pm_encoding_t pm_encoding_utf_8 = {
.multibyte = true
};
-/** UTF8-mac */
+/** UTF8-MAC */
pm_encoding_t pm_encoding_utf8_mac = {
- .name = "utf8-mac",
+ .name = "UTF8-MAC",
+ .char_width = pm_encoding_utf_8_char_width,
+ .alnum_char = pm_encoding_utf_8_alnum_char,
+ .alpha_char = pm_encoding_utf_8_alpha_char,
+ .isupper_char = pm_encoding_utf_8_isupper_char,
+ .multibyte = true
+};
+
+/** UTF8-DoCoMo */
+pm_encoding_t pm_encoding_utf8_docomo = {
+ .name = "UTF8-DoCoMo",
+ .char_width = pm_encoding_utf_8_char_width,
+ .alnum_char = pm_encoding_utf_8_alnum_char,
+ .alpha_char = pm_encoding_utf_8_alpha_char,
+ .isupper_char = pm_encoding_utf_8_isupper_char,
+ .multibyte = true
+};
+
+/** UTF8-KDDI */
+pm_encoding_t pm_encoding_utf8_kddi = {
+ .name = "UTF8-KDDI",
+ .char_width = pm_encoding_utf_8_char_width,
+ .alnum_char = pm_encoding_utf_8_alnum_char,
+ .alpha_char = pm_encoding_utf_8_alpha_char,
+ .isupper_char = pm_encoding_utf_8_isupper_char,
+ .multibyte = true
+};
+
+/** UTF8-SoftBank */
+pm_encoding_t pm_encoding_utf8_softbank = {
+ .name = "UTF8-SoftBank",
.char_width = pm_encoding_utf_8_char_width,
.alnum_char = pm_encoding_utf_8_alnum_char,
.alpha_char = pm_encoding_utf_8_alpha_char,
diff --git a/prism/prism.c b/prism/prism.c
index 2147992351..a0c0e728b6 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -6324,6 +6324,9 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
case 'U': case 'u':
ENCODING1("US-ASCII", pm_encoding_ascii);
ENCODING2("UTF8-MAC", "UTF-8-HFS", pm_encoding_utf8_mac);
+ ENCODING1("UTF8-DoCoMo", pm_encoding_utf8_docomo);
+ ENCODING1("UTF8-KDDI", pm_encoding_utf8_kddi);
+ ENCODING1("UTF8-SoftBank", pm_encoding_utf8_softbank);
break;
case 'W': case 'w':
ENCODING1("Windows-31J", pm_encoding_windows_31j);
diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb
index 76162bec1e..13c622e40a 100644
--- a/test/prism/encoding_test.rb
+++ b/test/prism/encoding_test.rb
@@ -54,6 +54,7 @@ module Prism
Encoding::MACROMANIA => 0x00...0x100,
Encoding::MACTHAI => 0x00...0x100,
Encoding::MACTURKISH => 0x00...0x100,
+ Encoding::MACUKRAINE => 0x00...0x100,
Encoding::TIS_620 => 0x00...0x100,
Encoding::Windows_1250 => 0x00...0x100,
Encoding::Windows_1251 => 0x00...0x100,
@@ -82,9 +83,12 @@ module Prism
# suite.
if ENV["PRISM_TEST_ALL_ENCODINGS"]
encodings.merge!(
- Encoding::EUC_JP => 0x00...0x1000000,
- Encoding::UTF_8 => 0x00...0x110000,
- Encoding::UTF8_MAC => 0x00...0x110000
+ Encoding::EUC_JP => 0x00...0x1000000,
+ Encoding::UTF_8 => 0x00...0x110000,
+ Encoding::UTF8_MAC => 0x00...0x110000,
+ Encoding::UTF8_DoCoMo => 0x00...0x110000,
+ Encoding::UTF8_KDDI => 0x00...0x110000,
+ Encoding::UTF8_SoftBank => 0x00...0x110000
)
end