summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Cai <222655+pcai@users.noreply.github.com>2023-11-17 04:55:41 +0000
committergit <svn-admin@ruby-lang.org>2023-11-17 20:06:48 +0000
commit585fdfe1f59951bcfe5c426601330c113c5a1e06 (patch)
treee9ef033c2c64a864ec1825f40a67ff250468d404
parent229f6e5bb42d24838afb3f5820a5e951f8115788 (diff)
[ruby/prism] add Windows-874 encoding
https://github.com/ruby/prism/commit/0670dd3b9a
-rw-r--r--prism/enc/pm_encoding.h1
-rw-r--r--prism/enc/pm_tables.c35
-rw-r--r--prism/prism.c2
-rw-r--r--test/prism/encoding_test.rb1
4 files changed, 39 insertions, 0 deletions
diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h
index cb45b02b5c..cfc90b4d96 100644
--- a/prism/enc/pm_encoding.h
+++ b/prism/enc/pm_encoding.h
@@ -210,5 +210,6 @@ extern pm_encoding_t pm_encoding_windows_1256;
extern pm_encoding_t pm_encoding_windows_1257;
extern pm_encoding_t pm_encoding_windows_1258;
extern pm_encoding_t pm_encoding_windows_31j;
+extern pm_encoding_t pm_encoding_windows_874;
#endif
diff --git a/prism/enc/pm_tables.c b/prism/enc/pm_tables.c
index dd50210034..2bec68f458 100644
--- a/prism/enc/pm_tables.c
+++ b/prism/enc/pm_tables.c
@@ -1105,6 +1105,30 @@ static uint8_t pm_encoding_windows_1258_table[256] = {
};
/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding windows-874 character.
+ */
+static uint8_t pm_encoding_windows_874_table[256] = {
+// 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+ 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
* Returns the size of the next character in the ASCII encoding. This basically
* means that if the top bit is not set, the character is 1 byte long.
*/
@@ -1214,6 +1238,7 @@ PRISM_ENCODING_TABLE(windows_1255)
PRISM_ENCODING_TABLE(windows_1256)
PRISM_ENCODING_TABLE(windows_1257)
PRISM_ENCODING_TABLE(windows_1258)
+PRISM_ENCODING_TABLE(windows_874)
#undef PRISM_ENCODING_TABLE
@@ -1686,3 +1711,13 @@ pm_encoding_t pm_encoding_windows_1258 = {
.isupper_char = pm_encoding_windows_1258_isupper_char,
.multibyte = false
};
+
+/** Windows-874 */
+pm_encoding_t pm_encoding_windows_874 = {
+ .name = "Windows-874",
+ .char_width = pm_encoding_single_char_width,
+ .alnum_char = pm_encoding_windows_874_alnum_char,
+ .alpha_char = pm_encoding_windows_874_alpha_char,
+ .isupper_char = pm_encoding_windows_874_isupper_char,
+ .multibyte = false
+};
diff --git a/prism/prism.c b/prism/prism.c
index 9f0881751c..3df36b0d2e 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -6075,6 +6075,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
ENCODING1("CP860", pm_encoding_ibm860);
ENCODING1("CP861", pm_encoding_ibm861);
ENCODING1("CP862", pm_encoding_ibm862);
+ ENCODING1("CP874", pm_encoding_windows_874);
ENCODING1("CP878", pm_encoding_koi8_r);
ENCODING2("CP932", "csWindows31J", pm_encoding_windows_31j);
ENCODING1("CP936", pm_encoding_gbk);
@@ -6156,6 +6157,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
break;
case 'W': case 'w':
ENCODING1("Windows-31J", pm_encoding_windows_31j);
+ ENCODING1("Windows-874", pm_encoding_windows_874);
ENCODING1("Windows-1250", pm_encoding_windows_1250);
ENCODING1("Windows-1251", pm_encoding_windows_1251);
ENCODING1("Windows-1252", pm_encoding_windows_1252);
diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb
index 1e69bc9bd6..07f43312eb 100644
--- a/test/prism/encoding_test.rb
+++ b/test/prism/encoding_test.rb
@@ -53,6 +53,7 @@ module Prism
Encoding::Windows_1256 => 0x00...0x100,
Encoding::Windows_1257 => 0x00...0x100,
Encoding::Windows_1258 => 0x00...0x100,
+ Encoding::Windows_874 => 0x00...0x100,
Encoding::Big5 => 0x00...0x10000,
Encoding::CP51932 => 0x00...0x10000,
Encoding::GBK => 0x00...0x10000,