summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaple Ong <maple.ong@gusto.com>2023-11-20 14:05:56 -0600
committergit <svn-admin@ruby-lang.org>2023-11-20 20:06:00 +0000
commit9fbdb26f06822305781a7fa12db1ee7249dbd0a8 (patch)
treecd14127a3e7ddf2e04bb605f7f8e0855a75d7546
parent9d51ab8b3dce52ce97ab388e7bb8082b359cf90a (diff)
[ruby/prism] Add and test ibm863 encoding
(https://github.com/ruby/prism/pull/1853) * Add and test ibm863 * Remove dup encoding and add alias * Update test/prism/encoding_test.rb Co-authored-by: Kevin Newton <kddnewton@gmail.com> * Readd bitfield table lol --------- https://github.com/ruby/prism/commit/4cd756d7ff Co-authored-by: Kevin Newton <kddnewton@gmail.com>
-rw-r--r--prism/enc/pm_encoding.h1
-rw-r--r--prism/enc/pm_tables.c35
-rw-r--r--prism/prism.c2
-rw-r--r--test/prism/encoding_test.rb1
4 files changed, 39 insertions, 0 deletions
diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h
index 7279dfd7fb..0cccd5a58d 100644
--- a/prism/enc/pm_encoding.h
+++ b/prism/enc/pm_encoding.h
@@ -176,6 +176,7 @@ extern pm_encoding_t pm_encoding_ibm857;
extern pm_encoding_t pm_encoding_ibm860;
extern pm_encoding_t pm_encoding_ibm861;
extern pm_encoding_t pm_encoding_ibm862;
+extern pm_encoding_t pm_encoding_ibm863;
extern pm_encoding_t pm_encoding_ibm864;
extern pm_encoding_t pm_encoding_ibm865;
extern pm_encoding_t pm_encoding_ibm866;
diff --git a/prism/enc/pm_tables.c b/prism/enc/pm_tables.c
index d0414590cb..c726b99887 100644
--- a/prism/enc/pm_tables.c
+++ b/prism/enc/pm_tables.c
@@ -362,6 +362,30 @@ static uint8_t pm_encoding_ibm862_table[256] = {
/**
* Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding IBM863 character.
+ */
+static uint8_t pm_encoding_ibm863_table[256] = {
+// 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+ 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
* piece of information about the corresponding IBM864 character.
*/
static uint8_t pm_encoding_ibm864_table[256] = {
@@ -1375,6 +1399,7 @@ PRISM_ENCODING_TABLE(ibm857)
PRISM_ENCODING_TABLE(ibm860)
PRISM_ENCODING_TABLE(ibm861)
PRISM_ENCODING_TABLE(ibm862)
+PRISM_ENCODING_TABLE(ibm863)
PRISM_ENCODING_TABLE(ibm864)
PRISM_ENCODING_TABLE(ibm865)
PRISM_ENCODING_TABLE(ibm866)
@@ -1577,6 +1602,16 @@ pm_encoding_t pm_encoding_ibm862 = {
.multibyte = false
};
+/** IBM863 */
+pm_encoding_t pm_encoding_ibm863 = {
+ .name = "IBM863",
+ .char_width = pm_encoding_single_char_width,
+ .alnum_char = pm_encoding_ibm863_alnum_char,
+ .alpha_char = pm_encoding_ibm863_alpha_char,
+ .isupper_char = pm_encoding_ibm863_isupper_char,
+ .multibyte = false
+};
+
/** IBM864 */
pm_encoding_t pm_encoding_ibm864 = {
.name = "IBM864",
diff --git a/prism/prism.c b/prism/prism.c
index 2607a84cdc..4935bdc4bf 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -6091,6 +6091,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
ENCODING1("CP866", pm_encoding_ibm866);
ENCODING1("CP874", pm_encoding_windows_874);
ENCODING1("CP878", pm_encoding_koi8_r);
+ ENCODING1("CP863", pm_encoding_ibm863);
ENCODING2("CP932", "csWindows31J", pm_encoding_windows_31j);
ENCODING1("CP936", pm_encoding_gbk);
ENCODING1("CP1250", pm_encoding_windows_1250);
@@ -6128,6 +6129,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
ENCODING1("IBM860", pm_encoding_ibm860);
ENCODING1("IBM861", pm_encoding_ibm861);
ENCODING1("IBM862", pm_encoding_ibm862);
+ ENCODING1("IBM863", pm_encoding_ibm863);
ENCODING1("IBM864", pm_encoding_ibm864);
ENCODING1("IBM865", pm_encoding_ibm865);
ENCODING1("IBM866", pm_encoding_ibm866);
diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb
index a059e22716..9a3efa7d8e 100644
--- a/test/prism/encoding_test.rb
+++ b/test/prism/encoding_test.rb
@@ -23,6 +23,7 @@ module Prism
Encoding::IBM860 => 0x00...0x100,
Encoding::IBM861 => 0x00...0x100,
Encoding::IBM862 => 0x00...0x100,
+ Encoding::IBM863 => 0x00...0x100,
Encoding::IBM864 => 0x00...0x100,
Encoding::IBM865 => 0x00...0x100,
Encoding::IBM866 => 0x00...0x100,