summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Wessman <david.wesmn@gmail.com>2023-11-20 17:07:14 +0100
committergit <svn-admin@ruby-lang.org>2023-11-20 16:07:19 +0000
commitf9fb05f9d04f4622f6080d582103c612f37cc1f5 (patch)
treecb21a364bbc34d8166db41aa48f1e74cbc3af458
parentf2ed7eaba0275099842b5b8407250e2d410f2f25 (diff)
[ruby/prism] feat: Adds macCroatian encoding
(https://github.com/ruby/prism/pull/1880) * feat: Adds macCroatian encoding - Based on: https://en.wikipedia.org/wiki/Mac_OS_Croatian_encoding https://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/CROATIAN.TXT Co-authored-by: Josefine Rost <nijrost@gmail.com> * Use output from bin/encodings and adds to docs/encoding.md --------- https://github.com/ruby/prism/commit/019a82d8f3 Co-authored-by: Josefine Rost <nijrost@gmail.com>
-rw-r--r--prism/enc/pm_encoding.h1
-rw-r--r--prism/enc/pm_tables.c35
-rw-r--r--prism/prism.c1
-rw-r--r--test/prism/encoding_test.rb1
4 files changed, 38 insertions, 0 deletions
diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h
index a062bbe591..7279dfd7fb 100644
--- a/prism/enc/pm_encoding.h
+++ b/prism/enc/pm_encoding.h
@@ -196,6 +196,7 @@ extern pm_encoding_t pm_encoding_iso_8859_15;
extern pm_encoding_t pm_encoding_iso_8859_16;
extern pm_encoding_t pm_encoding_koi8_r;
extern pm_encoding_t pm_encoding_mac_cent_euro;
+extern pm_encoding_t pm_encoding_mac_croatian;
extern pm_encoding_t pm_encoding_mac_cyrillic;
extern pm_encoding_t pm_encoding_mac_greek;
extern pm_encoding_t pm_encoding_mac_iceland;
diff --git a/prism/enc/pm_tables.c b/prism/enc/pm_tables.c
index 87c7bfb495..d0414590cb 100644
--- a/prism/enc/pm_tables.c
+++ b/prism/enc/pm_tables.c
@@ -842,6 +842,30 @@ static uint8_t pm_encoding_mac_cent_euro_table[256] = {
/**
* Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding macCroatian character.
+ */
+static uint8_t pm_encoding_mac_croatian_table[256] = {
+// 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+ 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+ /**
+ * Each element of the following table contains a bitfield that indicates a
* piece of information about the corresponding macCyrillic character.
*/
static uint8_t pm_encoding_mac_cyrillic_table[256] = {
@@ -1371,6 +1395,7 @@ PRISM_ENCODING_TABLE(iso_8859_15)
PRISM_ENCODING_TABLE(iso_8859_16)
PRISM_ENCODING_TABLE(koi8_r)
PRISM_ENCODING_TABLE(mac_cent_euro)
+PRISM_ENCODING_TABLE(mac_croatian)
PRISM_ENCODING_TABLE(mac_cyrillic)
PRISM_ENCODING_TABLE(mac_greek)
PRISM_ENCODING_TABLE(mac_iceland)
@@ -1752,6 +1777,16 @@ pm_encoding_t pm_encoding_mac_cent_euro = {
.multibyte = false
};
+/** macCroatian */
+pm_encoding_t pm_encoding_mac_croatian = {
+ .name = "macCroatian",
+ .char_width = pm_encoding_single_char_width,
+ .alnum_char = pm_encoding_mac_croatian_alnum_char,
+ .alpha_char = pm_encoding_mac_croatian_alpha_char,
+ .isupper_char = pm_encoding_mac_croatian_isupper_char,
+ .multibyte = false
+};
+
/** macCyrillic */
pm_encoding_t pm_encoding_mac_cyrillic = {
.name = "macCyrillic",
diff --git a/prism/prism.c b/prism/prism.c
index 5dcd3f2746..a0751efd37 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -6145,6 +6145,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
break;
case 'M': case 'm':
ENCODING1("macCentEuro", pm_encoding_mac_cent_euro);
+ ENCODING1("macCroatian", pm_encoding_mac_croatian);
ENCODING1("macCyrillic", pm_encoding_mac_cyrillic);
ENCODING1("macGreek", pm_encoding_mac_greek);
ENCODING1("macIceland", pm_encoding_mac_iceland);
diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb
index bc9aae8e8a..a059e22716 100644
--- a/test/prism/encoding_test.rb
+++ b/test/prism/encoding_test.rb
@@ -43,6 +43,7 @@ module Prism
Encoding::ISO_8859_16 => 0x00...0x100,
Encoding::KOI8_R => 0x00...0x100,
Encoding::MACCENTEURO => 0x00...0x100,
+ Encoding::MACCROATIAN => 0x00...0x100,
Encoding::MACCYRILLIC => 0x00...0x100,
Encoding::MACGREEK => 0x00...0x100,
Encoding::MACICELAND => 0x00...0x100,