summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-11-15 19:35:23 -0500
committergit <svn-admin@ruby-lang.org>2023-11-16 17:39:39 +0000
commit6c2defdfaa275bccf733988cc2185f144bd8f355 (patch)
tree01790e3dbbfc1d089f12dd2d1c6ba5f56b6f4d62
parent13bf8c1b46723792b31134cdb11619ed88ff2ca3 (diff)
[ruby/prism] Support for the macIceland encoding
https://github.com/ruby/prism/commit/f433d1b51b
-rw-r--r--prism/enc/pm_encoding.h1
-rw-r--r--prism/enc/pm_tables.c35
-rw-r--r--prism/prism.c9
3 files changed, 43 insertions, 2 deletions
diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h
index 88f128c045..6a0ba3b6c8 100644
--- a/prism/enc/pm_encoding.h
+++ b/prism/enc/pm_encoding.h
@@ -190,6 +190,7 @@ extern pm_encoding_t pm_encoding_iso_8859_14;
extern pm_encoding_t pm_encoding_iso_8859_15;
extern pm_encoding_t pm_encoding_iso_8859_16;
extern pm_encoding_t pm_encoding_koi8_r;
+extern pm_encoding_t pm_encoding_mac_iceland;
extern pm_encoding_t pm_encoding_shift_jis;
extern pm_encoding_t pm_encoding_utf_8;
extern pm_encoding_t pm_encoding_utf8_mac;
diff --git a/prism/enc/pm_tables.c b/prism/enc/pm_tables.c
index d33f52e2df..02041c2587 100644
--- a/prism/enc/pm_tables.c
+++ b/prism/enc/pm_tables.c
@@ -722,6 +722,30 @@ static uint8_t pm_encoding_koi8_r_table[256] = {
/**
* Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding macIceland character.
+ */
+static uint8_t pm_encoding_mac_iceland_table[256] = {
+// 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+ 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
* piece of information about the corresponding windows-1250 character.
*/
static uint8_t pm_encoding_windows_1250_table[256] = {
@@ -1030,6 +1054,7 @@ PRISM_ENCODING_TABLE(iso_8859_14)
PRISM_ENCODING_TABLE(iso_8859_15)
PRISM_ENCODING_TABLE(iso_8859_16)
PRISM_ENCODING_TABLE(koi8_r)
+PRISM_ENCODING_TABLE(mac_iceland)
PRISM_ENCODING_TABLE(windows_1250)
PRISM_ENCODING_TABLE(windows_1251)
PRISM_ENCODING_TABLE(windows_1252)
@@ -1352,6 +1377,16 @@ pm_encoding_t pm_encoding_koi8_r = {
.multibyte = false
};
+/** macIceland */
+pm_encoding_t pm_encoding_mac_iceland = {
+ .name = "macIceland",
+ .char_width = pm_encoding_single_char_width,
+ .alnum_char = pm_encoding_mac_iceland_alnum_char,
+ .alpha_char = pm_encoding_mac_iceland_alpha_char,
+ .isupper_char = pm_encoding_mac_iceland_isupper_char,
+ .multibyte = false
+};
+
/** Windows-1250 */
pm_encoding_t pm_encoding_windows_1250 = {
.name = "Windows-1250",
diff --git a/prism/prism.c b/prism/prism.c
index 0812cf840b..95201f5c34 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -6134,6 +6134,9 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
case 'L': case 'l':
ENCODING1("locale", pm_encoding_utf_8);
break;
+ case 'M': case 'm':
+ ENCODING1("macIceland", pm_encoding_mac_iceland);
+ break;
case 'P': case 'p':
ENCODING1("PCK", pm_encoding_windows_31j);
break;
@@ -6269,6 +6272,8 @@ parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor
*/
static inline bool
parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
+ bool result = true;
+
const uint8_t *start = parser->current.start + 1;
const uint8_t *end = parser->current.end;
if (end - start <= 7) return false;
@@ -6366,7 +6371,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
(key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
(key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
) {
- parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
+ result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
}
}
@@ -6393,7 +6398,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
}
}
- return true;
+ return result;
}
/******************************************************************************/