summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--prism/enc/pm_encoding.h1
-rw-r--r--prism/enc/pm_tables.c39
-rw-r--r--prism/prism.c1
-rw-r--r--test/prism/encoding_test.rb1
4 files changed, 40 insertions, 2 deletions
diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h
index 16deaefcb3..51227b9c96 100644
--- a/prism/enc/pm_encoding.h
+++ b/prism/enc/pm_encoding.h
@@ -198,6 +198,7 @@ extern pm_encoding_t pm_encoding_iso_8859_14;
extern pm_encoding_t pm_encoding_iso_8859_15;
extern pm_encoding_t pm_encoding_iso_8859_16;
extern pm_encoding_t pm_encoding_koi8_r;
+extern pm_encoding_t pm_encoding_koi8_u;
extern pm_encoding_t pm_encoding_mac_cent_euro;
extern pm_encoding_t pm_encoding_mac_croatian;
extern pm_encoding_t pm_encoding_mac_cyrillic;
diff --git a/prism/enc/pm_tables.c b/prism/enc/pm_tables.c
index 6a163315be..6eede35e32 100644
--- a/prism/enc/pm_tables.c
+++ b/prism/enc/pm_tables.c
@@ -866,6 +866,30 @@ static uint8_t pm_encoding_koi8_r_table[256] = {
/**
* Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding KOI8-U character.
+ */
+static uint8_t pm_encoding_koi8_u_table[256] = {
+// 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+ 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+ 0, 0, 0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 0, 3, 0, 0, // Ax
+ 0, 0, 0, 7, 7, 0, 7, 7, 0, 0, 0, 0, 0, 7, 0, 0, // Bx
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Cx
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Dx
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Ex
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
* piece of information about the corresponding macCentEuro character.
*/
static uint8_t pm_encoding_mac_cent_euro_table[256] = {
@@ -1418,7 +1442,7 @@ pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATT
* checking if it's a valid codepoint in KOI-8 and if it is returning 1.
*/
static size_t
-pm_encoding_koi8_r_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
+pm_encoding_koi8_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
}
@@ -1468,6 +1492,7 @@ PRISM_ENCODING_TABLE(iso_8859_14)
PRISM_ENCODING_TABLE(iso_8859_15)
PRISM_ENCODING_TABLE(iso_8859_16)
PRISM_ENCODING_TABLE(koi8_r)
+PRISM_ENCODING_TABLE(koi8_u)
PRISM_ENCODING_TABLE(mac_cent_euro)
PRISM_ENCODING_TABLE(mac_croatian)
PRISM_ENCODING_TABLE(mac_cyrillic)
@@ -1855,13 +1880,23 @@ pm_encoding_t pm_encoding_iso_8859_16 = {
/** KOI8-R */
pm_encoding_t pm_encoding_koi8_r = {
.name = "KOI8-R",
- .char_width = pm_encoding_koi8_r_char_width,
+ .char_width = pm_encoding_koi8_char_width,
.alnum_char = pm_encoding_koi8_r_alnum_char,
.alpha_char = pm_encoding_koi8_r_alpha_char,
.isupper_char = pm_encoding_koi8_r_isupper_char,
.multibyte = false
};
+/** KOI8-U */
+pm_encoding_t pm_encoding_koi8_u = {
+ .name = "KOI8-U",
+ .char_width = pm_encoding_koi8_char_width,
+ .alnum_char = pm_encoding_koi8_u_alnum_char,
+ .alpha_char = pm_encoding_koi8_u_alpha_char,
+ .isupper_char = pm_encoding_koi8_u_isupper_char,
+ .multibyte = false
+};
+
/** macCentEuro */
pm_encoding_t pm_encoding_mac_cent_euro = {
.name = "macCentEuro",
diff --git a/prism/prism.c b/prism/prism.c
index 6892643495..e7848f0e04 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -6223,6 +6223,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
break;
case 'K': case 'k':
ENCODING1("KOI8-R", pm_encoding_koi8_r);
+ ENCODING1("KOI8-U", pm_encoding_koi8_u);
break;
case 'L': case 'l':
ENCODING1("locale", pm_encoding_utf_8);
diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb
index dfbbff0a47..5a0a671b5b 100644
--- a/test/prism/encoding_test.rb
+++ b/test/prism/encoding_test.rb
@@ -44,6 +44,7 @@ module Prism
Encoding::ISO_8859_15 => 0x00...0x100,
Encoding::ISO_8859_16 => 0x00...0x100,
Encoding::KOI8_R => 0x00...0x100,
+ Encoding::KOI8_U => 0x00...0x100,
Encoding::MACCENTEURO => 0x00...0x100,
Encoding::MACCROATIAN => 0x00...0x100,
Encoding::MACCYRILLIC => 0x00...0x100,