summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--prism/enc/pm_encoding.h6
-rw-r--r--prism/enc/pm_tables.c210
-rw-r--r--prism/prism.c6
-rw-r--r--test/prism/encoding_test.rb8
4 files changed, 229 insertions, 1 deletions
diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h
index 97ebc68a4d..9d8c1ac445 100644
--- a/prism/enc/pm_encoding.h
+++ b/prism/enc/pm_encoding.h
@@ -183,6 +183,12 @@ extern pm_encoding_t pm_encoding_utf8_mac;
extern pm_encoding_t pm_encoding_windows_1250;
extern pm_encoding_t pm_encoding_windows_1251;
extern pm_encoding_t pm_encoding_windows_1252;
+extern pm_encoding_t pm_encoding_windows_1253;
+extern pm_encoding_t pm_encoding_windows_1254;
+extern pm_encoding_t pm_encoding_windows_1255;
+extern pm_encoding_t pm_encoding_windows_1256;
+extern pm_encoding_t pm_encoding_windows_1257;
+extern pm_encoding_t pm_encoding_windows_1258;
extern pm_encoding_t pm_encoding_windows_31j;
#endif
diff --git a/prism/enc/pm_tables.c b/prism/enc/pm_tables.c
index 3ed5523c7f..569128373d 100644
--- a/prism/enc/pm_tables.c
+++ b/prism/enc/pm_tables.c
@@ -481,6 +481,150 @@ static uint8_t pm_encoding_windows_1252_table[256] = {
};
/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding windows-1253 character.
+ */
+static uint8_t pm_encoding_windows_1253_table[256] = {
+// 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+ 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+ 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+ 0, 0, 0, 0, 0, 3, 7, 0, 7, 7, 7, 0, 7, 0, 7, 7, // Bx
+ 3, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+ 7, 7, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3, 3, 3, 3, // Dx
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding windows-1254 character.
+ */
+static uint8_t pm_encoding_windows_1254_table[256] = {
+// 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+ 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 7, 0, 0, 0, // 8x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 7, // 9x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, // Ax
+ 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, // Bx
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+ 7, 7, 7, 7, 7, 7, 7, 0, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+ 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding windows-1255 character.
+ */
+static uint8_t pm_encoding_windows_1255_table[256] = {
+// 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+ 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+ 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding windows-1256 character.
+ */
+static uint8_t pm_encoding_windows_1256_table[256] = {
+// 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+ 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Cx
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // Dx
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+ 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding windows-1257 character.
+ */
+static uint8_t pm_encoding_windows_1257_table[256] = {
+// 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+ 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+ 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 7, 0, 0, 0, 0, 7, // Ax
+ 0, 0, 0, 0, 0, 3, 0, 0, 3, 0, 3, 0, 0, 0, 0, 3, // Bx
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+ 7, 7, 7, 7, 7, 7, 7, 0, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+ 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding windows-1258 character.
+ */
+static uint8_t pm_encoding_windows_1258_table[256] = {
+// 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+ 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
* Returns the size of the next character in the ASCII encoding. This basically
* means that if the top bit is not set, the character is 1 byte long.
*/
@@ -564,6 +708,12 @@ PRISM_ENCODING_TABLE(koi8_r)
PRISM_ENCODING_TABLE(windows_1250)
PRISM_ENCODING_TABLE(windows_1251)
PRISM_ENCODING_TABLE(windows_1252)
+PRISM_ENCODING_TABLE(windows_1253)
+PRISM_ENCODING_TABLE(windows_1254)
+PRISM_ENCODING_TABLE(windows_1255)
+PRISM_ENCODING_TABLE(windows_1256)
+PRISM_ENCODING_TABLE(windows_1257)
+PRISM_ENCODING_TABLE(windows_1258)
#undef PRISM_ENCODING_TABLE
@@ -776,3 +926,63 @@ pm_encoding_t pm_encoding_windows_1252 = {
.isupper_char = pm_encoding_windows_1252_isupper_char,
.multibyte = false
};
+
+/** Windows-1253 */
+pm_encoding_t pm_encoding_windows_1253 = {
+ .name = "windows-1253",
+ .char_width = pm_encoding_single_char_width,
+ .alnum_char = pm_encoding_windows_1253_alnum_char,
+ .alpha_char = pm_encoding_windows_1253_alpha_char,
+ .isupper_char = pm_encoding_windows_1253_isupper_char,
+ .multibyte = false
+};
+
+/** Windows-1254 */
+pm_encoding_t pm_encoding_windows_1254 = {
+ .name = "windows-1254",
+ .char_width = pm_encoding_single_char_width,
+ .alnum_char = pm_encoding_windows_1254_alnum_char,
+ .alpha_char = pm_encoding_windows_1254_alpha_char,
+ .isupper_char = pm_encoding_windows_1254_isupper_char,
+ .multibyte = false
+};
+
+/** Windows-1255 */
+pm_encoding_t pm_encoding_windows_1255 = {
+ .name = "windows-1255",
+ .char_width = pm_encoding_single_char_width,
+ .alnum_char = pm_encoding_windows_1255_alnum_char,
+ .alpha_char = pm_encoding_windows_1255_alpha_char,
+ .isupper_char = pm_encoding_windows_1255_isupper_char,
+ .multibyte = false
+};
+
+/** Windows-1256 */
+pm_encoding_t pm_encoding_windows_1256 = {
+ .name = "windows-1256",
+ .char_width = pm_encoding_single_char_width,
+ .alnum_char = pm_encoding_windows_1256_alnum_char,
+ .alpha_char = pm_encoding_windows_1256_alpha_char,
+ .isupper_char = pm_encoding_windows_1256_isupper_char,
+ .multibyte = false
+};
+
+/** Windows-1257 */
+pm_encoding_t pm_encoding_windows_1257 = {
+ .name = "windows-1257",
+ .char_width = pm_encoding_single_char_width,
+ .alnum_char = pm_encoding_windows_1257_alnum_char,
+ .alpha_char = pm_encoding_windows_1257_alpha_char,
+ .isupper_char = pm_encoding_windows_1257_isupper_char,
+ .multibyte = false
+};
+
+/** Windows-1258 */
+pm_encoding_t pm_encoding_windows_1258 = {
+ .name = "windows-1258",
+ .char_width = pm_encoding_single_char_width,
+ .alnum_char = pm_encoding_windows_1258_alnum_char,
+ .alpha_char = pm_encoding_windows_1258_alpha_char,
+ .isupper_char = pm_encoding_windows_1258_isupper_char,
+ .multibyte = false
+};
diff --git a/prism/prism.c b/prism/prism.c
index 33d50acc3d..75b972407b 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -6090,6 +6090,12 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
ENCODING2("Windows-1250", "CP1250", pm_encoding_windows_1250);
ENCODING2("Windows-1251", "CP1251", pm_encoding_windows_1251);
ENCODING2("Windows-1252", "CP1252", pm_encoding_windows_1252);
+ ENCODING2("Windows-1253", "CP1253", pm_encoding_windows_1253);
+ ENCODING2("Windows-1254", "CP1254", pm_encoding_windows_1254);
+ ENCODING2("Windows-1255", "CP1255", pm_encoding_windows_1255);
+ ENCODING2("Windows-1256", "CP1256", pm_encoding_windows_1256);
+ ENCODING2("Windows-1257", "CP1257", pm_encoding_windows_1257);
+ ENCODING2("Windows-1258", "CP1258", pm_encoding_windows_1258);
ENCODING5("Windows-31J", "CP932", "csWindows31J", "SJIS", "PCK", pm_encoding_windows_31j);
#undef ENCODING2
diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb
index 9e18989ad3..264dbba119 100644
--- a/test/prism/encoding_test.rb
+++ b/test/prism/encoding_test.rb
@@ -28,12 +28,18 @@ module Prism
"ISO-8859-16",
"KOI8-R",
"Shift_JIS",
- "Windows-31J",
"UTF-8",
"UTF8-MAC",
"Windows-1250",
"Windows-1251",
"Windows-1252",
+ "Windows-1253",
+ "Windows-1254",
+ "Windows-1255",
+ "Windows-1256",
+ "Windows-1257",
+ "Windows-1258",
+ "Windows-31J"
].each do |canonical_name|
encoding = Encoding.find(canonical_name)