diff options
| -rw-r--r-- | prism/enc/pm_encoding.h | 1 | ||||
| -rw-r--r-- | prism/enc/pm_tables.c | 35 | ||||
| -rw-r--r-- | prism/prism.c | 2 | ||||
| -rw-r--r-- | test/prism/encoding_test.rb | 1 |
4 files changed, 39 insertions, 0 deletions
diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h index cb45b02b5c..cfc90b4d96 100644 --- a/prism/enc/pm_encoding.h +++ b/prism/enc/pm_encoding.h @@ -210,5 +210,6 @@ extern pm_encoding_t pm_encoding_windows_1256; extern pm_encoding_t pm_encoding_windows_1257; extern pm_encoding_t pm_encoding_windows_1258; extern pm_encoding_t pm_encoding_windows_31j; +extern pm_encoding_t pm_encoding_windows_874; #endif diff --git a/prism/enc/pm_tables.c b/prism/enc/pm_tables.c index dd50210034..2bec68f458 100644 --- a/prism/enc/pm_tables.c +++ b/prism/enc/pm_tables.c @@ -1105,6 +1105,30 @@ static uint8_t pm_encoding_windows_1258_table[256] = { }; /** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding windows-874 character. + */ +static uint8_t pm_encoding_windows_874_table[256] = { +// 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x + 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx +}; + +/** * Returns the size of the next character in the ASCII encoding. This basically * means that if the top bit is not set, the character is 1 byte long. */ @@ -1214,6 +1238,7 @@ PRISM_ENCODING_TABLE(windows_1255) PRISM_ENCODING_TABLE(windows_1256) PRISM_ENCODING_TABLE(windows_1257) PRISM_ENCODING_TABLE(windows_1258) +PRISM_ENCODING_TABLE(windows_874) #undef PRISM_ENCODING_TABLE @@ -1686,3 +1711,13 @@ pm_encoding_t pm_encoding_windows_1258 = { .isupper_char = pm_encoding_windows_1258_isupper_char, .multibyte = false }; + +/** Windows-874 */ +pm_encoding_t pm_encoding_windows_874 = { + .name = "Windows-874", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_windows_874_alnum_char, + .alpha_char = pm_encoding_windows_874_alpha_char, + .isupper_char = pm_encoding_windows_874_isupper_char, + .multibyte = false +}; diff --git a/prism/prism.c b/prism/prism.c index 9f0881751c..3df36b0d2e 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -6075,6 +6075,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star ENCODING1("CP860", pm_encoding_ibm860); ENCODING1("CP861", pm_encoding_ibm861); ENCODING1("CP862", pm_encoding_ibm862); + ENCODING1("CP874", pm_encoding_windows_874); ENCODING1("CP878", pm_encoding_koi8_r); ENCODING2("CP932", "csWindows31J", pm_encoding_windows_31j); ENCODING1("CP936", pm_encoding_gbk); @@ -6156,6 +6157,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star break; case 'W': case 'w': ENCODING1("Windows-31J", pm_encoding_windows_31j); + ENCODING1("Windows-874", pm_encoding_windows_874); ENCODING1("Windows-1250", pm_encoding_windows_1250); ENCODING1("Windows-1251", pm_encoding_windows_1251); ENCODING1("Windows-1252", pm_encoding_windows_1252); diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb index 1e69bc9bd6..07f43312eb 100644 --- a/test/prism/encoding_test.rb +++ b/test/prism/encoding_test.rb @@ -53,6 +53,7 @@ module Prism Encoding::Windows_1256 => 0x00...0x100, Encoding::Windows_1257 => 0x00...0x100, Encoding::Windows_1258 => 0x00...0x100, + Encoding::Windows_874 => 0x00...0x100, Encoding::Big5 => 0x00...0x10000, Encoding::CP51932 => 0x00...0x10000, Encoding::GBK => 0x00...0x10000, |
