summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--prism/enc/pm_encoding.h3
-rw-r--r--prism/enc/pm_tables.c35
-rw-r--r--prism/prism.c1
-rw-r--r--test/prism/encoding_test.rb79
-rw-r--r--test/prism/newline_test.rb2
5 files changed, 78 insertions, 42 deletions
diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h
index 7f5785ea8b..0d7a414383 100644
--- a/prism/enc/pm_encoding.h
+++ b/prism/enc/pm_encoding.h
@@ -158,9 +158,10 @@ extern const uint8_t pm_encoding_unicode_table[256];
extern pm_encoding_t pm_encoding_ascii;
extern pm_encoding_t pm_encoding_ascii_8bit;
extern pm_encoding_t pm_encoding_big5;
+extern pm_encoding_t pm_encoding_cp51932;
extern pm_encoding_t pm_encoding_cp850;
extern pm_encoding_t pm_encoding_cp852;
-extern pm_encoding_t pm_encoding_cp51932;
+extern pm_encoding_t pm_encoding_cp855;
extern pm_encoding_t pm_encoding_euc_jp;
extern pm_encoding_t pm_encoding_gbk;
extern pm_encoding_t pm_encoding_iso_8859_1;
diff --git a/prism/enc/pm_tables.c b/prism/enc/pm_tables.c
index 685bd838e7..9bb56e81ce 100644
--- a/prism/enc/pm_tables.c
+++ b/prism/enc/pm_tables.c
@@ -74,6 +74,30 @@ static uint8_t pm_encoding_cp852_table[256] = {
/**
* Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding CP855 character.
+ */
+static uint8_t pm_encoding_cp855_table[256] = {
+// 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+ 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
* piece of information about the corresponding ISO-8859-1 character.
*/
static uint8_t pm_encoding_iso_8859_1_table[256] = {
@@ -739,6 +763,7 @@ pm_encoding_koi8_r_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
PRISM_ENCODING_TABLE(cp850)
PRISM_ENCODING_TABLE(cp852)
+PRISM_ENCODING_TABLE(cp855)
PRISM_ENCODING_TABLE(iso_8859_1)
PRISM_ENCODING_TABLE(iso_8859_2)
PRISM_ENCODING_TABLE(iso_8859_3)
@@ -807,6 +832,16 @@ pm_encoding_t pm_encoding_cp852 = {
.multibyte = false
};
+/** CP855 */
+pm_encoding_t pm_encoding_cp855 = {
+ .name = "CP855",
+ .char_width = pm_encoding_single_char_width,
+ .alnum_char = pm_encoding_cp855_alnum_char,
+ .alpha_char = pm_encoding_cp855_alpha_char,
+ .isupper_char = pm_encoding_cp855_isupper_char,
+ .multibyte = false
+};
+
/** ISO-8859-1 */
pm_encoding_t pm_encoding_iso_8859_1 = {
.name = "ISO-8859-1",
diff --git a/prism/prism.c b/prism/prism.c
index 12ce4ecaee..e9cbe08a68 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -6070,6 +6070,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
ENCODING1("Big5", pm_encoding_big5);
ENCODING2("CP850", "IBM850", pm_encoding_cp850);
ENCODING1("CP852", pm_encoding_cp852);
+ ENCODING1("CP855", pm_encoding_cp855);
ENCODING2("GBK", "CP936", pm_encoding_gbk);
ENCODING2("ISO-8859-1", "ISO8859-1", pm_encoding_iso_8859_1);
ENCODING2("ISO-8859-2", "ISO8859-2", pm_encoding_iso_8859_2);
diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb
index 9f209691d0..bd0911e96e 100644
--- a/test/prism/encoding_test.rb
+++ b/test/prism/encoding_test.rb
@@ -5,46 +5,45 @@ require_relative "test_helper"
module Prism
class EncodingTest < TestCase
[
- "US-ASCII",
- "ASCII-8BIT",
- "Big5",
- "CP850",
- "CP852",
- "CP51932",
- "EUC-JP",
- "GBK",
- "ISO-8859-1",
- "ISO-8859-2",
- "ISO-8859-3",
- "ISO-8859-4",
- "ISO-8859-5",
- "ISO-8859-6",
- "ISO-8859-7",
- "ISO-8859-8",
- "ISO-8859-9",
- "ISO-8859-10",
- "ISO-8859-11",
- "ISO-8859-13",
- "ISO-8859-14",
- "ISO-8859-15",
- "ISO-8859-16",
- "KOI8-R",
- "Shift_JIS",
- "UTF-8",
- "UTF8-MAC",
- "Windows-1250",
- "Windows-1251",
- "Windows-1252",
- "Windows-1253",
- "Windows-1254",
- "Windows-1255",
- "Windows-1256",
- "Windows-1257",
- "Windows-1258",
- "Windows-31J"
- ].each do |canonical_name|
- encoding = Encoding.find(canonical_name)
-
+ Encoding::ASCII,
+ Encoding::ASCII_8BIT,
+ Encoding::Big5,
+ Encoding::CP51932,
+ Encoding::CP850,
+ Encoding::CP852,
+ Encoding::CP855,
+ Encoding::EUC_JP,
+ Encoding::GBK,
+ Encoding::ISO_8859_1,
+ Encoding::ISO_8859_2,
+ Encoding::ISO_8859_3,
+ Encoding::ISO_8859_4,
+ Encoding::ISO_8859_5,
+ Encoding::ISO_8859_6,
+ Encoding::ISO_8859_7,
+ Encoding::ISO_8859_8,
+ Encoding::ISO_8859_9,
+ Encoding::ISO_8859_10,
+ Encoding::ISO_8859_11,
+ Encoding::ISO_8859_13,
+ Encoding::ISO_8859_14,
+ Encoding::ISO_8859_15,
+ Encoding::ISO_8859_16,
+ Encoding::KOI8_R,
+ Encoding::Shift_JIS,
+ Encoding::UTF_8,
+ Encoding::UTF8_MAC,
+ Encoding::Windows_1250,
+ Encoding::Windows_1251,
+ Encoding::Windows_1252,
+ Encoding::Windows_1253,
+ Encoding::Windows_1254,
+ Encoding::Windows_1255,
+ Encoding::Windows_1256,
+ Encoding::Windows_1257,
+ Encoding::Windows_1258,
+ Encoding::Windows_31J
+ ].each do |encoding|
encoding.names.each do |name|
# Even though UTF-8-MAC is an alias for UTF8-MAC, CRuby treats it as
# UTF-8. So we'll skip this test.
diff --git a/test/prism/newline_test.rb b/test/prism/newline_test.rb
index c20a99a398..d998502869 100644
--- a/test/prism/newline_test.rb
+++ b/test/prism/newline_test.rb
@@ -7,7 +7,7 @@ return unless defined?(RubyVM::InstructionSequence)
module Prism
class NewlineTest < TestCase
base = File.expand_path("../", __FILE__)
- filepaths = Dir["*.rb", base: base] - %w[unescape_test.rb]
+ filepaths = Dir["*.rb", base: base] - %w[encoding_test.rb unescape_test.rb]
filepaths.each do |relative|
define_method("test_newline_flags_#{relative}") do