summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDhaval <anarock@MacBook-Pro.local>2023-11-29 05:55:29 +0530
committerKevin Newton <kddnewton@gmail.com>2023-11-29 11:15:50 -0500
commit9fada99cb24d9ddd7147b048776f8e0a9d7b967f (patch)
treec1dcad68e314f35f008f403d8e313b05d0313d98
parent57cb47bfe2df3fc4300b39875c40a7fe0879effb (diff)
[ruby/prism] added CP950 encoding
https://github.com/ruby/prism/commit/9c2d1cf4ba
-rw-r--r--lib/prism/prism.gemspec1
-rw-r--r--prism/enc/pm_cp950.c57
-rw-r--r--prism/enc/pm_encoding.h1
-rw-r--r--prism/prism.c1
-rw-r--r--test/prism/encoding_test.rb1
5 files changed, 61 insertions, 0 deletions
diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec
index 132c3747fc..80cfa8ab76 100644
--- a/lib/prism/prism.gemspec
+++ b/lib/prism/prism.gemspec
@@ -88,6 +88,7 @@ Gem::Specification.new do |spec|
"src/enc/pm_big5.c",
"src/enc/pm_cp51932.c",
"src/enc/pm_cp949.c",
+ "src/enc/pm_cp950.c",
"src/enc/pm_euc_jp.c",
"src/enc/pm_gbk.c",
"src/enc/pm_shift_jis.c",
diff --git a/prism/enc/pm_cp950.c b/prism/enc/pm_cp950.c
new file mode 100644
index 0000000000..1b7a0995ac
--- /dev/null
+++ b/prism/enc/pm_cp950.c
@@ -0,0 +1,57 @@
+#include "prism/enc/pm_encoding.h"
+
+static size_t
+pm_encoding_cp950_char_width(const uint8_t *b, ptrdiff_t n) {
+ // These are the single byte characters
+ if (*b < 0x80) {
+ return 1;
+ }
+
+ // These are the double byte characters
+ if (
+ (n > 1) &&
+ ((b[0] >= 0x81 && b[0] <= 0xFE) &&
+ ((b[1] >= 0x40 && b[1] <= 0x7E) || (b[1] >= 0xA1 && b[1] <= 0xFE)))
+ ) {
+ return 2;
+ }
+
+ return 0;
+}
+
+static size_t
+pm_encoding_cp950_alpha_char(const uint8_t *b, ptrdiff_t n) {
+ if (pm_encoding_cp950_char_width(b, n) == 1) {
+ return pm_encoding_ascii_alpha_char(b, n);
+ } else {
+ return 0;
+ }
+}
+
+static size_t
+pm_encoding_cp950_alnum_char(const uint8_t *b, ptrdiff_t n) {
+ if (pm_encoding_cp950_char_width(b, n) == 1) {
+ return pm_encoding_ascii_alnum_char(b, n);
+ } else {
+ return 0;
+ }
+}
+
+static bool
+pm_encoding_cp950_isupper_char(const uint8_t *b, ptrdiff_t n) {
+ if (pm_encoding_cp950_char_width(b, n) == 1) {
+ return pm_encoding_ascii_isupper_char(b, n);
+ } else {
+ return 0;
+ }
+}
+
+/** cp950 encoding */
+pm_encoding_t pm_encoding_cp950 = {
+ .name = "cp950",
+ .char_width = pm_encoding_cp950_char_width,
+ .alnum_char = pm_encoding_cp950_alnum_char,
+ .alpha_char = pm_encoding_cp950_alpha_char,
+ .isupper_char = pm_encoding_cp950_isupper_char,
+ .multibyte = true
+};
diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h
index 698abc8be9..5b79902389 100644
--- a/prism/enc/pm_encoding.h
+++ b/prism/enc/pm_encoding.h
@@ -165,6 +165,7 @@ extern pm_encoding_t pm_encoding_cp850;
extern pm_encoding_t pm_encoding_cp852;
extern pm_encoding_t pm_encoding_cp855;
extern pm_encoding_t pm_encoding_cp949;
+extern pm_encoding_t pm_encoding_cp950;
extern pm_encoding_t pm_encoding_euc_jp;
extern pm_encoding_t pm_encoding_gb1988;
extern pm_encoding_t pm_encoding_gbk;
diff --git a/prism/prism.c b/prism/prism.c
index 881ea89a29..960b652db8 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -6233,6 +6233,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
ENCODING2("CP932", "csWindows31J", pm_encoding_windows_31j);
ENCODING1("CP936", pm_encoding_gbk);
ENCODING1("CP949", pm_encoding_cp949);
+ ENCODING1("CP950", pm_encoding_cp950);
ENCODING1("CP1250", pm_encoding_windows_1250);
ENCODING1("CP1251", pm_encoding_windows_1251);
ENCODING1("CP1252", pm_encoding_windows_1252);
diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb
index 463cb95121..28992fcf1b 100644
--- a/test/prism/encoding_test.rb
+++ b/test/prism/encoding_test.rb
@@ -69,6 +69,7 @@ module Prism
Encoding::Big5_HKSCS => 0x00...0x10000,
Encoding::Big5_UAO => 0x00...0x10000,
Encoding::CP949 => 0x00...0x10000,
+ Encoding::CP950 => 0x00...0x10000,
Encoding::CP51932 => 0x00...0x10000,
Encoding::GBK => 0x00...0x10000,
Encoding::Shift_JIS => 0x00...0x10000,