summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorheyogrady <ogradypatrickj@gmail.com>2023-11-21 23:17:09 -0500
committerKevin Newton <kddnewton@gmail.com>2023-11-22 09:12:35 -0500
commite3ef05a4343f447883183bc28e515715eedad9c4 (patch)
tree98c29187c3e5358bf3cba3c66ff5266b9cf46f87
parent4f031a745d528e0a8a626c340787c7c3ab9ea143 (diff)
[ruby/prism] Add `CP949` encoding
https://github.com/ruby/prism/commit/9e78dfdf69
-rw-r--r--lib/prism/prism.gemspec1
-rw-r--r--prism/enc/pm_cp949.c57
-rw-r--r--prism/enc/pm_encoding.h1
-rw-r--r--prism/prism.c1
-rw-r--r--test/prism/encoding_test.rb1
5 files changed, 61 insertions, 0 deletions
diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec
index 6fbd6c2945..43ec005452 100644
--- a/lib/prism/prism.gemspec
+++ b/lib/prism/prism.gemspec
@@ -86,6 +86,7 @@ Gem::Specification.new do |spec|
"src/diagnostic.c",
"src/enc/pm_big5.c",
"src/enc/pm_cp51932.c",
+ "src/enc/pm_cp949.c",
"src/enc/pm_euc_jp.c",
"src/enc/pm_gbk.c",
"src/enc/pm_shift_jis.c",
diff --git a/prism/enc/pm_cp949.c b/prism/enc/pm_cp949.c
new file mode 100644
index 0000000000..f3b5a50fde
--- /dev/null
+++ b/prism/enc/pm_cp949.c
@@ -0,0 +1,57 @@
+#include "prism/enc/pm_encoding.h"
+
+static size_t
+pm_encoding_cp949_char_width(const uint8_t *b, ptrdiff_t n) {
+ // These are the single byte characters
+ if (*b < 0x81) {
+ return 1;
+ }
+
+ // These are the double byte characters
+ if (
+ (n > 1) &&
+ (b[0] >= 0x81 && b[0] <= 0xfe) &&
+ (b[1] >= 0x41 && b[1] <= 0xfe)
+ ) {
+ return 2;
+ }
+
+ return 0;
+}
+
+static size_t
+pm_encoding_cp949_alpha_char(const uint8_t *b, ptrdiff_t n) {
+ if (pm_encoding_cp949_char_width(b, n) == 1) {
+ return pm_encoding_ascii_alpha_char(b, n);
+ } else {
+ return 0;
+ }
+}
+
+static size_t
+pm_encoding_cp949_alnum_char(const uint8_t *b, ptrdiff_t n) {
+ if (pm_encoding_cp949_char_width(b, n) == 1) {
+ return pm_encoding_ascii_alnum_char(b, n);
+ } else {
+ return 0;
+ }
+}
+
+static bool
+pm_encoding_cp949_isupper_char(const uint8_t *b, ptrdiff_t n) {
+ if (pm_encoding_cp949_char_width(b, n) == 1) {
+ return pm_encoding_ascii_isupper_char(b, n);
+ } else {
+ return 0;
+ }
+}
+
+/** cp949 encoding */
+pm_encoding_t pm_encoding_cp949 = {
+ .name = "cp949",
+ .char_width = pm_encoding_cp949_char_width,
+ .alnum_char = pm_encoding_cp949_alnum_char,
+ .alpha_char = pm_encoding_cp949_alpha_char,
+ .isupper_char = pm_encoding_cp949_isupper_char,
+ .multibyte = true
+};
diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h
index 51227b9c96..698abc8be9 100644
--- a/prism/enc/pm_encoding.h
+++ b/prism/enc/pm_encoding.h
@@ -164,6 +164,7 @@ extern pm_encoding_t pm_encoding_cp51932;
extern pm_encoding_t pm_encoding_cp850;
extern pm_encoding_t pm_encoding_cp852;
extern pm_encoding_t pm_encoding_cp855;
+extern pm_encoding_t pm_encoding_cp949;
extern pm_encoding_t pm_encoding_euc_jp;
extern pm_encoding_t pm_encoding_gb1988;
extern pm_encoding_t pm_encoding_gbk;
diff --git a/prism/prism.c b/prism/prism.c
index e7848f0e04..99ece901f5 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -6165,6 +6165,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
ENCODING1("CP863", pm_encoding_ibm863);
ENCODING2("CP932", "csWindows31J", pm_encoding_windows_31j);
ENCODING1("CP936", pm_encoding_gbk);
+ ENCODING1("CP949", pm_encoding_cp949);
ENCODING1("CP1250", pm_encoding_windows_1250);
ENCODING1("CP1251", pm_encoding_windows_1251);
ENCODING1("CP1252", pm_encoding_windows_1252);
diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb
index 5a0a671b5b..463cb95121 100644
--- a/test/prism/encoding_test.rb
+++ b/test/prism/encoding_test.rb
@@ -68,6 +68,7 @@ module Prism
Encoding::Big5 => 0x00...0x10000,
Encoding::Big5_HKSCS => 0x00...0x10000,
Encoding::Big5_UAO => 0x00...0x10000,
+ Encoding::CP949 => 0x00...0x10000,
Encoding::CP51932 => 0x00...0x10000,
Encoding::GBK => 0x00...0x10000,
Encoding::Shift_JIS => 0x00...0x10000,