summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-11-29 12:53:42 -0500
committerKevin Newton <kddnewton@gmail.com>2023-11-30 21:37:56 -0500
commit7b5bb978fb8aefb77c731b0a48386fc139413b13 (patch)
treec1dc4dc848a33f102994faf4d1a30f33b95a959c
parent9ba92327f2aed5b5d95ad1ce51fe695f14a3428e (diff)
[PRISM] Alias CP51932 to EUC-JP
-rw-r--r--lib/prism/prism.gemspec1
-rw-r--r--prism/enc/pm_cp51932.c57
-rw-r--r--prism/enc/pm_euc_jp.c10
-rw-r--r--test/prism/encoding_test.rb6
4 files changed, 13 insertions, 61 deletions
diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec
index cd90b025b2..2e8ebee409 100644
--- a/lib/prism/prism.gemspec
+++ b/lib/prism/prism.gemspec
@@ -88,7 +88,6 @@ Gem::Specification.new do |spec|
"src/enc/pm_big5.c",
"src/enc/pm_cp949.c",
"src/enc/pm_cp950.c",
- "src/enc/pm_cp51932.c",
"src/enc/pm_euc_jp.c",
"src/enc/pm_gbk.c",
"src/enc/pm_shift_jis.c",
diff --git a/prism/enc/pm_cp51932.c b/prism/enc/pm_cp51932.c
deleted file mode 100644
index 75c9fb824f..0000000000
--- a/prism/enc/pm_cp51932.c
+++ /dev/null
@@ -1,57 +0,0 @@
-#include "prism/enc/pm_encoding.h"
-
-static size_t
-pm_encoding_cp51932_char_width(const uint8_t *b, ptrdiff_t n) {
- // These are the single byte characters.
- if (*b < 0x80) {
- return 1;
- }
-
- // These are the double byte characters.
- if (
- (n > 1) &&
- ((b[0] >= 0xa1 && b[0] <= 0xfe) || (b[0] == 0x8e)) &&
- (b[1] >= 0xa1 && b[1] <= 0xfe)
- ) {
- return 2;
- }
-
- return 0;
-}
-
-static size_t
-pm_encoding_cp51932_alpha_char(const uint8_t *b, ptrdiff_t n) {
- if (pm_encoding_cp51932_char_width(b, n) == 1) {
- return pm_encoding_ascii_alpha_char(b, n);
- } else {
- return 0;
- }
-}
-
-static size_t
-pm_encoding_cp51932_alnum_char(const uint8_t *b, ptrdiff_t n) {
- if (pm_encoding_cp51932_char_width(b, n) == 1) {
- return pm_encoding_ascii_alnum_char(b, n);
- } else {
- return 0;
- }
-}
-
-static bool
-pm_encoding_cp51932_isupper_char(const uint8_t *b, ptrdiff_t n) {
- if (pm_encoding_cp51932_char_width(b, n) == 1) {
- return pm_encoding_ascii_isupper_char(b, n);
- } else {
- return 0;
- }
-}
-
-/** cp51932 encoding */
-pm_encoding_t pm_encoding_cp51932 = {
- .name = "cp51932",
- .char_width = pm_encoding_cp51932_char_width,
- .alnum_char = pm_encoding_cp51932_alnum_char,
- .alpha_char = pm_encoding_cp51932_alpha_char,
- .isupper_char = pm_encoding_cp51932_isupper_char,
- .multibyte = true
-};
diff --git a/prism/enc/pm_euc_jp.c b/prism/enc/pm_euc_jp.c
index c891326a60..6468712607 100644
--- a/prism/enc/pm_euc_jp.c
+++ b/prism/enc/pm_euc_jp.c
@@ -67,3 +67,13 @@ pm_encoding_t pm_encoding_euc_jp = {
.isupper_char = pm_encoding_euc_jp_isupper_char,
.multibyte = true
};
+
+/** CP51932 encoding */
+pm_encoding_t pm_encoding_cp51932 = {
+ .name = "CP51932",
+ .char_width = pm_encoding_euc_jp_char_width,
+ .alnum_char = pm_encoding_euc_jp_alnum_char,
+ .alpha_char = pm_encoding_euc_jp_alpha_char,
+ .isupper_char = pm_encoding_euc_jp_isupper_char,
+ .multibyte = true
+};
diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb
index 8ab6f323e5..db51d653f9 100644
--- a/test/prism/encoding_test.rb
+++ b/test/prism/encoding_test.rb
@@ -71,14 +71,13 @@ module Prism
Encoding::Big5_UAO => 0x00...0x10000,
Encoding::CP949 => 0x00...0x10000,
Encoding::CP950 => 0x00...0x10000,
- Encoding::CP51932 => 0x00...0x10000,
Encoding::GBK => 0x00...0x10000,
Encoding::MACJAPANESE => 0x00...0x10000,
Encoding::Shift_JIS => 0x00...0x10000,
Encoding::SJIS_DoCoMo => 0x00...0x10000,
Encoding::SJIS_KDDI => 0x00...0x10000,
Encoding::SJIS_SoftBank => 0x00...0x10000,
- Encoding::Windows_31J => 0x00...0x10000
+ Encoding::Windows_31J => 0x00...0x10000,
}
# By default we don't test every codepoint in these encodings because they
@@ -86,7 +85,8 @@ module Prism
# suite.
if ENV["PRISM_TEST_ALL_ENCODINGS"]
encodings.merge!(
- Encoding::EUC_JP => 0x00...0x1000000,
+ Encoding::CP51932 => [*(0x00...0x10000), *(0x00...0x10000).map { |bytes| bytes | 0x8F0000 }],
+ Encoding::EUC_JP => [*(0x00...0x10000), *(0x00...0x10000).map { |bytes| bytes | 0x8F0000 }],
Encoding::UTF_8 => 0x00...0x110000,
Encoding::UTF8_MAC => 0x00...0x110000,
Encoding::UTF8_DoCoMo => 0x00...0x110000,