diff options
| author | Kevin Newton <kddnewton@gmail.com> | 2023-11-29 12:53:42 -0500 |
|---|---|---|
| committer | Kevin Newton <kddnewton@gmail.com> | 2023-11-30 21:37:56 -0500 |
| commit | 7b5bb978fb8aefb77c731b0a48386fc139413b13 (patch) | |
| tree | c1dc4dc848a33f102994faf4d1a30f33b95a959c | |
| parent | 9ba92327f2aed5b5d95ad1ce51fe695f14a3428e (diff) | |
[PRISM] Alias CP51932 to EUC-JP
| -rw-r--r-- | lib/prism/prism.gemspec | 1 | ||||
| -rw-r--r-- | prism/enc/pm_cp51932.c | 57 | ||||
| -rw-r--r-- | prism/enc/pm_euc_jp.c | 10 | ||||
| -rw-r--r-- | test/prism/encoding_test.rb | 6 |
4 files changed, 13 insertions, 61 deletions
diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec index cd90b025b2..2e8ebee409 100644 --- a/lib/prism/prism.gemspec +++ b/lib/prism/prism.gemspec @@ -88,7 +88,6 @@ Gem::Specification.new do |spec| "src/enc/pm_big5.c", "src/enc/pm_cp949.c", "src/enc/pm_cp950.c", - "src/enc/pm_cp51932.c", "src/enc/pm_euc_jp.c", "src/enc/pm_gbk.c", "src/enc/pm_shift_jis.c", diff --git a/prism/enc/pm_cp51932.c b/prism/enc/pm_cp51932.c deleted file mode 100644 index 75c9fb824f..0000000000 --- a/prism/enc/pm_cp51932.c +++ /dev/null @@ -1,57 +0,0 @@ -#include "prism/enc/pm_encoding.h" - -static size_t -pm_encoding_cp51932_char_width(const uint8_t *b, ptrdiff_t n) { - // These are the single byte characters. - if (*b < 0x80) { - return 1; - } - - // These are the double byte characters. - if ( - (n > 1) && - ((b[0] >= 0xa1 && b[0] <= 0xfe) || (b[0] == 0x8e)) && - (b[1] >= 0xa1 && b[1] <= 0xfe) - ) { - return 2; - } - - return 0; -} - -static size_t -pm_encoding_cp51932_alpha_char(const uint8_t *b, ptrdiff_t n) { - if (pm_encoding_cp51932_char_width(b, n) == 1) { - return pm_encoding_ascii_alpha_char(b, n); - } else { - return 0; - } -} - -static size_t -pm_encoding_cp51932_alnum_char(const uint8_t *b, ptrdiff_t n) { - if (pm_encoding_cp51932_char_width(b, n) == 1) { - return pm_encoding_ascii_alnum_char(b, n); - } else { - return 0; - } -} - -static bool -pm_encoding_cp51932_isupper_char(const uint8_t *b, ptrdiff_t n) { - if (pm_encoding_cp51932_char_width(b, n) == 1) { - return pm_encoding_ascii_isupper_char(b, n); - } else { - return 0; - } -} - -/** cp51932 encoding */ -pm_encoding_t pm_encoding_cp51932 = { - .name = "cp51932", - .char_width = pm_encoding_cp51932_char_width, - .alnum_char = pm_encoding_cp51932_alnum_char, - .alpha_char = pm_encoding_cp51932_alpha_char, - .isupper_char = pm_encoding_cp51932_isupper_char, - .multibyte = true -}; diff --git a/prism/enc/pm_euc_jp.c b/prism/enc/pm_euc_jp.c index c891326a60..6468712607 100644 --- a/prism/enc/pm_euc_jp.c +++ b/prism/enc/pm_euc_jp.c @@ -67,3 +67,13 @@ pm_encoding_t pm_encoding_euc_jp = { .isupper_char = pm_encoding_euc_jp_isupper_char, .multibyte = true }; + +/** CP51932 encoding */ +pm_encoding_t pm_encoding_cp51932 = { + .name = "CP51932", + .char_width = pm_encoding_euc_jp_char_width, + .alnum_char = pm_encoding_euc_jp_alnum_char, + .alpha_char = pm_encoding_euc_jp_alpha_char, + .isupper_char = pm_encoding_euc_jp_isupper_char, + .multibyte = true +}; diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb index 8ab6f323e5..db51d653f9 100644 --- a/test/prism/encoding_test.rb +++ b/test/prism/encoding_test.rb @@ -71,14 +71,13 @@ module Prism Encoding::Big5_UAO => 0x00...0x10000, Encoding::CP949 => 0x00...0x10000, Encoding::CP950 => 0x00...0x10000, - Encoding::CP51932 => 0x00...0x10000, Encoding::GBK => 0x00...0x10000, Encoding::MACJAPANESE => 0x00...0x10000, Encoding::Shift_JIS => 0x00...0x10000, Encoding::SJIS_DoCoMo => 0x00...0x10000, Encoding::SJIS_KDDI => 0x00...0x10000, Encoding::SJIS_SoftBank => 0x00...0x10000, - Encoding::Windows_31J => 0x00...0x10000 + Encoding::Windows_31J => 0x00...0x10000, } # By default we don't test every codepoint in these encodings because they @@ -86,7 +85,8 @@ module Prism # suite. if ENV["PRISM_TEST_ALL_ENCODINGS"] encodings.merge!( - Encoding::EUC_JP => 0x00...0x1000000, + Encoding::CP51932 => [*(0x00...0x10000), *(0x00...0x10000).map { |bytes| bytes | 0x8F0000 }], + Encoding::EUC_JP => [*(0x00...0x10000), *(0x00...0x10000).map { |bytes| bytes | 0x8F0000 }], Encoding::UTF_8 => 0x00...0x110000, Encoding::UTF8_MAC => 0x00...0x110000, Encoding::UTF8_DoCoMo => 0x00...0x110000, |
