diff options
| author | heyogrady <ogradypatrickj@gmail.com> | 2023-11-21 23:17:09 -0500 |
|---|---|---|
| committer | Kevin Newton <kddnewton@gmail.com> | 2023-11-22 09:12:35 -0500 |
| commit | e3ef05a4343f447883183bc28e515715eedad9c4 (patch) | |
| tree | 98c29187c3e5358bf3cba3c66ff5266b9cf46f87 | |
| parent | 4f031a745d528e0a8a626c340787c7c3ab9ea143 (diff) | |
[ruby/prism] Add `CP949` encoding
https://github.com/ruby/prism/commit/9e78dfdf69
| -rw-r--r-- | lib/prism/prism.gemspec | 1 | ||||
| -rw-r--r-- | prism/enc/pm_cp949.c | 57 | ||||
| -rw-r--r-- | prism/enc/pm_encoding.h | 1 | ||||
| -rw-r--r-- | prism/prism.c | 1 | ||||
| -rw-r--r-- | test/prism/encoding_test.rb | 1 |
5 files changed, 61 insertions, 0 deletions
diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec index 6fbd6c2945..43ec005452 100644 --- a/lib/prism/prism.gemspec +++ b/lib/prism/prism.gemspec @@ -86,6 +86,7 @@ Gem::Specification.new do |spec| "src/diagnostic.c", "src/enc/pm_big5.c", "src/enc/pm_cp51932.c", + "src/enc/pm_cp949.c", "src/enc/pm_euc_jp.c", "src/enc/pm_gbk.c", "src/enc/pm_shift_jis.c", diff --git a/prism/enc/pm_cp949.c b/prism/enc/pm_cp949.c new file mode 100644 index 0000000000..f3b5a50fde --- /dev/null +++ b/prism/enc/pm_cp949.c @@ -0,0 +1,57 @@ +#include "prism/enc/pm_encoding.h" + +static size_t +pm_encoding_cp949_char_width(const uint8_t *b, ptrdiff_t n) { + // These are the single byte characters + if (*b < 0x81) { + return 1; + } + + // These are the double byte characters + if ( + (n > 1) && + (b[0] >= 0x81 && b[0] <= 0xfe) && + (b[1] >= 0x41 && b[1] <= 0xfe) + ) { + return 2; + } + + return 0; +} + +static size_t +pm_encoding_cp949_alpha_char(const uint8_t *b, ptrdiff_t n) { + if (pm_encoding_cp949_char_width(b, n) == 1) { + return pm_encoding_ascii_alpha_char(b, n); + } else { + return 0; + } +} + +static size_t +pm_encoding_cp949_alnum_char(const uint8_t *b, ptrdiff_t n) { + if (pm_encoding_cp949_char_width(b, n) == 1) { + return pm_encoding_ascii_alnum_char(b, n); + } else { + return 0; + } +} + +static bool +pm_encoding_cp949_isupper_char(const uint8_t *b, ptrdiff_t n) { + if (pm_encoding_cp949_char_width(b, n) == 1) { + return pm_encoding_ascii_isupper_char(b, n); + } else { + return 0; + } +} + +/** cp949 encoding */ +pm_encoding_t pm_encoding_cp949 = { + .name = "cp949", + .char_width = pm_encoding_cp949_char_width, + .alnum_char = pm_encoding_cp949_alnum_char, + .alpha_char = pm_encoding_cp949_alpha_char, + .isupper_char = pm_encoding_cp949_isupper_char, + .multibyte = true +}; diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h index 51227b9c96..698abc8be9 100644 --- a/prism/enc/pm_encoding.h +++ b/prism/enc/pm_encoding.h @@ -164,6 +164,7 @@ extern pm_encoding_t pm_encoding_cp51932; extern pm_encoding_t pm_encoding_cp850; extern pm_encoding_t pm_encoding_cp852; extern pm_encoding_t pm_encoding_cp855; +extern pm_encoding_t pm_encoding_cp949; extern pm_encoding_t pm_encoding_euc_jp; extern pm_encoding_t pm_encoding_gb1988; extern pm_encoding_t pm_encoding_gbk; diff --git a/prism/prism.c b/prism/prism.c index e7848f0e04..99ece901f5 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -6165,6 +6165,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star ENCODING1("CP863", pm_encoding_ibm863); ENCODING2("CP932", "csWindows31J", pm_encoding_windows_31j); ENCODING1("CP936", pm_encoding_gbk); + ENCODING1("CP949", pm_encoding_cp949); ENCODING1("CP1250", pm_encoding_windows_1250); ENCODING1("CP1251", pm_encoding_windows_1251); ENCODING1("CP1252", pm_encoding_windows_1252); diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb index 5a0a671b5b..463cb95121 100644 --- a/test/prism/encoding_test.rb +++ b/test/prism/encoding_test.rb @@ -68,6 +68,7 @@ module Prism Encoding::Big5 => 0x00...0x10000, Encoding::Big5_HKSCS => 0x00...0x10000, Encoding::Big5_UAO => 0x00...0x10000, + Encoding::CP949 => 0x00...0x10000, Encoding::CP51932 => 0x00...0x10000, Encoding::GBK => 0x00...0x10000, Encoding::Shift_JIS => 0x00...0x10000, |
