summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/prism/prism.gemspec1
-rw-r--r--prism/enc/pm_big5.c63
-rw-r--r--prism/enc/pm_big5_hkscs.c54
-rw-r--r--prism/enc/pm_encoding.h1
-rw-r--r--prism/prism.c1
-rw-r--r--test/prism/encoding_test.rb1
6 files changed, 66 insertions, 55 deletions
diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec
index 5f3c666e45..4b0f87d442 100644
--- a/lib/prism/prism.gemspec
+++ b/lib/prism/prism.gemspec
@@ -85,7 +85,6 @@ Gem::Specification.new do |spec|
"lib/prism/visitor.rb",
"src/diagnostic.c",
"src/enc/pm_big5.c",
- "src/enc/pm_big5_hkscs.c",
"src/enc/pm_cp51932.c",
"src/enc/pm_euc_jp.c",
"src/enc/pm_gbk.c",
diff --git a/prism/enc/pm_big5.c b/prism/enc/pm_big5.c
index fc5c4e73f0..5e9924b40d 100644
--- a/prism/enc/pm_big5.c
+++ b/prism/enc/pm_big5.c
@@ -16,6 +16,22 @@ pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
}
static size_t
+pm_encoding_big5_star_char_width(const uint8_t *b, ptrdiff_t n) {
+ // These are the single byte characters.
+ if (*b < 0x80) {
+ return 1;
+ }
+
+ // These are the double byte characters.
+ if ((n > 1) && (b[0] >= 0x87 && b[0] <= 0xFE) &&
+ ((b[1] >= 0x40 && b[1] <= 0x7E) || (b[1] >= 0xA1 && b[1] <= 0xFE))) {
+ return 2;
+ }
+
+ return 0;
+}
+
+static size_t
pm_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_big5_char_width(b, n) == 1) {
return pm_encoding_ascii_alpha_char(b, n);
@@ -25,6 +41,15 @@ pm_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
}
static size_t
+pm_encoding_big5_star_alpha_char(const uint8_t *b, ptrdiff_t n) {
+ if (pm_encoding_big5_star_char_width(b, n) == 1) {
+ return pm_encoding_ascii_alpha_char(b, n);
+ } else {
+ return 0;
+ }
+}
+
+static size_t
pm_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_big5_char_width(b, n) == 1) {
return pm_encoding_ascii_alnum_char(b, n);
@@ -33,6 +58,15 @@ pm_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
}
}
+static size_t
+pm_encoding_big5_star_alnum_char(const uint8_t *b, ptrdiff_t n) {
+ if (pm_encoding_big5_star_char_width(b, n) == 1) {
+ return pm_encoding_ascii_alnum_char(b, n);
+ } else {
+ return 0;
+ }
+}
+
static bool
pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_big5_char_width(b, n) == 1) {
@@ -42,6 +76,15 @@ pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
}
}
+static bool
+pm_encoding_big5_star_isupper_char(const uint8_t *b, ptrdiff_t n) {
+ if (pm_encoding_big5_star_char_width(b, n) == 1) {
+ return pm_encoding_ascii_isupper_char(b, n);
+ } else {
+ return false;
+ }
+}
+
/** Big5 encoding */
pm_encoding_t pm_encoding_big5 = {
.name = "big5",
@@ -51,3 +94,23 @@ pm_encoding_t pm_encoding_big5 = {
.isupper_char = pm_encoding_big5_isupper_char,
.multibyte = true
};
+
+/** Big5-HKSCS encoding */
+pm_encoding_t pm_encoding_big5_hkscs = {
+ .name = "big5-hkscs",
+ .char_width = pm_encoding_big5_star_char_width,
+ .alnum_char = pm_encoding_big5_star_alnum_char,
+ .alpha_char = pm_encoding_big5_star_alpha_char,
+ .isupper_char = pm_encoding_big5_star_isupper_char,
+ .multibyte = true
+};
+
+/** Big5-UAO encoding */
+pm_encoding_t pm_encoding_big5_uao = {
+ .name = "big5-uao",
+ .char_width = pm_encoding_big5_star_char_width,
+ .alnum_char = pm_encoding_big5_star_alnum_char,
+ .alpha_char = pm_encoding_big5_star_alpha_char,
+ .isupper_char = pm_encoding_big5_star_isupper_char,
+ .multibyte = true
+};
diff --git a/prism/enc/pm_big5_hkscs.c b/prism/enc/pm_big5_hkscs.c
deleted file mode 100644
index e6a13737ed..0000000000
--- a/prism/enc/pm_big5_hkscs.c
+++ /dev/null
@@ -1,54 +0,0 @@
-#include "prism/enc/pm_encoding.h"
-
-static size_t
-pm_encoding_big5_hkscs_char_width(const uint8_t *b, ptrdiff_t n) {
- // These are the single byte characters.
- if (*b < 0x80) {
- return 1;
- }
-
- // These are the double byte characters.
- if ((n > 1) && (b[0] >= 0x87 && b[0] <= 0xFE) &&
- ((b[1] >= 0x40 && b[1] <= 0x7E) || (b[1] >= 0xA1 && b[1] <= 0xFE))) {
- return 2;
- }
-
- return 0;
-}
-
-static size_t
-pm_encoding_big5_hkscs_alpha_char(const uint8_t *b, ptrdiff_t n) {
- if (pm_encoding_big5_hkscs_char_width(b, n) == 1) {
- return pm_encoding_ascii_alpha_char(b, n);
- } else {
- return 0;
- }
-}
-
-static size_t
-pm_encoding_big5_hkscs_alnum_char(const uint8_t *b, ptrdiff_t n) {
- if (pm_encoding_big5_hkscs_char_width(b, n) == 1) {
- return pm_encoding_ascii_alnum_char(b, n);
- } else {
- return 0;
- }
-}
-
-static bool
-pm_encoding_big5_hkscs_isupper_char(const uint8_t *b, ptrdiff_t n) {
- if (pm_encoding_big5_hkscs_char_width(b, n) == 1) {
- return pm_encoding_ascii_isupper_char(b, n);
- } else {
- return false;
- }
-}
-
-/** Big5 encoding */
-pm_encoding_t pm_encoding_big5_hkscs = {
- .name = "big5-hkscs",
- .char_width = pm_encoding_big5_hkscs_char_width,
- .alnum_char = pm_encoding_big5_hkscs_alnum_char,
- .alpha_char = pm_encoding_big5_hkscs_alpha_char,
- .isupper_char = pm_encoding_big5_hkscs_isupper_char,
- .multibyte = true
-};
diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h
index 1785a842cc..15db4abc9d 100644
--- a/prism/enc/pm_encoding.h
+++ b/prism/enc/pm_encoding.h
@@ -159,6 +159,7 @@ extern pm_encoding_t pm_encoding_ascii;
extern pm_encoding_t pm_encoding_ascii_8bit;
extern pm_encoding_t pm_encoding_big5;
extern pm_encoding_t pm_encoding_big5_hkscs;
+extern pm_encoding_t pm_encoding_big5_uao;
extern pm_encoding_t pm_encoding_cp51932;
extern pm_encoding_t pm_encoding_cp850;
extern pm_encoding_t pm_encoding_cp852;
diff --git a/prism/prism.c b/prism/prism.c
index 004bf8ca24..7330063bd8 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -6091,6 +6091,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
ENCODING1("BINARY", pm_encoding_ascii_8bit);
ENCODING1("Big5", pm_encoding_big5);
ENCODING1("Big5-HKSCS", pm_encoding_big5_hkscs);
+ ENCODING1("Big5-UAO", pm_encoding_big5_uao);
break;
case 'C': case 'c':
ENCODING1("CP437", pm_encoding_ibm437);
diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb
index 8704945d53..75a6b027c3 100644
--- a/test/prism/encoding_test.rb
+++ b/test/prism/encoding_test.rb
@@ -65,6 +65,7 @@ module Prism
Encoding::Windows_874 => 0x00...0x100,
Encoding::Big5 => 0x00...0x10000,
Encoding::Big5_HKSCS => 0x00...0x10000,
+ Encoding::Big5_UAO => 0x00...0x10000,
Encoding::CP51932 => 0x00...0x10000,
Encoding::GBK => 0x00...0x10000,
Encoding::Shift_JIS => 0x00...0x10000,