summaryrefslogtreecommitdiff
path: root/sjis.c
diff options
context:
space:
mode:
authorksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2004-03-05 15:31:51 +0000
committerksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2004-03-05 15:31:51 +0000
commit5770336f8be4ac6dbdff43587fda2b508d3786de (patch)
treed75456ec841c8a8d8eba9c3a4d25b39243c56ef6 /sjis.c
parent33a06e4acac0cb7f080436507e06c82de45f7b9b (diff)
Initial revision
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5892 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'sjis.c')
-rw-r--r--sjis.c174
1 files changed, 174 insertions, 0 deletions
diff --git a/sjis.c b/sjis.c
new file mode 100644
index 0000000000..8485910e69
--- /dev/null
+++ b/sjis.c
@@ -0,0 +1,174 @@
+/**********************************************************************
+
+ sjis.c - Oniguruma (regular expression library)
+
+ Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#include "regenc.h"
+
+static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
+};
+
+#define SJIS_ISMB_FIRST(byte) (OnigEncodingSJIS.len_table[byte] > 1)
+#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)]
+
+static OnigCodePoint
+sjis_mbc_to_code(UChar* p, UChar* end)
+{
+ int c, i, len;
+ OnigCodePoint n;
+
+ c = *p++;
+ len = enc_len(ONIG_ENCODING_SJIS, c);
+ n = c;
+ if (len == 1) return n;
+
+ for (i = 1; i < len; i++) {
+ if (p >= end) break;
+ c = *p++;
+ n <<= 8; n += c;
+ }
+ return n;
+}
+
+static int
+sjis_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar *p = buf;
+
+ if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
+ *p++ = (UChar )(code & 0xff);
+
+#if 0
+ if (enc_len(ONIG_ENCODING_SJIS, buf[0]) != (p - buf))
+ return REGERR_INVALID_WIDE_CHAR_VALUE;
+#endif
+ return p - buf;
+}
+
+static int
+sjis_mbc_to_lower(UChar* p, UChar* lower)
+{
+ int len;
+
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ return 1;
+ }
+ else {
+ len = enc_len(ONIG_ENCODING_SJIS, *p);
+ if (lower != p) {
+ /* memcpy(lower, p, len); */
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+sjis_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ int first = onigenc_mb2_code_to_mbc_first(code);
+ return (enc_len(ONIG_ENCODING_SJIS, first) > 1 ? TRUE : FALSE);
+ }
+
+ ctype &= ~ONIGENC_CTYPE_WORD;
+ if (ctype == 0) return FALSE;
+ }
+
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static UChar*
+sjis_left_adjust_char_head(UChar* start, UChar* s)
+{
+ UChar *p;
+ int len;
+
+ if (s <= start) return s;
+ p = s;
+
+ if (SJIS_ISMB_TRAIL(*p)) {
+ while (p > start) {
+ if (! SJIS_ISMB_FIRST(*--p)) {
+ p++;
+ break;
+ }
+ }
+ }
+ len = enc_len(ONIG_ENCODING_SJIS, *p);
+ if (p + len > s) return p;
+ p += len;
+ return p + ((s - p) & ~1);
+}
+
+static int
+sjis_is_allowed_reverse_match(UChar* s, UChar* end)
+{
+ UChar c = *s;
+ return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE);
+}
+
+OnigEncodingType OnigEncodingSJIS = {
+ {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
+ },
+ "Shift_JIS", /* name */
+ 2, /* max byte length */
+ FALSE, /* is_fold_match */
+ ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
+ FALSE, /* is continuous sb mb codepoint */
+ sjis_mbc_to_code,
+ onigenc_mb2_code_to_mbclen,
+ sjis_code_to_mbc,
+ sjis_mbc_to_lower,
+ onigenc_mbn_mbc_is_case_ambig,
+ sjis_code_is_ctype,
+ onigenc_nothing_get_ctype_code_range,
+ sjis_left_adjust_char_head,
+ sjis_is_allowed_reverse_match,
+ onigenc_nothing_get_all_fold_match_code,
+ onigenc_nothing_get_fold_match_info
+};