summaryrefslogtreecommitdiff
path: root/regex.c
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2002-02-01 08:49:02 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2002-02-01 08:49:02 +0000
commitd9b49e39b2f3380cb6c4bb68a175a230c4702b58 (patch)
treed2563ee6af8604892dfa8fca3a4d751cedc5cfdc /regex.c
parentdad91ce6d8974ed959146ce252749a9fa0d7ebfe (diff)
* regex.c (mbc_startpos): become macro.
* regex.c (euc_startpos): added for improvement. * regex.c (sjis_startpos): ditto. * regex.c (utf8_startpos): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@2040 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'regex.c')
-rw-r--r--regex.c152
1 files changed, 74 insertions, 78 deletions
diff --git a/regex.c b/regex.c
index e3e84ae162..c9436a820f 100644
--- a/regex.c
+++ b/regex.c
@@ -478,7 +478,9 @@ re_set_syntax(syntax)
#define WC2MBC1ST(c) \
((current_mbctype != MBCTYPE_UTF8) ? ((c<0x100) ? (c) : (((c)>>8)&0xff)) : utf8_firstbyte(c))
-int mbc_startpos _((const char *start, int pos));
+typedef unsigned int (*mbc_startpos_func_t) _((const char *string, unsigned int pos));
+const mbc_startpos_func_t mbc_startpos_func[];
+#define mbc_startpos(start, pos) (*mbc_startpos_func[current_mbctype])((start), (pos))
static unsigned int
utf8_firstbyte(c)
@@ -4384,7 +4386,6 @@ re_free_registers(regs)
Created for grep multi-byte extension Jul., 1993 by t^2 (Takahiro Tanimoto)
Last change: Jul. 9, 1993 by t^2 */
static const unsigned char mbctab_ascii[] = {
- /* forward scan */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -4401,28 +4402,9 @@ static const unsigned char mbctab_ascii[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- /* reverse scan */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
static const unsigned char mbctab_euc[] = { /* 0xA1-0xFE */
- /* forward scan */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -4439,28 +4421,9 @@ static const unsigned char mbctab_euc[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
-
- /* reverse scan */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
};
-static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
- /* forward scan */
+static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFC */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -4476,9 +4439,10 @@ static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
+};
- /* reverse scan */
+static const unsigned char mbctab_sjis_trail[] = { /* 0x40-0x7E,0x80-0xFC */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -4498,7 +4462,6 @@ static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
};
static const unsigned char mbctab_utf8[] = {
- /* forward scan */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -4515,24 +4478,6 @@ static const unsigned char mbctab_utf8[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 0, 0,
-
- /* reverse scan */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
const unsigned char *re_mbctab = mbctab_ascii;
@@ -4561,36 +4506,87 @@ re_mbcinit(mbctype)
}
}
-int
-mbc_startpos(string, pos)
+#define mbc_isfirst(t, c) (t)[(unsigned char)(c)]
+#define mbc_len(t, c) ((t)[(unsigned char)(c)]+1)
+
+static unsigned int asc_startpos _((const char *string, unsigned int pos));
+static unsigned int
+asc_startpos(string, pos)
const char *string;
- int pos;
+ unsigned int pos;
{
- int i = pos, w;
+ return pos;
+}
+
+#define euc_islead(c) ((unsigned char)((c) - 0xa1) > 0xfe - 0xa1)
+#define euc_mbclen(c) mbc_len(mbctab_euc, (c))
+static unsigned int euc_startpos _((const char *string, unsigned int pos));
+static unsigned int
+euc_startpos(string, pos)
+ const char *string;
+ unsigned int pos;
+{
+ unsigned int i = pos, w;
- while (i > 0 && re_mbctab[(unsigned char)string[i]+256]) {
+ while (i > 0 && !euc_islead(string[i])) {
--i;
}
- if (i == pos || i + (w = mbclen(string[i])) > pos) return i;
+ if (i == pos || i + (w = euc_mbclen(string[i])) > pos) {
+ return i;
+ }
i += w;
+ return i + ((pos - i) & ~1);
+}
- switch (current_mbctype) {
- case MBCTYPE_EUC:
- return i + ((pos - i) & ~1);
+#define sjis_isfirst(c) mbc_isfirst(mbctab_sjis, (c))
+#define sjis_istrail(c) mbctab_sjis_trail[(unsigned char)(c)]
+#define sjis_mbclen(c) mbc_len(mbctab_sjis, (c))
+static unsigned int sjis_startpos _((const char *string, unsigned int pos));
+static unsigned int
+sjis_startpos(string, pos)
+ const char *string;
+ unsigned int pos;
+{
+ unsigned int i = pos, w;
- case MBCTYPE_SJIS:
- while (i + (w = mbclen(string[i])) < pos) {
- i += w;
- }
+ if (i > 0 && sjis_istrail(string[i])) {
+ do {
+ if (!sjis_isfirst(string[--i])) {
+ ++i;
+ break;
+ }
+ } while (i > 0);
+ }
+ if (i == pos || i + (w = sjis_mbclen(string[i])) > pos) {
return i;
+ }
+ i += w;
+ return i + ((pos - i) & ~1);
+}
- case MBCTYPE_UTF8:
+#define utf8_islead(c) ((unsigned char)((c) & 0xc0) != 0x80)
+#define utf8_mbclen(c) mbc_len(mbctab_utf8, (c))
+static unsigned int utf8_startpos _((const char *string, unsigned int pos));
+static unsigned int
+utf8_startpos(string, pos)
+ const char *string;
+ unsigned int pos;
+{
+ unsigned int i = pos, w;
+
+ while (i > 0 && !utf8_islead(string[i])) {
+ --i;
+ }
+ if (i == pos || i + (w = utf8_mbclen(string[i])) > pos) {
return i;
- default:
- return pos;
}
+ return i + w;
}
+const mbc_startpos_func_t mbc_startpos_func[4] = {
+ asc_startpos, euc_startpos, sjis_startpos, utf8_startpos
+};
+
/*
vi: sw=2 ts=8
Local variables: