summaryrefslogtreecommitdiff
path: root/regex.c
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2003-01-20 08:29:24 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2003-01-20 08:29:24 +0000
commitd242ae60d8d8bccb8e209130d3bdb1cc41333b8c (patch)
treee1cc1f856aac1c3bda4897c7a244b43039316f3d /regex.c
parent181edd12a09a76e2443c702ff2e17777fe8cfeb5 (diff)
* regex.c (is_in_list): should work weill with UTF-8.
* regex.c (re_match_exec): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3363 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'regex.c')
-rw-r--r--regex.c44
1 files changed, 33 insertions, 11 deletions
diff --git a/regex.c b/regex.c
index 90fc530..360b7a8 100644
--- a/regex.c
+++ b/regex.c
@@ -698,7 +698,18 @@ set_list_bits(c1, c2, b)
}
static int
-is_in_list(c, b)
+is_in_list_sbc(c, b)
+ unsigned long c;
+ const unsigned char *b;
+{
+ unsigned short size;
+
+ size = *b++;
+ return ((int)c / BYTEWIDTH < (int)size && b[c / BYTEWIDTH] & 1 << c % BYTEWIDTH);
+}
+
+static int
+is_in_list_mbc(c, b)
unsigned long c;
const unsigned char *b;
{
@@ -706,9 +717,6 @@ is_in_list(c, b)
unsigned short i, j;
size = *b++;
- if ((int)c / BYTEWIDTH < (int)size && b[c / BYTEWIDTH] & 1 << c % BYTEWIDTH) {
- return 1;
- }
b += size + 2;
size = EXTRACT_UNSIGNED(&b[-2]);
if (size == 0) return 0;
@@ -727,6 +735,14 @@ is_in_list(c, b)
return 0;
}
+static int
+is_in_list(c, b)
+ unsigned long c;
+ const unsigned char *b;
+{
+ return is_in_list_sbc(c, b) || is_in_list_mbc(c, b);
+}
+
static void
print_partial_compiled_pattern(start, end)
unsigned char *start;
@@ -3815,19 +3831,25 @@ re_match_exec(bufp, string_arg, size, pos, beg, regs)
int cc, c;
PREFETCH;
- cc = c = (unsigned char)*d++;
+ c = (unsigned char)*d++;
if (ismbchar(c)) {
if (d + mbclen(c) - 1 <= dend) {
+ cc = c;
MBC2WC(c, d);
+ not = is_in_list_mbc(c, p);
+ if (!not) {
+ part = not = is_in_list_sbc(cc, p);
+ }
+ } else {
+ not = is_in_list_sbc(c, p);
}
}
- else if (TRANSLATE_P())
- cc = c = (unsigned char)translate[c];
-
- not = is_in_list(c, p);
- if (!not && cc != c) {
- part = not = is_in_list(cc, p);
+ else {
+ if (TRANSLATE_P())
+ c = (unsigned char)translate[c];
+ not = is_in_list_sbc(c, p);
}
+
if (*(p - 1) == (unsigned char)charset_not) {
not = !not;
}