From d242ae60d8d8bccb8e209130d3bdb1cc41333b8c Mon Sep 17 00:00:00 2001 From: matz Date: Mon, 20 Jan 2003 08:29:24 +0000 Subject: * regex.c (is_in_list): should work weill with UTF-8. * regex.c (re_match_exec): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3363 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- regex.c | 44 +++++++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 11 deletions(-) (limited to 'regex.c') diff --git a/regex.c b/regex.c index 90fc5302d5..360b7a8efc 100644 --- a/regex.c +++ b/regex.c @@ -698,7 +698,18 @@ set_list_bits(c1, c2, b) } static int -is_in_list(c, b) +is_in_list_sbc(c, b) + unsigned long c; + const unsigned char *b; +{ + unsigned short size; + + size = *b++; + return ((int)c / BYTEWIDTH < (int)size && b[c / BYTEWIDTH] & 1 << c % BYTEWIDTH); +} + +static int +is_in_list_mbc(c, b) unsigned long c; const unsigned char *b; { @@ -706,9 +717,6 @@ is_in_list(c, b) unsigned short i, j; size = *b++; - if ((int)c / BYTEWIDTH < (int)size && b[c / BYTEWIDTH] & 1 << c % BYTEWIDTH) { - return 1; - } b += size + 2; size = EXTRACT_UNSIGNED(&b[-2]); if (size == 0) return 0; @@ -727,6 +735,14 @@ is_in_list(c, b) return 0; } +static int +is_in_list(c, b) + unsigned long c; + const unsigned char *b; +{ + return is_in_list_sbc(c, b) || is_in_list_mbc(c, b); +} + static void print_partial_compiled_pattern(start, end) unsigned char *start; @@ -3815,19 +3831,25 @@ re_match_exec(bufp, string_arg, size, pos, beg, regs) int cc, c; PREFETCH; - cc = c = (unsigned char)*d++; + c = (unsigned char)*d++; if (ismbchar(c)) { if (d + mbclen(c) - 1 <= dend) { + cc = c; MBC2WC(c, d); + not = is_in_list_mbc(c, p); + if (!not) { + part = not = is_in_list_sbc(cc, p); + } + } else { + not = is_in_list_sbc(c, p); } } - else if (TRANSLATE_P()) - cc = c = (unsigned char)translate[c]; - - not = is_in_list(c, p); - if (!not && cc != c) { - part = not = is_in_list(cc, p); + else { + if (TRANSLATE_P()) + c = (unsigned char)translate[c]; + not = is_in_list_sbc(c, p); } + if (*(p - 1) == (unsigned char)charset_not) { not = !not; } -- cgit v1.2.3