From d2a5354255bab4d74c710ee8ff21f43998f33095 Mon Sep 17 00:00:00 2001 From: naruse Date: Mon, 15 Sep 2014 16:18:41 +0000 Subject: * reg*.c: Merge Onigmo 5.15.0 38a870960aa7370051a3544 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@47598 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 4 + enc/big5.c | 6 +- enc/euc_jp.c | 7 +- enc/iso_8859_1.c | 2 - enc/iso_8859_10.c | 4 +- enc/iso_8859_13.c | 4 +- enc/iso_8859_14.c | 4 +- enc/iso_8859_15.c | 4 +- enc/iso_8859_16.c | 4 +- enc/iso_8859_2.c | 2 - enc/iso_8859_3.c | 4 +- enc/iso_8859_4.c | 4 +- enc/iso_8859_5.c | 4 +- enc/iso_8859_7.c | 4 +- enc/iso_8859_9.c | 4 +- enc/koi8_r.c | 4 +- enc/koi8_u.c | 4 +- enc/shift_jis.c | 7 +- enc/unicode.c | 1 - enc/us_ascii.c | 6 +- enc/utf_16_32.h | 2 +- enc/utf_16be.c | 4 - enc/utf_16le.c | 4 - enc/utf_8.c | 4 +- enc/windows_1251.c | 4 +- include/ruby/oniguruma.h | 4 +- regcomp.c | 127 +++++++++++--------- regenc.c | 64 +++++----- regenc.h | 14 ++- regexec.c | 7 +- regint.h | 6 +- regparse.c | 299 ++++++++++++++++++++++++++++++++++------------- regparse.h | 4 +- 33 files changed, 390 insertions(+), 236 deletions(-) diff --git a/ChangeLog b/ChangeLog index e52a75bb4b..659f69a170 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +Tue Sep 16 01:06:40 2014 NARUSE, Yui + + * reg*.c: Merge Onigmo 5.15.0 38a870960aa7370051a3544 + Mon Sep 15 16:21:10 2014 Eric Wong * io.c (struct io_advise_struct): 32 => 24 bytes on 64-bit diff --git a/enc/big5.c b/enc/big5.c index 9d7738d8f9..27315c4ba9 100644 --- a/enc/big5.c +++ b/enc/big5.c @@ -167,19 +167,19 @@ big5_mbc_enc_len0(const UChar* p, const UChar* e, int tridx, const int tbl[]) static int big5_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) { - return big5_mbc_enc_len0(p, e, 0, EncLen_BIG5); + return big5_mbc_enc_len0(p, e, 0, EncLen_BIG5); } static int big5_hkscs_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) { - return big5_mbc_enc_len0(p, e, 2, EncLen_BIG5_HKSCS); + return big5_mbc_enc_len0(p, e, 2, EncLen_BIG5_HKSCS); } static int big5_uao_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) { - return big5_mbc_enc_len0(p, e, 2, EncLen_BIG5_UAO); + return big5_mbc_enc_len0(p, e, 2, EncLen_BIG5_UAO); } static OnigCodePoint diff --git a/enc/euc_jp.c b/enc/euc_jp.c index 8ee24bffdb..61bb8ba65a 100644 --- a/enc/euc_jp.c +++ b/enc/euc_jp.c @@ -293,7 +293,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + numberof(CaseFoldMap), CaseFoldMap, 0, flag, f, arg); } @@ -504,13 +504,14 @@ static int property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) { UChar *s = p, *e = end; - const struct enc_property *prop = onig_jis_property((const char *)s, (unsigned int)(e-s)); + const struct enc_property *prop = + onig_jis_property((const char* )s, (unsigned int )(e - s)); if (!prop) { return onigenc_minimum_property_name_to_ctype(enc, s, e); } - return (int)prop->ctype; + return (int )prop->ctype; } static int diff --git a/enc/iso_8859_1.c b/enc/iso_8859_1.c index 92dc14f978..088b427d1c 100644 --- a/enc/iso_8859_1.c +++ b/enc/iso_8859_1.c @@ -29,8 +29,6 @@ #include "regenc.h" -#define numberof(array) (int)(sizeof(array) / sizeof((array)[0])) - #define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \ ((EncISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) diff --git a/enc/iso_8859_10.c b/enc/iso_8859_10.c index ec20a15baa..ab71a5adcf 100644 --- a/enc/iso_8859_10.c +++ b/enc/iso_8859_10.c @@ -208,7 +208,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + numberof(CaseFoldMap), CaseFoldMap, 1, flag, f, arg); } @@ -219,7 +219,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + numberof(CaseFoldMap), CaseFoldMap, 1, flag, p, end, items); } diff --git a/enc/iso_8859_13.c b/enc/iso_8859_13.c index 4d7b328818..11b3dda1c9 100644 --- a/enc/iso_8859_13.c +++ b/enc/iso_8859_13.c @@ -197,7 +197,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + numberof(CaseFoldMap), CaseFoldMap, 1, flag, f, arg); } @@ -208,7 +208,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + numberof(CaseFoldMap), CaseFoldMap, 1, flag, p, end, items); } diff --git a/enc/iso_8859_14.c b/enc/iso_8859_14.c index 1271c8a7a6..2939e89b7b 100644 --- a/enc/iso_8859_14.c +++ b/enc/iso_8859_14.c @@ -210,7 +210,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + numberof(CaseFoldMap), CaseFoldMap, 1, flag, f, arg); } @@ -221,7 +221,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + numberof(CaseFoldMap), CaseFoldMap, 1, flag, p, end, items); } diff --git a/enc/iso_8859_15.c b/enc/iso_8859_15.c index 451033e158..fdb7ca12d7 100644 --- a/enc/iso_8859_15.c +++ b/enc/iso_8859_15.c @@ -204,7 +204,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + numberof(CaseFoldMap), CaseFoldMap, 1, flag, f, arg); } @@ -215,7 +215,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + numberof(CaseFoldMap), CaseFoldMap, 1, flag, p, end, items); } diff --git a/enc/iso_8859_16.c b/enc/iso_8859_16.c index 5234cf0e7f..5e53f3b6d0 100644 --- a/enc/iso_8859_16.c +++ b/enc/iso_8859_16.c @@ -206,7 +206,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + numberof(CaseFoldMap), CaseFoldMap, 1, flag, f, arg); } @@ -217,7 +217,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + numberof(CaseFoldMap), CaseFoldMap, 1, flag, p, end, items); } diff --git a/enc/iso_8859_2.c b/enc/iso_8859_2.c index f4cb9100df..94613e661b 100644 --- a/enc/iso_8859_2.c +++ b/enc/iso_8859_2.c @@ -29,8 +29,6 @@ #include "regenc.h" -#define numberof(array) (int)(sizeof(array) / sizeof((array)[0])) - #define ENC_ISO_8859_2_TO_LOWER_CASE(c) EncISO_8859_2_ToLowerCaseTable[c] #define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \ ((EncISO_8859_2_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) diff --git a/enc/iso_8859_3.c b/enc/iso_8859_3.c index 85572574b8..863a575020 100644 --- a/enc/iso_8859_3.c +++ b/enc/iso_8859_3.c @@ -204,7 +204,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + numberof(CaseFoldMap), CaseFoldMap, 1, flag, f, arg); } @@ -215,7 +215,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + numberof(CaseFoldMap), CaseFoldMap, 1, flag, p, end, items); } diff --git a/enc/iso_8859_4.c b/enc/iso_8859_4.c index 771a2cf6e7..48b999e756 100644 --- a/enc/iso_8859_4.c +++ b/enc/iso_8859_4.c @@ -206,7 +206,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + numberof(CaseFoldMap), CaseFoldMap, 1, flag, f, arg); } @@ -217,7 +217,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + numberof(CaseFoldMap), CaseFoldMap, 1, flag, p, end, items); } diff --git a/enc/iso_8859_5.c b/enc/iso_8859_5.c index 4ee27b10d8..e71a488c4c 100644 --- a/enc/iso_8859_5.c +++ b/enc/iso_8859_5.c @@ -194,7 +194,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + numberof(CaseFoldMap), CaseFoldMap, 0, flag, f, arg); } @@ -205,7 +205,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + numberof(CaseFoldMap), CaseFoldMap, 0, flag, p, end, items); } diff --git a/enc/iso_8859_7.c b/enc/iso_8859_7.c index aa82f880f9..8d07cb6310 100644 --- a/enc/iso_8859_7.c +++ b/enc/iso_8859_7.c @@ -190,7 +190,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + numberof(CaseFoldMap), CaseFoldMap, 0, flag, f, arg); } @@ -201,7 +201,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + numberof(CaseFoldMap), CaseFoldMap, 0, flag, p, end, items); } diff --git a/enc/iso_8859_9.c b/enc/iso_8859_9.c index 0adafa3ed4..211ba3b2f3 100644 --- a/enc/iso_8859_9.c +++ b/enc/iso_8859_9.c @@ -197,7 +197,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + numberof(CaseFoldMap), CaseFoldMap, 1, flag, f, arg); } @@ -208,7 +208,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + numberof(CaseFoldMap), CaseFoldMap, 1, flag, p, end, items); } diff --git a/enc/koi8_r.c b/enc/koi8_r.c index 8ec48747f8..85fa72287e 100644 --- a/enc/koi8_r.c +++ b/enc/koi8_r.c @@ -183,7 +183,7 @@ koi8_r_apply_all_case_fold(OnigCaseFoldType flag, void* arg, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + numberof(CaseFoldMap), CaseFoldMap, 0, flag, f, arg); } @@ -193,7 +193,7 @@ koi8_r_get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + numberof(CaseFoldMap), CaseFoldMap, 0, flag, p, end, items); } diff --git a/enc/koi8_u.c b/enc/koi8_u.c index 0e51b6eb80..0ae449ca21 100644 --- a/enc/koi8_u.c +++ b/enc/koi8_u.c @@ -187,7 +187,7 @@ koi8_u_apply_all_case_fold(OnigCaseFoldType flag, void* arg, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + numberof(CaseFoldMap), CaseFoldMap, 0, flag, f, arg); } @@ -197,7 +197,7 @@ koi8_u_get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + numberof(CaseFoldMap), CaseFoldMap, 0, flag, p, end, items); } diff --git a/enc/shift_jis.c b/enc/shift_jis.c index 530415b87c..cbd3f02051 100644 --- a/enc/shift_jis.c +++ b/enc/shift_jis.c @@ -278,7 +278,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + numberof(CaseFoldMap), CaseFoldMap, 0, flag, f, arg); } @@ -493,13 +493,14 @@ static int property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) { UChar *s = p, *e = end; - const struct enc_property *prop = onig_jis_property((const char *)s, (unsigned int)(e-s)); + const struct enc_property *prop = + onig_jis_property((const char* )s, (unsigned int )(e - s)); if (!prop) { return onigenc_minimum_property_name_to_ctype(enc, s, e); } - return (int)prop->ctype; + return (int )prop->ctype; } static int diff --git a/enc/unicode.c b/enc/unicode.c index 2575762ada..f0ef89880f 100644 --- a/enc/unicode.c +++ b/enc/unicode.c @@ -141,7 +141,6 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y) #include "enc/unicode/name2ctype.h" -#define numberof(array) (int)(sizeof(array) / sizeof((array)[0])) #define CODE_RANGES_NUM numberof(CodeRanges) extern int diff --git a/enc/us_ascii.c b/enc/us_ascii.c index 1b47778391..18d0685040 100644 --- a/enc/us_ascii.c +++ b/enc/us_ascii.c @@ -3,9 +3,9 @@ static int us_ascii_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc) { - if (*p & 0x80) - return ONIGENC_CONSTRUCT_MBCLEN_INVALID(); - return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1); + if (*p & 0x80) + return ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1); } OnigEncodingDefine(us_ascii, US_ASCII) = { diff --git a/enc/utf_16_32.h b/enc/utf_16_32.h index da58d1b23c..b232767ee3 100644 --- a/enc/utf_16_32.h +++ b/enc/utf_16_32.h @@ -1,5 +1,5 @@ #include "regenc.h" /* dummy for unsupported, statefull encoding */ -#define ENC_DUMMY_UNICODE(name) ENC_REPLICATE(name, name "BE") +#define ENC_DUMMY_UNICODE(name) ENC_DUMMY(name) ENC_DUMMY_UNICODE("UTF-16"); ENC_DUMMY_UNICODE("UTF-32"); diff --git a/enc/utf_16be.c b/enc/utf_16be.c index 3af8359caf..a61ae00863 100644 --- a/enc/utf_16be.c +++ b/enc/utf_16be.c @@ -29,10 +29,6 @@ #include "regenc.h" -#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) -#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) -#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8) - #if 0 static const int EncLen_UTF16[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, diff --git a/enc/utf_16le.c b/enc/utf_16le.c index 453c771cc5..7d176e710e 100644 --- a/enc/utf_16le.c +++ b/enc/utf_16le.c @@ -29,10 +29,6 @@ #include "regenc.h" -#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) -#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) -#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8) - #if 0 static const int EncLen_UTF16[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, diff --git a/enc/utf_8.c b/enc/utf_8.c index dae1f3a1bc..b8f38e9d58 100644 --- a/enc/utf_8.c +++ b/enc/utf_8.c @@ -367,7 +367,7 @@ code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED) static int mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, - const UChar* end, UChar* fold, OnigEncoding enc) + const UChar* end, UChar* fold, OnigEncoding enc) { const UChar* p = *pp; @@ -395,7 +395,7 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, static int get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, - const OnigCodePoint* ranges[], OnigEncoding enc ARG_UNUSED) + const OnigCodePoint* ranges[], OnigEncoding enc ARG_UNUSED) { *sb_out = 0x80; return onigenc_unicode_ctype_code_range(ctype, ranges); diff --git a/enc/windows_1251.c b/enc/windows_1251.c index 73060962c3..191d631b88 100644 --- a/enc/windows_1251.c +++ b/enc/windows_1251.c @@ -167,7 +167,7 @@ cp1251_apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + numberof(CaseFoldMap), CaseFoldMap, 0, flag, f, arg); } @@ -176,7 +176,7 @@ cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + numberof(CaseFoldMap), CaseFoldMap, 0, flag, p, end, items); } diff --git a/include/ruby/oniguruma.h b/include/ruby/oniguruma.h index acb7365a88..2530f61427 100644 --- a/include/ruby/oniguruma.h +++ b/include/ruby/oniguruma.h @@ -39,8 +39,8 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 5 -#define ONIGURUMA_VERSION_MINOR 14 -#define ONIGURUMA_VERSION_TEENY 1 +#define ONIGURUMA_VERSION_MINOR 15 +#define ONIGURUMA_VERSION_TEENY 0 #ifdef __cplusplus # ifndef HAVE_PROTOTYPES diff --git a/regcomp.c b/regcomp.c index 676bee26cc..c1698ea1dc 100644 --- a/regcomp.c +++ b/regcomp.c @@ -330,9 +330,10 @@ static int compile_tree(Node* node, regex_t* reg); (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC) static int -select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case) +select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case) { int op; + OnigDistance str_len = (byte_len + mb_len - 1) / mb_len; if (ignore_case) { switch (str_len) { @@ -434,11 +435,11 @@ compile_tree_n_times(Node* node, int n, regex_t* reg) } static int -add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len, +add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len, regex_t* reg ARG_UNUSED, int ignore_case) { int len; - int op = select_str_opcode(mb_len, str_len, ignore_case); + int op = select_str_opcode(mb_len, byte_len, ignore_case); len = SIZE_OPCODE; @@ -446,15 +447,15 @@ add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len, if (IS_NEED_STR_LEN_OP_EXACT(op)) len += SIZE_LENGTH; - len += mb_len * (int )str_len; + len += (int )byte_len; return len; } static int -add_compile_string(UChar* s, int mb_len, OnigDistance str_len, +add_compile_string(UChar* s, int mb_len, OnigDistance byte_len, regex_t* reg, int ignore_case) { - int op = select_str_opcode(mb_len, str_len, ignore_case); + int op = select_str_opcode(mb_len, byte_len, ignore_case); add_opcode(reg, op); if (op == OP_EXACTMBN) @@ -462,12 +463,12 @@ add_compile_string(UChar* s, int mb_len, OnigDistance str_len, if (IS_NEED_STR_LEN_OP_EXACT(op)) { if (op == OP_EXACTN_IC) - add_length(reg, mb_len * str_len); + add_length(reg, byte_len); else - add_length(reg, str_len); + add_length(reg, byte_len / mb_len); } - add_bytes(reg, s, mb_len * str_len); + add_bytes(reg, s, byte_len); return 0; } @@ -475,7 +476,7 @@ add_compile_string(UChar* s, int mb_len, OnigDistance str_len, static int compile_length_string_node(Node* node, regex_t* reg) { - int rlen, r, len, prev_len, slen, ambig; + int rlen, r, len, prev_len, blen, ambig; OnigEncoding enc = reg->enc; UChar *p, *prev; StrNode* sn; @@ -489,24 +490,24 @@ compile_length_string_node(Node* node, regex_t* reg) p = prev = sn->s; prev_len = enclen(enc, p, sn->end); p += prev_len; - slen = 1; + blen = prev_len; rlen = 0; for (; p < sn->end; ) { len = enclen(enc, p, sn->end); - if (len == prev_len) { - slen++; + if (len == prev_len || ambig) { + blen += len; } else { - r = add_compile_string_length(prev, prev_len, slen, reg, ambig); + r = add_compile_string_length(prev, prev_len, blen, reg, ambig); rlen += r; prev = p; - slen = 1; + blen = len; prev_len = len; } p += len; } - r = add_compile_string_length(prev, prev_len, slen, reg, ambig); + r = add_compile_string_length(prev, prev_len, blen, reg, ambig); rlen += r; return rlen; } @@ -523,7 +524,7 @@ compile_length_string_raw_node(StrNode* sn, regex_t* reg) static int compile_string_node(Node* node, regex_t* reg) { - int r, len, prev_len, slen, ambig; + int r, len, prev_len, blen, ambig; OnigEncoding enc = reg->enc; UChar *p, *prev, *end; StrNode* sn; @@ -538,25 +539,25 @@ compile_string_node(Node* node, regex_t* reg) p = prev = sn->s; prev_len = enclen(enc, p, end); p += prev_len; - slen = 1; + blen = prev_len; for (; p < end; ) { len = enclen(enc, p, end); - if (len == prev_len) { - slen++; + if (len == prev_len || ambig) { + blen += len; } else { - r = add_compile_string(prev, prev_len, slen, reg, ambig); + r = add_compile_string(prev, prev_len, blen, reg, ambig); if (r) return r; prev = p; - slen = 1; + blen = len; prev_len = len; } p += len; } - return add_compile_string(prev, prev_len, slen, reg, ambig); + return add_compile_string(prev, prev_len, blen, reg, ambig); } static int @@ -2591,6 +2592,7 @@ is_not_included(Node* x, Node* y, regex_t* reg) return 0; } else { + if (IS_NOT_NULL(xc->mbuf)) return 0; for (i = 0; i < SINGLE_BYTE_SIZE; i++) { int is_word; if (NCTYPE(y)->ascii_range) @@ -3311,7 +3313,7 @@ next_setup(Node* node, Node* next_node, int in_root, regex_t* reg) qn->next_head_exact = n; } #endif - /* automatic possessivation a*b ==> (?>a*)b */ + /* automatic possessification a*b ==> (?>a*)b */ if (qn->lower <= 1) { int ttype = NTYPE(qn->target); if (IS_NODE_TYPE_SIMPLE(ttype)) { @@ -3432,27 +3434,40 @@ expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, return 0; } +static int +is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[], + int slen) +{ + int i; + + for (i = 0; i < item_num; i++) { + if (items[i].byte_len != slen) { + return 1; + } + if (items[i].code_len != 1) { + return 1; + } + } + return 0; +} + static int expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p, int slen, UChar *end, regex_t* reg, Node **rnode) { - int r, i, j, len, varlen, varclen; + int r, i, j, len, varlen; Node *anode, *var_anode, *snode, *xnode, *an; UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; *rnode = var_anode = NULL_NODE; varlen = 0; - varclen = 0; for (i = 0; i < item_num; i++) { if (items[i].byte_len != slen) { varlen = 1; break; } - if (items[i].code_len != 1) { - varclen |= 1; - } } if (varlen != 0) { @@ -3537,8 +3552,6 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], } } - if (varclen && !varlen) - return 2; return varlen; mem_err2: @@ -3582,7 +3595,8 @@ expand_case_fold_string(Node* node, regex_t* reg) len = enclen(reg->enc, p, end); - if (n == 0) { + varlen = is_case_fold_variable_len(n, items, len); + if (n == 0 || varlen == 0) { if (IS_NULL(snode)) { if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { top_root = root = onig_node_list_add(NULL_NODE, prev_node); @@ -3607,11 +3621,14 @@ expand_case_fold_string(Node* node, regex_t* reg) } else { alt_num *= (n + 1); - if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) { - varlen = 1; /* Assume that expanded strings are variable length. */ - break; - } + if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; + if (IS_NOT_NULL(snode)) { + r = update_string_node_case_fold(reg, snode); + if (r == 0) { + NSTRING_SET_AMBIG(snode); + } + } if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { top_root = root = onig_node_list_add(NULL_NODE, prev_node); if (IS_NULL(root)) { @@ -3622,7 +3639,6 @@ expand_case_fold_string(Node* node, regex_t* reg) r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node); if (r < 0) goto mem_err; - if (r > 0) varlen = 1; if (r == 1) { if (IS_NULL(root)) { top_root = prev_node; @@ -3636,7 +3652,7 @@ expand_case_fold_string(Node* node, regex_t* reg) root = NCAR(prev_node); } - else { /* r == 0 || r == 2 */ + else { /* r == 0 */ if (IS_NOT_NULL(root)) { if (IS_NULL(onig_node_list_add(root, prev_node))) { onig_node_free(prev_node); @@ -3650,6 +3666,12 @@ expand_case_fold_string(Node* node, regex_t* reg) p += len; } + if (IS_NOT_NULL(snode)) { + r = update_string_node_case_fold(reg, snode); + if (r == 0) { + NSTRING_SET_AMBIG(snode); + } + } if (p < end) { Node *srem; @@ -3679,20 +3701,9 @@ expand_case_fold_string(Node* node, regex_t* reg) /* ending */ top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node); - if (!varlen) { - /* When all expanded strings are same length, case-insensitive - BM search will be used. */ - r = update_string_node_case_fold(reg, node); - if (r == 0) { - NSTRING_SET_AMBIG(node); - } - } - else { - swap_node(node, top_root); - r = 0; - } + swap_node(node, top_root); onig_node_free(top_root); - return r; + return 0; mem_err: r = ONIGERR_MEMORY; @@ -4367,7 +4378,7 @@ map_position_value(OnigEncoding enc, int i) 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 }; - if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { + if (i < numberof(ByteValTable)) { if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) return 20; else @@ -4399,7 +4410,7 @@ distance_value(MinMaxLen* mm) if (mm->max == ONIG_INFINITE_DISTANCE) return 0; d = mm->max - mm->min; - if (d < sizeof(dist_vals)/sizeof(dist_vals[0])) + if (d < numberof(dist_vals)) /* return dist_vals[d] * 16 / (mm->min + 12); */ return (int )dist_vals[d]; else @@ -4507,6 +4518,9 @@ concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, if (right_len == 0) { to->right_anchor |= left->right_anchor; } + else { + to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT); + } } static int @@ -5080,7 +5094,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case ANCHOR_END_BUF: case ANCHOR_SEMI_END_BUF: case ANCHOR_END_LINE: - case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */ + case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */ + case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */ add_opt_anc_info(&opt->anc, NANCHOR(node)->type); break; @@ -5103,7 +5118,6 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case ANCHOR_PREC_READ_NOT: case ANCHOR_LOOK_BEHIND_NOT: break; } @@ -5369,7 +5383,8 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML | ANCHOR_LOOK_BEHIND); - reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); + reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF | + ANCHOR_PREC_READ_NOT); if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { reg->anchor_dmin = opt.len.min; diff --git a/regenc.c b/regenc.c index e628d62357..2683b826f2 100644 --- a/regenc.c +++ b/regenc.c @@ -414,9 +414,7 @@ onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, OnigCodePoint code; int i, r; - for (i = 0; - i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes)); - i++) { + for (i = 0; i < numberof(OnigAsciiLowerMap); i++) { code = OnigAsciiLowerMap[i].to; r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg); if (r != 0) return r; @@ -431,8 +429,8 @@ onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, extern int onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, - const OnigUChar* p, const OnigUChar* end ARG_UNUSED, OnigCaseFoldCodeItem items[], - OnigEncoding enc ARG_UNUSED) + const OnigUChar* p, const OnigUChar* end ARG_UNUSED, + OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED) { if (0x41 <= *p && *p <= 0x5a) { items[0].byte_len = 1; @@ -570,9 +568,10 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size, extern int -onigenc_not_support_get_ctype_code_range(OnigCtype ctype, - OnigCodePoint* sb_out, const OnigCodePoint* ranges[], - OnigEncoding enc) +onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED, + OnigCodePoint* sb_out ARG_UNUSED, + const OnigCodePoint* ranges[] ARG_UNUSED, + OnigEncoding enc) { return ONIG_NO_SUPPORT_CONFIG; } @@ -589,7 +588,7 @@ onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc A /* for single byte encodings */ extern int onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p, - const UChar*end, UChar* lower, OnigEncoding enc ARG_UNUSED) + const UChar* end, UChar* lower, OnigEncoding enc ARG_UNUSED) { *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p); @@ -633,28 +632,31 @@ extern int onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED) { if (code > 0xff) - rb_raise(rb_eRangeError, "%u out of char range", code); + rb_raise(rb_eRangeError, "%u out of char range", code); *buf = (UChar )(code & 0xff); return 1; } extern UChar* -onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s, - const UChar* end, +onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, + const UChar* s, + const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED) { return (UChar* )s; } extern int -onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED, +onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, + const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED) { return TRUE; } extern int -onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED, +onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, + const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED) { return FALSE; @@ -716,7 +718,7 @@ onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, #if 0 extern int onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag, - const UChar** pp ARG_UNUSED, const UChar* end ARG_UNUSED) + const UChar** pp, const UChar* end ARG_UNUSED) { const UChar* p = *pp; @@ -791,27 +793,27 @@ extern int onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) { static const PosixBracketEntryType PBS[] = { - PosixBracketEntryInit("Alnum", ONIGENC_CTYPE_ALNUM), - PosixBracketEntryInit("Alpha", ONIGENC_CTYPE_ALPHA), - PosixBracketEntryInit("Blank", ONIGENC_CTYPE_BLANK), - PosixBracketEntryInit("Cntrl", ONIGENC_CTYPE_CNTRL), - PosixBracketEntryInit("Digit", ONIGENC_CTYPE_DIGIT), - PosixBracketEntryInit("Graph", ONIGENC_CTYPE_GRAPH), - PosixBracketEntryInit("Lower", ONIGENC_CTYPE_LOWER), - PosixBracketEntryInit("Print", ONIGENC_CTYPE_PRINT), - PosixBracketEntryInit("Punct", ONIGENC_CTYPE_PUNCT), - PosixBracketEntryInit("Space", ONIGENC_CTYPE_SPACE), - PosixBracketEntryInit("Upper", ONIGENC_CTYPE_UPPER), - PosixBracketEntryInit("XDigit", ONIGENC_CTYPE_XDIGIT), - PosixBracketEntryInit("ASCII", ONIGENC_CTYPE_ASCII), - PosixBracketEntryInit("Word", ONIGENC_CTYPE_WORD), + POSIX_BRACKET_ENTRY_INIT("Alnum", ONIGENC_CTYPE_ALNUM), + POSIX_BRACKET_ENTRY_INIT("Alpha", ONIGENC_CTYPE_ALPHA), + POSIX_BRACKET_ENTRY_INIT("Blank", ONIGENC_CTYPE_BLANK), + POSIX_BRACKET_ENTRY_INIT("Cntrl", ONIGENC_CTYPE_CNTRL), + POSIX_BRACKET_ENTRY_INIT("Digit", ONIGENC_CTYPE_DIGIT), + POSIX_BRACKET_ENTRY_INIT("Graph", ONIGENC_CTYPE_GRAPH), + POSIX_BRACKET_ENTRY_INIT("Lower", ONIGENC_CTYPE_LOWER), + POSIX_BRACKET_ENTRY_INIT("Print", ONIGENC_CTYPE_PRINT), + POSIX_BRACKET_ENTRY_INIT("Punct", ONIGENC_CTYPE_PUNCT), + POSIX_BRACKET_ENTRY_INIT("Space", ONIGENC_CTYPE_SPACE), + POSIX_BRACKET_ENTRY_INIT("Upper", ONIGENC_CTYPE_UPPER), + POSIX_BRACKET_ENTRY_INIT("XDigit", ONIGENC_CTYPE_XDIGIT), + POSIX_BRACKET_ENTRY_INIT("ASCII", ONIGENC_CTYPE_ASCII), + POSIX_BRACKET_ENTRY_INIT("Word", ONIGENC_CTYPE_WORD), }; - const PosixBracketEntryType *pb, *pbe; + const PosixBracketEntryType *pb; int len; len = onigenc_strlen(enc, p, end); - for (pbe = (pb = PBS) + sizeof(PBS)/sizeof(PBS[0]); pb < pbe; ++pb) { + for (pb = PBS; pb < PBS + numberof(PBS); pb++) { if (len == pb->len && onigenc_with_ascii_strnicmp(enc, p, end, pb->name, pb->len) == 0) return pb->ctype; diff --git a/regenc.h b/regenc.h index 6f2d4a69c1..e220aabc5b 100644 --- a/regenc.h +++ b/regenc.h @@ -29,15 +29,18 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ + #ifndef REGINT_H #ifndef RUBY_EXTERN #include "ruby/config.h" #include "ruby/defines.h" #endif +#endif + #ifdef ONIG_ESCAPE_UCHAR_COLLISION #undef ONIG_ESCAPE_UCHAR_COLLISION #endif -#endif + #include "ruby/oniguruma.h" RUBY_SYMBOL_EXPORT_BEGIN @@ -104,7 +107,13 @@ typedef struct { short int len; } PosixBracketEntryType; -#define PosixBracketEntryInit(name, ctype) {(const UChar *)name, ctype, (short int)(sizeof(name) - 1)} +#define POSIX_BRACKET_ENTRY_INIT(name, ctype) \ + {(const UChar* )(name), (ctype), (short int )(sizeof(name) - 1)} + +#ifndef numberof +#define numberof(array) (int )(sizeof(array) / sizeof((array)[0])) +#endif + #define USE_CRNL_AS_LINE_TERMINATOR #define USE_UNICODE_PROPERTIES @@ -159,6 +168,7 @@ ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, O #define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) #define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) +#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8) #define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \ OnigEncISO_8859_1_ToLowerCaseTable[c] diff --git a/regexec.c b/regexec.c index 87b7759e26..1336468a74 100644 --- a/regexec.c +++ b/regexec.c @@ -1397,7 +1397,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, (int )(end - str), (int )(sstart - str)); #endif - STACK_PUSH_ENSURED(STK_ALT, (UChar *)FinishCode); /* bottom stack */ + STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */ best_len = ONIG_MISMATCH; s = (UChar* )sstart; pkeep = (UChar* )sstart; @@ -1406,7 +1406,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (s) { UChar *q, *bp, buf[50]; int len; - fprintf(stderr, "%4d> \"", (*p == OP_FINISH) ? -1 : (int )(s - str)); + fprintf(stderr, "%4"PRIdPTR"> \"", (*p == OP_FINISH) ? (ptrdiff_t )-1 : s - str); bp = buf; if (*p != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ for (i = 0, q = s; i < 7 && q < end; i++) { @@ -1419,6 +1419,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, *bp = 0; fputs((char* )buf, stderr); for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); + fprintf(stderr, "%4"PRIdPTR":", (p == FinishCode) ? (ptrdiff_t )-1 : p - reg->p); onig_print_compiled_byte_code(stderr, p, p + strlen((char *)p), NULL, encode); fprintf(stderr, "\n"); } @@ -4183,7 +4184,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, prev = s; s += enclen(reg->enc, s, end); - if ((reg->anchor & ANCHOR_LOOK_BEHIND) == 0) { + if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) { while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0) && s < range) { prev = s; diff --git a/regint.h b/regint.h index c389477211..3abc8809c9 100644 --- a/regint.h +++ b/regint.h @@ -193,6 +193,8 @@ extern pthread_mutex_t gOnigMutex; #define USE_UPPER_CASE_TABLE #else +#define CHECK_INTERRUPT_IN_MATCH_AT + #define st_init_table onig_st_init_table #define st_init_table_with_size onig_st_init_table_with_size #define st_init_numtable onig_st_init_numtable @@ -213,8 +215,6 @@ extern pthread_mutex_t gOnigMutex; /* */ #define onig_st_is_member st_is_member -#define CHECK_INTERRUPT_IN_MATCH_AT - #endif #define STATE_CHECK_STRING_THRESHOLD_LEN 7 @@ -913,9 +913,7 @@ typedef struct { extern OnigOpInfoType OnigOpInfo[]; -#ifdef ONIG_DEBUG extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar* bpend, UChar** nextp, OnigEncoding enc)); -#endif #ifdef ONIG_DEBUG_STATISTICS extern void onig_statistics_init P_((void)); diff --git a/regparse.c b/regparse.c index 6e2e5c6ea1..5258972399 100644 --- a/regparse.c +++ b/regparse.c @@ -4153,17 +4153,15 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, } static int -add_ctype_to_cc(CClassNode* cc, int ctype, int not, int char_prop, ScanEnv* env) +add_ctype_to_cc(CClassNode* cc, int ctype, int not, int ascii_range, ScanEnv* env) { - int maxcode, ascii_range; + int maxcode; int c, r; const OnigCodePoint *ranges; OnigCodePoint sb_out; OnigEncoding enc = env->enc; OnigOptionType option = env->option; - ascii_range = IS_ASCII_RANGE(option) && (char_prop == 0); - r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges); if (r == 0) { if (ascii_range) { @@ -4280,31 +4278,32 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, int char_prop, ScanEnv* env) } static int -parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) +parse_posix_bracket(CClassNode* cc, CClassNode* asc_cc, + UChar** src, UChar* end, ScanEnv* env) { #define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20 #define POSIX_BRACKET_NAME_MIN_LEN 4 static const PosixBracketEntryType PBS[] = { - { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 }, - { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 }, - { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 }, - { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 }, - { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 }, - { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 }, - { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 }, - { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 }, - { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 }, - { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 }, - { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 }, - { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 }, - { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 }, - { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 }, - { (UChar* )NULL, -1, 0 } + POSIX_BRACKET_ENTRY_INIT("alnum", ONIGENC_CTYPE_ALNUM), + POSIX_BRACKET_ENTRY_INIT("alpha", ONIGENC_CTYPE_ALPHA), + POSIX_BRACKET_ENTRY_INIT("blank", ONIGENC_CTYPE_BLANK), + POSIX_BRACKET_ENTRY_INIT("cntrl", ONIGENC_CTYPE_CNTRL), + POSIX_BRACKET_ENTRY_INIT("digit", ONIGENC_CTYPE_DIGIT), + POSIX_BRACKET_ENTRY_INIT("graph", ONIGENC_CTYPE_GRAPH), + POSIX_BRACKET_ENTRY_INIT("lower", ONIGENC_CTYPE_LOWER), + POSIX_BRACKET_ENTRY_INIT("print", ONIGENC_CTYPE_PRINT), + POSIX_BRACKET_ENTRY_INIT("punct", ONIGENC_CTYPE_PUNCT), + POSIX_BRACKET_ENTRY_INIT("space", ONIGENC_CTYPE_SPACE), + POSIX_BRACKET_ENTRY_INIT("upper", ONIGENC_CTYPE_UPPER), + POSIX_BRACKET_ENTRY_INIT("xdigit", ONIGENC_CTYPE_XDIGIT), + POSIX_BRACKET_ENTRY_INIT("ascii", ONIGENC_CTYPE_ASCII), + POSIX_BRACKET_ENTRY_INIT("word", ONIGENC_CTYPE_WORD), }; const PosixBracketEntryType *pb; int not, i, r; + int ascii_range; OnigCodePoint c; OnigEncoding enc = env->enc; UChar *p = *src; @@ -4319,17 +4318,25 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3) goto not_posix_bracket; - for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { + ascii_range = IS_ASCII_RANGE(env->option) && + ! IS_POSIX_BRACKET_ALL_RANGE(env->option); + for (pb = PBS; pb < PBS + numberof(PBS); pb++) { if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) { p = (UChar* )onigenc_step(enc, p, end, pb->len); if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0) return ONIGERR_INVALID_POSIX_BRACKET_TYPE; - r = add_ctype_to_cc(cc, pb->ctype, not, - IS_POSIX_BRACKET_ALL_RANGE(env->option), - env); + r = add_ctype_to_cc(cc, pb->ctype, not, ascii_range, env); if (r != 0) return r; + if (IS_NOT_NULL(asc_cc)) { + if (pb->ctype != ONIGENC_CTYPE_WORD && + pb->ctype != ONIGENC_CTYPE_ASCII && + !ascii_range) + r = add_ctype_to_cc(asc_cc, pb->ctype, not, ascii_range, env); + if (r != 0) return r; + } + PINC_S; PINC_S; *src = p; return 0; @@ -4386,6 +4393,8 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env) return r; } +static int cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env); + static int parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) @@ -4399,11 +4408,15 @@ parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, *np = node_new_cclass(); CHECK_NULL_RETURN_MEMERR(*np); cc = NCCLASS(*np); - r = add_ctype_to_cc(cc, ctype, 0, 1, env); + r = add_ctype_to_cc(cc, ctype, 0, 0, env); if (r != 0) return r; if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); - return 0; + if (IS_IGNORECASE(env->option)) { + if (ctype != ONIGENC_CTYPE_ASCII) + r = cclass_case_fold(np, cc, cc, env); + } + return r; } @@ -4421,7 +4434,8 @@ enum CCVALTYPE { }; static int -next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, +next_state_class(CClassNode* cc, CClassNode* asc_cc, + OnigCodePoint* vs, enum CCVALTYPE* type, enum CCSTATE* state, ScanEnv* env) { int r; @@ -4430,11 +4444,18 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE; if (*state == CCS_VALUE && *type != CCV_CLASS) { - if (*type == CCV_SB) + if (*type == CCV_SB) { BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs)); + if (IS_NOT_NULL(asc_cc)) + BITSET_SET_BIT(asc_cc->bs, (int )(*vs)); + } else if (*type == CCV_CODE_POINT) { r = add_code_range(&(cc->mbuf), env, *vs, *vs); if (r < 0) return r; + if (IS_NOT_NULL(asc_cc)) { + r = add_code_range0(&(asc_cc->mbuf), env, *vs, *vs, 0); + if (r < 0) return r; + } } } @@ -4444,7 +4465,8 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, } static int -next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, +next_state_val(CClassNode* cc, CClassNode* asc_cc, + OnigCodePoint *vs, OnigCodePoint v, int* vs_israw, int v_israw, enum CCVALTYPE intype, enum CCVALTYPE* type, enum CCSTATE* state, ScanEnv* env) @@ -4453,11 +4475,18 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, switch (*state) { case CCS_VALUE: - if (*type == CCV_SB) + if (*type == CCV_SB) { BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs)); + if (IS_NOT_NULL(asc_cc)) + BITSET_SET_BIT(asc_cc->bs, (int )(*vs)); + } else if (*type == CCV_CODE_POINT) { r = add_code_range(&(cc->mbuf), env, *vs, *vs); if (r < 0) return r; + if (IS_NOT_NULL(asc_cc)) { + r = add_code_range0(&(asc_cc->mbuf), env, *vs, *vs, 0); + if (r < 0) return r; + } } break; @@ -4474,10 +4503,16 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; } bitset_set_range(env, cc->bs, (int )*vs, (int )v); + if (IS_NOT_NULL(asc_cc)) + bitset_set_range(env, asc_cc->bs, (int )*vs, (int )v); } else { r = add_code_range(&(cc->mbuf), env, *vs, v); if (r < 0) return r; + if (IS_NOT_NULL(asc_cc)) { + r = add_code_range0(&(asc_cc->mbuf), env, *vs, v, 0); + if (r < 0) return r; + } } } else { @@ -4493,6 +4528,11 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v); if (r < 0) return r; + if (IS_NOT_NULL(asc_cc)) { + bitset_set_range(env, asc_cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); + r = add_code_range0(&(asc_cc->mbuf), env, (OnigCodePoint )*vs, v, 0); + if (r < 0) return r; + } #if 0 } else @@ -4542,22 +4582,24 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped, } static int -parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, +parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) { int r, neg, len, fetched, and_start; OnigCodePoint v, vs; UChar *p; Node* node; + Node* asc_node; CClassNode *cc, *prev_cc; - CClassNode work_cc; + CClassNode *asc_cc, *asc_prev_cc; + CClassNode work_cc, asc_work_cc; enum CCSTATE state; enum CCVALTYPE val_type, in_type; int val_israw, in_israw; - prev_cc = (CClassNode* )NULL; - *np = NULL_NODE; + prev_cc = asc_prev_cc = (CClassNode* )NULL; + *np = *asc_np = NULL_NODE; r = fetch_token_in_cc(tok, src, end, env); if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) { neg = 1; @@ -4581,6 +4623,16 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, CHECK_NULL_RETURN_MEMERR(node); cc = NCCLASS(node); + if (IS_IGNORECASE(env->option)) { + *asc_np = asc_node = node_new_cclass(); + CHECK_NULL_RETURN_MEMERR(asc_node); + asc_cc = NCCLASS(asc_node); + } + else { + asc_node = NULL_NODE; + asc_cc = NULL; + } + and_start = 0; state = CCS_START; p = *src; @@ -4671,13 +4723,13 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, } in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT); val_entry2: - r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type, + r = next_state_val(cc, asc_cc, &vs, v, &val_israw, in_israw, in_type, &val_type, &state, env); if (r != 0) goto err; break; case TK_POSIX_BRACKET_OPEN: - r = parse_posix_bracket(cc, &p, end, env); + r = parse_posix_bracket(cc, asc_cc, &p, end, env); if (r < 0) goto err; if (r == 1) { /* is not POSIX bracket */ CC_ESC_WARN(env, (UChar* )"["); @@ -4690,11 +4742,18 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, break; case TK_CHAR_TYPE: - r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, 0, env); + r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, + IS_ASCII_RANGE(env->option), env); if (r != 0) return r; + if (IS_NOT_NULL(asc_cc)) { + if (tok->u.prop.ctype != ONIGENC_CTYPE_WORD) + r = add_ctype_to_cc(asc_cc, tok->u.prop.ctype, tok->u.prop.not, + IS_ASCII_RANGE(env->option), env); + if (r != 0) return r; + } next_class: - r = next_state_class(cc, &vs, &val_type, &state, env); + r = next_state_class(cc, asc_cc, &vs, &val_type, &state, env); if (r != 0) goto err; break; @@ -4704,8 +4763,13 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ctype = fetch_char_property_to_ctype(&p, end, env); if (ctype < 0) return ctype; - r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 1, env); + r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 0, env); if (r != 0) return r; + if (IS_NOT_NULL(asc_cc)) { + if (ctype != ONIGENC_CTYPE_ASCII) + r = add_ctype_to_cc(asc_cc, ctype, tok->u.prop.not, 0, env); + if (r != 0) return r; + } goto next_class; } break; @@ -4766,15 +4830,20 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, case TK_CC_CC_OPEN: /* [ */ { - Node *anode; + Node *anode, *aasc_node; CClassNode* acc; - r = parse_char_class(&anode, tok, &p, end, env); + r = parse_char_class(&anode, &aasc_node, tok, &p, end, env); if (r == 0) { acc = NCCLASS(anode); r = or_cclass(cc, acc, env); } + if (r == 0 && IS_NOT_NULL(aasc_node)) { + acc = NCCLASS(aasc_node); + r = or_cclass(asc_cc, acc, env); + } onig_node_free(anode); + onig_node_free(aasc_node); if (r != 0) goto err; } break; @@ -4782,7 +4851,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, case TK_CC_AND: /* && */ { if (state == CCS_VALUE) { - r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, + r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type, &val_type, &state, env); if (r != 0) goto err; } @@ -4794,12 +4863,23 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, r = and_cclass(prev_cc, cc, env); if (r != 0) goto err; bbuf_free(cc->mbuf); + if (IS_NOT_NULL(asc_cc)) { + r = and_cclass(asc_prev_cc, asc_cc, env); + if (r != 0) goto err; + bbuf_free(asc_cc->mbuf); + } } else { prev_cc = cc; cc = &work_cc; + if (IS_NOT_NULL(asc_cc)) { + asc_prev_cc = asc_cc; + asc_cc = &asc_work_cc; + } } initialize_cclass(cc); + if (IS_NOT_NULL(asc_cc)) + initialize_cclass(asc_cc); } break; @@ -4822,7 +4902,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, } if (state == CCS_VALUE) { - r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, + r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type, &val_type, &state, env); if (r != 0) goto err; } @@ -4832,12 +4912,24 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, if (r != 0) goto err; bbuf_free(cc->mbuf); cc = prev_cc; + if (IS_NOT_NULL(asc_cc)) { + r = and_cclass(asc_prev_cc, asc_cc, env); + if (r != 0) goto err; + bbuf_free(asc_cc->mbuf); + asc_cc = asc_prev_cc; + } } - if (neg != 0) + if (neg != 0) { NCCLASS_SET_NOT(cc); - else + if (IS_NOT_NULL(asc_cc)) + NCCLASS_SET_NOT(asc_cc); + } + else { NCCLASS_CLEAR_NOT(cc); + if (IS_NOT_NULL(asc_cc)) + NCCLASS_CLEAR_NOT(asc_cc); + } if (IS_NCCLASS_NOT(cc) && IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) { int is_empty; @@ -4865,6 +4957,8 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, err: if (cc != NCCLASS(*np)) bbuf_free(cc->mbuf); + if (IS_NOT_NULL(asc_cc) && (asc_cc != NCCLASS(*asc_np))) + bbuf_free(asc_cc->mbuf); return r; } @@ -5489,6 +5583,7 @@ clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc) typedef struct { ScanEnv* env; CClassNode* cc; + CClassNode* asc_cc; Node* alt_root; Node** ptail; } IApplyCaseFoldArg; @@ -5500,37 +5595,57 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], IApplyCaseFoldArg* iarg; ScanEnv* env; CClassNode* cc; + CClassNode* asc_cc; BitSetRef bs; + int add_flag; iarg = (IApplyCaseFoldArg* )arg; env = iarg->env; cc = iarg->cc; + asc_cc = iarg->asc_cc; bs = cc->bs; + if (IS_NULL(asc_cc)) { + add_flag = 0; + } + else if (ONIGENC_IS_ASCII_CODE(from) == ONIGENC_IS_ASCII_CODE(*to)) { + add_flag = 1; + } + else { + add_flag = onig_is_code_in_cc(env->enc, from, asc_cc); + if (IS_NCCLASS_NOT(asc_cc)) + add_flag = !add_flag; + } + if (to_len == 1) { int is_in = onig_is_code_in_cc(env->enc, from, cc); #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) || (is_in == 0 && IS_NCCLASS_NOT(cc))) { - if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { - add_code_range0(&(cc->mbuf), env, *to, *to, 0); - } - else { - BITSET_SET_BIT(bs, *to); + if (add_flag) { + if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { + add_code_range0(&(cc->mbuf), env, *to, *to, 0); + } + else { + BITSET_SET_BIT(bs, *to); + } } } #else if (is_in != 0) { - if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { - if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc); - add_code_range0(&(cc->mbuf), env, *to, *to, 0); - } - else { - if (IS_NCCLASS_NOT(cc)) { - BITSET_CLEAR_BIT(bs, *to); + if (add_flag) { + if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { + if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc); + add_code_range0(&(cc->mbuf), env, *to, *to, 0); + } + else { + if (IS_NCCLASS_NOT(cc)) { + BITSET_CLEAR_BIT(bs, *to); + } + else { + BITSET_SET_BIT(bs, *to); + } } - else - BITSET_SET_BIT(bs, *to); } } #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ @@ -5573,6 +5688,35 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], return 0; } +static int +cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env) +{ + int r; + IApplyCaseFoldArg iarg; + + iarg.env = env; + iarg.cc = cc; + iarg.asc_cc = asc_cc; + iarg.alt_root = NULL_NODE; + iarg.ptail = &(iarg.alt_root); + + r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag, + i_apply_case_fold, &iarg); + if (r != 0) { + onig_node_free(iarg.alt_root); + return r; + } + if (IS_NOT_NULL(iarg.alt_root)) { + Node* work = onig_node_new_alt(*np, iarg.alt_root); + if (IS_NULL(work)) { + onig_node_free(iarg.alt_root); + return ONIGERR_MEMORY; + } + *np = work; + } + return r; +} + static int node_linebreak(Node** np, ScanEnv* env) { @@ -5658,7 +5802,7 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env) np1 = node_new_cclass(); if (IS_NULL(np1)) goto err; cc1 = NCCLASS(np1); - r = add_ctype_to_cc(cc1, ctype, 0, 1, env); + r = add_ctype_to_cc(cc1, ctype, 0, 0, env); if (r != 0) goto err; NCCLASS_SET_NOT(cc1); @@ -5666,7 +5810,7 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env) np2 = node_new_cclass(); if (IS_NULL(np2)) goto err; cc2 = NCCLASS(np2); - r = add_ctype_to_cc(cc2, ctype, 0, 1, env); + r = add_ctype_to_cc(cc2, ctype, 0, 0, env); if (r != 0) goto err; qn = node_new_quantifier(0, REPEAT_INFINITE, 0); @@ -6013,7 +6157,8 @@ parse_exp(Node** np, OnigToken* tok, int term, *np = node_new_cclass(); CHECK_NULL_RETURN_MEMERR(*np); cc = NCCLASS(*np); - r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0, 0, env); + r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0, + IS_ASCII_RANGE(env->option), env); if (r != 0) return r; if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); #ifdef USE_SHARED_CCLASS_TABLE @@ -6036,15 +6181,20 @@ parse_exp(Node** np, OnigToken* tok, int term, case TK_CC_OPEN: { + Node *asc_node; CClassNode* cc; OnigCodePoint code; - r = parse_char_class(np, tok, src, end, env); - if (r != 0) return r; + r = parse_char_class(np, &asc_node, tok, src, end, env); + if (r != 0) { + onig_node_free(asc_node); + return r; + } cc = NCCLASS(*np); if (is_onechar_cclass(cc, &code)) { onig_node_free(*np); + onig_node_free(asc_node); *np = node_new_empty(); CHECK_NULL_RETURN_MEMERR(*np); r = node_str_cat_codepoint(*np, env->enc, code); @@ -6052,28 +6202,13 @@ parse_exp(Node** np, OnigToken* tok, int term, goto string_loop; } if (IS_IGNORECASE(env->option)) { - IApplyCaseFoldArg iarg; - - iarg.env = env; - iarg.cc = cc; - iarg.alt_root = NULL_NODE; - iarg.ptail = &(iarg.alt_root); - - r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag, - i_apply_case_fold, &iarg); + r = cclass_case_fold(np, cc, NCCLASS(asc_node), env); if (r != 0) { - onig_node_free(iarg.alt_root); + onig_node_free(asc_node); return r; } - if (IS_NOT_NULL(iarg.alt_root)) { - Node* work = onig_node_new_alt(*np, iarg.alt_root); - if (IS_NULL(work)) { - onig_node_free(iarg.alt_root); - return ONIGERR_MEMORY; - } - *np = work; - } } + onig_node_free(asc_node); } break; diff --git a/regparse.h b/regparse.h index c92babfebe..35de54671e 100644 --- a/regparse.h +++ b/regparse.h @@ -193,8 +193,8 @@ typedef struct { int type; int regnum; OnigOptionType option; - struct _Node* target; AbsAddrType call_addr; + struct _Node* target; /* for multiple call reference */ OnigDistance min_len; /* min length (byte) */ OnigDistance max_len; /* max length (byte) */ @@ -296,10 +296,10 @@ typedef struct { UChar* error; UChar* error_end; regex_t* reg; /* for reg->names only */ - int num_call; #ifdef USE_SUBEXP_CALL UnsetAddrList* unset_addr_list; #endif + int num_call; int num_mem; #ifdef USE_NAMED_GROUP int num_named; -- cgit v1.2.3