summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--euc_jp.c20
-rw-r--r--oniguruma.h100
-rw-r--r--regcomp.c155
-rw-r--r--regenc.c70
-rw-r--r--regenc.h26
-rw-r--r--regerror.c50
-rw-r--r--regexec.c115
-rw-r--r--regint.h19
-rw-r--r--regparse.c161
-rw-r--r--regparse.h3
-rw-r--r--sjis.c19
-rw-r--r--utf8.c90
13 files changed, 469 insertions, 365 deletions
diff --git a/ChangeLog b/ChangeLog
index c67c150f59..bc90734c2c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -11,6 +11,10 @@ Sat Feb 4 15:52:56 2006 Hirokazu Yamamoto <ocean@m2.ccsnet.ne.jp>
I think the function name of rb_int2big is quite misleading.
This should be "rb_long2big".
+Sat Feb 4 15:02:05 2006 Yukihiro Matsumoto <matz@ruby-lang.org>
+
+ * oniguruma.h: merge Oniguruma 4.0.0 [ruby-dev:28290]
+
Fri Feb 3 19:25:53 2006 Hirokazu Yamamoto <ocean@m2.ccsnet.ne.jp>
* ruby.h: fixed prototype.
@@ -86,7 +90,7 @@ Tue Jan 31 08:07:02 2006 Yukihiro Matsumoto <matz@ruby-lang.org>
* numeric.c (int_dotimes): ditto.
* enum.c (enum_first): new method Enumerable#first to take first n
- element from an enumerable.
+ elements from an enumerable.
* enum.c (enum_group_by): new method Enumerable#group_by that
groups enumerable values according to their block values.
diff --git a/euc_jp.c b/euc_jp.c
index 5f13e33eb4..71c81ee9fe 100644
--- a/euc_jp.c
+++ b/euc_jp.c
@@ -31,7 +31,7 @@
#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
-static int EncLen_EUCJP[] = {
+static const int EncLen_EUCJP[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -158,20 +158,16 @@ eucjp_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
static int
eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
- if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if ((ctype & (ONIGENC_CTYPE_WORD |
+ ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
return (eucjp_code_to_mbclen(code) > 1 ? TRUE : FALSE);
-
- ctype &= ~ONIGENC_CTYPE_WORD;
- if (ctype == 0) return FALSE;
+ }
}
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else
- return FALSE;
+ return FALSE;
}
static UChar*
diff --git a/oniguruma.h b/oniguruma.h
index 95dfbebc5f..0fd7e12a44 100644
--- a/oniguruma.h
+++ b/oniguruma.h
@@ -4,7 +4,7 @@
oniguruma.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,8 +34,8 @@ extern "C" {
#endif
#define ONIGURUMA
-#define ONIGURUMA_VERSION_MAJOR 3
-#define ONIGURUMA_VERSION_MINOR 7
+#define ONIGURUMA_VERSION_MAJOR 4
+#define ONIGURUMA_VERSION_MINOR 0
#define ONIGURUMA_VERSION_TEENY 0
#ifdef __cplusplus
@@ -79,7 +79,11 @@ extern "C" {
/* PART: character encoding */
-typedef unsigned char UChar;
+#ifndef ONIG_ESCAPE_UCHAR_COLLISION
+#define UChar OnigUChar
+#endif
+
+typedef unsigned char OnigUChar;
typedef unsigned long OnigCodePoint;
typedef unsigned int OnigDistance;
@@ -149,24 +153,24 @@ typedef m17n_encoding* OnigEncoding;
#else
typedef struct {
- int (*mbc_enc_len)(const UChar* p);
+ int (*mbc_enc_len)(const OnigUChar* p);
const char* name;
int max_enc_len;
int min_enc_len;
OnigAmbigType support_ambig_flag;
OnigMetaCharTableType meta_char_table;
- int (*is_mbc_newline)(const UChar* p, const UChar* end);
- OnigCodePoint (*mbc_to_code)(const UChar* p, const UChar* end);
+ int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end);
+ OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end);
int (*code_to_mbclen)(OnigCodePoint code);
- int (*code_to_mbc)(OnigCodePoint code, UChar *buf);
- int (*mbc_to_normalize)(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* to);
- int (*is_mbc_ambiguous)(OnigAmbigType flag, const UChar** pp, const UChar* end);
- int (*get_all_pair_ambig_codes)(OnigAmbigType flag, OnigPairAmbigCodes** acs);
- int (*get_all_comp_ambig_codes)(OnigAmbigType flag, OnigCompAmbigCodes** acs);
+ int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf);
+ int (*mbc_to_normalize)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to);
+ int (*is_mbc_ambiguous)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end);
+ int (*get_all_pair_ambig_codes)(OnigAmbigType flag, const OnigPairAmbigCodes** acs);
+ int (*get_all_comp_ambig_codes)(OnigAmbigType flag, const OnigCompAmbigCodes** acs);
int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype);
- int (*get_ctype_code_range)(int ctype, OnigCodePoint* sb_range[], OnigCodePoint* mb_range[]);
- UChar* (*left_adjust_char_head)(const UChar* start, const UChar* p);
- int (*is_allowed_reverse_match)(const UChar* p, const UChar* end);
+ int (*get_ctype_code_range)(int ctype, const OnigCodePoint* sb_range[], const OnigCodePoint* mb_range[]);
+ OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p);
+ int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end);
} OnigEncodingType;
typedef OnigEncodingType* OnigEncoding;
@@ -200,6 +204,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingSJIS;
ONIG_EXTERN OnigEncodingType OnigEncodingKOI8;
ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R;
ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
+ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
#define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
#define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1)
@@ -230,6 +235,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
#define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8)
#define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R)
#define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5)
+#define ONIG_ENCODING_GB18030 (&OnigEncodingGB18030)
#endif /* else RUBY && M17N */
@@ -333,22 +339,22 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
ONIG_EXTERN
int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype));
ONIG_EXTERN
-int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
+int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, OnigUChar *buf));
ONIG_EXTERN
-int onigenc_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* buf));
+int onigenc_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* buf));
ONIG_EXTERN
-int onigenc_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const UChar** pp, const UChar* end));
+int onigenc_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end));
ONIG_EXTERN
-int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, const UChar* s, const UChar* end));
+int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end));
#else /* ONIG_RUBY_M17N */
#define ONIGENC_NAME(enc) ((enc)->name)
#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \
- (enc)->mbc_to_normalize(flag,(const UChar** )pp,end,buf)
+ (enc)->mbc_to_normalize(flag,(const OnigUChar** )pp,end,buf)
#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \
- (enc)->is_mbc_ambiguous(flag,(const UChar** )pp,end)
+ (enc)->is_mbc_ambiguous(flag,(const OnigUChar** )pp,end)
#define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ((enc)->support_ambig_flag)
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
(enc)->is_allowed_reverse_match(s,end)
@@ -405,7 +411,7 @@ int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, const UChar* s, const
(enc)->get_ctype_code_range(ctype,sbr,mbr)
ONIG_EXTERN
-UChar* onigenc_step_back P_((OnigEncoding enc, const UChar* start, const UChar* s, int n));
+OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, int n));
#endif /* is not ONIG_RUBY_M17N */
@@ -418,21 +424,21 @@ int onigenc_set_default_encoding P_((OnigEncoding enc));
ONIG_EXTERN
OnigEncoding onigenc_get_default_encoding P_(());
ONIG_EXTERN
-void onigenc_set_default_caseconv_table P_((const UChar* table));
+void onigenc_set_default_caseconv_table P_((const OnigUChar* table));
ONIG_EXTERN
-UChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const UChar* start, const UChar* s, const UChar** prev));
+OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar** prev));
ONIG_EXTERN
-UChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const UChar* start, const UChar* s));
+OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
ONIG_EXTERN
-UChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const UChar* start, const UChar* s));
+OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
ONIG_EXTERN
-UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const UChar* start, const UChar* s));
+OnigUChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
ONIG_EXTERN
-int onigenc_strlen P_((OnigEncoding enc, const UChar* p, const UChar* end));
+int onigenc_strlen P_((OnigEncoding enc, const OnigUChar* p, const OnigUChar* end));
ONIG_EXTERN
-int onigenc_strlen_null P_((OnigEncoding enc, const UChar* p));
+int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p));
ONIG_EXTERN
-int onigenc_str_bytelen_null P_((OnigEncoding enc, const UChar* p));
+int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
@@ -465,6 +471,7 @@ typedef unsigned int OnigOptionType;
#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1)
+#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */
#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
@@ -478,6 +485,7 @@ typedef struct {
OnigOptionType options; /* default option */
} OnigSyntaxType;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
@@ -485,9 +493,11 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep;
ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl_NG;
ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
/* predefined syntaxes (see regsyntax.c) */
+#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS)
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
@@ -495,6 +505,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
+#define ONIG_SYNTAX_PERL_NG (&OnigSyntaxPerl_NG)
#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
/* default syntax */
@@ -554,6 +565,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1<<17) /* \p{^..}, \P{^..} */
#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1<<18) /* \p{IsXDigit} */
#define ONIG_SYN_OP2_ESC_H_XDIGIT (1<<19) /* \h, \H */
+#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1<<20) /* \ */
/* syntax (behavior) */
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1<<31) /* not implemented */
@@ -695,8 +707,8 @@ struct re_registers {
typedef struct re_registers OnigRegion;
typedef struct {
- UChar* par;
- UChar* par_end;
+ OnigUChar* par;
+ OnigUChar* par_end;
} OnigErrorInfo;
typedef struct {
@@ -704,8 +716,8 @@ typedef struct {
int upper;
} OnigRepeatRange;
-typedef void (*OnigWarnFunc) P_((const char* s, ...));
-extern void onig_null_warn P_((const char* s, ...));
+typedef void (*OnigWarnFunc) P_((const char* s));
+extern void onig_null_warn P_((const char* s));
#define ONIG_NULL_WARN onig_null_warn
#define ONIG_CHAR_TABLE_SIZE 256
@@ -776,25 +788,25 @@ typedef struct {
ONIG_EXTERN
int onig_init P_((void));
ONIG_EXTERN
-int onig_error_code_to_str PV_((UChar* s, int err_code, ...));
+int onig_error_code_to_str PV_((OnigUChar* s, int err_code, ...));
ONIG_EXTERN
void onig_set_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
void onig_set_verb_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
-int onig_new P_((regex_t**, const UChar* pattern, const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
+int onig_new P_((regex_t**, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
ONIG_EXTERN
-int onig_new_deluxe P_((regex_t** reg, const UChar* pattern, const UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
+int onig_new_deluxe P_((regex_t** reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
ONIG_EXTERN
void onig_free P_((regex_t*));
ONIG_EXTERN
-int onig_recompile P_((regex_t*, const UChar* pattern, const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
+int onig_recompile P_((regex_t*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
ONIG_EXTERN
-int onig_recompile_deluxe P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
+int onig_recompile_deluxe P_((regex_t* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
ONIG_EXTERN
-int onig_search P_((regex_t*, const UChar* str, const UChar* end, const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option));
+int onig_search P_((regex_t*, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
-int onig_match P_((regex_t*, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, OnigOptionType option));
+int onig_match P_((regex_t*, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
OnigRegion* onig_region_new P_((void));
ONIG_EXTERN
@@ -810,11 +822,11 @@ int onig_region_resize P_((OnigRegion* region, int n));
ONIG_EXTERN
int onig_region_set P_((OnigRegion* region, int at, int beg, int end));
ONIG_EXTERN
-int onig_name_to_group_numbers P_((regex_t* reg, const UChar* name, const UChar* name_end, int** nums));
+int onig_name_to_group_numbers P_((regex_t* reg, const OnigUChar* name, const OnigUChar* name_end, int** nums));
ONIG_EXTERN
-int onig_name_to_backref_number P_((regex_t* reg, const UChar* name, const UChar* name_end, OnigRegion *region));
+int onig_name_to_backref_number P_((regex_t* reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region));
ONIG_EXTERN
-int onig_foreach_name P_((regex_t* reg, int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg));
+int onig_foreach_name P_((regex_t* reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,regex_t*,void*), void* arg));
ONIG_EXTERN
int onig_number_of_names P_((regex_t* reg));
ONIG_EXTERN
diff --git a/regcomp.c b/regcomp.c
index 15dc42cc13..ff0fd8abb7 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -34,7 +34,7 @@ OnigAmbigType OnigDefaultAmbigFlag =
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE);
extern OnigAmbigType
-onig_get_default_ambig_flag(void)
+onig_get_default_ambig_flag()
{
return OnigDefaultAmbigFlag;
}
@@ -2120,29 +2120,6 @@ get_char_length_tree(Node* node, regex_t* reg, int* len)
return get_char_length_tree1(node, reg, len, 0);
}
-extern int
-onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
-{
- int found;
-
- if (ONIGENC_MBC_MINLEN(enc) > 1 || (code >= SINGLE_BYTE_SIZE)) {
- if (IS_NULL(cc->mbuf)) {
- found = 0;
- }
- else {
- found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
- }
- }
- else {
- found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
- }
-
- if (IS_CCLASS_NOT(cc))
- return !found;
- else
- return found;
-}
-
/* x is not included y ==> 1 : 0 */
static int
is_not_included(Node* x, Node* y, regex_t* reg)
@@ -2516,6 +2493,9 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
case N_QUALIFIER:
r = subexp_inf_recursive_check(NQUALIFIER(node).target, env, head);
+ if (r == RECURSION_EXIST) {
+ if (NQUALIFIER(node).lower == 0) r = 0;
+ }
break;
case N_ANCHOR:
@@ -2943,15 +2923,55 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
return 0;
}
+
+static int
+divide_ambig_string_node_sub(regex_t* reg, int prev_ambig,
+ UChar* prev_start, UChar* prev,
+ UChar* end, Node*** tailp, Node** root)
+{
+ UChar *tmp, *wp;
+ Node* snode;
+
+ if (prev_ambig != 0) {
+ tmp = prev_start;
+ wp = prev_start;
+ while (tmp < prev) {
+ wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
+ &tmp, end, wp);
+ }
+ snode = onig_node_new_str(prev_start, wp);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ NSTRING_SET_AMBIG(snode);
+ if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode);
+ }
+ else {
+ snode = onig_node_new_str(prev_start, prev);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ }
+
+ if (*tailp == (Node** )0) {
+ *root = onig_node_new_list(snode, NULL);
+ CHECK_NULL_RETURN_VAL(*root, ONIGERR_MEMORY);
+ *tailp = &(NCONS(*root).right);
+ }
+ else {
+ **tailp = onig_node_new_list(snode, NULL);
+ CHECK_NULL_RETURN_VAL(**tailp, ONIGERR_MEMORY);
+ *tailp = &(NCONS(**tailp).right);
+ }
+
+ return 0;
+}
+
static int
divide_ambig_string_node(Node* node, regex_t* reg)
{
StrNode* sn = &NSTRING(node);
int ambig, prev_ambig;
UChar *prev, *p, *end, *prev_start, *start, *tmp, *wp;
- Node *snode;
Node *root = NULL_NODE;
Node **tailp = (Node** )0;
+ int r;
start = prev_start = p = sn->s;
end = sn->end;
@@ -2964,33 +2984,9 @@ divide_ambig_string_node(Node* node, regex_t* reg)
if (prev_ambig != (ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc,
reg->ambig_flag, &p, end))) {
- if (prev_ambig != 0) {
- tmp = prev_start;
- wp = prev_start;
- while (tmp < prev) {
- wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
- &tmp, end, wp);
- }
- snode = onig_node_new_str(prev_start, wp);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
- NSTRING_SET_AMBIG(snode);
- if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode);
- }
- else {
- snode = onig_node_new_str(prev_start, prev);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
- }
-
- if (tailp == (Node** )0) {
- root = onig_node_new_list(snode, NULL);
- CHECK_NULL_RETURN_VAL(root, ONIGERR_MEMORY);
- tailp = &(NCONS(root).right);
- }
- else {
- *tailp = onig_node_new_list(snode, NULL);
- CHECK_NULL_RETURN_VAL(*tailp, ONIGERR_MEMORY);
- tailp = &(NCONS(*tailp).right);
- }
+ r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, prev,
+ end, &tailp, &root);
+ if (r != 0) return r;
prev_ambig = ambig;
prev_start = prev;
@@ -3011,33 +3007,9 @@ divide_ambig_string_node(Node* node, regex_t* reg)
}
}
else {
- if (prev_ambig != 0) {
- tmp = prev_start;
- wp = prev_start;
- while (tmp < end) {
- wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
- &tmp, end, wp);
- }
- snode = onig_node_new_str(prev_start, wp);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
- NSTRING_SET_AMBIG(snode);
- if (wp != end) NSTRING_SET_AMBIG_REDUCE(snode);
- }
- else {
- snode = onig_node_new_str(prev_start, end);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
- }
-
- if (tailp == (Node** )0) {
- root = onig_node_new_list(snode, NULL);
- CHECK_NULL_RETURN_VAL(root, ONIGERR_MEMORY);
- tailp = &(NCONS(node).right);
- }
- else {
- *tailp = onig_node_new_list(snode, NULL);
- CHECK_NULL_RETURN_VAL(*tailp, ONIGERR_MEMORY);
- tailp = &(NCONS(*tailp).right);
- }
+ r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, end,
+ end, &tailp, &root);
+ if (r != 0) return r;
swap_node(node, root);
onig_node_str_clear(root); /* should be after swap! */
@@ -3383,7 +3355,7 @@ typedef struct {
static int
map_position_value(OnigEncoding enc, int i)
{
- static short int ByteValTable[] = {
+ static const short int ByteValTable[] = {
5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
@@ -3408,7 +3380,7 @@ static int
distance_value(MinMaxLen* mm)
{
/* 1000 / (min-max-dist + 1) */
- static short int dist_vals[] = {
+ static const short int dist_vals[] = {
1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
@@ -3711,7 +3683,7 @@ select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
static void
clear_opt_map_info(OptMapInfo* map)
{
- static OptMapInfo clean_info = {
+ static const OptMapInfo clean_info = {
{0, 0}, {0, 0}, 0,
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -3758,8 +3730,8 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
int i, j, n, len;
UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN];
OnigCodePoint code, ccode;
- OnigCompAmbigCodes* ccs;
- OnigPairAmbigCodes* pccs;
+ const OnigCompAmbigCodes* ccs;
+ const OnigPairAmbigCodes* pccs;
OnigAmbigType amb;
add_char_opt_map_info(map, p[0], enc);
@@ -4316,10 +4288,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
reg->exact_end = reg->exact + e->len;
- if (e->anc.left_anchor & ANCHOR_BEGIN_LINE)
- allow_reverse = 1;
- else
- allow_reverse =
+ allow_reverse =
ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
@@ -4514,8 +4483,8 @@ print_anchor(FILE* f, int anchor)
static void
print_optimize_info(FILE* f, regex_t* reg)
{
- static char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
- "EXACT_IC", "MAP" };
+ static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
+ "EXACT_IC", "MAP" };
fprintf(f, "optimize: %s\n", on[reg->optimize]);
fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
@@ -4959,7 +4928,7 @@ onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
}
extern int
-onig_init(void)
+onig_init()
{
if (onig_inited != 0)
return 0;
@@ -4981,9 +4950,9 @@ onig_init(void)
extern int
-onig_end(void)
+onig_end()
{
- extern int onig_free_shared_cclass_table(void);
+ extern int onig_free_shared_cclass_table();
THREAD_ATOMIC_START;
diff --git a/regenc.c b/regenc.c
index 955dfeecf6..bbbf1a2f94 100644
--- a/regenc.c
+++ b/regenc.c
@@ -32,13 +32,13 @@
OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
extern int
-onigenc_init(void)
+onigenc_init()
{
return 0;
}
extern OnigEncoding
-onigenc_get_default_encoding(void)
+onigenc_get_default_encoding()
{
return OnigEncDefaultCharEncoding;
}
@@ -175,7 +175,7 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
#define USE_APPLICATION_TO_LOWER_CASE_TABLE
-unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
+const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -251,7 +251,7 @@ static const UChar BuiltInAsciiToLowerCaseTable[] = {
#endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */
#ifdef USE_UPPER_CASE_TABLE
-UChar OnigEncAsciiToUpperCaseTable[256] = {
+const UChar OnigEncAsciiToUpperCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -287,7 +287,7 @@ UChar OnigEncAsciiToUpperCaseTable[256] = {
};
#endif
-unsigned short OnigEncAsciiCtypeTable[256] = {
+const unsigned short OnigEncAsciiCtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -323,7 +323,7 @@ unsigned short OnigEncAsciiCtypeTable[256] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
};
-UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
+const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -359,7 +359,7 @@ UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
};
#ifdef USE_UPPER_CASE_TABLE
-UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
+const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -417,7 +417,7 @@ onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UC
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
}
-OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
+const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
{ 0x41, 0x61 },
{ 0x42, 0x62 },
{ 0x43, 0x63 },
@@ -475,7 +475,7 @@ OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
extern int
onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
*ccs = OnigAsciiPairAmbigCodes;
@@ -488,16 +488,16 @@ onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag,
extern int
onigenc_nothing_get_all_comp_ambig_codes(OnigAmbigType flag,
- OnigCompAmbigCodes** ccs)
+ const OnigCompAmbigCodes** ccs)
{
return 0;
}
extern int
onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
@@ -577,9 +577,9 @@ onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag,
extern int
onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag,
- OnigCompAmbigCodes** ccs)
+ const OnigCompAmbigCodes** ccs)
{
- static OnigCompAmbigCodes folds[] = {
+ static const OnigCompAmbigCodes folds[] = {
{ 2, 0xdf, {{ 2, { 0x53, 0x53 } }, { 2, { 0x73, 0x73} } } }
};
@@ -593,7 +593,7 @@ onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag,
extern int
onigenc_not_support_get_ctype_code_range(int ctype,
- OnigCodePoint* sbr[], OnigCodePoint* mbr[])
+ const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
{
return ONIG_NO_SUPPORT_CONFIG;
}
@@ -830,10 +830,10 @@ onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
if ((code & 0xff000000) != 0) {
*p++ = (UChar )((code >> 24) & 0xff);
}
- if ((code & 0xff0000) != 0) {
+ if ((code & 0xff0000) != 0 || p != buf) {
*p++ = (UChar )((code >> 16) & 0xff);
}
- if ((code & 0xff00) != 0) {
+ if ((code & 0xff00) != 0 || p != buf) {
*p++ = (UChar )((code >> 8) & 0xff);
}
*p++ = (UChar )(code & 0xff);
@@ -849,40 +849,32 @@ extern int
onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
unsigned int ctype)
{
- if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if ((ctype & (ONIGENC_CTYPE_WORD |
+ ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
-
- ctype &= ~ONIGENC_CTYPE_WORD;
- if (ctype == 0) return FALSE;
+ }
}
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else
- return FALSE;
+ return FALSE;
}
extern int
onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
unsigned int ctype)
{
- if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if ((ctype & (ONIGENC_CTYPE_WORD |
+ ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
-
- ctype &= ~ONIGENC_CTYPE_WORD;
- if (ctype == 0) return FALSE;
+ }
}
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else
- return FALSE;
+ return FALSE;
}
extern int
diff --git a/regenc.h b/regenc.h
index 510455146e..58ee3e7f22 100644
--- a/regenc.h
+++ b/regenc.h
@@ -4,7 +4,7 @@
regenc.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -65,15 +65,17 @@
#else /* ONIG_RUBY_M17N */
#define USE_UNICODE_FULL_RANGE_CTYPE
+/* following must not use with USE_CRNL_AS_LINE_TERMINATOR */
+/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
/* for encoding system implementation (internal) */
-ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, OnigPairAmbigCodes** acs));
-ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, OnigCompAmbigCodes** acs));
-ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, OnigPairAmbigCodes** acs));
-ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, OnigCompAmbigCodes** acs));
-ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, OnigCodePoint* sbr[], OnigCodePoint* mbr[]));
+ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs));
+ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs));
+ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs));
+ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs));
+ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]));
ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
/* methods for single byte encoding */
@@ -105,7 +107,7 @@ ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** code
/* in enc/unicode.c */
ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
-ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, OnigCodePoint* sbr[], OnigCodePoint* mbr[]));
+ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]));
#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
@@ -115,10 +117,10 @@ ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, OnigCodePoin
#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
((OnigEnc_Unicode_ISO_8859_1_CtypeTable[code] & ctype) != 0)
-ONIG_EXTERN UChar OnigEncISO_8859_1_ToLowerCaseTable[];
-ONIG_EXTERN UChar OnigEncISO_8859_1_ToUpperCaseTable[];
-ONIG_EXTERN unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[];
-ONIG_EXTERN OnigPairAmbigCodes OnigAsciiPairAmbigCodes[];
+ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
+ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
+ONIG_EXTERN const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[];
+ONIG_EXTERN const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[];
#endif /* is not ONIG_RUBY_M17N */
@@ -133,7 +135,7 @@ extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
ONIG_EXTERN const UChar* OnigEncAsciiToLowerCaseTable;
ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[];
-ONIG_EXTERN unsigned short OnigEncAsciiCtypeTable[];
+ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]
diff --git a/regerror.c b/regerror.c
index 348b7b30ed..043f52d43b 100644
--- a/regerror.c
+++ b/regerror.c
@@ -30,14 +30,20 @@
#include "regint.h"
#include <stdio.h> /* for vsnprintf() */
+#ifdef HAVE_STDARG_PROTOTYPES
#include <stdarg.h>
+#define va_init_list(a,b) va_start(a,b)
+#else
+#include <varargs.h>
+#define va_init_list(a,b) va_start(a)
+#endif
-extern char*
+extern UChar*
onig_error_code_to_format(int code)
{
char *p;
- if (code >= 0) return (char* )0;
+ if (code >= 0) return (UChar* )0;
switch (code) {
case ONIG_MISMATCH:
@@ -171,7 +177,7 @@ onig_error_code_to_format(int code)
p = "undefined error code"; break;
}
- return p;
+ return (UChar* )p;
}
@@ -179,14 +185,21 @@ onig_error_code_to_format(int code)
#define MAX_ERROR_PAR_LEN 30
extern int
+#ifdef HAVE_STDARG_PROTOTYPES
onig_error_code_to_str(UChar* s, int code, ...)
+#else
+onig_error_code_to_str(s, code, va_alist)
+ UChar* s;
+ int code;
+ va_dcl
+#endif
{
UChar *p, *q;
OnigErrorInfo* einfo;
int len;
va_list vargs;
- va_start(vargs, code);
+ va_init_list(vargs, code);
switch (code) {
case ONIGERR_UNDEFINED_NAME_REFERENCE:
@@ -242,26 +255,37 @@ onig_error_code_to_str(UChar* s, int code, ...)
void
-onig_snprintf_with_pattern(char buf[], int bufsize, OnigEncoding enc,
- char* pat, char* pat_end, char *fmt, ...)
+#ifdef HAVE_STDARG_PROTOTYPES
+onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
+ UChar* pat, UChar* pat_end, const UChar *fmt, ...)
+#else
+onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
+ UChar buf[];
+ int bufsize;
+ OnigEncoding enc;
+ UChar* pat;
+ UChar* pat_end;
+ const UChar *fmt;
+ va_dcl
+#endif
{
int n, need, len;
UChar *p, *s, *bp;
- char bs[6];
+ UChar bs[6];
va_list args;
- va_start(args, fmt);
- n = vsnprintf(buf, bufsize, fmt, args);
+ va_init_list(args, (char* )fmt);
+ n = vsnprintf((char* )buf, bufsize, (char* )fmt, args);
va_end(args);
need = (pat_end - pat) * 4 + 4;
if (n + need < bufsize) {
- strcat(buf, ": /");
+ strcat((char* )buf, ": /");
s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf);
p = pat;
- while (p < (UChar* )pat_end) {
+ while (p < pat_end) {
if (*p == MC_ESC(enc)) {
*s++ = *p++;
len = enc_len(enc, p);
@@ -280,7 +304,7 @@ onig_snprintf_with_pattern(char buf[], int bufsize, OnigEncoding enc,
int blen;
while (len-- > 0) {
- sprintf(bs, "\\%03o", *p++ & 0377);
+ sprintf((char* )bs, "\\%03o", *p++ & 0377);
blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
bp = bs;
while (blen-- > 0) *s++ = *bp++;
@@ -289,7 +313,7 @@ onig_snprintf_with_pattern(char buf[], int bufsize, OnigEncoding enc,
}
else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
!ONIGENC_IS_CODE_SPACE(enc, *p)) {
- sprintf(bs, "\\%03o", *p++ & 0377);
+ sprintf((char* )bs, "\\%03o", *p++ & 0377);
len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
bp = bs;
while (len-- > 0) *s++ = *bp++;
diff --git a/regexec.c b/regexec.c
index 3c377d67bb..fd92904cc5 100644
--- a/regexec.c
+++ b/regexec.c
@@ -29,6 +29,12 @@
#include "regint.h"
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+#define ONIGENC_IS_MBC_CRNL(enc,p,end) \
+ (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
+ ONIGENC_IS_MBC_NEWLINE(enc,(p+enc_len(enc,p)),end))
+#endif
+
#ifdef USE_CAPTURE_HISTORY
static void history_tree_free(OnigCaptureTreeNode* node);
@@ -227,7 +233,7 @@ onig_region_init(OnigRegion* region)
}
extern OnigRegion*
-onig_region_new(void)
+onig_region_new()
{
OnigRegion* r;
@@ -1165,27 +1171,43 @@ onig_is_in_code_range(const UChar* p, OnigCodePoint code)
}
static int
-code_is_in_cclass_node(void* node, OnigCodePoint code, int enclen)
+is_code_in_cc(int enclen, OnigCodePoint code, CClassNode* cc)
{
- unsigned int in_cc;
- CClassNode* cc = (CClassNode* )node;
+ int found;
- if (enclen == 1 && code < SINGLE_BYTE_SIZE) {
- in_cc = BITSET_AT(cc->bs, code);
+ if (enclen > 1 || (code >= SINGLE_BYTE_SIZE)) {
+ if (IS_NULL(cc->mbuf)) {
+ found = 0;
+ }
+ else {
+ found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
+ }
}
else {
- UChar* p = ((BBuf* )(cc->mbuf))->p;
- in_cc = onig_is_in_code_range(p, code);
+ found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
}
- if (IS_CCLASS_NOT(cc)) {
- return (in_cc ? 0 : 1);
+ if (IS_CCLASS_NOT(cc))
+ return !found;
+ else
+ return found;
+}
+
+extern int
+onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
+{
+ int len;
+
+ if (ONIGENC_MBC_MINLEN(enc) > 1) {
+ len = 2;
}
else {
- return (in_cc ? 1 : 0);
+ len = ONIGENC_CODE_TO_MBCLEN(enc, code);
}
+ return is_code_in_cc(len, code, cc);
}
+
/* matching region of POSIX API */
typedef int regoff_t;
@@ -1739,8 +1761,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
mb_len = enc_len(encode, s);
ss = s;
s += mb_len;
+ DATA_ENSURE(0);
code = ONIGENC_MBC_TO_CODE(encode, ss, s);
- if (code_is_in_cclass_node(node, code, mb_len) == 0) goto fail;
+ if (is_code_in_cc(mb_len, code, node) == 0) goto fail;
}
STAT_OP_OUT;
break;
@@ -1946,6 +1969,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
STAT_OP_OUT;
continue;
}
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
+ STAT_OP_OUT;
+ continue;
+ }
+#endif
goto fail;
break;
@@ -1966,6 +1995,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
STAT_OP_OUT;
continue;
}
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
+ UChar* ss = s + enc_len(encode, s);
+ if (ON_STR_END(ss + enc_len(encode, ss))) {
+ STAT_OP_OUT;
+ continue;
+ }
+ }
+#endif
goto fail;
break;
@@ -3029,7 +3067,11 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
goto retry_gate;
}
- else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end))
+ else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
+#endif
+ )
goto retry_gate;
break;
}
@@ -3149,7 +3191,11 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
goto retry;
}
}
- else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)) {
+ else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
+#endif
+ ) {
p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
if (IS_NULL(p)) goto fail;
goto retry;
@@ -3310,7 +3356,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
}
}
else if (str == end) { /* empty string */
- static const UChar* address_for_empty_string = "";
+ static const UChar* address_for_empty_string = (UChar* )"";
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "onig_search: empty string.\n");
@@ -3354,8 +3400,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
if (sch_range > end) sch_range = (UChar* )end;
}
}
- if (reg->dmax != ONIG_INFINITE_DISTANCE &&
- (end - start) >= reg->threshold_len) {
+
+ if ((end - start) < reg->threshold_len)
+ goto mismatch;
+
+ if (reg->dmax != ONIG_INFINITE_DISTANCE) {
do {
if (! forward_search_range(reg, str, end, s, sch_range,
&low, &high, &low_prev)) goto mismatch;
@@ -3368,22 +3417,26 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
prev = s;
s += enc_len(reg->enc, s);
}
- if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
- if (IS_NOT_NULL(prev)) {
- while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) &&
- s < range) {
- prev = s;
- s += enc_len(reg->enc, s);
- }
- }
- }
} while (s < range);
goto mismatch;
}
else { /* check only. */
- if ((end - start) < reg->threshold_len ||
- ! forward_search_range(reg, str, end, s, sch_range,
+ if (! forward_search_range(reg, str, end, s, sch_range,
&low, &high, (UChar** )NULL)) goto mismatch;
+
+ if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
+ do {
+ MATCH_AND_RETURN_CHECK;
+ prev = s;
+ s += enc_len(reg->enc, s);
+
+ while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
+ prev = s;
+ s += enc_len(reg->enc, s);
+ }
+ } while (s < range);
+ goto mismatch;
+ }
}
}
@@ -3391,7 +3444,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
MATCH_AND_RETURN_CHECK;
prev = s;
s += enc_len(reg->enc, s);
- } while (s <= range); /* exec s == range, because empty match with /$/. */
+ } while (s < range);
+
+ if (s == range) { /* because empty match with /$/. */
+ MATCH_AND_RETURN_CHECK;
+ }
}
else { /* backward search */
if (reg->optimize != ONIG_OPTIMIZE_NONE) {
diff --git a/regint.h b/regint.h
index 2bd514b7c3..bf7da0a102 100644
--- a/regint.h
+++ b/regint.h
@@ -62,6 +62,11 @@
#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
+/* treat \r\n as line terminator.
+ !!! NO SUPPORT !!!
+ use this configuration on your own responsibility */
+/* #define USE_CRNL_AS_LINE_TERMINATOR */
+
/* internal config */
#define USE_RECYCLE_NODE
#define USE_OP_PUSH_OR_JUMP_EXACT
@@ -105,8 +110,8 @@
}\
} while (0)
-#define DEFAULT_WARN_FUNCTION rb_warn
-#define DEFAULT_VERB_WARN_FUNCTION rb_warning
+#define DEFAULT_WARN_FUNCTION onig_rb_warn
+#define DEFAULT_VERB_WARN_FUNCTION onig_rb_warning
#endif /* else NOT_RUBY */
@@ -721,6 +726,11 @@ typedef void* PointerType;
#define MC_ONE_OR_MORE_TIME(enc) (enc)->meta_char_table.one_or_more_time
#define MC_ANYCHAR_ANYTIME(enc) (enc)->meta_char_table.anychar_anytime
+#define IS_MC_ESC_CODE(code, enc, syn) \
+ ((code) == MC_ESC(enc) && \
+ !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE))
+
+
#define SYN_POSIX_COMMON_OP \
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
ONIG_SYN_OP_DECIMAL_BACKREF | \
@@ -781,13 +791,14 @@ extern void onig_print_statistics P_((FILE* f));
#endif
#endif
-extern char* onig_error_code_to_format P_((int code));
-extern void onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...));
+extern UChar* onig_error_code_to_format P_((int code));
+extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...));
extern int onig_bbuf_init P_((BBuf* buf, int size));
extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax));
extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));
extern void onig_chain_reduce P_((regex_t* reg));
extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
extern void onig_transfer P_((regex_t* to, regex_t* from));
+extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
#endif /* REGINT_H */
diff --git a/regparse.c b/regparse.c
index 837ea35b30..76bce7885b 100644
--- a/regparse.c
+++ b/regparse.c
@@ -58,7 +58,21 @@ OnigSyntaxType OnigSyntaxRuby = {
OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
-extern void onig_null_warn(const char* s, ...) { }
+extern void onig_null_warn(const char* s) { }
+
+#ifdef RUBY_PLATFORM
+extern void
+onig_rb_warn(const char* s)
+{
+ rb_warn(s);
+}
+
+extern void
+onig_rb_warning(const char* s)
+{
+ rb_warning(s);
+}
+#endif
#ifdef DEFAULT_WARN_FUNCTION
static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
@@ -1050,12 +1064,12 @@ onig_node_free(Node* node)
#ifdef USE_RECYCLE_NODE
extern int
-onig_free_node_list(void)
+onig_free_node_list()
{
FreeNode* n;
THREAD_ATOMIC_START;
- while (FreeNodeList) {
+ while (IS_NOT_NULL(FreeNodeList)) {
n = FreeNodeList;
FreeNodeList = FreeNodeList->next;
xfree(n);
@@ -1066,18 +1080,19 @@ onig_free_node_list(void)
#endif
static Node*
-node_new(void)
+node_new()
{
Node* node;
#ifdef USE_RECYCLE_NODE
+ THREAD_ATOMIC_START;
if (IS_NOT_NULL(FreeNodeList)) {
- THREAD_ATOMIC_START;
node = (Node* )FreeNodeList;
FreeNodeList = FreeNodeList->next;
THREAD_ATOMIC_END;
return node;
}
+ THREAD_ATOMIC_END;
#endif
node = (Node* )xmalloc(sizeof(Node));
@@ -1094,7 +1109,7 @@ initialize_cclass(CClassNode* cc)
}
static Node*
-node_new_cclass(void)
+node_new_cclass()
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
@@ -1106,7 +1121,7 @@ node_new_cclass(void)
static Node*
node_new_cclass_by_codepoint_range(int not,
- OnigCodePoint sbr[], OnigCodePoint mbr[])
+ const OnigCodePoint sbr[], const OnigCodePoint mbr[])
{
CClassNode* cc;
int n, i, j;
@@ -1163,7 +1178,7 @@ node_new_ctype(int type)
}
static Node*
-node_new_anychar(void)
+node_new_anychar()
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
@@ -1434,7 +1449,7 @@ node_new_str_raw(UChar* s, UChar* end)
}
static Node*
-node_new_empty(void)
+node_new_empty()
{
return node_new_str(NULL, NULL);
}
@@ -2358,15 +2373,17 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
control:
if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
PFETCH(c);
- if (c == MC_ESC(enc)) {
- v = fetch_escaped_value(&p, end, env);
- if (v < 0) return v;
- c = (OnigCodePoint )(v & 0x9f);
- }
- else if (c == '?')
+ if (c == '?') {
c = 0177;
- else
+ }
+ else {
+ if (c == MC_ESC(enc)) {
+ v = fetch_escaped_value(&p, end, env);
+ if (v < 0) return v;
+ c = (OnigCodePoint )v;
+ }
c &= 0x9f;
+ }
break;
}
/* fall through */
@@ -2512,11 +2529,11 @@ CC_ESC_WARN(ScanEnv* env, UChar *c)
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
- char buf[WARN_BUFSIZE];
+ UChar buf[WARN_BUFSIZE];
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
env->pattern, env->pattern_end,
- "character class has '%s' without escape", c);
- (*onig_warn)(buf);
+ (UChar* )"character class has '%s' without escape", c);
+ (*onig_warn)((char* )buf);
}
}
@@ -2526,11 +2543,11 @@ CCEND_ESC_WARN(ScanEnv* env, UChar* c)
if (onig_warn == onig_null_warn) return ;
if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
- char buf[WARN_BUFSIZE];
+ UChar buf[WARN_BUFSIZE];
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
(env)->pattern, (env)->pattern_end,
- "regular expression has '%s' without escape", c);
- (*onig_warn)(buf);
+ (UChar* )"regular expression has '%s' without escape", c);
+ (*onig_warn)((char* )buf);
}
}
@@ -2794,7 +2811,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->type = TK_CC_CC_OPEN;
}
else {
- CC_ESC_WARN(env, "[");
+ CC_ESC_WARN(env, (UChar* )"[");
}
}
}
@@ -2833,7 +2850,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->backp = p;
PFETCH(c);
- if (c == MC_ESC(enc)) {
+ if (IS_MC_ESC_CODE(c, enc, syn)) {
if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
tok->backp = p;
@@ -3365,7 +3382,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case ']':
if (*src > env->pattern) /* /].../ is allowed. */
- CCEND_ESC_WARN(env, "]");
+ CCEND_ESC_WARN(env, (UChar* )"]");
break;
case '#':
@@ -3400,7 +3417,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
static int
add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
- OnigCodePoint sbr[], OnigCodePoint mbr[])
+ const OnigCodePoint sbr[], const OnigCodePoint mbr[])
{
int i, r;
OnigCodePoint j;
@@ -3464,7 +3481,7 @@ static int
add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
{
int c, r;
- OnigCodePoint *sbr, *mbr;
+ const OnigCodePoint *sbr, *mbr;
OnigEncoding enc = env->enc;
r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr);
@@ -3602,19 +3619,19 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
#define POSIX_BRACKET_NAME_MAX_LEN 6
static PosixBracketEntryType PBS[] = {
- { "alnum", ONIGENC_CTYPE_ALNUM, 5 },
- { "alpha", ONIGENC_CTYPE_ALPHA, 5 },
- { "blank", ONIGENC_CTYPE_BLANK, 5 },
- { "cntrl", ONIGENC_CTYPE_CNTRL, 5 },
- { "digit", ONIGENC_CTYPE_DIGIT, 5 },
- { "graph", ONIGENC_CTYPE_GRAPH, 5 },
- { "lower", ONIGENC_CTYPE_LOWER, 5 },
- { "print", ONIGENC_CTYPE_PRINT, 5 },
- { "punct", ONIGENC_CTYPE_PUNCT, 5 },
- { "space", ONIGENC_CTYPE_SPACE, 5 },
- { "upper", ONIGENC_CTYPE_UPPER, 5 },
- { "xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
- { "ascii", ONIGENC_CTYPE_ASCII, 5 }, /* I don't know origin. Perl? */
+ { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
+ { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },
+ { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },
+ { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },
+ { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },
+ { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },
+ { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },
+ { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },
+ { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },
+ { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },
+ { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
+ { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
+ { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
{ (UChar* )NULL, -1, 0 }
};
@@ -3638,7 +3655,7 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
p = (UChar* )onigenc_step(enc, p, end, pb->len);
- if (onigenc_with_ascii_strncmp(enc, p, end, ":]", 2) != 0)
+ if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
r = add_ctype_to_cc(cc, pb->ctype, not, env);
@@ -3673,19 +3690,19 @@ static int
property_name_to_ctype(UChar* p, UChar* end, OnigEncoding enc)
{
static PosixBracketEntryType PBS[] = {
- { "Alnum", ONIGENC_CTYPE_ALNUM, 5 },
- { "Alpha", ONIGENC_CTYPE_ALPHA, 5 },
- { "Blank", ONIGENC_CTYPE_BLANK, 5 },
- { "Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
- { "Digit", ONIGENC_CTYPE_DIGIT, 5 },
- { "Graph", ONIGENC_CTYPE_GRAPH, 5 },
- { "Lower", ONIGENC_CTYPE_LOWER, 5 },
- { "Print", ONIGENC_CTYPE_PRINT, 5 },
- { "Punct", ONIGENC_CTYPE_PUNCT, 5 },
- { "Space", ONIGENC_CTYPE_SPACE, 5 },
- { "Upper", ONIGENC_CTYPE_UPPER, 5 },
- { "XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
- { "ASCII", ONIGENC_CTYPE_ASCII, 5 },
+ { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 },
+ { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 },
+ { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 },
+ { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
+ { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 },
+ { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 },
+ { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 },
+ { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 },
+ { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 },
+ { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 },
+ { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 },
+ { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
+ { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 },
{ (UChar* )NULL, -1, 0 }
};
@@ -3935,7 +3952,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
*src, env->pattern_end, 1, env->enc))
return ONIGERR_EMPTY_CHAR_CLASS;
- CC_ESC_WARN(env, "]");
+ CC_ESC_WARN(env, (UChar* )"]");
r = tok->type = TK_CHAR; /* allow []...] */
}
@@ -4038,7 +4055,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
r = parse_posix_bracket(cc, &p, end, env);
if (r < 0) goto err;
if (r == 1) { /* is not POSIX bracket */
- CC_ESC_WARN(env, "[");
+ CC_ESC_WARN(env, (UChar* )"[");
p = tok->backp;
v = (OnigCodePoint )tok->u.c;
in_israw = 0;
@@ -4084,7 +4101,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
goto val_entry;
}
else if (r == TK_CC_AND) {
- CC_ESC_WARN(env, "-");
+ CC_ESC_WARN(env, (UChar* )"-");
goto range_end_val;
}
state = CCS_RANGE;
@@ -4099,12 +4116,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
fetched = 1;
/* [--x] or [a&&-x] is warned. */
if (r == TK_CC_RANGE || and_start != 0)
- CC_ESC_WARN(env, "-");
+ CC_ESC_WARN(env, (UChar* )"-");
goto val_entry;
}
else if (state == CCS_RANGE) {
- CC_ESC_WARN(env, "-");
+ CC_ESC_WARN(env, (UChar* )"-");
goto sb_char; /* [!--x] is allowed */
}
else { /* CCS_COMPLETE */
@@ -4113,12 +4130,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
fetched = 1;
if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
else if (r == TK_CC_AND) {
- CC_ESC_WARN(env, "-");
+ CC_ESC_WARN(env, (UChar* )"-");
goto range_end_val;
}
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
- CC_ESC_WARN(env, "-");
+ CC_ESC_WARN(env, (UChar* )"-");
goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */
}
r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
@@ -4495,7 +4512,7 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
if (qn->by_number == 0 && qnt->by_number == 0 &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
int nestq_num, targetq_num;
- char buf[WARN_BUFSIZE];
+ UChar buf[WARN_BUFSIZE];
nestq_num = popular_qualifier_num(qn);
targetq_num = popular_qualifier_num(qnt);
@@ -4507,9 +4524,9 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
case RQ_DEL:
if (onig_verb_warn != onig_null_warn) {
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
- env->pattern, env->pattern_end,
- "redundant nested repeat operator");
- (*onig_verb_warn)(buf);
+ env->pattern, env->pattern_end,
+ (UChar* )"redundant nested repeat operator");
+ (*onig_verb_warn)((char* )buf);
}
goto warn_exit;
break;
@@ -4518,10 +4535,10 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
if (onig_verb_warn != onig_null_warn) {
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
env->pattern, env->pattern_end,
- "nested repeat operator %s and %s was replaced with '%s'",
+ (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
PopularQStr[targetq_num], PopularQStr[nestq_num],
ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
- (*onig_verb_warn)(buf);
+ (*onig_verb_warn)((char* )buf);
}
goto warn_exit;
break;
@@ -4553,8 +4570,8 @@ make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc,
int r, i, j, k, clen, len, ncode, n;
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
Node **ptail, *snode = NULL_NODE;
- OnigCompAmbigCodes* ccs;
- OnigCompAmbigCodeItem* ci;
+ const OnigCompAmbigCodes* ccs;
+ const OnigCompAmbigCodeItem* ci;
OnigAmbigType amb;
n = 0;
@@ -4662,7 +4679,7 @@ i_free_shared_class(type_cclass_key* key, Node* node, void* arg)
}
extern int
-onig_free_shared_cclass_table(void)
+onig_free_shared_cclass_table()
{
if (IS_NOT_NULL(OnigTypeCClassTable)) {
onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
@@ -4819,7 +4836,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
int ctype, not;
#ifdef USE_SHARED_CCLASS_TABLE
- OnigCodePoint *sbr, *mbr;
+ const OnigCodePoint *sbr, *mbr;
ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr);
@@ -4901,7 +4918,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
if (IS_IGNORECASE(env->option)) {
int i, n, in_cc;
- OnigPairAmbigCodes* ccs;
+ const OnigPairAmbigCodes* ccs;
BitSetRef bs = cc->bs;
OnigAmbigType amb;
diff --git a/regparse.h b/regparse.h
index f68d07a67f..bdf6d92219 100644
--- a/regparse.h
+++ b/regparse.h
@@ -290,7 +290,6 @@ typedef struct {
extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
#endif
-extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
@@ -303,7 +302,7 @@ extern Node* onig_node_new_anchor P_((int type));
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
extern Node* onig_node_new_list P_((Node* left, Node* right));
extern void onig_node_str_clear P_((Node* node));
-extern int onig_free_node_list(void);
+extern int onig_free_node_list();
extern int onig_names_free P_((regex_t* reg));
extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
diff --git a/sjis.c b/sjis.c
index c71b0ad689..f7d7d52265 100644
--- a/sjis.c
+++ b/sjis.c
@@ -29,7 +29,7 @@
#include "regenc.h"
-static int EncLen_SJIS[] = {
+static const int EncLen_SJIS[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -167,21 +167,16 @@ sjis_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
static int
sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
- if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else {
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if ((ctype & (ONIGENC_CTYPE_WORD |
+ ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
return (sjis_code_to_mbclen(code) > 1 ? TRUE : FALSE);
}
-
- ctype &= ~ONIGENC_CTYPE_WORD;
- if (ctype == 0) return FALSE;
}
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else
- return FALSE;
+ return FALSE;
}
static UChar*
diff --git a/utf8.c b/utf8.c
index 592bebfe8f..0e816176ba 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2,7 +2,7 @@
utf8.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -40,7 +40,7 @@
#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
-static int EncLen_UTF8[] = {
+static const int EncLen_UTF8[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -65,6 +65,29 @@ utf8_mbc_enc_len(const UChar* p)
return EncLen_UTF8[*p];
}
+static int
+utf8_is_mbc_newline(const UChar* p, const UChar* end)
+{
+ if (p < end) {
+ if (*p == 0x0a) return 1;
+
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if (*p == 0x0d) return 1;
+ if (p + 1 < end) {
+ if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */
+ return 1;
+ if (p + 2 < end) {
+ if ((*(p+2) == 0xa8 || *(p+2) == 0xa9)
+ && *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */
+ return 1;
+ }
+ }
+#endif
+ }
+
+ return 0;
+}
+
static OnigCodePoint
utf8_mbc_to_code(const UChar* p, const UChar* end)
{
@@ -307,16 +330,16 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
}
-static OnigCodePoint EmptyRange[] = { 0 };
+static const OnigCodePoint EmptyRange[] = { 0 };
-static OnigCodePoint SBAlnum[] = {
+static const OnigCodePoint SBAlnum[] = {
3,
0x0030, 0x0039,
0x0041, 0x005a,
0x0061, 0x007a
};
-static OnigCodePoint MBAlnum[] = {
+static const OnigCodePoint MBAlnum[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
411,
#else
@@ -738,13 +761,13 @@ static OnigCodePoint MBAlnum[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBAlnum */
-static OnigCodePoint SBAlpha[] = {
+static const OnigCodePoint SBAlpha[] = {
2,
0x0041, 0x005a,
0x0061, 0x007a
};
-static OnigCodePoint MBAlpha[] = {
+static const OnigCodePoint MBAlpha[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
394,
#else
@@ -1149,13 +1172,13 @@ static OnigCodePoint MBAlpha[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBAlpha */
-static OnigCodePoint SBBlank[] = {
+static const OnigCodePoint SBBlank[] = {
2,
0x0009, 0x0009,
0x0020, 0x0020
};
-static OnigCodePoint MBBlank[] = {
+static const OnigCodePoint MBBlank[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
7,
#else
@@ -1173,13 +1196,13 @@ static OnigCodePoint MBBlank[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBBlank */
-static OnigCodePoint SBCntrl[] = {
+static const OnigCodePoint SBCntrl[] = {
2,
0x0000, 0x001f,
0x007f, 0x007f
};
-static OnigCodePoint MBCntrl[] = {
+static const OnigCodePoint MBCntrl[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
18,
#else
@@ -1208,12 +1231,12 @@ static OnigCodePoint MBCntrl[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBCntrl */
-static OnigCodePoint SBDigit[] = {
+static const OnigCodePoint SBDigit[] = {
1,
0x0030, 0x0039
};
-static OnigCodePoint MBDigit[] = {
+static const OnigCodePoint MBDigit[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
22,
#else
@@ -1245,12 +1268,12 @@ static OnigCodePoint MBDigit[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBDigit */
-static OnigCodePoint SBGraph[] = {
+static const OnigCodePoint SBGraph[] = {
1,
0x0021, 0x007e
};
-static OnigCodePoint MBGraph[] = {
+static const OnigCodePoint MBGraph[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
404,
#else
@@ -1665,12 +1688,12 @@ static OnigCodePoint MBGraph[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBGraph */
-static OnigCodePoint SBLower[] = {
+static const OnigCodePoint SBLower[] = {
1,
0x0061, 0x007a
};
-static OnigCodePoint MBLower[] = {
+static const OnigCodePoint MBLower[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
423,
#else
@@ -2104,13 +2127,13 @@ static OnigCodePoint MBLower[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBLower */
-static OnigCodePoint SBPrint[] = {
+static const OnigCodePoint SBPrint[] = {
2,
0x0009, 0x000d,
0x0020, 0x007e
};
-static OnigCodePoint MBPrint[] = {
+static const OnigCodePoint MBPrint[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
403,
#else
@@ -2524,7 +2547,7 @@ static OnigCodePoint MBPrint[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBPrint */
-static OnigCodePoint SBPunct[] = {
+static const OnigCodePoint SBPunct[] = {
9,
0x0021, 0x0023,
0x0025, 0x002a,
@@ -2537,7 +2560,7 @@ static OnigCodePoint SBPunct[] = {
0x007d, 0x007d
}; /* end of SBPunct */
-static OnigCodePoint MBPunct[] = {
+static const OnigCodePoint MBPunct[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
77,
#else
@@ -2625,13 +2648,13 @@ static OnigCodePoint MBPunct[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBPunct */
-static OnigCodePoint SBSpace[] = {
+static const OnigCodePoint SBSpace[] = {
2,
0x0009, 0x000d,
0x0020, 0x0020
};
-static OnigCodePoint MBSpace[] = {
+static const OnigCodePoint MBSpace[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
9,
#else
@@ -2651,12 +2674,12 @@ static OnigCodePoint MBSpace[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBSpace */
-static OnigCodePoint SBUpper[] = {
+static const OnigCodePoint SBUpper[] = {
1,
0x0041, 0x005a
};
-static OnigCodePoint MBUpper[] = {
+static const OnigCodePoint MBUpper[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
420,
#else
@@ -3087,19 +3110,19 @@ static OnigCodePoint MBUpper[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBUpper */
-static OnigCodePoint SBXDigit[] = {
+static const OnigCodePoint SBXDigit[] = {
3,
0x0030, 0x0039,
0x0041, 0x0046,
0x0061, 0x0066
};
-static OnigCodePoint SBASCII[] = {
+static const OnigCodePoint SBASCII[] = {
1,
0x0000, 0x007f
};
-static OnigCodePoint SBWord[] = {
+static const OnigCodePoint SBWord[] = {
4,
0x0030, 0x0039,
0x0041, 0x005a,
@@ -3107,7 +3130,7 @@ static OnigCodePoint SBWord[] = {
0x0061, 0x007a
};
-static OnigCodePoint MBWord[] = {
+static const OnigCodePoint MBWord[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
432,
#else
@@ -3554,7 +3577,7 @@ static OnigCodePoint MBWord[] = {
static int
utf8_get_ctype_code_range(int ctype,
- OnigCodePoint* sbr[], OnigCodePoint* mbr[])
+ const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
{
#define CR_SET(sbl,mbl) do { \
*sbr = sbl; \
@@ -3622,7 +3645,7 @@ static int
utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- OnigCodePoint *range;
+ const OnigCodePoint *range;
#endif
if (code < 256) {
@@ -3674,6 +3697,9 @@ utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
case ONIGENC_CTYPE_ALNUM:
range = MBAlnum;
break;
+ case ONIGENC_CTYPE_NEWLINE:
+ return FALSE;
+ break;
default:
return ONIGENCERR_TYPE_BUG;
@@ -3723,7 +3749,7 @@ OnigEncodingType OnigEncodingUTF8 = {
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
- onigenc_is_mbc_newline_0x0a,
+ utf8_is_mbc_newline,
utf8_mbc_to_code,
utf8_code_to_mbclen,
utf8_code_to_mbc,