summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--oniguruma.h16
-rw-r--r--regparse.c78
3 files changed, 79 insertions, 20 deletions
diff --git a/ChangeLog b/ChangeLog
index 61c1988389..eb5032951c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+Wed Mar 17 00:22:03 2004 Kazuo Saito <ksaito@uranus.dti.ne.jp>
+
+ * oniguruma.h: imported Oniguruma 2.2.5.
+ * regparse.c: ditto.
+
Tue Mar 16 11:14:17 Hirokazu Yamamoto <ocean@m2.ccsnet.ne.jp>
* dir.c (fnmatch_helper): File.fnmatch('\.', '.') should return true.
diff --git a/oniguruma.h b/oniguruma.h
index 338f71357a..b9c5ad8cd4 100644
--- a/oniguruma.h
+++ b/oniguruma.h
@@ -11,7 +11,7 @@
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 2
#define ONIGURUMA_VERSION_MINOR 2
-#define ONIGURUMA_VERSION_TEENY 4
+#define ONIGURUMA_VERSION_TEENY 5
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
@@ -464,7 +464,9 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYN_OP2_ESC_V_VTAB (1<<13) /* \v as VTAB */
#define ONIG_SYN_OP2_ESC_U_HEX4 (1<<14) /* \uHHHH */
#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1<<15) /* \`, \' */
-#define ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */
+#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */
+#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1<<17) /* \p{^..}, \P{^..} */
+#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1<<18) /* \p{IsXDigit} */
/* syntax (behavior) */
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1<<31) /* not implemented */
@@ -503,7 +505,10 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_NORMAL 0
#define ONIG_MISMATCH -1
#define ONIG_NO_SUPPORT_CONFIG -2
+
/* internal error */
+#define ONIGERR_MEMORY -5
+#define ONIGERR_TYPE_BUG -6
#define ONIGERR_PARSER_BUG -11
#define ONIGERR_STACK_BUG -12
#define ONIGERR_UNDEFINED_BYTECODE -13
@@ -558,8 +563,11 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_NEVER_ENDING_RECURSION -221
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
+#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
+#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
+
/* errors related to thread */
-#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
+#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
@@ -706,7 +714,7 @@ void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior)
ONIG_EXTERN
void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
ONIG_EXTERN
-int onig_set_meta_char P_((unsigned int what, unsigned int c));
+int onig_set_meta_char P_((unsigned int what, OnigCodePoint code));
ONIG_EXTERN
int onig_end P_((void));
ONIG_EXTERN
diff --git a/regparse.c b/regparse.c
index ead0bce12b..3a5b402f1e 100644
--- a/regparse.c
+++ b/regparse.c
@@ -96,7 +96,7 @@ OnigSyntaxType OnigSyntaxJava = {
ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
- ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY )
+ ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
, ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
, ONIG_OPTION_SINGLELINE
};
@@ -109,7 +109,9 @@ OnigSyntaxType OnigSyntaxPerl = {
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
- ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY )
+ ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
+ ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS )
, SYN_GNU_REGEX_BV
, ONIG_OPTION_SINGLELINE
};
@@ -192,26 +194,30 @@ OnigMetaCharTableType OnigMetaCharTable = {
};
#ifdef USE_VARIABLE_META_CHARS
-extern int onig_set_meta_char(unsigned int what, unsigned int c)
+extern int onig_set_meta_char(unsigned int what, OnigCodePoint code)
{
+ if (code >= 256) { /* restricted by current implementation. */
+ return ONIGERR_INVALID_ARGUMENT;
+ }
+
switch (what) {
case ONIG_META_CHAR_ESCAPE:
- OnigMetaCharTable.esc = c;
+ OnigMetaCharTable.esc = (UChar )code;
break;
case ONIG_META_CHAR_ANYCHAR:
- OnigMetaCharTable.anychar = c;
+ OnigMetaCharTable.anychar = (UChar )code;
break;
case ONIG_META_CHAR_ANYTIME:
- OnigMetaCharTable.anytime = c;
+ OnigMetaCharTable.anytime = (UChar )code;
break;
case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
- OnigMetaCharTable.zero_or_one_time = c;
+ OnigMetaCharTable.zero_or_one_time = (UChar )code;
break;
case ONIG_META_CHAR_ONE_OR_MORE_TIME:
- OnigMetaCharTable.one_or_more_time = c;
+ OnigMetaCharTable.one_or_more_time = (UChar )code;
break;
case ONIG_META_CHAR_ANYCHAR_ANYTIME:
- OnigMetaCharTable.anychar_anytime = c;
+ OnigMetaCharTable.anychar_anytime = (UChar )code;
break;
default:
return ONIGERR_INVALID_ARGUMENT;
@@ -2574,10 +2580,20 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case 'p':
case 'P':
if (PPEEK == '{' &&
- IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY)) {
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
PINC;
tok->type = TK_CHAR_PROPERTY;
tok->u.prop.not = (c == 'P' ? 1 : 0);
+
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
+ int c2;
+ PFETCH(c2);
+ if (c2 == '^') {
+ tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
+ }
+ else
+ PUNFETCH;
+ }
}
break;
@@ -3055,10 +3071,20 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case 'p':
case 'P':
if (PPEEK == '{' &&
- IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY)) {
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
PINC;
tok->type = TK_CHAR_PROPERTY;
tok->u.prop.not = (c == 'P' ? 1 : 0);
+
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
+ int c2;
+ PFETCH(c2);
+ if (c2 == '^') {
+ tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
+ }
+ else
+ PUNFETCH;
+ }
}
break;
@@ -3483,22 +3509,40 @@ property_name_to_ctype(UChar* p, UChar* end)
return pb->ctype;
}
- return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+ return -1;
}
static int
fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
{
int ctype;
- UChar *prev, *p = *src;
- int c = 0;
+ UChar *prev, *start, *p = *src;
+ int c;
+
+ /* 'IsXXXX' => 'XXXX' */
+ if (!PEND &&
+ IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS)) {
+ c = PPEEK;
+ if (c == 'I') {
+ PINC;
+ if (! PEND) {
+ c = PPEEK;
+ if (c == 's')
+ PINC;
+ else
+ PUNFETCH;
+ }
+ }
+ }
+
+ start = prev = p;
while (!PEND) {
prev = p;
PFETCH(c);
if (c == '}') {
- ctype = property_name_to_ctype(*src, prev);
- if (ctype < 0) return ctype;
+ ctype = property_name_to_ctype(start, prev);
+ if (ctype < 0) break;
*src = p;
return ctype;
@@ -3507,6 +3551,8 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
break;
}
+ onig_scan_env_set_error_string(env, ONIGERR_INVALID_CHAR_PROPERTY_NAME,
+ *src, prev);
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
}