summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2004-11-28 03:09:13 +0000
committerksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2004-11-28 03:09:13 +0000
commit09542e3269f5b915b45eb5c03fcb8c61452d751e (patch)
tree91eb489927947f75436f5bd366896772c6b0c71c
parent5fb312bd18a09ac50e8c7d707cb87893609ce9fc (diff)
* regparse.c, test/ruby/test_regexp.rb: fixed problem with UTF-8 characters that have U+00FE or invalid characters.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7398 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog5
-rw-r--r--regparse.c20
-rw-r--r--test/ruby/test_regexp.rb12
3 files changed, 26 insertions, 11 deletions
diff --git a/ChangeLog b/ChangeLog
index fbe5cd11af..eb2dabb6b4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+Sun Nov 28 12:08:15 2004 Kazuo Saito <ksaito@uranus.dti.ne.jp>
+
+ * regparse.c, test/ruby/test_regexp.rb: fixed problem with UTF-8
+ characters that have U+00FE or invalid characters.
+
Sun Nov 28 12:07:04 2004 Kazuo Saito <ksaito@uranus.dti.ne.jp>
* regexec.c, test/ruby/test_regexp.rb: fixed segmentation falut
diff --git a/regparse.c b/regparse.c
index e6fea8e68a..16792ee9eb 100644
--- a/regparse.c
+++ b/regparse.c
@@ -3631,6 +3631,9 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
case CCS_RANGE:
if (intype == *type) {
if (intype == CCV_SB) {
+ if (*vs > 0xff || v > 0xff)
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+
if (*vs > v) {
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
goto ccs_range_end;
@@ -3646,14 +3649,8 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
}
else {
#if 0
- if (intype == CCV_CODE_POINT && *type == CCV_SB &&
- ONIGENC_IS_CONTINUOUS_SB_MB(env->enc)) {
- bitset_set_range(cc->bs, (int )*vs, 0x7f);
- r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )0x80, v);
- if (r < 0) return r;
- }
-#else
if (intype == CCV_CODE_POINT && *type == CCV_SB) {
+#endif
if (*vs > v) {
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
goto ccs_range_end;
@@ -3663,10 +3660,11 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
if (r < 0) return r;
+#if 0
}
-#endif
else
return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
+#endif
}
ccs_range_end:
*state = CCS_COMPLETE;
@@ -3826,7 +3824,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
break;
case TK_CODE_POINT:
- v = tok->u.code;
+ v = (OnigCodePoint )tok->u.code;
in_israw = 1;
val_entry:
len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
@@ -3952,7 +3950,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
case TK_CC_AND: /* && */
{
if (state == CCS_VALUE) {
- r = next_state_val(cc, &vs, 0, &val_israw, 0, CCV_SB,
+ r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
&val_type, &state, env);
if (r != 0) goto err;
}
@@ -3992,7 +3990,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
}
if (state == CCS_VALUE) {
- r = next_state_val(cc, &vs, 0, &val_israw, 0, CCV_SB,
+ r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
&val_type, &state, env);
if (r != 0) goto err;
}
diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb
index 889fbb4b2d..dbfe4d7aeb 100644
--- a/test/ruby/test_regexp.rb
+++ b/test/ruby/test_regexp.rb
@@ -12,4 +12,16 @@ class TestRegexp < Test::Unit::TestCase
def test_ruby_dev_24887
assert_equal("a".gsub(/a\Z/, ""), "")
end
+
+ def test_yoshidam_net_20041111_1
+ s = "[\xC2\xA0-\xC3\xBE]"
+ assert_match(Regexp.new(s, nil, "u"), "\xC3\xBE")
+ end
+
+ def test_yoshidam_net_20041111_2
+ assert_raise(RegexpError) do
+ s = "[\xFF-\xFF]"
+ Regexp.new(s, nil, "u")
+ end
+ end
end