summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-07-04 23:50:33 +0000
committerakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-07-04 23:50:33 +0000
commit1a32af4e7a1527ebe6b4b1205c522fa9ebd56344 (patch)
treeeb27624ef0bd7b0bfae29535b29f577b7ebef8dd
parent3d0bd1c6269a7945dc1da22e212d47600942715d (diff)
* re.c (unescape_nonascii): add has_property argument not to
raise error by /\p{Hiragana}\u{3042}/ in EUC-JP script. (rb_reg_preprocess): use has_property argument to make regexp encoding fixed. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@17884 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog7
-rw-r--r--re.c12
-rw-r--r--test/ruby/test_m17n.rb8
3 files changed, 24 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index 49835564ea..18417d3332 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+Sat Jul 5 08:48:05 2008 Tanaka Akira <akr@fsij.org>
+
+ * re.c (unescape_nonascii): add has_property argument not to
+ raise error by /\p{Hiragana}\u{3042}/ in EUC-JP script.
+ (rb_reg_preprocess): use has_property argument to make regexp
+ encoding fixed.
+
Sat Jul 5 08:29:47 2008 Tanaka Akira <akr@fsij.org>
* re.c (unescape_nonascii): make regexp fixed_encoding if \p is used.
diff --git a/re.c b/re.c
index 899ede647b..2bbf915e97 100644
--- a/re.c
+++ b/re.c
@@ -2085,7 +2085,8 @@ unescape_unicode_bmp(const char **pp, const char *end,
static int
unescape_nonascii(const char *p, const char *end, rb_encoding *enc,
- VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
+ VALUE buf, rb_encoding **encp, int *has_property,
+ onig_errmsg_buffer err)
{
char c;
char smallbuf[2];
@@ -2165,7 +2166,7 @@ unescape_nonascii(const char *p, const char *end, rb_encoding *enc,
case 'p': /* \p{Hiragana} */
if (!*encp) {
- *encp = enc;
+ *has_property = 1;
}
goto escape_asis;
@@ -2192,6 +2193,7 @@ rb_reg_preprocess(const char *p, const char *end, rb_encoding *enc,
rb_encoding **fixed_enc, onig_errmsg_buffer err)
{
VALUE buf;
+ int has_property = 0;
buf = rb_str_buf_new(0);
@@ -2202,9 +2204,13 @@ rb_reg_preprocess(const char *p, const char *end, rb_encoding *enc,
rb_enc_associate(buf, enc);
}
- if (unescape_nonascii(p, end, enc, buf, fixed_enc, err) != 0)
+ if (unescape_nonascii(p, end, enc, buf, fixed_enc, &has_property, err) != 0)
return Qnil;
+ if (has_property && !*fixed_enc) {
+ *fixed_enc = enc;
+ }
+
if (*fixed_enc) {
rb_enc_associate(buf, *fixed_enc);
}
diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb
index c5871898fe..b2a67686aa 100644
--- a/test/ruby/test_m17n.rb
+++ b/test/ruby/test_m17n.rb
@@ -510,6 +510,14 @@ class TestM17N < Test::Unit::TestCase
r = /\p{Hiragana}/e
assert(r.fixed_encoding?)
assert_match(r, "\xa4\xa2".force_encoding("euc-jp"))
+
+ r = eval('/\u{3042}\p{Hiragana}/'.force_encoding("euc-jp"))
+ assert(r.fixed_encoding?)
+ assert_equal(Encoding::UTF_8, r.encoding)
+
+ r = eval('/\p{Hiragana}\u{3042}/'.force_encoding("euc-jp"))
+ assert(r.fixed_encoding?)
+ assert_equal(Encoding::UTF_8, r.encoding)
end
def test_regexp_embed_preprocess