summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--string.c45
-rw-r--r--test/ruby/test_m17n_comb.rb19
3 files changed, 64 insertions, 7 deletions
diff --git a/ChangeLog b/ChangeLog
index 7242fa49e1..a44a127316 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,9 @@
-Sat Jul 20 12:13:37 2013 Nobuyoshi Nakada <nobu@ruby-lang.org>
+Sat Jul 20 12:14:07 2013 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * string.c (enc_succ_char, enc_pred_char): consider wchar case.
+ [ruby-core:56071] [Bug #8653]
+
+ * string.c (rb_str_succ): do not replace with invalid char.
* encoding.c (rb_enc_code_to_mbclen): add new function which returns
mbclen from codepoint like as rb_enc_codelen() but 0 for invalid
diff --git a/string.c b/string.c
index 05a97b47e7..f10dd3aa82 100644
--- a/string.c
+++ b/string.c
@@ -2870,6 +2870,24 @@ enc_succ_char(char *p, long len, rb_encoding *enc)
{
long i;
int l;
+
+ if (rb_enc_mbminlen(enc) > 1) {
+ /* wchar, trivial case */
+ int r = rb_enc_precise_mbclen(p, p + len, enc), c;
+ if (!MBCLEN_CHARFOUND_P(r)) {
+ return NEIGHBOR_NOT_CHAR;
+ }
+ c = rb_enc_mbc_to_codepoint(p, p + len, enc) + 1;
+ l = rb_enc_code_to_mbclen(c, enc);
+ if (!l) return NEIGHBOR_NOT_CHAR;
+ if (l != len) return NEIGHBOR_WRAPPED;
+ rb_enc_mbcput(c, p, enc);
+ r = rb_enc_precise_mbclen(p, p + len, enc);
+ if (!MBCLEN_CHARFOUND_P(r)) {
+ return NEIGHBOR_NOT_CHAR;
+ }
+ return NEIGHBOR_FOUND;
+ }
while (1) {
for (i = len-1; 0 <= i && (unsigned char)p[i] == 0xff; i--)
p[i] = '\0';
@@ -2904,6 +2922,25 @@ enc_pred_char(char *p, long len, rb_encoding *enc)
{
long i;
int l;
+ if (rb_enc_mbminlen(enc) > 1) {
+ /* wchar, trivial case */
+ int r = rb_enc_precise_mbclen(p, p + len, enc), c;
+ if (!MBCLEN_CHARFOUND_P(r)) {
+ return NEIGHBOR_NOT_CHAR;
+ }
+ c = rb_enc_mbc_to_codepoint(p, p + len, enc);
+ if (!c) return NEIGHBOR_NOT_CHAR;
+ --c;
+ l = rb_enc_code_to_mbclen(c, enc);
+ if (!l) return NEIGHBOR_NOT_CHAR;
+ if (l != len) return NEIGHBOR_WRAPPED;
+ rb_enc_mbcput(c, p, enc);
+ r = rb_enc_precise_mbclen(p, p + len, enc);
+ if (!MBCLEN_CHARFOUND_P(r)) {
+ return NEIGHBOR_NOT_CHAR;
+ }
+ return NEIGHBOR_FOUND;
+ }
while (1) {
for (i = len-1; 0 <= i && (unsigned char)p[i] == 0; i--)
p[i] = '\xff';
@@ -3074,12 +3111,16 @@ rb_str_succ(VALUE orig)
s = e;
while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
enum neighbor_char neighbor;
+ char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
l = rb_enc_precise_mbclen(s, e, enc);
if (!ONIGENC_MBCLEN_CHARFOUND_P(l)) continue;
l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
- neighbor = enc_succ_char(s, l, enc);
- if (neighbor == NEIGHBOR_FOUND)
+ MEMCPY(tmp, s, char, l);
+ neighbor = enc_succ_char(tmp, l, enc);
+ if (neighbor == NEIGHBOR_FOUND) {
+ MEMCPY(s, tmp, char, l);
return str;
+ }
if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
/* wrapped to \0...\0. search next valid char. */
enc_succ_char(s, l, enc);
diff --git a/test/ruby/test_m17n_comb.rb b/test/ruby/test_m17n_comb.rb
index 2de7179996..47a1f411c6 100644
--- a/test/ruby/test_m17n_comb.rb
+++ b/test/ruby/test_m17n_comb.rb
@@ -50,10 +50,12 @@ class TestM17NComb < Test::Unit::TestCase
# for transitivity test
u("\xe0\xa0\xa1"), e("\xe0\xa0\xa1"), s("\xe0\xa0\xa1"), # [ruby-dev:32693]
e("\xa1\xa1"), a("\xa1\xa1"), s("\xa1\xa1"), # [ruby-dev:36484]
+ ]
- #"aa".force_encoding("utf-16be"),
- #"aaaa".force_encoding("utf-32be"),
- #"aaa".force_encoding("utf-32be"),
+ WSTRINGS = [
+ "aa".force_encoding("utf-16be"),
+ "aaaa".force_encoding("utf-32be"),
+ "aaa".force_encoding("utf-32be"),
]
def combination(*args, &b)
@@ -84,7 +86,7 @@ class TestM17NComb < Test::Unit::TestCase
r
end
- def enccall(recv, meth, *args, &block)
+ def assert_enccall(recv, meth, *args, &block)
desc = ''
if String === recv
desc << encdump(recv)
@@ -113,6 +115,7 @@ class TestM17NComb < Test::Unit::TestCase
}
result
end
+ alias enccall assert_enccall
def assert_str_enc_propagation(t, s1, s2)
if !s1.ascii_only?
@@ -1327,6 +1330,14 @@ class TestM17NComb < Test::Unit::TestCase
s = t
}
}
+
+ Encoding.list.each do |enc|
+ next if enc.dummy?
+ {"A"=>"B", "A1"=>"A2", "A9"=>"B0", "9"=>"10", "Z"=>"AA"}.each do |orig, expected|
+ s = orig.encode(enc)
+ assert_strenc(expected.encode(enc), enc, s.succ, proc {"#{orig.dump}.encode(#{enc}).succ"})
+ end
+ end
end
def test_str_hash