summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-02-08 22:34:12 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-02-08 22:34:12 +0000
commite0f8351d5528cd01ab71855eae87ef067de55626 (patch)
tree3b259c3c1fb9703175ad43836cc4ef3210035797 /string.c
parent2bd2ea829f360bdae74281230aa3377b0c81fa14 (diff)
merge revision(s) 34236: [Backport #5890]
* numeric.c (rb_enc_uint_char): raise RangeError when added codepoint is invalid. [Feature #5855] [Bug #5863] [Bug #5864] * string.c (rb_str_concat): ditto. * string.c (rb_str_concat): set encoding as ASCII-8BIT when the string is US-ASCII and the argument is an integer greater than 127. * regenc.c (onigenc_mb2_code_to_mbclen): rearrange error code. * enc/euc_jp.c (code_to_mbclen): ditto. * enc/shift_jis.c (code_to_mbclen): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_9_3@34501 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r--string.c43
1 files changed, 34 insertions, 9 deletions
diff --git a/string.c b/string.c
index d2737045e1..22f7aaf614 100644
--- a/string.c
+++ b/string.c
@@ -2081,10 +2081,11 @@ rb_str_append(VALUE str, VALUE str2)
VALUE
rb_str_concat(VALUE str1, VALUE str2)
{
- unsigned int lc;
+ unsigned int code;
+ rb_encoding *enc = STR_ENC_GET(str1);
if (FIXNUM_P(str2) || TYPE(str2) == T_BIGNUM) {
- if (rb_num_to_uint(str2, &lc) == 0) {
+ if (rb_num_to_uint(str2, &code) == 0) {
}
else if (FIXNUM_P(str2)) {
rb_raise(rb_eRangeError, "%ld out of char range", FIX2LONG(str2));
@@ -2096,22 +2097,46 @@ rb_str_concat(VALUE str1, VALUE str2)
else {
return rb_str_append(str1, str2);
}
- {
- rb_encoding *enc = STR_ENC_GET(str1);
+
+ if (enc == rb_usascii_encoding()) {
+ /* US-ASCII automatically extended to ASCII-8BIT */
+ char buf[1] = {(char)code};
+ if (code > 0xFF) {
+ rb_raise(rb_eRangeError, "%u out of char range", code);
+ }
+ rb_str_cat(str1, buf, 1);
+ if (code > 127) {
+ rb_enc_associate(str1, rb_ascii8bit_encoding());
+ ENC_CODERANGE_SET(str1, ENC_CODERANGE_VALID);
+ }
+ }
+ else {
long pos = RSTRING_LEN(str1);
int cr = ENC_CODERANGE(str1);
int len;
+ char *buf;
- if ((len = rb_enc_codelen(lc, enc)) <= 0) {
- rb_raise(rb_eRangeError, "%u invalid char", lc);
+ switch (len = rb_enc_codelen(code, enc)) {
+ case ONIGERR_INVALID_CODE_POINT_VALUE:
+ rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
+ break;
+ case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
+ case 0:
+ rb_raise(rb_eRangeError, "%u out of char range", code);
+ break;
+ }
+ buf = ALLOCA_N(char, len + 1);
+ rb_enc_mbcput(code, buf, enc);
+ if (rb_enc_precise_mbclen(buf, buf + len + 1, enc) != len) {
+ rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
}
rb_str_resize(str1, pos+len);
- rb_enc_mbcput(lc, RSTRING_PTR(str1)+pos, enc);
- if (cr == ENC_CODERANGE_7BIT && lc > 127)
+ strncpy(RSTRING_PTR(str1) + pos, buf, len);
+ if (cr == ENC_CODERANGE_7BIT && code > 127)
cr = ENC_CODERANGE_VALID;
ENC_CODERANGE_SET(str1, cr);
- return str1;
}
+ return str1;
}
/*