summaryrefslogtreecommitdiff
path: root/pack.c
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-12-23 15:30:05 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-12-23 15:30:05 +0000
commit9f8c309d6e4d8be6591e0fbeadf4de4603370b38 (patch)
treedb482f656fd1dbd7c88834523cabde928236ec17 /pack.c
parentd5bc38fdbf70035bef7898ec5c4de7ce26c68643 (diff)
* pack.c (pack_pack): encoding of packed string only from 'm',
'M', and 'u' should be US-ASCII. [ruby-dev:37284] * pack.c (pack_pack): encoding of packed string only from 'U' should be UTF-8. also upgrade US-ASCII strings to UTF-8. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@20956 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'pack.c')
-rw-r--r--pack.c42
1 files changed, 25 insertions, 17 deletions
diff --git a/pack.c b/pack.c
index 8c02f62563..4f2533f2d6 100644
--- a/pack.c
+++ b/pack.c
@@ -444,7 +444,7 @@ pack_pack(VALUE ary, VALUE fmt)
char type;
long items, len, idx, plen;
const char *ptr;
- rb_encoding *enc;
+ int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
#ifdef NATINT_PACK
int natint; /* native integer */
#endif
@@ -508,6 +508,19 @@ pack_pack(VALUE ary, VALUE fmt)
}
switch (type) {
+ case 'U':
+ /* if encoding is US-ASCII, upgrade to UTF-8 */
+ if (enc_info == 1) enc_info = 2;
+ break;
+ case 'm': case 'M': case 'u':
+ /* keep US-ASCII (do nothing) */
+ break;
+ default:
+ /* fall back to BINARY */
+ enc_info = 0;
+ break;
+ }
+ switch (type) {
case 'A': case 'a': case 'Z':
case 'B': case 'b':
case 'H': case 'h':
@@ -521,15 +534,6 @@ pack_pack(VALUE ary, VALUE fmt)
ptr = RSTRING_PTR(from);
plen = RSTRING_LEN(from);
OBJ_INFECT(res, from);
- switch (type) {
- case 'a': case 'A': case 'Z':
- enc = rb_enc_compatible(res, from);
- rb_enc_associate(res, enc);
- break;
- default:
- rb_enc_associate(res, rb_ascii8bit_encoding());
- break;
- }
}
if (p[-1] == '*')
@@ -878,8 +882,6 @@ pack_pack(VALUE ary, VALUE fmt)
break;
case 'U': /* Unicode character */
- enc = rb_enc_compatible(res, rb_enc_from_encoding(rb_utf8_encoding()));
- rb_enc_associate(res, enc);
while (len-- > 0) {
SIGNED_VALUE l;
char buf[8];
@@ -898,8 +900,6 @@ pack_pack(VALUE ary, VALUE fmt)
case 'u': /* uuencoded string */
case 'm': /* base64 encoded string */
- enc = rb_enc_compatible(res, rb_enc_from_encoding(rb_usascii_encoding()));
- rb_enc_associate(res, enc);
from = NEXTFROM;
StringValue(from);
ptr = RSTRING_PTR(from);
@@ -928,8 +928,6 @@ pack_pack(VALUE ary, VALUE fmt)
break;
case 'M': /* quoted-printable encoded string */
- enc = rb_enc_compatible(res, rb_enc_from_encoding(rb_usascii_encoding()));
- rb_enc_associate(res, enc);
from = rb_obj_as_string(NEXTFROM);
if (len <= 1)
len = 72;
@@ -1024,6 +1022,17 @@ pack_pack(VALUE ary, VALUE fmt)
rb_str_associate(res, associates);
}
OBJ_INFECT(res, fmt);
+ switch (enc_info) {
+ case 1:
+ ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
+ break;
+ case 2:
+ rb_enc_set_index(res, rb_utf8_encindex());
+ break;
+ default:
+ /* do nothing, keep ASCII-8BIT */
+ break;
+ }
return res;
}
@@ -1892,7 +1901,6 @@ pack_unpack(VALUE str, VALUE fmt)
}
}
rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
- ENCODING_CODERANGE_SET(buf, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
UNPACK_PUSH(buf);
}
break;