summaryrefslogtreecommitdiff
path: root/transcode.c
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-12-27 16:55:06 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-12-27 16:55:06 +0000
commit3b83e10790dac26481242077ee51fc2187b8eefb (patch)
tree600f9ce35b6adc8fa99709757f4ee71de2bde208 /transcode.c
parentb8f8c75579c31ef11697ff52dfe72a35d8bb5e9b (diff)
* transcode.c (transcode_dispatch): allows transcoding from/to
ASCII-8BIT. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14746 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'transcode.c')
-rw-r--r--transcode.c38
1 files changed, 27 insertions, 11 deletions
diff --git a/transcode.c b/transcode.c
index 259d27ad19..b748eb6347 100644
--- a/transcode.c
+++ b/transcode.c
@@ -116,8 +116,7 @@ transcode_dispatch(const char* from_encoding, const char* to_encoding)
char *const key = transcoder_key(from_encoding, to_encoding);
st_data_t k, val = 0;
- k = (st_data_t)key;
- while (!st_lookup(transcoder_table, k, &val) &&
+ while (!st_lookup(transcoder_table, (k = (st_data_t)key), &val) &&
st_delete(transcoder_lib_table, &k, &val)) {
const char *const lib = (const char *)val;
int len = strlen(lib);
@@ -258,8 +257,8 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
rb_transcoding my_transcoding;
int final_encoding = 0;
- if (argc<1 || argc>2) {
- rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
+ if (argc < 1 || argc > 2) {
+ rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc);
}
if ((to_encidx = rb_to_encoding_index(to_encval = argv[0])) < 0) {
to_enc = 0;
@@ -288,7 +287,7 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
return -1;
}
if (from_enc && to_enc && rb_enc_asciicompat(from_enc) && rb_enc_asciicompat(to_enc)) {
- if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) {
+ if (to_encidx == 0 || ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) {
return to_encidx;
}
}
@@ -296,14 +295,32 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
return -1;
}
- while (!final_encoding) /* loop for multistep transcoding */
- { /* later, maybe use smaller intermediate strings for very long strings */
+ if (from_encidx == 0) {
+ const char *p = RSTRING_PTR(str);
+ const char *e = p + RSTRING_LEN(str);
+
+ while (p < e) {
+ int ret = rb_enc_precise_mbclen(p, e, to_enc);
+ int len = MBCLEN_CHARFOUND(ret);
+
+ if (!len) {
+ rb_raise(rb_eArgError, "not fully converted, %d bytes left", e-p);
+ }
+ p += len;
+ }
+ if (to_encidx < 0) {
+ to_encidx = rb_define_dummy_encoding(to_e);
+ }
+ return to_encidx;
+ }
+
+ while (!final_encoding) { /* loop for multistep transcoding */
+ /* later, maybe use smaller intermediate strings for very long strings */
if (!(my_transcoder = transcode_dispatch(from_e, to_e))) {
rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_e, to_e);
}
- if (my_transcoder->preprocessor)
- {
+ if (my_transcoder->preprocessor) {
fromp = sp = RSTRING_PTR(str);
slen = RSTRING_LEN(str);
blen = slen + 30; /* len + margin */
@@ -334,8 +351,7 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
buf = RSTRING_PTR(dest);
*bp = '\0';
rb_str_set_len(dest, bp - buf);
- if (my_transcoder->postprocessor)
- {
+ if (my_transcoder->postprocessor) {
str = dest;
fromp = sp = RSTRING_PTR(str);
slen = RSTRING_LEN(str);