diff options
-rw-r--r-- | ChangeLog | 13 | ||||
-rw-r--r-- | transcode.c | 171 | ||||
-rw-r--r-- | transcode_data.h | 2 |
3 files changed, 125 insertions, 61 deletions
@@ -1,3 +1,16 @@ +Sun Aug 10 20:59:54 2008 Tanaka Akira <akr@fsij.org> + + * transcode_data.h (rb_transcoding): new field: flags. + + * transcode.c (load_transcoder): extracted from transcode_dispatch_cb. + (rb_transcoding_result_t): renamed from transcode_result_t. + (rb_transcoding_open): new function. + (rb_transcoding_convert): ditto. + (rb_transcoding_close): ditto. + (transcode_loop): use rb_transcoding_open, rb_transcoding_convert + and rb_transcoding_close. + (str_transcode): don't need rb_transcoding. + Sun Aug 10 18:09:16 2008 Tanaka Akira <akr@fsij.org> * tool/transcode-tblgen.rb: record checksum of diff --git a/transcode.c b/transcode.c index 8fc7c075cd..3c1858d67d 100644 --- a/transcode.c +++ b/transcode.c @@ -228,6 +228,33 @@ cleanup: return found; } +static const rb_transcoder * +load_transcoder(transcoder_entry_t *entry) +{ + if (entry->transcoder) + return entry->transcoder; + + if (entry->lib) { + const char *lib = entry->lib; + int len = strlen(lib); + char path[sizeof(transcoder_lib_prefix) + MAX_TRANSCODER_LIBNAME_LEN]; + + entry->lib = NULL; + + if (len > MAX_TRANSCODER_LIBNAME_LEN) + return NULL; + memcpy(path, transcoder_lib_prefix, sizeof(transcoder_lib_prefix) - 1); + memcpy(path + sizeof(transcoder_lib_prefix) - 1, lib, len + 1); + if (!rb_require(path)) + return NULL; + } + + if (entry->transcoder) + return entry->transcoder; + + return NULL; +} + static void transcode_dispatch_cb(const char *from, const char *to, int depth, void *arg) { @@ -243,16 +270,7 @@ transcode_dispatch_cb(const char *from, const char *to, int depth, void *arg) goto failed; if (!entry->transcoder && entry->lib) { - const char *lib = entry->lib; - int len = strlen(lib); - char path[sizeof(transcoder_lib_prefix) + MAX_TRANSCODER_LIBNAME_LEN]; - - entry->lib = NULL; - - if (len > MAX_TRANSCODER_LIBNAME_LEN) goto failed; - memcpy(path, transcoder_lib_prefix, sizeof(transcoder_lib_prefix) - 1); - memcpy(path + sizeof(transcoder_lib_prefix) - 1, lib, len + 1); - if (!rb_require(path)) goto failed; + load_transcoder(entry); } if (!entry->transcoder) goto failed; @@ -352,9 +370,9 @@ typedef enum { transcode_obuf_full, transcode_ibuf_empty, transcode_finished, -} transcode_result_t; +} rb_transcoding_result_t; -static transcode_result_t +static rb_transcoding_result_t transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos, const unsigned char *in_stop, unsigned char *out_stop, rb_transcoding *tc, @@ -575,7 +593,7 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos, #undef SUSPEND } -static transcode_result_t +static rb_transcoding_result_t transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos, const unsigned char *in_stop, unsigned char *out_stop, rb_transcoding *tc, @@ -585,7 +603,7 @@ transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos, unsigned char *feed_buf = ALLOCA_N(unsigned char, tc->feedlen); const unsigned char *feed_pos = feed_buf; const unsigned char *feed_stop = feed_buf + tc->feedlen; - transcode_result_t res; + rb_transcoding_result_t res; MEMCPY(feed_buf, TRANSCODING_READBUF(tc) + tc->readlen, unsigned char, tc->feedlen); @@ -601,6 +619,65 @@ transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos, return transcode_restartable0(in_pos, out_pos, in_stop, out_stop, tc, opt); } +static rb_transcoding * +rb_transcoding_open(const char *from, const char *to, int flags) +{ + rb_transcoding *tc; + const rb_transcoder *tr; + + st_data_t val; + st_table *table2; + transcoder_entry_t *entry; + + /* xxx: support multistep conversion */ + + if (!st_lookup(transcoder_table, (st_data_t)from, &val)) { + return NULL; + } + table2 = (st_table *)val; + + if (!st_lookup(table2, (st_data_t)to, &val)) { + return NULL; + } + entry = (transcoder_entry_t *)val; + tr = load_transcoder(entry); + if (!tr) + return NULL; + + tc = ALLOC(rb_transcoding); + tc->transcoder = tr; + tc->flags = flags; + memset(tc->stateful, 0, sizeof(tc->stateful)); + tc->resume_position = 0; + tc->readlen = 0; + tc->feedlen = 0; + if (sizeof(tc->readbuf.ary) < tr->max_input) { + tc->readbuf.ptr = xmalloc(tr->max_input); + } + return tc; +} + +static rb_transcoding_result_t +rb_transcoding_convert(rb_transcoding *tc, + const unsigned char **input_ptr, const unsigned char *input_stop, + unsigned char **output_ptr, unsigned char *output_stop, + int flags) +{ + return transcode_restartable( + input_ptr, output_ptr, + input_stop, output_stop, + tc, flags); +} + +static void +rb_transcoding_close(rb_transcoding *tc) +{ + const rb_transcoder *tr = tc->transcoder; + if (sizeof(tc->readbuf.ary) < tr->max_input) + xfree(tc->readbuf.ptr); + xfree(tc); +} + static void more_output_buffer( VALUE destination, @@ -623,28 +700,17 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, const unsigned char *in_stop, unsigned char *out_stop, VALUE destination, unsigned char *(*resize_destination)(VALUE, int, int), - rb_transcoding *tc, + const rb_transcoder *tr, const int opt) { - const rb_transcoder *tr = tc->transcoder; - transcode_result_t ret; + rb_transcoding *tc; + rb_transcoding_result_t ret; unsigned char *out_start = *out_pos; - tc->resume_position = 0; - tc->readlen = 0; - tc->feedlen = 0; - - if (sizeof(tc->readbuf.ary) < tr->max_input) { - tc->readbuf.ptr = xmalloc(tr->max_input); - } -#define CLEANUP \ - do { \ - if (sizeof(tc->readbuf.ary) < tr->max_input) \ - xfree(tc->readbuf.ptr); \ - } while(0) + tc = rb_transcoding_open(tr->from_encoding, tr->to_encoding, 0); resume: - ret = transcode_restartable(in_pos, out_pos, in_stop, out_stop, tc, opt); + ret = rb_transcoding_convert(tc, in_pos, in_stop, out_pos, out_stop, opt); if (ret == transcode_invalid_input) { /* deal with invalid byte sequence */ /* todo: add more alternative behaviors */ @@ -657,7 +723,7 @@ resume: output_replacement_character(out_pos, rb_enc_find(tr->to_encoding)); goto resume; } - CLEANUP; + rb_transcoding_close(tc); rb_raise(TRANSCODE_ERROR, "invalid byte sequence"); } if (ret == transcode_undefined_conversion) { @@ -673,7 +739,7 @@ resume: output_replacement_character(out_pos, rb_enc_find(tr->to_encoding)); goto resume; } - CLEANUP; + rb_transcoding_close(tc); rb_raise(TRANSCODE_ERROR, "conversion undefined for byte sequence (maybe invalid byte sequence)"); } if (ret == transcode_obuf_full) { @@ -681,9 +747,8 @@ resume: goto resume; } - CLEANUP; + rb_transcoding_close(tc); return; -#undef CLEANUP } #else /* sample transcode_loop implementation in byte-by-byte stream style */ @@ -692,26 +757,15 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, const unsigned char *in_stop, unsigned char *out_stop, VALUE destination, unsigned char *(*resize_destination)(VALUE, int, int), - rb_transcoding *tc, + const rb_transcoder *tr, const int opt) { - const rb_transcoder *tr = tc->transcoder; - transcode_result_t ret; + rb_transcoding *tc; + rb_transcoding_result_t ret; unsigned char *out_start = *out_pos; const unsigned char *ptr; - tc->resume_position = 0; - tc->readlen = 0; - tc->feedlen = 0; - - if (sizeof(tc->readbuf.ary) < tr->max_input) { - tc->readbuf.ptr = xmalloc(tr->max_input); - } -#define CLEANUP \ - do { \ - if (sizeof(tc->readbuf.ary) < tr->max_input) \ - xfree(tc->readbuf.ptr); \ - } while(0) + tc = rb_transcoding_open(tr->from_encoding, tr->to_encoding, 0); ret = transcode_ibuf_empty; ptr = *in_pos; @@ -722,14 +776,14 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, if (ret == transcode_ibuf_empty) { if (ptr < in_stop) { input_byte = *ptr; - ret = transcode_restartable(&p, out_pos, p+1, out_stop, tc, opt|PARTIAL_INPUT); + ret = rb_transcoding_convert(tc, &p, p+1, out_pos, out_stop, PARTIAL_INPUT); } else { - ret = transcode_restartable(NULL, out_pos, NULL, out_stop, tc, opt); + ret = rb_transcoding_convert(tc, NULL, NULL, out_pos, out_stop, 0); } } else { - ret = transcode_restartable(NULL, out_pos, NULL, out_stop, tc, opt|PARTIAL_INPUT); + ret = rb_transcoding_convert(tc, NULL, NULL, out_pos, out_stop, PARTIAL_INPUT); } if (&input_byte != p) ptr += p - &input_byte; @@ -746,7 +800,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, output_replacement_character(out_pos, rb_enc_find(tr->to_encoding)); break; } - CLEANUP; + rb_transcoding_close(tc); rb_raise(TRANSCODE_ERROR, "invalid byte sequence"); break; @@ -763,7 +817,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, output_replacement_character(out_pos, rb_enc_find(tr->to_encoding)); break; } - CLEANUP; + rb_transcoding_close(tc); rb_raise(TRANSCODE_ERROR, "conversion undefined for byte sequence (maybe invalid byte sequence)"); break; @@ -778,10 +832,9 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, break; } } - CLEANUP; + rb_transcoding_close(tc); *in_pos = in_stop; return; -#undef CLEANUP } #endif @@ -810,7 +863,6 @@ str_transcode(int argc, VALUE *argv, VALUE *self) int from_encidx, to_encidx; VALUE from_encval, to_encval; const rb_transcoder *tr; - rb_transcoding tc; int final_encoding = 0; VALUE opt; int options = 0; @@ -890,16 +942,13 @@ str_transcode(int argc, VALUE *argv, VALUE *self) rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_e, to_e); } - tc.transcoder = tr; - memset(tc.stateful, 0, sizeof(tc.stateful)); - fromp = sp = (unsigned char *)RSTRING_PTR(str); slen = RSTRING_LEN(str); blen = slen + 30; /* len + margin */ dest = rb_str_tmp_new(blen); bp = (unsigned char *)RSTRING_PTR(dest); - transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), dest, str_transcoding_resize, &tc, options); + transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), dest, str_transcoding_resize, tr, options); if (fromp != sp+slen) { rb_raise(rb_eArgError, "not fully converted, %"PRIdPTRDIFF" bytes left", sp+slen-fromp); } diff --git a/transcode_data.h b/transcode_data.h index 42c3b2dc89..f7294e4356 100644 --- a/transcode_data.h +++ b/transcode_data.h @@ -61,6 +61,8 @@ typedef struct byte_lookup { typedef struct rb_transcoding { const struct rb_transcoder *transcoder; + int flags; + int resume_position; const BYTE_LOOKUP *next_table; VALUE next_info; |