diff options
Diffstat (limited to 'transcode.c')
| -rw-r--r-- | transcode.c | 89 |
1 files changed, 56 insertions, 33 deletions
diff --git a/transcode.c b/transcode.c index da934226dc..84c579eb2e 100644 --- a/transcode.c +++ b/transcode.c @@ -9,8 +9,6 @@ **********************************************************************/ -#include "ruby/ruby.h" -#include "ruby/encoding.h" #include "internal.h" #include "transcode_data.h" #include <ctype.h> @@ -110,21 +108,21 @@ typedef struct { struct rb_econv_t { int flags; + int started; /* bool */ + const char *source_encoding_name; const char *destination_encoding_name; - int started; - const unsigned char *replacement_str; size_t replacement_len; const char *replacement_enc; - int replacement_allocated; unsigned char *in_buf_start; unsigned char *in_data_start; unsigned char *in_data_end; unsigned char *in_buf_end; rb_econv_elem_t *elems; + int replacement_allocated; /* bool */ int num_allocated; int num_trans; int num_finished; @@ -156,7 +154,7 @@ struct rb_econv_t { typedef struct { const char *sname; const char *dname; - const char *lib; /* null means means no need to load a library */ + const char *lib; /* null means no need to load a library */ const rb_transcoder *transcoder; } transcoder_entry_t; @@ -2462,7 +2460,7 @@ econv_opts(VALUE opt, int ecflags) ecflags |= ECONV_XML_ATTR_CONTENT_DECORATOR|ECONV_XML_ATTR_QUOTE_DECORATOR|ECONV_UNDEF_HEX_CHARREF; } else if (RB_TYPE_P(v, T_SYMBOL)) { - rb_raise(rb_eArgError, "unexpected value for xml option: %s", rb_id2name(SYM2ID(v))); + rb_raise(rb_eArgError, "unexpected value for xml option: %"PRIsVALUE, rb_sym2str(v)); } else { rb_raise(rb_eArgError, "unexpected value for xml option"); @@ -2486,8 +2484,8 @@ econv_opts(VALUE opt, int ecflags) /* ecflags |= ECONV_LF_NEWLINE_DECORATOR; */ } else if (SYMBOL_P(v)) { - rb_raise(rb_eArgError, "unexpected value for newline option: %s", - rb_id2name(SYM2ID(v))); + rb_raise(rb_eArgError, "unexpected value for newline option: %"PRIsVALUE, + rb_sym2str(v)); } else { rb_raise(rb_eArgError, "unexpected value for newline option"); @@ -2609,7 +2607,7 @@ rb_econv_open_opts(const char *source_encoding, const char *destination_encoding } static int -enc_arg(volatile VALUE *arg, const char **name_p, rb_encoding **enc_p) +enc_arg(VALUE *arg, const char **name_p, rb_encoding **enc_p) { rb_encoding *enc; const char *n; @@ -2633,7 +2631,7 @@ enc_arg(volatile VALUE *arg, const char **name_p, rb_encoding **enc_p) } static int -str_transcode_enc_args(VALUE str, volatile VALUE *arg1, volatile VALUE *arg2, +str_transcode_enc_args(VALUE str, VALUE *arg1, VALUE *arg2, const char **sname_p, rb_encoding **senc_p, const char **dname_p, rb_encoding **denc_p) { @@ -2664,7 +2662,7 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts) { VALUE dest; VALUE str = *self; - volatile VALUE arg1, arg2; + VALUE arg1, arg2; long blen, slen; unsigned char *buf, *bp, *sp; const unsigned char *fromp; @@ -2702,7 +2700,7 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts) if (!NIL_P(ecopts)) { rep = rb_hash_aref(ecopts, sym_replace); } - dest = rb_str_scrub(str, rep); + dest = rb_enc_str_scrub(senc, str, rep); if (NIL_P(dest)) dest = str; *self = dest; return dencidx; @@ -2742,6 +2740,8 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts) /* set encoding */ if (!denc) { dencidx = rb_define_dummy_encoding(dname); + RB_GC_GUARD(arg1); + RB_GC_GUARD(arg2); } *self = dest; @@ -2915,13 +2915,13 @@ econv_free(void *ptr) static size_t econv_memsize(const void *ptr) { - return ptr ? sizeof(rb_econv_t) : 0; + return sizeof(rb_econv_t); } static const rb_data_type_t econv_data_type = { "econv", {NULL, econv_free, econv_memsize,}, - NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY }; static VALUE @@ -2994,7 +2994,7 @@ econv_s_asciicompat_encoding(VALUE klass, VALUE arg) static void econv_args(int argc, VALUE *argv, - volatile VALUE *snamev_p, volatile VALUE *dnamev_p, + VALUE *snamev_p, VALUE *dnamev_p, const char **sname_p, const char **dname_p, rb_encoding **senc_p, rb_encoding **denc_p, int *ecflags_p, @@ -3138,7 +3138,7 @@ search_convpath_i(const char *sname, const char *dname, int depth, void *arg) static VALUE econv_s_search_convpath(int argc, VALUE *argv, VALUE klass) { - volatile VALUE snamev, dnamev; + VALUE snamev, dnamev; const char *sname, *dname; rb_encoding *senc, *denc; int ecflags; @@ -3150,11 +3150,19 @@ econv_s_search_convpath(int argc, VALUE *argv, VALUE klass) convpath = Qnil; transcode_search_path(sname, dname, search_convpath_i, &convpath); - if (NIL_P(convpath)) - rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags)); + if (NIL_P(convpath)) { + VALUE exc = rb_econv_open_exc(sname, dname, ecflags); + RB_GC_GUARD(snamev); + RB_GC_GUARD(dnamev); + rb_exc_raise(exc); + } - if (decorate_convpath(convpath, ecflags) == -1) - rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags)); + if (decorate_convpath(convpath, ecflags) == -1) { + VALUE exc = rb_econv_open_exc(sname, dname, ecflags); + RB_GC_GUARD(snamev); + RB_GC_GUARD(dnamev); + rb_exc_raise(exc); + } return convpath; } @@ -3210,7 +3218,7 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, DATA_PTR(self) = ec; for (i = 0; i < RARRAY_LEN(convpath); i++) { - volatile VALUE snamev, dnamev; + VALUE snamev, dnamev; VALUE pair; elt = rb_ary_entry(convpath, i); if (!NIL_P(pair = rb_check_array_type(elt))) { @@ -3227,8 +3235,12 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, } if (DECORATOR_P(sname, dname)) { ret = rb_econv_add_converter(ec, sname, dname, ec->num_trans); - if (ret == -1) - rb_raise(rb_eArgError, "decoration failed: %s", dname); + if (ret == -1) { + VALUE msg = rb_sprintf("decoration failed: %s", dname); + RB_GC_GUARD(snamev); + RB_GC_GUARD(dnamev); + rb_exc_raise(rb_exc_new_str(rb_eArgError, msg)); + } } else { int j = ec->num_trans; @@ -3237,8 +3249,12 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, arg.index = ec->num_trans; arg.ret = 0; ret = transcode_search_path(sname, dname, rb_econv_init_by_convpath_i, &arg); - if (ret == -1 || arg.ret == -1) - rb_raise(rb_eArgError, "adding conversion failed: %s to %s", sname, dname); + if (ret == -1 || arg.ret == -1) { + VALUE msg = rb_sprintf("adding conversion failed: %s to %s", sname, dname); + RB_GC_GUARD(snamev); + RB_GC_GUARD(dnamev); + rb_exc_raise(rb_exc_new_str(rb_eArgError, msg)); + } if (first) { first = 0; *senc_p = senc; @@ -3332,7 +3348,7 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, * Convert LF to CR. * [:xml => :text] * Escape as XML CharData. - * This form can be used as a HTML 4.0 #PCDATA. + * This form can be used as an HTML 4.0 #PCDATA. * - '&' -> '&' * - '<' -> '<' * - '>' -> '>' @@ -3340,7 +3356,7 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, * [:xml => :attr] * Escape as XML AttValue. * The converted result is quoted as "...". - * This form can be used as a HTML 4.0 attribute value. + * This form can be used as an HTML 4.0 attribute value. * - '&' -> '&' * - '<' -> '<' * - '>' -> '>' @@ -3372,7 +3388,7 @@ static VALUE econv_init(int argc, VALUE *argv, VALUE self) { VALUE ecopts; - volatile VALUE snamev, dnamev; + VALUE snamev, dnamev; const char *sname, *dname; rb_encoding *senc, *denc; rb_econv_t *ec; @@ -4394,13 +4410,10 @@ ecerr_incomplete_input(VALUE self) * correspond with a known converter. */ +#undef rb_intern void Init_transcode(void) { - rb_eUndefinedConversionError = rb_define_class_under(rb_cEncoding, "UndefinedConversionError", rb_eEncodingError); - rb_eInvalidByteSequenceError = rb_define_class_under(rb_cEncoding, "InvalidByteSequenceError", rb_eEncodingError); - rb_eConverterNotFoundError = rb_define_class_under(rb_cEncoding, "ConverterNotFoundError", rb_eEncodingError); - transcoder_table = st_init_strcasetable(); sym_invalid = ID2SYM(rb_intern("invalid")); @@ -4432,6 +4445,16 @@ Init_transcode(void) sym_lf = ID2SYM(rb_intern("lf")); #endif + InitVM(transcode); +} + +void +InitVM_transcode(void) +{ + rb_eUndefinedConversionError = rb_define_class_under(rb_cEncoding, "UndefinedConversionError", rb_eEncodingError); + rb_eInvalidByteSequenceError = rb_define_class_under(rb_cEncoding, "InvalidByteSequenceError", rb_eEncodingError); + rb_eConverterNotFoundError = rb_define_class_under(rb_cEncoding, "ConverterNotFoundError", rb_eEncodingError); + rb_define_method(rb_cString, "encode", str_encode, -1); rb_define_method(rb_cString, "encode!", str_encode_bang, -1); |
