summaryrefslogtreecommitdiff
path: root/transcode.c
diff options
context:
space:
mode:
Diffstat (limited to 'transcode.c')
-rw-r--r--transcode.c89
1 files changed, 56 insertions, 33 deletions
diff --git a/transcode.c b/transcode.c
index da934226dc..84c579eb2e 100644
--- a/transcode.c
+++ b/transcode.c
@@ -9,8 +9,6 @@
**********************************************************************/
-#include "ruby/ruby.h"
-#include "ruby/encoding.h"
#include "internal.h"
#include "transcode_data.h"
#include <ctype.h>
@@ -110,21 +108,21 @@ typedef struct {
struct rb_econv_t {
int flags;
+ int started; /* bool */
+
const char *source_encoding_name;
const char *destination_encoding_name;
- int started;
-
const unsigned char *replacement_str;
size_t replacement_len;
const char *replacement_enc;
- int replacement_allocated;
unsigned char *in_buf_start;
unsigned char *in_data_start;
unsigned char *in_data_end;
unsigned char *in_buf_end;
rb_econv_elem_t *elems;
+ int replacement_allocated; /* bool */
int num_allocated;
int num_trans;
int num_finished;
@@ -156,7 +154,7 @@ struct rb_econv_t {
typedef struct {
const char *sname;
const char *dname;
- const char *lib; /* null means means no need to load a library */
+ const char *lib; /* null means no need to load a library */
const rb_transcoder *transcoder;
} transcoder_entry_t;
@@ -2462,7 +2460,7 @@ econv_opts(VALUE opt, int ecflags)
ecflags |= ECONV_XML_ATTR_CONTENT_DECORATOR|ECONV_XML_ATTR_QUOTE_DECORATOR|ECONV_UNDEF_HEX_CHARREF;
}
else if (RB_TYPE_P(v, T_SYMBOL)) {
- rb_raise(rb_eArgError, "unexpected value for xml option: %s", rb_id2name(SYM2ID(v)));
+ rb_raise(rb_eArgError, "unexpected value for xml option: %"PRIsVALUE, rb_sym2str(v));
}
else {
rb_raise(rb_eArgError, "unexpected value for xml option");
@@ -2486,8 +2484,8 @@ econv_opts(VALUE opt, int ecflags)
/* ecflags |= ECONV_LF_NEWLINE_DECORATOR; */
}
else if (SYMBOL_P(v)) {
- rb_raise(rb_eArgError, "unexpected value for newline option: %s",
- rb_id2name(SYM2ID(v)));
+ rb_raise(rb_eArgError, "unexpected value for newline option: %"PRIsVALUE,
+ rb_sym2str(v));
}
else {
rb_raise(rb_eArgError, "unexpected value for newline option");
@@ -2609,7 +2607,7 @@ rb_econv_open_opts(const char *source_encoding, const char *destination_encoding
}
static int
-enc_arg(volatile VALUE *arg, const char **name_p, rb_encoding **enc_p)
+enc_arg(VALUE *arg, const char **name_p, rb_encoding **enc_p)
{
rb_encoding *enc;
const char *n;
@@ -2633,7 +2631,7 @@ enc_arg(volatile VALUE *arg, const char **name_p, rb_encoding **enc_p)
}
static int
-str_transcode_enc_args(VALUE str, volatile VALUE *arg1, volatile VALUE *arg2,
+str_transcode_enc_args(VALUE str, VALUE *arg1, VALUE *arg2,
const char **sname_p, rb_encoding **senc_p,
const char **dname_p, rb_encoding **denc_p)
{
@@ -2664,7 +2662,7 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
{
VALUE dest;
VALUE str = *self;
- volatile VALUE arg1, arg2;
+ VALUE arg1, arg2;
long blen, slen;
unsigned char *buf, *bp, *sp;
const unsigned char *fromp;
@@ -2702,7 +2700,7 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
if (!NIL_P(ecopts)) {
rep = rb_hash_aref(ecopts, sym_replace);
}
- dest = rb_str_scrub(str, rep);
+ dest = rb_enc_str_scrub(senc, str, rep);
if (NIL_P(dest)) dest = str;
*self = dest;
return dencidx;
@@ -2742,6 +2740,8 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
/* set encoding */
if (!denc) {
dencidx = rb_define_dummy_encoding(dname);
+ RB_GC_GUARD(arg1);
+ RB_GC_GUARD(arg2);
}
*self = dest;
@@ -2915,13 +2915,13 @@ econv_free(void *ptr)
static size_t
econv_memsize(const void *ptr)
{
- return ptr ? sizeof(rb_econv_t) : 0;
+ return sizeof(rb_econv_t);
}
static const rb_data_type_t econv_data_type = {
"econv",
{NULL, econv_free, econv_memsize,},
- NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY
+ 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
};
static VALUE
@@ -2994,7 +2994,7 @@ econv_s_asciicompat_encoding(VALUE klass, VALUE arg)
static void
econv_args(int argc, VALUE *argv,
- volatile VALUE *snamev_p, volatile VALUE *dnamev_p,
+ VALUE *snamev_p, VALUE *dnamev_p,
const char **sname_p, const char **dname_p,
rb_encoding **senc_p, rb_encoding **denc_p,
int *ecflags_p,
@@ -3138,7 +3138,7 @@ search_convpath_i(const char *sname, const char *dname, int depth, void *arg)
static VALUE
econv_s_search_convpath(int argc, VALUE *argv, VALUE klass)
{
- volatile VALUE snamev, dnamev;
+ VALUE snamev, dnamev;
const char *sname, *dname;
rb_encoding *senc, *denc;
int ecflags;
@@ -3150,11 +3150,19 @@ econv_s_search_convpath(int argc, VALUE *argv, VALUE klass)
convpath = Qnil;
transcode_search_path(sname, dname, search_convpath_i, &convpath);
- if (NIL_P(convpath))
- rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags));
+ if (NIL_P(convpath)) {
+ VALUE exc = rb_econv_open_exc(sname, dname, ecflags);
+ RB_GC_GUARD(snamev);
+ RB_GC_GUARD(dnamev);
+ rb_exc_raise(exc);
+ }
- if (decorate_convpath(convpath, ecflags) == -1)
- rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags));
+ if (decorate_convpath(convpath, ecflags) == -1) {
+ VALUE exc = rb_econv_open_exc(sname, dname, ecflags);
+ RB_GC_GUARD(snamev);
+ RB_GC_GUARD(dnamev);
+ rb_exc_raise(exc);
+ }
return convpath;
}
@@ -3210,7 +3218,7 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath,
DATA_PTR(self) = ec;
for (i = 0; i < RARRAY_LEN(convpath); i++) {
- volatile VALUE snamev, dnamev;
+ VALUE snamev, dnamev;
VALUE pair;
elt = rb_ary_entry(convpath, i);
if (!NIL_P(pair = rb_check_array_type(elt))) {
@@ -3227,8 +3235,12 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath,
}
if (DECORATOR_P(sname, dname)) {
ret = rb_econv_add_converter(ec, sname, dname, ec->num_trans);
- if (ret == -1)
- rb_raise(rb_eArgError, "decoration failed: %s", dname);
+ if (ret == -1) {
+ VALUE msg = rb_sprintf("decoration failed: %s", dname);
+ RB_GC_GUARD(snamev);
+ RB_GC_GUARD(dnamev);
+ rb_exc_raise(rb_exc_new_str(rb_eArgError, msg));
+ }
}
else {
int j = ec->num_trans;
@@ -3237,8 +3249,12 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath,
arg.index = ec->num_trans;
arg.ret = 0;
ret = transcode_search_path(sname, dname, rb_econv_init_by_convpath_i, &arg);
- if (ret == -1 || arg.ret == -1)
- rb_raise(rb_eArgError, "adding conversion failed: %s to %s", sname, dname);
+ if (ret == -1 || arg.ret == -1) {
+ VALUE msg = rb_sprintf("adding conversion failed: %s to %s", sname, dname);
+ RB_GC_GUARD(snamev);
+ RB_GC_GUARD(dnamev);
+ rb_exc_raise(rb_exc_new_str(rb_eArgError, msg));
+ }
if (first) {
first = 0;
*senc_p = senc;
@@ -3332,7 +3348,7 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath,
* Convert LF to CR.
* [:xml => :text]
* Escape as XML CharData.
- * This form can be used as a HTML 4.0 #PCDATA.
+ * This form can be used as an HTML 4.0 #PCDATA.
* - '&' -> '&amp;'
* - '<' -> '&lt;'
* - '>' -> '&gt;'
@@ -3340,7 +3356,7 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath,
* [:xml => :attr]
* Escape as XML AttValue.
* The converted result is quoted as "...".
- * This form can be used as a HTML 4.0 attribute value.
+ * This form can be used as an HTML 4.0 attribute value.
* - '&' -> '&amp;'
* - '<' -> '&lt;'
* - '>' -> '&gt;'
@@ -3372,7 +3388,7 @@ static VALUE
econv_init(int argc, VALUE *argv, VALUE self)
{
VALUE ecopts;
- volatile VALUE snamev, dnamev;
+ VALUE snamev, dnamev;
const char *sname, *dname;
rb_encoding *senc, *denc;
rb_econv_t *ec;
@@ -4394,13 +4410,10 @@ ecerr_incomplete_input(VALUE self)
* correspond with a known converter.
*/
+#undef rb_intern
void
Init_transcode(void)
{
- rb_eUndefinedConversionError = rb_define_class_under(rb_cEncoding, "UndefinedConversionError", rb_eEncodingError);
- rb_eInvalidByteSequenceError = rb_define_class_under(rb_cEncoding, "InvalidByteSequenceError", rb_eEncodingError);
- rb_eConverterNotFoundError = rb_define_class_under(rb_cEncoding, "ConverterNotFoundError", rb_eEncodingError);
-
transcoder_table = st_init_strcasetable();
sym_invalid = ID2SYM(rb_intern("invalid"));
@@ -4432,6 +4445,16 @@ Init_transcode(void)
sym_lf = ID2SYM(rb_intern("lf"));
#endif
+ InitVM(transcode);
+}
+
+void
+InitVM_transcode(void)
+{
+ rb_eUndefinedConversionError = rb_define_class_under(rb_cEncoding, "UndefinedConversionError", rb_eEncodingError);
+ rb_eInvalidByteSequenceError = rb_define_class_under(rb_cEncoding, "InvalidByteSequenceError", rb_eEncodingError);
+ rb_eConverterNotFoundError = rb_define_class_under(rb_cEncoding, "ConverterNotFoundError", rb_eEncodingError);
+
rb_define_method(rb_cString, "encode", str_encode, -1);
rb_define_method(rb_cString, "encode!", str_encode_bang, -1);