diff options
Diffstat (limited to 'transcode.c')
| -rw-r--r-- | transcode.c | 130 |
1 files changed, 63 insertions, 67 deletions
diff --git a/transcode.c b/transcode.c index a72afdc44b..84c579eb2e 100644 --- a/transcode.c +++ b/transcode.c @@ -9,31 +9,20 @@ **********************************************************************/ -#include "ruby/internal/config.h" - -#include <ctype.h> - #include "internal.h" -#include "internal/array.h" -#include "internal/inits.h" -#include "internal/object.h" -#include "internal/string.h" -#include "internal/transcode.h" -#include "ruby/encoding.h" - #include "transcode_data.h" -#include "id.h" +#include <ctype.h> #define ENABLE_ECONV_NEWLINE_OPTION 1 /* VALUE rb_cEncoding = rb_define_class("Encoding", rb_cObject); */ -static VALUE rb_eUndefinedConversionError; -static VALUE rb_eInvalidByteSequenceError; -static VALUE rb_eConverterNotFoundError; +VALUE rb_eUndefinedConversionError; +VALUE rb_eInvalidByteSequenceError; +VALUE rb_eConverterNotFoundError; VALUE rb_cEncodingConverter; -static VALUE sym_invalid, sym_undef, sym_replace, sym_fallback; +static VALUE sym_invalid, sym_undef, sym_replace, sym_fallback, sym_aref; static VALUE sym_xml, sym_text, sym_attr; static VALUE sym_universal_newline; static VALUE sym_crlf_newline; @@ -379,12 +368,14 @@ load_transcoder_entry(transcoder_entry_t *entry) const size_t total_len = sizeof(transcoder_lib_prefix) - 1 + len; const VALUE fn = rb_str_new(0, total_len); char *const path = RSTRING_PTR(fn); + const int safe = rb_safe_level(); memcpy(path, transcoder_lib_prefix, sizeof(transcoder_lib_prefix) - 1); memcpy(path + sizeof(transcoder_lib_prefix) - 1, lib, len); rb_str_set_len(fn, total_len); + FL_UNSET(fn, FL_TAINT); OBJ_FREEZE(fn); - rb_require_string(fn); + rb_require_safe(fn, safe > 3 ? 3 : safe); } if (entry->transcoder) @@ -983,10 +974,21 @@ rb_econv_open0(const char *sname, const char *dname, int ecflags) int num_trans; rb_econv_t *ec; - /* Just check if sname and dname are defined */ - /* (This check is needed?) */ - if (*sname) rb_enc_find_index(sname); - if (*dname) rb_enc_find_index(dname); + int sidx, didx; + + if (*sname) { + sidx = rb_enc_find_index(sname); + if (0 <= sidx) { + rb_enc_from_index(sidx); + } + } + + if (*dname) { + didx = rb_enc_find_index(dname); + if (0 <= didx) { + rb_enc_from_index(didx); + } + } if (*sname == '\0' && *dname == '\0') { num_trans = 0; @@ -1192,6 +1194,7 @@ rb_trans_conv(rb_econv_t *ec, if (ec->elems[0].last_result == econv_after_output) ec->elems[0].last_result = econv_source_buffer_empty; + needreport_index = -1; for (i = ec->num_trans-1; 0 <= i; i--) { switch (ec->elems[i].last_result) { case econv_invalid_byte_sequence: @@ -1200,6 +1203,7 @@ rb_trans_conv(rb_econv_t *ec, case econv_after_output: case econv_finished: sweep_start = i+1; + needreport_index = i; goto found_needreport; case econv_destination_buffer_full: @@ -1850,6 +1854,7 @@ rb_econv_substr_append(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, src = rb_str_new_frozen(src); dst = rb_econv_append(ec, RSTRING_PTR(src) + off, len, dst, flags); RB_GC_GUARD(src); + OBJ_INFECT_RAW(dst, src); return dst; } @@ -2048,6 +2053,7 @@ make_econv_exception(rb_econv_t *ec) size_t readagain_len = ec->last_error.readagain_len; VALUE bytes2 = Qnil; VALUE dumped2; + int idx; if (ec->last_error.result == econv_incomplete_input) { mesg = rb_sprintf("incomplete %s on %s", StringValueCStr(dumped), @@ -2071,7 +2077,17 @@ make_econv_exception(rb_econv_t *ec) rb_ivar_set(exc, rb_intern("error_bytes"), bytes); rb_ivar_set(exc, rb_intern("readagain_bytes"), bytes2); rb_ivar_set(exc, rb_intern("incomplete_input"), ec->last_error.result == econv_incomplete_input ? Qtrue : Qfalse); - goto set_encs; + + set_encs: + rb_ivar_set(exc, rb_intern("source_encoding_name"), rb_str_new2(ec->last_error.source_encoding)); + rb_ivar_set(exc, rb_intern("destination_encoding_name"), rb_str_new2(ec->last_error.destination_encoding)); + idx = rb_enc_find_index(ec->last_error.source_encoding); + if (0 <= idx) + rb_ivar_set(exc, rb_intern("source_encoding"), rb_enc_from_encoding(rb_enc_from_index(idx))); + idx = rb_enc_find_index(ec->last_error.destination_encoding); + if (0 <= idx) + rb_ivar_set(exc, rb_intern("destination_encoding"), rb_enc_from_encoding(rb_enc_from_index(idx))); + return exc; } if (ec->last_error.result == econv_undefined_conversion) { VALUE bytes = rb_str_new((const char *)ec->last_error.error_bytes_start, @@ -2123,17 +2139,6 @@ make_econv_exception(rb_econv_t *ec) goto set_encs; } return Qnil; - - set_encs: - rb_ivar_set(exc, rb_intern("source_encoding_name"), rb_str_new2(ec->last_error.source_encoding)); - rb_ivar_set(exc, rb_intern("destination_encoding_name"), rb_str_new2(ec->last_error.destination_encoding)); - int idx = rb_enc_find_index(ec->last_error.source_encoding); - if (0 <= idx) - rb_ivar_set(exc, rb_intern("source_encoding"), rb_enc_from_encoding(rb_enc_from_index(idx))); - idx = rb_enc_find_index(ec->last_error.destination_encoding); - if (0 <= idx) - rb_ivar_set(exc, rb_intern("destination_encoding"), rb_enc_from_encoding(rb_enc_from_index(idx))); - return exc; } static void @@ -2251,7 +2256,7 @@ method_fallback(VALUE fallback, VALUE c) static VALUE aref_fallback(VALUE fallback, VALUE c) { - return rb_funcallv_public(fallback, idAREF, 1, &c); + return rb_funcall3(fallback, sym_aref, 1, &c); } static void @@ -2420,7 +2425,6 @@ static int econv_opts(VALUE opt, int ecflags) { VALUE v; - int newlineflag = 0; v = rb_hash_aref(opt, sym_invalid); if (NIL_P(v)) { @@ -2466,7 +2470,6 @@ econv_opts(VALUE opt, int ecflags) #ifdef ENABLE_ECONV_NEWLINE_OPTION v = rb_hash_aref(opt, sym_newline); if (!NIL_P(v)) { - newlineflag = 2; ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK; if (v == sym_universal) { ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR; @@ -2488,9 +2491,10 @@ econv_opts(VALUE opt, int ecflags) rb_raise(rb_eArgError, "unexpected value for newline option"); } } + else #endif { - int setflags = 0; + int setflags = 0, newlineflag = 0; v = rb_hash_aref(opt, sym_universal_newline); if (RTEST(v)) @@ -2507,15 +2511,9 @@ econv_opts(VALUE opt, int ecflags) setflags |= ECONV_CR_NEWLINE_DECORATOR; newlineflag |= !NIL_P(v); - switch (newlineflag) { - case 1: + if (newlineflag) { ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK; ecflags |= setflags; - break; - - case 3: - rb_warning(":newline option preceds other newline options"); - break; } } @@ -2552,7 +2550,7 @@ rb_econv_prepare_options(VALUE opthash, VALUE *opts, int ecflags) if (!NIL_P(v)) { VALUE h = rb_check_hash_type(v); if (NIL_P(h) - ? (rb_obj_is_proc(v) || rb_obj_is_method(v) || rb_respond_to(v, idAREF)) + ? (rb_obj_is_proc(v) || rb_obj_is_method(v) || rb_respond_to(v, sym_aref)) : (v = h, 1)) { if (NIL_P(newhash)) newhash = rb_hash_new(); @@ -2784,14 +2782,14 @@ str_encode_associate(VALUE str, int encidx) /* * call-seq: - * str.encode!(encoding, **options) -> str - * str.encode!(dst_encoding, src_encoding, **options) -> str + * str.encode!(encoding [, options] ) -> str + * str.encode!(dst_encoding, src_encoding [, options] ) -> str * * The first form transcodes the contents of <i>str</i> from * str.encoding to +encoding+. * The second form transcodes the contents of <i>str</i> from * src_encoding to dst_encoding. - * The +options+ keyword arguments give details for conversion. See String#encode + * The options Hash gives details for conversion. See String#encode * for details. * Returns the string even if no changes were made. */ @@ -2820,9 +2818,9 @@ static VALUE encoded_dup(VALUE newstr, VALUE str, int encidx); /* * call-seq: - * str.encode(encoding, **options) -> str - * str.encode(dst_encoding, src_encoding, **options) -> str - * str.encode(**options) -> str + * str.encode(encoding [, options] ) -> str + * str.encode(dst_encoding, src_encoding [, options] ) -> str + * str.encode([options]) -> str * * The first form returns a copy of +str+ transcoded * to encoding +encoding+. @@ -2838,8 +2836,8 @@ static VALUE encoded_dup(VALUE newstr, VALUE str, int encidx); * in the source encoding. The last form by default does not raise * exceptions but uses replacement strings. * - * The +options+ keyword arguments give details for conversion. - * The arguments are: + * The +options+ Hash gives details for conversion and can have the following + * keys: * * :invalid :: * If the value is +:replace+, #encode replaces invalid byte sequences in @@ -2907,11 +2905,6 @@ encoded_dup(VALUE newstr, VALUE str, int encidx) return str_encode_associate(newstr, encidx); } -/* - * Document-class: Encoding::Converter - * - * Encoding conversion class. - */ static void econv_free(void *ptr) { @@ -2927,7 +2920,7 @@ econv_memsize(const void *ptr) static const rb_data_type_t econv_data_type = { "econv", - {0, econv_free, econv_memsize,}, + {NULL, econv_free, econv_memsize,}, 0, 0, RUBY_TYPED_FREE_IMMEDIATELY }; @@ -3158,10 +3151,10 @@ econv_s_search_convpath(int argc, VALUE *argv, VALUE klass) transcode_search_path(sname, dname, search_convpath_i, &convpath); if (NIL_P(convpath)) { - VALUE exc = rb_econv_open_exc(sname, dname, ecflags); - RB_GC_GUARD(snamev); - RB_GC_GUARD(dnamev); - rb_exc_raise(exc); + VALUE exc = rb_econv_open_exc(sname, dname, ecflags); + RB_GC_GUARD(snamev); + RB_GC_GUARD(dnamev); + rb_exc_raise(exc); } if (decorate_convpath(convpath, ecflags) == -1) { @@ -3795,6 +3788,7 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self) res = rb_econv_convert(ec, &ip, is, &op, os, flags); rb_str_set_len(output, op-(unsigned char *)RSTRING_PTR(output)); if (!NIL_P(input)) { + OBJ_INFECT_RAW(output, input); rb_str_drop_bytes(input, ip - (unsigned char *)RSTRING_PTR(input)); } @@ -4085,7 +4079,7 @@ econv_insert_output(VALUE self, VALUE string) } /* - * call-seq: + * call-seq * ec.putback -> string * ec.putback(max_numbytes) -> string * @@ -4116,9 +4110,10 @@ econv_putback(int argc, VALUE *argv, VALUE self) int putbackable; VALUE str, max; - if (!rb_check_arity(argc, 0, 1) || NIL_P(max = argv[0])) { + rb_scan_args(argc, argv, "01", &max); + + if (NIL_P(max)) n = rb_econv_putbackable(ec); - } else { n = NUM2INT(max); putbackable = rb_econv_putbackable(ec); @@ -4425,6 +4420,7 @@ Init_transcode(void) sym_undef = ID2SYM(rb_intern("undef")); sym_replace = ID2SYM(rb_intern("replace")); sym_fallback = ID2SYM(rb_intern("fallback")); + sym_aref = ID2SYM(rb_intern("[]")); sym_xml = ID2SYM(rb_intern("xml")); sym_text = ID2SYM(rb_intern("text")); sym_attr = ID2SYM(rb_intern("attr")); @@ -4462,7 +4458,7 @@ InitVM_transcode(void) rb_define_method(rb_cString, "encode", str_encode, -1); rb_define_method(rb_cString, "encode!", str_encode_bang, -1); - rb_cEncodingConverter = rb_define_class_under(rb_cEncoding, "Converter", rb_cObject); + rb_cEncodingConverter = rb_define_class_under(rb_cEncoding, "Converter", rb_cData); rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate); rb_define_singleton_method(rb_cEncodingConverter, "asciicompat_encoding", econv_s_asciicompat_encoding, 1); rb_define_singleton_method(rb_cEncodingConverter, "search_convpath", econv_s_search_convpath, -1); |
