diff options
author | matz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-10-07 17:39:44 +0000 |
---|---|---|
committer | matz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-10-07 17:39:44 +0000 |
commit | baeeebf474cae027bc3d2e80a72a540612874ef9 (patch) | |
tree | 2e18a3411a9cdd58c64185fd72b9d11485d6e332 | |
parent | 0b184b473b1baea42f4093f23d64b24cf4a565fd (diff) |
* encoding.c (rb_default_internal_encoding): merged a patch from
Michael Selig <michael.selig at fs.com.au> in [ruby-core:18985].
* io.c (rb_io_ext_int_to_encs): ditto.
* ruby.c (proc_options): support default internal encoding in -E
option.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19709 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | encoding.c | 50 | ||||
-rw-r--r-- | include/ruby/encoding.h | 3 | ||||
-rw-r--r-- | io.c | 166 | ||||
-rw-r--r-- | ruby.c | 38 |
5 files changed, 206 insertions, 61 deletions
@@ -1,3 +1,13 @@ +Wed Oct 8 02:38:28 2008 Yukihiro Matsumoto <matz@ruby-lang.org> + + * encoding.c (rb_default_internal_encoding): merged a patch from + Michael Selig <michael.selig at fs.com.au> in [ruby-core:18985]. + + * io.c (rb_io_ext_int_to_encs): ditto. + + * ruby.c (proc_options): support default internal encoding in -E + option. + Wed Oct 8 00:03:39 2008 Tadayoshi Funaba <tadf@dotrb.org> * lib/date.rb (today,now): should produce own instances. diff --git a/encoding.c b/encoding.c index 35130a6aaa..f17ed1882c 100644 --- a/encoding.c +++ b/encoding.c @@ -1027,6 +1027,55 @@ rb_enc_set_default_external(VALUE encoding) default_external = 0; } +/* -2 => not yet set, -1 => nil */ +static int default_internal_index = -2; +static rb_encoding *default_internal; + +rb_encoding * +rb_default_internal_encoding(void) +{ + if (!default_internal && default_internal_index >= 0) { + default_internal = rb_enc_from_index(default_internal_index); + } + return default_internal; +} + +VALUE +rb_enc_default_internal(void) +{ + /* Note: These functions cope with default_internal not being set */ + return rb_enc_from_encoding(rb_default_internal_encoding()); +} + +/* + * call-seq: + * Encoding.default_internal => enc + * + * Returns default internal encoding. + * + * It is initialized by the source internal_encoding or -E option, + * and can't be modified after that. + */ +static VALUE +get_default_internal(VALUE klass) +{ + return rb_enc_default_internal(); +} + +void +rb_enc_set_default_internal(VALUE encoding) +{ + if (default_internal_index != -2) + /* Already set */ + return; + default_internal_index = encoding == Qnil ? + -1 :rb_enc_to_index(rb_to_encoding(encoding)); + /* Convert US-ASCII => UTF-8 */ + if (default_internal_index == rb_usascii_encindex()) + default_internal_index = rb_utf8_encindex(); + default_internal = 0; +} + /* * call-seq: * Encoding.locale_charmap => string @@ -1212,6 +1261,7 @@ Init_Encoding(void) rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1); rb_define_singleton_method(rb_cEncoding, "default_external", get_default_external, 0); + rb_define_singleton_method(rb_cEncoding, "default_internal", get_default_internal, 0); rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0); list = rb_ary_new2(enc_table.count); diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 1d12934356..9049df3fa2 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -168,11 +168,14 @@ rb_encoding *rb_usascii_encoding(void); rb_encoding *rb_locale_encoding(void); rb_encoding *rb_filesystem_encoding(void); rb_encoding *rb_default_external_encoding(void); +rb_encoding *rb_default_internal_encoding(void); int rb_ascii8bit_encindex(void); int rb_utf8_encindex(void); int rb_usascii_encindex(void); VALUE rb_enc_default_external(void); +VALUE rb_enc_default_internal(void); void rb_enc_set_default_external(VALUE encoding); +void rb_enc_set_default_internal(VALUE encoding); VALUE rb_locale_charmap(VALUE klass); long rb_memsearch(const void*,long,const void*,long,rb_encoding*); @@ -2177,10 +2177,8 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io) } newline = (unsigned char)rsptr[rslen - 1]; - if (fptr->encs.enc2) - enc = fptr->encs.enc; - else - enc = io_input_encoding(fptr); + /* MS - Optimisation */ + enc = io_read_encoding(fptr); while ((c = appendline(fptr, newline, &str, &limit)) != EOF) { const char *s, *p, *pp, *e; @@ -3740,52 +3738,87 @@ rb_io_oflags_modestr(int oflags) return NULL; /* not reached */ } +/* + * Convert external/internal encodings to enc/enc2 + * NULL => use default encoding + * Qnil => no encoding specified (internal only) + */ +static void +rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2) +{ + int default_ext = 0; + + if (ext == NULL) { + ext = rb_default_external_encoding(); + default_ext = 1; + } + if (intern == NULL && ext != rb_ascii8bit_encoding()) + /* If external is ASCII-8BIT, no default transcoding */ + intern = rb_default_internal_encoding(); + if (intern == NULL || intern == (rb_encoding *)Qnil || intern == ext) { + /* No internal encoding => use external + no transcoding */ + *enc = default_ext ? NULL : ext; + *enc2 = NULL; + } + else { + *enc = intern; + *enc2 = ext; + } +} + static void parse_mode_enc(const char *estr, rb_encoding **enc_p, rb_encoding **enc2_p) { - const char *p0, *p1; - char *enc2name; + const char *p; + char encname[ENCODING_MAXNAMELEN+1]; int idx, idx2; + rb_encoding *ext_enc, *int_enc; - /* parse estr as "enc" or "enc2:enc" */ - - *enc_p = 0; - *enc2_p = 0; + /* parse estr as "enc" or "enc2:enc" or "enc:-" */ - p0 = strrchr(estr, ':'); - if (!p0) p1 = estr; - else p1 = p0 + 1; - idx = rb_enc_find_index(p1); - if (idx >= 0) { - *enc_p = rb_enc_from_index(idx); + p = strrchr(estr, ':'); + if (p) { + int len = (p++) - estr; + if (len == 0 || len > ENCODING_MAXNAMELEN) + idx = -1; + else { + memcpy(encname, estr, len); + encname[len] = '\0'; + estr = encname; + idx = rb_enc_find_index(encname); + } } + else + idx = rb_enc_find_index(estr); + + if (idx >= 0) + ext_enc = rb_enc_from_index(idx); else { - rb_warn("Unsupported encoding %s ignored", p1); + if (idx != -2) + rb_warn("Unsupported encoding %s ignored", estr); + ext_enc = NULL; } - if (*enc_p && p0) { - int n = p0 - estr; - if (n > ENCODING_MAXNAMELEN) { - idx2 = -1; - } - else { - enc2name = ALLOCA_N(char, n+1); - memcpy(enc2name, estr, n); - enc2name[n] = '\0'; - estr = enc2name; - idx2 = rb_enc_find_index(enc2name); - } - if (idx2 < 0) { - rb_warn("Unsupported encoding %.*s ignored", n, estr); - } - else if (idx2 == idx) { - rb_warn("Ignoring internal encoding %.*s: it is identical to external encoding %s", - n, estr, p1); + int_enc = NULL; + if (p) { + if (*p == '-' && *(p+1) == '\0') { + /* Special case - "-" => no transcoding */ + int_enc = (rb_encoding *)Qnil; } else { - *enc2_p = rb_enc_from_index(idx2); + idx2 = rb_enc_find_index(p); + if (idx2 < 0) + rb_warn("Unsupported encoding %s ignored", p); + else if (idx2 == idx) { + rb_warn("Ignoring internal encoding %s: it is identical to external encoding %s", p, estr); + int_enc = (rb_encoding *)Qnil; + } + else + int_enc = rb_enc_from_index(idx2); } } + + rb_io_ext_int_to_encs(ext_enc, int_enc, enc_p, enc2_p); } static void @@ -3821,28 +3854,32 @@ io_extract_encoding_option(VALUE opt, rb_encoding **enc_p, rb_encoding **enc2_p) } if (!NIL_P(extenc)) { rb_encoding *extencoding = rb_to_encoding(extenc); + rb_encoding *intencoding = NULL; extracted = 1; - *enc_p = 0; - *enc2_p = 0; if (!NIL_P(encoding)) { rb_warn("Ignoring encoding parameter '%s': external_encoding is used", RSTRING_PTR(encoding)); } if (!NIL_P(intenc)) { - rb_encoding *intencoding = rb_to_encoding(intenc); + if (!NIL_P(encoding = rb_check_string_type(intenc))) { + char *p = StringValueCStr(encoding); + if (*p == '-' && *(p+1) == '\0') { + /* Special case - "-" => no transcoding */ + intencoding = (rb_encoding *)Qnil; + } + else + intencoding = rb_to_encoding(intenc); + } + else + intencoding = rb_to_encoding(intenc); if (extencoding == intencoding) { rb_warn("Ignoring internal encoding '%s': it is identical to external encoding '%s'", RSTRING_PTR(rb_inspect(intenc)), RSTRING_PTR(rb_inspect(extenc))); - } - else { - *enc_p = intencoding; - *enc2_p = extencoding; + intencoding = (rb_encoding *)Qnil; } } - else { - *enc_p = extencoding; - } + rb_io_ext_int_to_encs(extencoding, intencoding, enc_p, enc2_p); } else { if (!NIL_P(intenc)) { @@ -3882,8 +3919,8 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash, vmode = *vmode_p; - enc = NULL; - enc2 = NULL; + /* Set to defaults */ + rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2); if (NIL_P(vmode)) { fmode = FMODE_READABLE; @@ -4070,8 +4107,8 @@ rb_file_open_generic(VALUE io, VALUE filename, int oflags, int fmode, convconfig rb_io_t *fptr; convconfig_t cc; if (!convconfig) { - cc.enc = NULL; - cc.enc2 = NULL; + /* Set to default encodings */ + rb_io_ext_int_to_encs(NULL, NULL, &cc.enc, &cc.enc2); cc.ecflags = 0; cc.ecopts = Qnil; convconfig = &cc; @@ -4099,8 +4136,8 @@ rb_file_open_internal(VALUE io, VALUE filename, const char *modestr) parse_mode_enc(p+1, &convconfig.enc, &convconfig.enc2); } else { - convconfig.enc = NULL; - convconfig.enc2 = NULL; + /* Set to default encodings */ + rb_io_ext_int_to_encs(NULL, NULL, &convconfig.enc, &convconfig.enc2); convconfig.ecflags = 0; convconfig.ecopts = Qnil; } @@ -6661,29 +6698,40 @@ io_encoding_set(rb_io_t *fptr, VALUE v1, VALUE v2, VALUE opt) { rb_encoding *enc, *enc2; int ecflags; - VALUE ecopts; + VALUE ecopts, tmp; if (!NIL_P(v2)) { enc2 = rb_to_encoding(v1); - enc = rb_to_encoding(v2); + tmp = rb_check_string_type(v2); + if (!NIL_P(tmp)) { + char *p = StringValueCStr(tmp); + if (*p == '-' && *(p+1) == '\0') { + /* Special case - "-" => no transcoding */ + enc = enc2; + enc2 = NULL; + } + else + enc = rb_to_encoding(v2); + } + else + enc = rb_to_encoding(v2); ecflags = rb_econv_prepare_opts(opt, &ecopts); } else { if (NIL_P(v1)) { - enc = NULL; - enc2 = NULL; + /* Set to default encodings */ + rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2); ecflags = 0; ecopts = Qnil; } else { - VALUE tmp = rb_check_string_type(v1); + tmp = rb_check_string_type(v1); if (!NIL_P(tmp)) { parse_mode_enc(StringValueCStr(tmp), &enc, &enc2); ecflags = rb_econv_prepare_opts(opt, &ecopts); } else { - enc = rb_to_encoding(v1); - enc2 = NULL; + rb_io_ext_int_to_encs(rb_to_encoding(v1), NULL, &enc, &enc2); ecflags = 0; ecopts = Qnil; } @@ -86,7 +86,7 @@ struct cmdline_options { VALUE name; int index; } enc; - } src, ext; + } src, ext, intern; VALUE req_list; }; @@ -855,6 +855,7 @@ proc_options(int argc, char **argv, struct cmdline_options *opt) ruby_each_words(s, disable_option, &opt->disable); } else if (strncmp("encoding", s, n = 8) == 0 && (!s[n] || s[n] == '=')) { + char *p; s += n; if (!*s++) { next_encoding: @@ -863,7 +864,15 @@ proc_options(int argc, char **argv, struct cmdline_options *opt) } } encoding: - opt->ext.enc.name = rb_str_new2(s); + p = strchr(s, ':'); + if (p) { + if (p > s) + opt->ext.enc.name = rb_str_new(s, p-s); + if (*++p) + opt->intern.enc.name = rb_str_new2(p); + } + else + opt->ext.enc.name = rb_str_new2(s); } else if (strcmp("version", s) == 0) opt->version = 1; @@ -966,6 +975,7 @@ process_options(VALUE arg) rb_safe_level() == 0 && (s = getenv("RUBYOPT"))) { VALUE src_enc_name = opt->src.enc.name; VALUE ext_enc_name = opt->ext.enc.name; + VALUE int_enc_name = opt->intern.enc.name; while (ISSPACE(*s)) s++; @@ -1005,6 +1015,8 @@ process_options(VALUE arg) opt->src.enc.name = src_enc_name; if (ext_enc_name) opt->ext.enc.name = ext_enc_name; + if (int_enc_name) + opt->intern.enc.name = int_enc_name; } if (opt->version) { @@ -1073,6 +1085,9 @@ process_options(VALUE arg) if (opt->ext.enc.name != 0) { opt->ext.enc.index = opt_enc_index(opt->ext.enc.name); } + if (opt->intern.enc.name != 0) { + opt->intern.enc.index = opt_enc_index(opt->intern.enc.name); + } if (opt->src.enc.name != 0) { opt->src.enc.index = opt_enc_index(opt->src.enc.name); src_encoding_index = opt->src.enc.index; @@ -1084,6 +1099,11 @@ process_options(VALUE arg) enc = lenc; } rb_enc_set_default_external(rb_enc_from_encoding(enc)); + if (opt->intern.enc.index >= 0) { + enc = rb_enc_from_index(opt->intern.enc.index); + rb_enc_set_default_internal(rb_enc_from_encoding(enc)); + opt->intern.enc.index = -1; + } rb_set_safe_level_force(safe); if (opt->e_script) { @@ -1105,6 +1125,15 @@ process_options(VALUE arg) tree = load_file(parser, opt->script, 1, opt); } + if (opt->intern.enc.index >= 0) { + /* Set in the shebang line */ + enc = rb_enc_from_index(opt->intern.enc.index); + rb_enc_set_default_internal(rb_enc_from_encoding(enc)); + } + else + /* Freeze default_internal */ + rb_enc_set_default_internal(Qnil); + if (!tree) return Qfalse; process_sflag(opt); @@ -1175,6 +1204,7 @@ load_file(VALUE parser, const char *fname, int script, struct cmdline_options *o char *p; int no_src_enc = !opt->src.enc.name; int no_ext_enc = !opt->ext.enc.name; + int no_int_enc = !opt->intern.enc.name; enc = rb_usascii_encoding(); rb_funcall(f, rb_intern("set_encoding"), 1, rb_enc_from_encoding(enc)); @@ -1261,6 +1291,9 @@ load_file(VALUE parser, const char *fname, int script, struct cmdline_options *o if (no_ext_enc && opt->ext.enc.name) { opt->ext.enc.index = opt_enc_index(opt->ext.enc.name); } + if (no_int_enc && opt->intern.enc.name) { + opt->intern.enc.index = opt_enc_index(opt->intern.enc.name); + } } else if (!NIL_P(c)) { rb_io_ungetbyte(f, c); @@ -1511,6 +1544,7 @@ ruby_process_options(int argc, char **argv) args.argv = argv; args.opt = cmdline_options_init(&opt); opt.ext.enc.index = -1; + opt.intern.enc.index = -1; tree = (NODE *)rb_vm_call_cfunc(rb_vm_top_self(), process_options, (VALUE)&args, 0, rb_progname); |