summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-10-07 17:39:44 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-10-07 17:39:44 +0000
commitbaeeebf474cae027bc3d2e80a72a540612874ef9 (patch)
tree2e18a3411a9cdd58c64185fd72b9d11485d6e332
parent0b184b473b1baea42f4093f23d64b24cf4a565fd (diff)
* encoding.c (rb_default_internal_encoding): merged a patch from
Michael Selig <michael.selig at fs.com.au> in [ruby-core:18985]. * io.c (rb_io_ext_int_to_encs): ditto. * ruby.c (proc_options): support default internal encoding in -E option. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19709 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog10
-rw-r--r--encoding.c50
-rw-r--r--include/ruby/encoding.h3
-rw-r--r--io.c166
-rw-r--r--ruby.c38
5 files changed, 206 insertions, 61 deletions
diff --git a/ChangeLog b/ChangeLog
index eb47212eaf..93b019892b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Wed Oct 8 02:38:28 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
+
+ * encoding.c (rb_default_internal_encoding): merged a patch from
+ Michael Selig <michael.selig at fs.com.au> in [ruby-core:18985].
+
+ * io.c (rb_io_ext_int_to_encs): ditto.
+
+ * ruby.c (proc_options): support default internal encoding in -E
+ option.
+
Wed Oct 8 00:03:39 2008 Tadayoshi Funaba <tadf@dotrb.org>
* lib/date.rb (today,now): should produce own instances.
diff --git a/encoding.c b/encoding.c
index 35130a6aaa..f17ed1882c 100644
--- a/encoding.c
+++ b/encoding.c
@@ -1027,6 +1027,55 @@ rb_enc_set_default_external(VALUE encoding)
default_external = 0;
}
+/* -2 => not yet set, -1 => nil */
+static int default_internal_index = -2;
+static rb_encoding *default_internal;
+
+rb_encoding *
+rb_default_internal_encoding(void)
+{
+ if (!default_internal && default_internal_index >= 0) {
+ default_internal = rb_enc_from_index(default_internal_index);
+ }
+ return default_internal;
+}
+
+VALUE
+rb_enc_default_internal(void)
+{
+ /* Note: These functions cope with default_internal not being set */
+ return rb_enc_from_encoding(rb_default_internal_encoding());
+}
+
+/*
+ * call-seq:
+ * Encoding.default_internal => enc
+ *
+ * Returns default internal encoding.
+ *
+ * It is initialized by the source internal_encoding or -E option,
+ * and can't be modified after that.
+ */
+static VALUE
+get_default_internal(VALUE klass)
+{
+ return rb_enc_default_internal();
+}
+
+void
+rb_enc_set_default_internal(VALUE encoding)
+{
+ if (default_internal_index != -2)
+ /* Already set */
+ return;
+ default_internal_index = encoding == Qnil ?
+ -1 :rb_enc_to_index(rb_to_encoding(encoding));
+ /* Convert US-ASCII => UTF-8 */
+ if (default_internal_index == rb_usascii_encindex())
+ default_internal_index = rb_utf8_encindex();
+ default_internal = 0;
+}
+
/*
* call-seq:
* Encoding.locale_charmap => string
@@ -1212,6 +1261,7 @@ Init_Encoding(void)
rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1);
rb_define_singleton_method(rb_cEncoding, "default_external", get_default_external, 0);
+ rb_define_singleton_method(rb_cEncoding, "default_internal", get_default_internal, 0);
rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0);
list = rb_ary_new2(enc_table.count);
diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h
index 1d12934356..9049df3fa2 100644
--- a/include/ruby/encoding.h
+++ b/include/ruby/encoding.h
@@ -168,11 +168,14 @@ rb_encoding *rb_usascii_encoding(void);
rb_encoding *rb_locale_encoding(void);
rb_encoding *rb_filesystem_encoding(void);
rb_encoding *rb_default_external_encoding(void);
+rb_encoding *rb_default_internal_encoding(void);
int rb_ascii8bit_encindex(void);
int rb_utf8_encindex(void);
int rb_usascii_encindex(void);
VALUE rb_enc_default_external(void);
+VALUE rb_enc_default_internal(void);
void rb_enc_set_default_external(VALUE encoding);
+void rb_enc_set_default_internal(VALUE encoding);
VALUE rb_locale_charmap(VALUE klass);
long rb_memsearch(const void*,long,const void*,long,rb_encoding*);
diff --git a/io.c b/io.c
index a4391d5761..5bb1bda801 100644
--- a/io.c
+++ b/io.c
@@ -2177,10 +2177,8 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io)
}
newline = (unsigned char)rsptr[rslen - 1];
- if (fptr->encs.enc2)
- enc = fptr->encs.enc;
- else
- enc = io_input_encoding(fptr);
+ /* MS - Optimisation */
+ enc = io_read_encoding(fptr);
while ((c = appendline(fptr, newline, &str, &limit)) != EOF) {
const char *s, *p, *pp, *e;
@@ -3740,52 +3738,87 @@ rb_io_oflags_modestr(int oflags)
return NULL; /* not reached */
}
+/*
+ * Convert external/internal encodings to enc/enc2
+ * NULL => use default encoding
+ * Qnil => no encoding specified (internal only)
+ */
+static void
+rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2)
+{
+ int default_ext = 0;
+
+ if (ext == NULL) {
+ ext = rb_default_external_encoding();
+ default_ext = 1;
+ }
+ if (intern == NULL && ext != rb_ascii8bit_encoding())
+ /* If external is ASCII-8BIT, no default transcoding */
+ intern = rb_default_internal_encoding();
+ if (intern == NULL || intern == (rb_encoding *)Qnil || intern == ext) {
+ /* No internal encoding => use external + no transcoding */
+ *enc = default_ext ? NULL : ext;
+ *enc2 = NULL;
+ }
+ else {
+ *enc = intern;
+ *enc2 = ext;
+ }
+}
+
static void
parse_mode_enc(const char *estr, rb_encoding **enc_p, rb_encoding **enc2_p)
{
- const char *p0, *p1;
- char *enc2name;
+ const char *p;
+ char encname[ENCODING_MAXNAMELEN+1];
int idx, idx2;
+ rb_encoding *ext_enc, *int_enc;
- /* parse estr as "enc" or "enc2:enc" */
-
- *enc_p = 0;
- *enc2_p = 0;
+ /* parse estr as "enc" or "enc2:enc" or "enc:-" */
- p0 = strrchr(estr, ':');
- if (!p0) p1 = estr;
- else p1 = p0 + 1;
- idx = rb_enc_find_index(p1);
- if (idx >= 0) {
- *enc_p = rb_enc_from_index(idx);
+ p = strrchr(estr, ':');
+ if (p) {
+ int len = (p++) - estr;
+ if (len == 0 || len > ENCODING_MAXNAMELEN)
+ idx = -1;
+ else {
+ memcpy(encname, estr, len);
+ encname[len] = '\0';
+ estr = encname;
+ idx = rb_enc_find_index(encname);
+ }
}
+ else
+ idx = rb_enc_find_index(estr);
+
+ if (idx >= 0)
+ ext_enc = rb_enc_from_index(idx);
else {
- rb_warn("Unsupported encoding %s ignored", p1);
+ if (idx != -2)
+ rb_warn("Unsupported encoding %s ignored", estr);
+ ext_enc = NULL;
}
- if (*enc_p && p0) {
- int n = p0 - estr;
- if (n > ENCODING_MAXNAMELEN) {
- idx2 = -1;
- }
- else {
- enc2name = ALLOCA_N(char, n+1);
- memcpy(enc2name, estr, n);
- enc2name[n] = '\0';
- estr = enc2name;
- idx2 = rb_enc_find_index(enc2name);
- }
- if (idx2 < 0) {
- rb_warn("Unsupported encoding %.*s ignored", n, estr);
- }
- else if (idx2 == idx) {
- rb_warn("Ignoring internal encoding %.*s: it is identical to external encoding %s",
- n, estr, p1);
+ int_enc = NULL;
+ if (p) {
+ if (*p == '-' && *(p+1) == '\0') {
+ /* Special case - "-" => no transcoding */
+ int_enc = (rb_encoding *)Qnil;
}
else {
- *enc2_p = rb_enc_from_index(idx2);
+ idx2 = rb_enc_find_index(p);
+ if (idx2 < 0)
+ rb_warn("Unsupported encoding %s ignored", p);
+ else if (idx2 == idx) {
+ rb_warn("Ignoring internal encoding %s: it is identical to external encoding %s", p, estr);
+ int_enc = (rb_encoding *)Qnil;
+ }
+ else
+ int_enc = rb_enc_from_index(idx2);
}
}
+
+ rb_io_ext_int_to_encs(ext_enc, int_enc, enc_p, enc2_p);
}
static void
@@ -3821,28 +3854,32 @@ io_extract_encoding_option(VALUE opt, rb_encoding **enc_p, rb_encoding **enc2_p)
}
if (!NIL_P(extenc)) {
rb_encoding *extencoding = rb_to_encoding(extenc);
+ rb_encoding *intencoding = NULL;
extracted = 1;
- *enc_p = 0;
- *enc2_p = 0;
if (!NIL_P(encoding)) {
rb_warn("Ignoring encoding parameter '%s': external_encoding is used",
RSTRING_PTR(encoding));
}
if (!NIL_P(intenc)) {
- rb_encoding *intencoding = rb_to_encoding(intenc);
+ if (!NIL_P(encoding = rb_check_string_type(intenc))) {
+ char *p = StringValueCStr(encoding);
+ if (*p == '-' && *(p+1) == '\0') {
+ /* Special case - "-" => no transcoding */
+ intencoding = (rb_encoding *)Qnil;
+ }
+ else
+ intencoding = rb_to_encoding(intenc);
+ }
+ else
+ intencoding = rb_to_encoding(intenc);
if (extencoding == intencoding) {
rb_warn("Ignoring internal encoding '%s': it is identical to external encoding '%s'",
RSTRING_PTR(rb_inspect(intenc)),
RSTRING_PTR(rb_inspect(extenc)));
- }
- else {
- *enc_p = intencoding;
- *enc2_p = extencoding;
+ intencoding = (rb_encoding *)Qnil;
}
}
- else {
- *enc_p = extencoding;
- }
+ rb_io_ext_int_to_encs(extencoding, intencoding, enc_p, enc2_p);
}
else {
if (!NIL_P(intenc)) {
@@ -3882,8 +3919,8 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash,
vmode = *vmode_p;
- enc = NULL;
- enc2 = NULL;
+ /* Set to defaults */
+ rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2);
if (NIL_P(vmode)) {
fmode = FMODE_READABLE;
@@ -4070,8 +4107,8 @@ rb_file_open_generic(VALUE io, VALUE filename, int oflags, int fmode, convconfig
rb_io_t *fptr;
convconfig_t cc;
if (!convconfig) {
- cc.enc = NULL;
- cc.enc2 = NULL;
+ /* Set to default encodings */
+ rb_io_ext_int_to_encs(NULL, NULL, &cc.enc, &cc.enc2);
cc.ecflags = 0;
cc.ecopts = Qnil;
convconfig = &cc;
@@ -4099,8 +4136,8 @@ rb_file_open_internal(VALUE io, VALUE filename, const char *modestr)
parse_mode_enc(p+1, &convconfig.enc, &convconfig.enc2);
}
else {
- convconfig.enc = NULL;
- convconfig.enc2 = NULL;
+ /* Set to default encodings */
+ rb_io_ext_int_to_encs(NULL, NULL, &convconfig.enc, &convconfig.enc2);
convconfig.ecflags = 0;
convconfig.ecopts = Qnil;
}
@@ -6661,29 +6698,40 @@ io_encoding_set(rb_io_t *fptr, VALUE v1, VALUE v2, VALUE opt)
{
rb_encoding *enc, *enc2;
int ecflags;
- VALUE ecopts;
+ VALUE ecopts, tmp;
if (!NIL_P(v2)) {
enc2 = rb_to_encoding(v1);
- enc = rb_to_encoding(v2);
+ tmp = rb_check_string_type(v2);
+ if (!NIL_P(tmp)) {
+ char *p = StringValueCStr(tmp);
+ if (*p == '-' && *(p+1) == '\0') {
+ /* Special case - "-" => no transcoding */
+ enc = enc2;
+ enc2 = NULL;
+ }
+ else
+ enc = rb_to_encoding(v2);
+ }
+ else
+ enc = rb_to_encoding(v2);
ecflags = rb_econv_prepare_opts(opt, &ecopts);
}
else {
if (NIL_P(v1)) {
- enc = NULL;
- enc2 = NULL;
+ /* Set to default encodings */
+ rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2);
ecflags = 0;
ecopts = Qnil;
}
else {
- VALUE tmp = rb_check_string_type(v1);
+ tmp = rb_check_string_type(v1);
if (!NIL_P(tmp)) {
parse_mode_enc(StringValueCStr(tmp), &enc, &enc2);
ecflags = rb_econv_prepare_opts(opt, &ecopts);
}
else {
- enc = rb_to_encoding(v1);
- enc2 = NULL;
+ rb_io_ext_int_to_encs(rb_to_encoding(v1), NULL, &enc, &enc2);
ecflags = 0;
ecopts = Qnil;
}
diff --git a/ruby.c b/ruby.c
index b55174e705..67b5a4fd3a 100644
--- a/ruby.c
+++ b/ruby.c
@@ -86,7 +86,7 @@ struct cmdline_options {
VALUE name;
int index;
} enc;
- } src, ext;
+ } src, ext, intern;
VALUE req_list;
};
@@ -855,6 +855,7 @@ proc_options(int argc, char **argv, struct cmdline_options *opt)
ruby_each_words(s, disable_option, &opt->disable);
}
else if (strncmp("encoding", s, n = 8) == 0 && (!s[n] || s[n] == '=')) {
+ char *p;
s += n;
if (!*s++) {
next_encoding:
@@ -863,7 +864,15 @@ proc_options(int argc, char **argv, struct cmdline_options *opt)
}
}
encoding:
- opt->ext.enc.name = rb_str_new2(s);
+ p = strchr(s, ':');
+ if (p) {
+ if (p > s)
+ opt->ext.enc.name = rb_str_new(s, p-s);
+ if (*++p)
+ opt->intern.enc.name = rb_str_new2(p);
+ }
+ else
+ opt->ext.enc.name = rb_str_new2(s);
}
else if (strcmp("version", s) == 0)
opt->version = 1;
@@ -966,6 +975,7 @@ process_options(VALUE arg)
rb_safe_level() == 0 && (s = getenv("RUBYOPT"))) {
VALUE src_enc_name = opt->src.enc.name;
VALUE ext_enc_name = opt->ext.enc.name;
+ VALUE int_enc_name = opt->intern.enc.name;
while (ISSPACE(*s))
s++;
@@ -1005,6 +1015,8 @@ process_options(VALUE arg)
opt->src.enc.name = src_enc_name;
if (ext_enc_name)
opt->ext.enc.name = ext_enc_name;
+ if (int_enc_name)
+ opt->intern.enc.name = int_enc_name;
}
if (opt->version) {
@@ -1073,6 +1085,9 @@ process_options(VALUE arg)
if (opt->ext.enc.name != 0) {
opt->ext.enc.index = opt_enc_index(opt->ext.enc.name);
}
+ if (opt->intern.enc.name != 0) {
+ opt->intern.enc.index = opt_enc_index(opt->intern.enc.name);
+ }
if (opt->src.enc.name != 0) {
opt->src.enc.index = opt_enc_index(opt->src.enc.name);
src_encoding_index = opt->src.enc.index;
@@ -1084,6 +1099,11 @@ process_options(VALUE arg)
enc = lenc;
}
rb_enc_set_default_external(rb_enc_from_encoding(enc));
+ if (opt->intern.enc.index >= 0) {
+ enc = rb_enc_from_index(opt->intern.enc.index);
+ rb_enc_set_default_internal(rb_enc_from_encoding(enc));
+ opt->intern.enc.index = -1;
+ }
rb_set_safe_level_force(safe);
if (opt->e_script) {
@@ -1105,6 +1125,15 @@ process_options(VALUE arg)
tree = load_file(parser, opt->script, 1, opt);
}
+ if (opt->intern.enc.index >= 0) {
+ /* Set in the shebang line */
+ enc = rb_enc_from_index(opt->intern.enc.index);
+ rb_enc_set_default_internal(rb_enc_from_encoding(enc));
+ }
+ else
+ /* Freeze default_internal */
+ rb_enc_set_default_internal(Qnil);
+
if (!tree) return Qfalse;
process_sflag(opt);
@@ -1175,6 +1204,7 @@ load_file(VALUE parser, const char *fname, int script, struct cmdline_options *o
char *p;
int no_src_enc = !opt->src.enc.name;
int no_ext_enc = !opt->ext.enc.name;
+ int no_int_enc = !opt->intern.enc.name;
enc = rb_usascii_encoding();
rb_funcall(f, rb_intern("set_encoding"), 1, rb_enc_from_encoding(enc));
@@ -1261,6 +1291,9 @@ load_file(VALUE parser, const char *fname, int script, struct cmdline_options *o
if (no_ext_enc && opt->ext.enc.name) {
opt->ext.enc.index = opt_enc_index(opt->ext.enc.name);
}
+ if (no_int_enc && opt->intern.enc.name) {
+ opt->intern.enc.index = opt_enc_index(opt->intern.enc.name);
+ }
}
else if (!NIL_P(c)) {
rb_io_ungetbyte(f, c);
@@ -1511,6 +1544,7 @@ ruby_process_options(int argc, char **argv)
args.argv = argv;
args.opt = cmdline_options_init(&opt);
opt.ext.enc.index = -1;
+ opt.intern.enc.index = -1;
tree = (NODE *)rb_vm_call_cfunc(rb_vm_top_self(),
process_options, (VALUE)&args,
0, rb_progname);