From 23b280869314c59be4e4fb01d68ebacf753475a9 Mon Sep 17 00:00:00 2001 From: naruse Date: Sat, 27 Aug 2011 09:18:18 +0000 Subject: * strftime.c (rb_strftime_with_timespec): get enc argument to specify the encoding of the format. On Windows (at least Japanese Windows), Time#strftime("%Z") includes non ASCII in locale encoding (CP932). So convert locale to default internal. [ruby-core:39092] [Bug #5226] * strftime.c (rb_strftime): ditto. * strftime.c (rb_strftime_timespec): ditto. * internal.h (rb_strftime_timespec): follow above. * time.c (rb_strftime_alloc): ditto. * time.c (strftimev): ditto. * time.c (time_strftime): ditto. * time.c (time_to_s): the resulted string of Time#to_s is always ascii only, so this should be US-ASCII. * time.c (time_asctime): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@33094 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 24 ++++++++++++++++++++++++ internal.h | 4 +++- strftime.c | 47 ++++++++++++++++++++++++++++++++++++----------- test/ruby/test_time.rb | 24 ++++++++---------------- time.c | 40 +++++++++++++++++++--------------------- 5 files changed, 90 insertions(+), 49 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5d7e6d719f..2cda9d637c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,27 @@ +Sat Aug 27 18:17:58 2011 NARUSE, Yui + + * strftime.c (rb_strftime_with_timespec): get enc argument to specify + the encoding of the format. On Windows (at least Japanese Windows), + Time#strftime("%Z") includes non ASCII in locale encoding (CP932). + So convert locale to default internal. [ruby-core:39092] [Bug #5226] + + * strftime.c (rb_strftime): ditto. + + * strftime.c (rb_strftime_timespec): ditto. + + * internal.h (rb_strftime_timespec): follow above. + + * time.c (rb_strftime_alloc): ditto. + + * time.c (strftimev): ditto. + + * time.c (time_strftime): ditto. + + * time.c (time_to_s): the resulted string of Time#to_s is always + ascii only, so this should be US-ASCII. + + * time.c (time_asctime): ditto. + Sat Aug 27 11:18:12 2011 Hiroshi Nakamura * Revert r33078. It caused a Rails application NoMethodError. diff --git a/internal.h b/internal.h index dad96606db..6e5773548e 100644 --- a/internal.h +++ b/internal.h @@ -29,6 +29,8 @@ struct rb_classext_struct { struct st_table *const_tbl; }; +typedef struct OnigEncodingTypeST rb_encoding; + #undef RCLASS_SUPER #define RCLASS_EXT(c) (RCLASS(c)->ptr) #define RCLASS_SUPER(c) (RCLASS_EXT(c)->super) @@ -158,7 +160,7 @@ VALUE rb_reg_check_preprocess(VALUE); int rb_get_next_signal(void); /* strftime.c */ -size_t rb_strftime_timespec(char *s, size_t maxsize, const char *format, const struct vtm *vtm, struct timespec *ts, int gmt); +size_t rb_strftime_timespec(char *s, size_t maxsize, const char *format, rb_encoding *enc, const struct vtm *vtm, struct timespec *ts, int gmt); /* string.c */ int rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p); diff --git a/strftime.c b/strftime.c index 1164db01d1..b02c482d3b 100644 --- a/strftime.c +++ b/strftime.c @@ -48,6 +48,7 @@ */ #include "ruby/ruby.h" +#include "ruby/encoding.h" #include "timev.h" #ifndef GAWK @@ -167,13 +168,19 @@ max(int a, int b) /* strftime --- produce formatted time */ +/* + * enc is the encoding of the format. It is used as the encoding of resulted + * string, but the name of the month and weekday are always US-ASCII. So it + * is only used for the timezone name on Windows. + */ static size_t -rb_strftime_with_timespec(char *s, size_t maxsize, const char *format, const struct vtm *vtm, VALUE timev, struct timespec *ts, int gmt) +rb_strftime_with_timespec(char *s, size_t maxsize, const char *format, rb_encoding *enc, const struct vtm *vtm, VALUE timev, struct timespec *ts, int gmt) { const char *const endp = s + maxsize; const char *const start = s; const char *sp, *tp; - auto char tbuf[100]; +#define TBUFSIZE 100 + auto char tbuf[TBUFSIZE]; long off; ptrdiff_t i; int w; @@ -205,6 +212,11 @@ rb_strftime_with_timespec(char *s, size_t maxsize, const char *format, const str return 0; } + if (enc && (enc == rb_usascii_encoding() || + enc == rb_ascii8bit_encoding() || enc == rb_locale_encoding())) { + enc = NULL; + } + for (; *format && s < endp - 1; format++) { #define FLAG_FOUND() do { \ if (precision > 0 || flags & (BIT_OF(LOCALE_E)|BIT_OF(LOCALE_O))) \ @@ -234,7 +246,7 @@ rb_strftime_with_timespec(char *s, size_t maxsize, const char *format, const str } while (0) #define STRFTIME(fmt) \ do { \ - i = rb_strftime_with_timespec(s, endp - s, (fmt), vtm, timev, ts, gmt); \ + i = rb_strftime_with_timespec(s, endp - s, (fmt), enc, vtm, timev, ts, gmt); \ if (!i) return 0; \ if (precision > i) {\ NEEDS(precision); \ @@ -506,11 +518,24 @@ rb_strftime_with_timespec(char *s, size_t maxsize, const char *format, const str tp = "UTC"; break; } - if (vtm->zone == NULL) - tp = ""; - else + if (vtm->zone == NULL) { + i = 0; + } + else { tp = vtm->zone; - i = strlen(tp); + if (enc) { + for (i = 0; i < TBUFSIZE && tp[i]; i++) { + if ((unsigned char)tp[i] > 0x7F) { + VALUE str = rb_str_conv_enc_opts(rb_str_new_cstr(tp), rb_locale_encoding(), enc, ECONV_UNDEF_REPLACE|ECONV_INVALID_REPLACE, Qnil); + i = strlcpy(tbuf, RSTRING_PTR(str), TBUFSIZE); + tp = tbuf; + break; + } + } + } + else + i = strlen(tp); + } break; #ifdef SYSV_EXT @@ -782,15 +807,15 @@ rb_strftime_with_timespec(char *s, size_t maxsize, const char *format, const str } size_t -rb_strftime(char *s, size_t maxsize, const char *format, const struct vtm *vtm, VALUE timev, int gmt) +rb_strftime(char *s, size_t maxsize, const char *format, rb_encoding *enc, const struct vtm *vtm, VALUE timev, int gmt) { - return rb_strftime_with_timespec(s, maxsize, format, vtm, timev, NULL, gmt); + return rb_strftime_with_timespec(s, maxsize, format, enc, vtm, timev, NULL, gmt); } size_t -rb_strftime_timespec(char *s, size_t maxsize, const char *format, const struct vtm *vtm, struct timespec *ts, int gmt) +rb_strftime_timespec(char *s, size_t maxsize, const char *format, rb_encoding *enc, const struct vtm *vtm, struct timespec *ts, int gmt) { - return rb_strftime_with_timespec(s, maxsize, format, vtm, Qnil, ts, gmt); + return rb_strftime_with_timespec(s, maxsize, format, enc, vtm, Qnil, ts, gmt); } /* isleap --- is a year a leap year? */ diff --git a/test/ruby/test_time.rb b/test/ruby/test_time.rb index 32d3b976d0..0f54b65cc7 100644 --- a/test/ruby/test_time.rb +++ b/test/ruby/test_time.rb @@ -15,22 +15,6 @@ class TestTime < Test::Unit::TestCase $VERBOSE = @verbose end - def test_to_s_default_encoding - before = Encoding.default_internal - Encoding.default_internal = nil - assert_equal Encoding::US_ASCII, Time.now.to_s.encoding - ensure - Encoding.default_internal = before - end - - def test_to_s_transcoding - before = Encoding.default_internal - Encoding.default_internal = Encoding::UTF_8 - assert_equal Encoding::UTF_8, Time.now.to_s.encoding - ensure - Encoding.default_internal = before - end - def test_new assert_equal(Time.utc(2000,2,10), Time.new(2000,2,10, 11,0,0, 3600*11)) assert_equal(Time.utc(2000,2,10), Time.new(2000,2,9, 13,0,0, -3600*11)) @@ -433,15 +417,21 @@ class TestTime < Test::Unit::TestCase def test_asctime assert_equal("Sat Jan 1 00:00:00 2000", T2000.asctime) + assert_equal(Encoding::US_ASCII, T2000.asctime.encoding) assert_kind_of(String, Time.at(0).asctime) end def test_to_s assert_equal("2000-01-01 00:00:00 UTC", T2000.to_s) + assert_equal(Encoding::US_ASCII, T2000.to_s.encoding) assert_kind_of(String, Time.at(946684800).getlocal.to_s) assert_equal(Time.at(946684800).getlocal.to_s, Time.at(946684800).to_s) end + def test_zone + assert_equal(Encoding.find('locale'), Time.now.zone.encoding) + end + def test_plus_minus_succ # assert_raise(RangeError) { T2000 + 10000000000 } # assert_raise(RangeError) T2000 - 3094168449 } @@ -565,6 +555,8 @@ class TestTime < Test::Unit::TestCase t = Time.mktime(2001, 10, 1) assert_equal("2001-10-01", t.strftime("%F")) + assert_equal(Encoding::UTF_8, t.strftime("\u3042%Z").encoding) + assert_equal(true, t.strftime("\u3042%Z").valid_encoding?) t = Time.mktime(2001, 10, 1, 2, 0, 0) assert_equal("01", t.strftime("%d")) diff --git a/time.c b/time.c index fbd0ecf198..04ba4c54ed 100644 --- a/time.c +++ b/time.c @@ -3638,7 +3638,7 @@ time_get_tm(VALUE time, struct time_object *tobj) return time_localtime(time); } -static VALUE strftimev(const char *fmt, VALUE time); +static VALUE strftimev(const char *fmt, VALUE time, rb_encoding *enc); /* * call-seq: @@ -3653,7 +3653,7 @@ static VALUE strftimev(const char *fmt, VALUE time); static VALUE time_asctime(VALUE time) { - return strftimev("%a %b %e %T %Y", time); + return strftimev("%a %b %e %T %Y", time, rb_usascii_encoding()); } /* @@ -3679,9 +3679,9 @@ time_to_s(VALUE time) GetTimeval(time, tobj); if (TIME_UTC_P(tobj)) - return strftimev("%Y-%m-%d %H:%M:%S UTC", time); + return strftimev("%Y-%m-%d %H:%M:%S UTC", time, rb_usascii_encoding()); else - return strftimev("%Y-%m-%d %H:%M:%S %z", time); + return strftimev("%Y-%m-%d %H:%M:%S %z", time, rb_usascii_encoding()); } static VALUE @@ -4284,13 +4284,12 @@ time_to_a(VALUE time) } size_t -rb_strftime(char *s, size_t maxsize, const char *format, - const struct vtm *vtm, VALUE timev, - int gmt); +rb_strftime(char *s, size_t maxsize, const char *format, rb_encoding *enc, + const struct vtm *vtm, VALUE timev, int gmt); #define SMALLBUF 100 static size_t -rb_strftime_alloc(char **buf, const char *format, +rb_strftime_alloc(char **buf, const char *format, rb_encoding *enc, struct vtm *vtm, wideval_t timew, int gmt) { size_t size, len, flen; @@ -4307,17 +4306,17 @@ rb_strftime_alloc(char **buf, const char *format, } errno = 0; if (timev == Qnil) - len = rb_strftime_timespec(*buf, SMALLBUF, format, vtm, &ts, gmt); + len = rb_strftime_timespec(*buf, SMALLBUF, format, enc, vtm, &ts, gmt); else - len = rb_strftime(*buf, SMALLBUF, format, vtm, timev, gmt); + len = rb_strftime(*buf, SMALLBUF, format, enc, vtm, timev, gmt); if (len != 0 || (**buf == '\0' && errno != ERANGE)) return len; for (size=1024; ; size*=2) { *buf = xmalloc(size); (*buf)[0] = '\0'; if (timev == Qnil) - len = rb_strftime_timespec(*buf, size, format, vtm, &ts, gmt); + len = rb_strftime_timespec(*buf, size, format, enc, vtm, &ts, gmt); else - len = rb_strftime(*buf, size, format, vtm, timev, gmt); + len = rb_strftime(*buf, size, format, enc, vtm, timev, gmt); /* * buflen can be zero EITHER because there's not enough * room in the string, or because the control command @@ -4336,7 +4335,7 @@ rb_strftime_alloc(char **buf, const char *format, } static VALUE -strftimev(const char *fmt, VALUE time) +strftimev(const char *fmt, VALUE time, rb_encoding *enc) { struct time_object *tobj; char buffer[SMALLBUF], *buf = buffer; @@ -4345,10 +4344,8 @@ strftimev(const char *fmt, VALUE time) GetTimeval(time, tobj); MAKE_TM(time, tobj); - len = rb_strftime_alloc(&buf, fmt, &tobj->vtm, tobj->timew, TIME_UTC_P(tobj)); - str = rb_str_new(buf, len); - rb_enc_associate_index(str, rb_usascii_encindex()); - str = rb_str_export_to_enc(str, rb_default_internal_encoding()); + len = rb_strftime_alloc(&buf, fmt, enc, &tobj->vtm, tobj->timew, TIME_UTC_P(tobj)); + str = rb_enc_str_new(buf, len, enc); if (buf != buffer) xfree(buf); return str; } @@ -4543,6 +4540,7 @@ time_strftime(VALUE time, VALUE format) char buffer[SMALLBUF], *buf = buffer; const char *fmt; long len; + rb_encoding *enc; VALUE str; GetTimeval(time, tobj); @@ -4554,6 +4552,7 @@ time_strftime(VALUE time, VALUE format) format = rb_str_new4(format); fmt = RSTRING_PTR(format); len = RSTRING_LEN(format); + enc = rb_enc_get(format); if (len == 0) { rb_warning("strftime called with empty format string"); } @@ -4563,7 +4562,7 @@ time_strftime(VALUE time, VALUE format) str = rb_str_new(0, 0); while (p < pe) { - len = rb_strftime_alloc(&buf, p, &tobj->vtm, tobj->timew, TIME_UTC_P(tobj)); + len = rb_strftime_alloc(&buf, p, enc, &tobj->vtm, tobj->timew, TIME_UTC_P(tobj)); rb_str_cat(str, buf, len); p += strlen(p); if (buf != buffer) { @@ -4576,12 +4575,11 @@ time_strftime(VALUE time, VALUE format) return str; } else { - len = rb_strftime_alloc(&buf, RSTRING_PTR(format), + len = rb_strftime_alloc(&buf, RSTRING_PTR(format), enc, &tobj->vtm, tobj->timew, TIME_UTC_P(tobj)); } - str = rb_str_new(buf, len); + str = rb_enc_str_new(buf, len, enc); if (buf != buffer) xfree(buf); - rb_enc_copy(str, format); return str; } -- cgit v1.2.3