diff options
Diffstat (limited to 'ext/date/date_parse.c')
| -rw-r--r-- | ext/date/date_parse.c | 802 |
1 files changed, 449 insertions, 353 deletions
diff --git a/ext/date/date_parse.c b/ext/date/date_parse.c index 7ac9e79836..a1600e4708 100644 --- a/ext/date/date_parse.c +++ b/ext/date/date_parse.c @@ -7,6 +7,12 @@ #include "ruby/re.h" #include <ctype.h> +#undef strncasecmp +#define strncasecmp STRNCASECMP + +RUBY_EXTERN VALUE rb_int_positive_pow(long x, unsigned long y); +RUBY_EXTERN unsigned long ruby_scan_digits(const char *str, ssize_t len, int base, size_t *retlen, int *overflow); + /* #define TIGHT_PARSER */ #define sizeof_array(o) (sizeof o / sizeof o[0]) @@ -37,19 +43,19 @@ #define f_sub_bang(s,r,x) rb_funcall(s, rb_intern("sub!"), 2, r, x) #define f_gsub_bang(s,r,x) rb_funcall(s, rb_intern("gsub!"), 2, r, x) -#define set_hash(k,v) rb_hash_aset(hash, ID2SYM(rb_intern(k)), v) -#define ref_hash(k) rb_hash_aref(hash, ID2SYM(rb_intern(k))) -#define del_hash(k) rb_hash_delete(hash, ID2SYM(rb_intern(k))) +#define set_hash(k,v) rb_hash_aset(hash, ID2SYM(rb_intern(k"")), v) +#define ref_hash(k) rb_hash_aref(hash, ID2SYM(rb_intern(k""))) +#define del_hash(k) rb_hash_delete(hash, ID2SYM(rb_intern(k""))) #define cstr2num(s) rb_cstr_to_inum(s, 10, 0) #define str2num(s) rb_str_to_inum(s, 10, 0) -static const char *abbr_days[] = { +static const char abbr_days[][4] = { "sun", "mon", "tue", "wed", "thu", "fri", "sat" }; -static const char *abbr_months[] = { +static const char abbr_months[][4] = { "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec" }; @@ -57,16 +63,27 @@ static const char *abbr_months[] = { #define issign(c) ((c) == '-' || (c) == '+') #define asp_string() rb_str_new(" ", 1) #ifdef TIGHT_PARSER +#define asuba_string() rb_str_new("\001", 1) +#define asubb_string() rb_str_new("\002", 1) #define asubw_string() rb_str_new("\027", 1) #define asubt_string() rb_str_new("\024", 1) #endif +static size_t +digit_span(const char *s, const char *e) +{ + size_t i = 0; + while (s + i < e && isdigit((unsigned char)s[i])) i++; + return i; +} + static void s3e(VALUE hash, VALUE y, VALUE m, VALUE d, int bc) { + VALUE vbuf = 0; VALUE c = Qnil; - if (TYPE(m) != T_STRING) + if (!RB_TYPE_P(m, T_STRING)) m = f_to_s(m); if (!NIL_P(y) && !NIL_P(m) && NIL_P(d)) { @@ -84,7 +101,7 @@ s3e(VALUE hash, VALUE y, VALUE m, VALUE d, int bc) y = d; d = Qnil; } - if (!NIL_P(d) && *RSTRING_PTR(d) == '\'') { + if (!NIL_P(d) && RSTRING_LEN(d) > 0 && *RSTRING_PTR(d) == '\'') { y = d; d = Qnil; } @@ -95,17 +112,20 @@ s3e(VALUE hash, VALUE y, VALUE m, VALUE d, int bc) size_t l; s = RSTRING_PTR(y); - while (!issign((unsigned char)*s) && !isdigit((unsigned char)*s)) + ep = RSTRING_END(y); + while (s < ep && !issign(*s) && !isdigit((unsigned char)*s)) s++; + if (s >= ep) goto no_date; bp = s; if (issign((unsigned char)*s)) s++; - l = strspn(s, "0123456789"); + l = digit_span(s, ep); ep = s + l; if (*ep) { y = d; d = rb_str_new(bp, ep - bp); } + no_date:; } if (!NIL_P(m)) { @@ -144,8 +164,10 @@ s3e(VALUE hash, VALUE y, VALUE m, VALUE d, int bc) VALUE iy; s = RSTRING_PTR(y); - while (!issign((unsigned char)*s) && !isdigit((unsigned char)*s)) + ep = RSTRING_END(y); + while (s < ep && !issign(*s) && !isdigit((unsigned char)*s)) s++; + if (s >= ep) goto no_year; bp = s; if (issign(*s)) { s++; @@ -153,43 +175,50 @@ s3e(VALUE hash, VALUE y, VALUE m, VALUE d, int bc) } if (sign) c = Qfalse; - l = strspn(s, "0123456789"); + l = digit_span(s, ep); ep = s + l; if (l > 2) c = Qfalse; { char *buf; - buf = ALLOCA_N(char, ep - bp + 1); + buf = ALLOCV_N(char, vbuf, ep - bp + 1); memcpy(buf, bp, ep - bp); buf[ep - bp] = '\0'; iy = cstr2num(buf); + ALLOCV_END(vbuf); } - if (bc) - iy = f_add(f_negate(iy), INT2FIX(1)); set_hash("year", iy); + no_year:; } + if (bc) + set_hash("_bc", Qtrue); + if (!NIL_P(m)) { const char *s, *bp, *ep; size_t l; VALUE im; s = RSTRING_PTR(m); - while (!isdigit((unsigned char)*s)) + ep = RSTRING_END(m); + while (s < ep && !isdigit((unsigned char)*s)) s++; + if (s >= ep) goto no_month; bp = s; - l = strspn(s, "0123456789"); + l = digit_span(s, ep); ep = s + l; { char *buf; - buf = ALLOCA_N(char, ep - bp + 1); + buf = ALLOCV_N(char, vbuf, ep - bp + 1); memcpy(buf, bp, ep - bp); buf[ep - bp] = '\0'; im = cstr2num(buf); + ALLOCV_END(vbuf); } set_hash("mon", im); + no_month:; } if (!NIL_P(d)) { @@ -198,20 +227,24 @@ s3e(VALUE hash, VALUE y, VALUE m, VALUE d, int bc) VALUE id; s = RSTRING_PTR(d); - while (!isdigit((unsigned char)*s)) + ep = RSTRING_END(d); + while (s < ep && !isdigit((unsigned char)*s)) s++; + if (s >= ep) goto no_mday; bp = s; - l = strspn(s, "0123456789"); + l = digit_span(s, ep); ep = s + l; { char *buf; - buf = ALLOCA_N(char, ep - bp + 1); + buf = ALLOCV_N(char, vbuf, ep - bp + 1); memcpy(buf, bp, ep - bp); buf[ep - bp] = '\0'; id = cstr2num(buf); + ALLOCV_END(vbuf); } set_hash("mday", id); + no_mday:; } if (!NIL_P(c)) @@ -223,11 +256,15 @@ s3e(VALUE hash, VALUE y, VALUE m, VALUE d, int bc) #define ABBR_DAYS "sun|mon|tue|wed|thu|fri|sat" #define ABBR_MONTHS "jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec" +#define NUMBER "(?<!\\d)\\d" + #ifdef TIGHT_PARSER -#define VALID_DAYS "(?:" DAYS ")\\b" "|(?:tues|wednes|thur|thurs|" ABBR_DAYS ")\\b\\.?" -#define VALID_MONTHS "(?:" MONTHS ")\\b" "|(?:sept|" ABBR_MONTHS ")\\b\\.?" -#define DOTLESS_VALID_MONTHS "(?:" MONTHS ")\\b" "|(?:sept|" ABBR_MONTHS ")\\b" +#define VALID_DAYS "(?:" DAYS ")" "|(?:tues|wednes|thurs|thur|" ABBR_DAYS ")\\.?" +#define VALID_MONTHS "(?:" MONTHS ")" "|(?:sept|" ABBR_MONTHS ")\\.?" +#define DOTLESS_VALID_MONTHS "(?:" MONTHS ")" "|(?:sept|" ABBR_MONTHS ")" #define BOS "\\A\\s*" +#define FPA "\\001" +#define FPB "\\002" #define FPW "\\027" #define FPT "\\024" #define FPW_COM "\\s*(?:" FPW "\\s*,?)?\\s*" @@ -244,23 +281,24 @@ regcomp(const char *source, long len, int opt) VALUE pat; pat = rb_reg_new(source, len, opt); + rb_obj_freeze(pat); rb_gc_register_mark_object(pat); return pat; } #define REGCOMP(pat,opt) \ -{ \ +do { \ if (NIL_P(pat)) \ pat = regcomp(pat##_source, sizeof pat##_source - 1, opt); \ -} +} while (0) #define REGCOMP_0(pat) REGCOMP(pat, 0) #define REGCOMP_I(pat) REGCOMP(pat, ONIG_OPTION_IGNORECASE) #define MATCH(s,p,c) \ -{ \ +do { \ return match(s, p, hash, c); \ -} +} while (0) static int match(VALUE str, VALUE pat, VALUE hash, int (*cb)(VALUE, VALUE)) @@ -300,289 +338,218 @@ subx(VALUE str, VALUE rep, VALUE pat, VALUE hash, int (*cb)(VALUE, VALUE)) } #define SUBS(s,p,c) \ -{ \ +do { \ return subx(s, asp_string(), p, hash, c); \ -} +} while (0) #ifdef TIGHT_PARSER +#define SUBA(s,p,c) \ +do { \ + return subx(s, asuba_string(), p, hash, c); \ +} while (0) + +#define SUBB(s,p,c) \ +do { \ + return subx(s, asubb_string(), p, hash, c); \ +} while (0) + #define SUBW(s,p,c) \ -{ \ +do { \ return subx(s, asubw_string(), p, hash, c); \ -} +} while (0) #define SUBT(s,p,c) \ -{ \ +do { \ return subx(s, asubt_string(), p, hash, c); \ -} +} while (0) #endif -struct zone { - const char *name; - int offset; -}; - -static struct zone zones_source[] = { - {"ut", 0*3600}, {"gmt", 0*3600}, {"est", -5*3600}, {"edt", -4*3600}, - {"cst", -6*3600}, {"cdt", -5*3600}, {"mst", -7*3600}, {"mdt", -6*3600}, - {"pst", -8*3600}, {"pdt", -7*3600}, - {"a", 1*3600}, {"b", 2*3600}, {"c", 3*3600}, {"d", 4*3600}, - {"e", 5*3600}, {"f", 6*3600}, {"g", 7*3600}, {"h", 8*3600}, - {"i", 9*3600}, {"k", 10*3600}, {"l", 11*3600}, {"m", 12*3600}, - {"n", -1*3600}, {"o", -2*3600}, {"p", -3*3600}, {"q", -4*3600}, - {"r", -5*3600}, {"s", -6*3600}, {"t", -7*3600}, {"u", -8*3600}, - {"v", -9*3600}, {"w", -10*3600}, {"x", -11*3600}, {"y", -12*3600}, - {"z", 0*3600}, - - {"utc", 0*3600}, {"wet", 0*3600}, - {"at", -2*3600}, {"brst",-2*3600}, {"ndt", -(2*3600+1800)}, - {"art", -3*3600}, {"adt", -3*3600}, {"brt", -3*3600}, {"clst",-3*3600}, - {"nst", -(3*3600+1800)}, - {"ast", -4*3600}, {"clt", -4*3600}, - {"akdt",-8*3600}, {"ydt", -8*3600}, - {"akst",-9*3600}, {"hadt",-9*3600}, {"hdt", -9*3600}, {"yst", -9*3600}, - {"ahst",-10*3600},{"cat",-10*3600}, {"hast",-10*3600},{"hst",-10*3600}, - {"nt", -11*3600}, - {"idlw",-12*3600}, - {"bst", 1*3600}, {"cet", 1*3600}, {"fwt", 1*3600}, {"met", 1*3600}, - {"mewt", 1*3600}, {"mez", 1*3600}, {"swt", 1*3600}, {"wat", 1*3600}, - {"west", 1*3600}, - {"cest", 2*3600}, {"eet", 2*3600}, {"fst", 2*3600}, {"mest", 2*3600}, - {"mesz", 2*3600}, {"sast", 2*3600}, {"sst", 2*3600}, - {"bt", 3*3600}, {"eat", 3*3600}, {"eest", 3*3600}, {"msk", 3*3600}, - {"msd", 4*3600}, {"zp4", 4*3600}, - {"zp5", 5*3600}, {"ist", (5*3600+1800)}, - {"zp6", 6*3600}, - {"wast", 7*3600}, - {"cct", 8*3600}, {"sgt", 8*3600}, {"wadt", 8*3600}, - {"jst", 9*3600}, {"kst", 9*3600}, - {"east",10*3600}, {"gst", 10*3600}, - {"eadt",11*3600}, - {"idle",12*3600}, {"nzst",12*3600}, {"nzt", 12*3600}, - {"nzdt",13*3600}, - - {"afghanistan", 16200}, {"alaskan", -32400}, - {"arab", 10800}, {"arabian", 14400}, - {"arabic", 10800}, {"atlantic", -14400}, - {"aus central", 34200}, {"aus eastern", 36000}, - {"azores", -3600}, {"canada central", -21600}, - {"cape verde", -3600}, {"caucasus", 14400}, - {"cen. australia", 34200}, {"central america", -21600}, - {"central asia", 21600}, {"central europe", 3600}, - {"central european", 3600}, {"central pacific", 39600}, - {"central", -21600}, {"china", 28800}, - {"dateline", -43200}, {"e. africa", 10800}, - {"e. australia", 36000}, {"e. europe", 7200}, - {"e. south america", -10800}, {"eastern", -18000}, - {"egypt", 7200}, {"ekaterinburg", 18000}, - {"fiji", 43200}, {"fle", 7200}, - {"greenland", -10800}, {"greenwich", 0}, - {"gtb", 7200}, {"hawaiian", -36000}, - {"india", 19800}, {"iran", 12600}, - {"jerusalem", 7200}, {"korea", 32400}, - {"mexico", -21600}, {"mid-atlantic", -7200}, - {"mountain", -25200}, {"myanmar", 23400}, - {"n. central asia", 21600}, {"nepal", 20700}, - {"new zealand", 43200}, {"newfoundland", -12600}, - {"north asia east", 28800}, {"north asia", 25200}, - {"pacific sa", -14400}, {"pacific", -28800}, - {"romance", 3600}, {"russian", 10800}, - {"sa eastern", -10800}, {"sa pacific", -18000}, - {"sa western", -14400}, {"samoa", -39600}, - {"se asia", 25200}, {"malay peninsula", 28800}, - {"south africa", 7200}, {"sri lanka", 21600}, - {"taipei", 28800}, {"tasmania", 36000}, - {"tokyo", 32400}, {"tonga", 46800}, - {"us eastern", -18000}, {"us mountain", -25200}, - {"vladivostok", 36000}, {"w. australia", 28800}, - {"w. central africa", 3600}, {"w. europe", 3600}, - {"west asia", 18000}, {"west pacific", 36000}, - {"yakutsk", 32400} -}; +#include "zonetab.h" -VALUE -date_zone_to_diff(VALUE str) +static int +str_end_with_word(const char *s, long l, const char *w) { - VALUE offset = Qnil; - - long l, i; - char *s, *dest, *d; - int sp = 1; - - l = RSTRING_LEN(str); - s = RSTRING_PTR(str); - - dest = d = ALLOCA_N(char, l + 1); + int n = (int)strlen(w); + if (l <= n || !isspace((unsigned char)s[l - n - 1])) return 0; + if (strncasecmp(&s[l - n], w, n)) return 0; + do ++n; while (l > n && isspace((unsigned char)s[l - n - 1])); + return n; +} - for (i = 0; i < l; i++) { - if (isspace((unsigned char)s[i]) || s[i] == '\0') { - if (!sp) - *d++ = ' '; - sp = 1; +static long +shrunk_size(const char *s, long l) +{ + long i, ni; + int sp = 0; + for (i = ni = 0; i < l; ++i) { + if (!isspace((unsigned char)s[i])) { + if (sp) ni++; + sp = 0; + ni++; } else { - if (isalpha((unsigned char)s[i])) - *d++ = tolower((unsigned char)s[i]); - else - *d++ = s[i]; - sp = 0; + sp = 1; } } - if (d > dest) { - if (*(d - 1) == ' ') - --d; - *d = '\0'; - } - str = rb_str_new2(dest); - { -#define STD " standard time" -#define DST " daylight time" - char *ss, *ds; - long sl, dl; - int dst = 0; - - sl = RSTRING_LEN(str) - (sizeof STD - 1); - ss = RSTRING_PTR(str) + sl; - dl = RSTRING_LEN(str) - (sizeof DST - 1); - ds = RSTRING_PTR(str) + dl; + return ni < l ? ni : 0; +} - if (sl >= 0 && strcmp(ss, STD) == 0) { - str = rb_str_new(RSTRING_PTR(str), sl); - } - else if (dl >= 0 && strcmp(ds, DST) == 0) { - str = rb_str_new(RSTRING_PTR(str), dl); - dst = 1; +static long +shrink_space(char *d, const char *s, long l) +{ + long i, ni; + int sp = 0; + for (i = ni = 0; i < l; ++i) { + if (!isspace((unsigned char)s[i])) { + if (sp) d[ni++] = ' '; + sp = 0; + d[ni++] = s[i]; } -#undef STD -#undef DST else { -#define DST " dst" - char *ds; - long dl; + sp = 1; + } + } + return ni; +} - dl = RSTRING_LEN(str) - (sizeof DST - 1); - ds = RSTRING_PTR(str) + dl; +VALUE +date_zone_to_diff(VALUE str) +{ + VALUE offset = Qnil; + long l = RSTRING_LEN(str); + const char *s = RSTRING_PTR(str); + + { + int dst = 0; + int w; - if (dl >= 0 && strcmp(ds, DST) == 0) { - str = rb_str_new(RSTRING_PTR(str), dl); + if ((w = str_end_with_word(s, l, "time")) > 0) { + int wtime = w; + l -= w; + if ((w = str_end_with_word(s, l, "standard")) > 0) { + l -= w; + } + else if ((w = str_end_with_word(s, l, "daylight")) > 0) { + l -= w; dst = 1; } -#undef DST + else { + l += wtime; + } } + else if ((w = str_end_with_word(s, l, "dst")) > 0) { + l -= w; + dst = 1; + } + { - static VALUE zones = Qnil; + const char *zn = s; + long sl = shrunk_size(s, l); + char shrunk_buff[MAX_WORD_LENGTH]; /* no terminator to be added */ + const struct zone *z = 0; - if (NIL_P(zones)) { - int i; + if (sl <= 0) { + sl = l; + } + else if (sl <= MAX_WORD_LENGTH) { + char *d = shrunk_buff; + sl = shrink_space(d, s, l); + zn = d; + } - zones = rb_hash_new(); - rb_gc_register_mark_object(zones); - for (i = 0; i < (int)sizeof_array(zones_source); i++) { - VALUE name = rb_str_new2(zones_source[i].name); - VALUE offset = INT2FIX(zones_source[i].offset); - rb_hash_aset(zones, name, offset); - } + if (sl > 0 && sl <= MAX_WORD_LENGTH) { + z = zonetab(zn, (unsigned int)sl); } - offset = f_aref(zones, str); - if (!NIL_P(offset)) { + if (z) { + int d = z->offset; if (dst) - offset = f_add(offset, INT2FIX(3600)); + d += 3600; + offset = INT2FIX(d); goto ok; } } - { - char *s, *p; - VALUE sign; - VALUE hour = Qnil, min = Qnil, sec = Qnil; - VALUE str_orig; - s = RSTRING_PTR(str); - str_orig = str; + { + char *p; + int sign = 0; + long hour = 0, min = 0, sec = 0; - if (strncmp(s, "gmt", 3) == 0 || - strncmp(s, "utc", 3) == 0) + if (l > 3 && + (strncasecmp(s, "gmt", 3) == 0 || + strncasecmp(s, "utc", 3) == 0)) { s += 3; + l -= 3; + } if (issign(*s)) { - sign = rb_str_new(s, 1); + sign = *s == '-'; s++; + l--; - str = rb_str_new2(s); - - if (p = strchr(s, ':')) { - hour = rb_str_new(s, p - s); +#define out_of_range(v, min, max) ((v) < (min) || (max) < (v)) + hour = STRTOUL(s, &p, 10); + if (*p == ':') { + if (out_of_range(hour, 0, 23)) return Qnil; s = ++p; - if (p = strchr(s, ':')) { - min = rb_str_new(s, p - s); + min = STRTOUL(s, &p, 10); + if (out_of_range(min, 0, 59)) return Qnil; + if (*p == ':') { s = ++p; - if (p = strchr(s, ':')) { - sec = rb_str_new(s, p - s); - } - else - sec = rb_str_new2(s); + sec = STRTOUL(s, &p, 10); + if (out_of_range(sec, 0, 59)) return Qnil; } - else - min = rb_str_new2(s); - RB_GC_GUARD(str_orig); - goto num; } - if (strpbrk(RSTRING_PTR(str), ",.")) { - char *a, *b; - - a = ALLOCA_N(char, RSTRING_LEN(str) + 1); - strcpy(a, RSTRING_PTR(str)); - b = strpbrk(a, ",."); - *b = '\0'; - b++; - - hour = cstr2num(a); - min = f_mul(rb_rational_new2 - (cstr2num(b), - f_expt(INT2FIX(10), - LONG2NUM((long)strlen(b)))), - INT2FIX(60)); - goto num; - } - { - const char *cs = RSTRING_PTR(str); - long cl = RSTRING_LEN(str); - - if (cl % 2) { - if (cl >= 1) - hour = rb_str_new(&cs[0], 1); - if (cl >= 3) - min = rb_str_new(&cs[1], 2); - if (cl >= 5) - min = rb_str_new(&cs[3], 2); + else if (*p == ',' || *p == '.') { + /* fractional hour */ + size_t n; + int ov; + /* no over precision for offset; 10**-7 hour = 0.36 + * milliseconds should be enough. */ + const size_t max_digits = 7; /* 36 * 10**7 < 32-bit FIXNUM_MAX */ + + if (out_of_range(hour, 0, 23)) return Qnil; + + n = (s + l) - ++p; + if (n > max_digits) n = max_digits; + sec = ruby_scan_digits(p, n, 10, &n, &ov); + if ((p += n) < s + l && *p >= ('5' + !(sec & 1)) && *p <= '9') { + /* round half to even */ + sec++; + } + sec *= 36; + if (sign) { + hour = -hour; + sec = -sec; + } + if (n <= 2) { + /* HH.nn or HH.n */ + if (n == 1) sec *= 10; + offset = INT2FIX(sec + hour * 3600); } else { - if (cl >= 2) - hour = rb_str_new(&cs[0], 2); - if (cl >= 4) - min = rb_str_new(&cs[2], 2); - if (cl >= 6) - sec = rb_str_new(&cs[4], 2); + VALUE denom = rb_int_positive_pow(10, (int)(n - 2)); + offset = f_add(rb_rational_new(INT2FIX(sec), denom), INT2FIX(hour * 3600)); + if (rb_rational_den(offset) == INT2FIX(1)) { + offset = rb_rational_num(offset); + } } - goto num; - } - num: - if (NIL_P(hour)) - offset = INT2FIX(0); - else { - if (TYPE(hour) == T_STRING) - hour = str2num(hour); - offset = f_mul(hour, INT2FIX(3600)); + goto ok; } - if (!NIL_P(min)) { - if (TYPE(min) == T_STRING) - min = str2num(min); - offset = f_add(offset, f_mul(min, INT2FIX(60))); + else if (l > 2) { + size_t n; + int ov; + + if (l >= 1) + hour = ruby_scan_digits(&s[0], 2 - l % 2, 10, &n, &ov); + if (l >= 3) + min = ruby_scan_digits(&s[2 - l % 2], 2, 10, &n, &ov); + if (l >= 5) + sec = ruby_scan_digits(&s[4 - l % 2], 2, 10, &n, &ov); } - if (!NIL_P(sec)) - offset = f_add(offset, str2num(sec)); - if (!NIL_P(sign) && - RSTRING_LEN(sign) == 1 && - *RSTRING_PTR(sign) == '-') - offset = f_negate(offset); + sec += min * 60 + hour * 3600; + if (sign) sec = -sec; + offset = INT2FIX(sec); +#undef out_of_range } } } @@ -724,33 +691,36 @@ parse_time(VALUE str, VALUE hash) { static const char pat_source[] = "(" + "" NUMBER "+\\s*" "(?:" - "\\d+\\s*:\\s*\\d+" "(?:" + ":\\s*\\d+" + "(?:" #ifndef TIGHT_PARSER - "\\s*:\\s*\\d+(?:[,.]\\d*)?" + "\\s*:\\s*\\d+(?:[,.]\\d*)?" #else - "\\s*:\\s*\\d+(?:[,.]\\d+)?" + "\\s*:\\s*\\d+(?:[,.]\\d+)?" #endif + ")?" + "|" + "h(?:\\s*\\d+m?(?:\\s*\\d+s?)?)?" + ")" + "(?:" + "\\s*" + "[ap](?:m\\b|\\.m\\.)" ")?" "|" - "\\d+\\s*h(?:\\s*\\d+m?(?:\\s*\\d+s?)?)?" - ")" - "(?:" - "\\s*" "[ap](?:m\\b|\\.m\\.)" - ")?" - "|" - "\\d+\\s*[ap](?:m\\b|\\.m\\.)" + ")" ")" "(?:" "\\s*" "(" "(?:gmt|utc?)?[-+]\\d+(?:[,.:]\\d+(?::\\d+)?)?" "|" - "[[:alpha:].\\s]+(?:standard|daylight)\\stime\\b" + "(?-i:[[:alpha:].\\s]+)(?:standard|daylight)\\stime\\b" "|" - "[[:alpha:]]+(?:\\sdst)?\\b" + "(?-i:[[:alpha:]]+)(?:\\sdst)?\\b" ")" ")?"; static VALUE pat = Qnil; @@ -763,6 +733,106 @@ parse_time(VALUE str, VALUE hash) #endif } +#define BEGIN_ERA "\\b" +#define END_ERA "(?!(?<!\\.)[a-z])" + +#ifdef TIGHT_PARSER +static int +parse_era1_cb(VALUE m, VALUE hash) +{ + return 1; +} + +static int +parse_era1(VALUE str, VALUE hash) +{ + static const char pat_source[] = + BEGIN_ERA "(a(?:d\\b|\\.d\\.))" END_ERA; + static VALUE pat = Qnil; + + REGCOMP_I(pat); + SUBA(str, pat, parse_era1_cb); +} + +static int +parse_era2_cb(VALUE m, VALUE hash) +{ + VALUE b; + + b = rb_reg_nth_match(1, m); + if (*RSTRING_PTR(b) == 'B' || + *RSTRING_PTR(b) == 'b') + set_hash("_bc", Qtrue); + return 1; +} + +static int +parse_era2(VALUE str, VALUE hash) +{ + static const char pat_source[] = BEGIN_ERA + "(c(?:e\\b|\\.e\\.)|b(?:ce\\b|\\.c\\.e\\.)|b(?:c\\b|\\.c\\.))" + END_ERA; + static VALUE pat = Qnil; + + REGCOMP_I(pat); + SUBB(str, pat, parse_era2_cb); +} + +static int +parse_era(VALUE str, VALUE hash) +{ + if (parse_era1(str, hash)) /* pre */ + goto ok; + if (parse_era2(str, hash)) /* post */ + goto ok; + return 0; + ok: + return 1; +} +#endif + +#ifdef TIGHT_PARSER +static int +check_year_width(VALUE y) +{ + const char *s; + long l; + + l = RSTRING_LEN(y); + if (l < 2) return 0; + s = RSTRING_PTR(y); + if (!isdigit((unsigned char)s[1])) return 0; + return (l == 2 || !isdigit((unsigned char)s[2])); +} + +static int +check_apost(VALUE a, VALUE b, VALUE c) +{ + int f = 0; + + if (!NIL_P(a) && *RSTRING_PTR(a) == '\'') { + if (!check_year_width(a)) + return 0; + f++; + } + if (!NIL_P(b) && *RSTRING_PTR(b) == '\'') { + if (!check_year_width(b)) + return 0; + if (!NIL_P(c)) + return 0; + f++; + } + if (!NIL_P(c) && *RSTRING_PTR(c) == '\'') { + if (!check_year_width(c)) + return 0; + f++; + } + if (f > 1) + return 0; + return 1; +} +#endif + static int parse_eu_cb(VALUE m, VALUE hash) { @@ -786,6 +856,9 @@ parse_eu_cb(VALUE m, VALUE hash) mon = rb_reg_nth_match(2, m); y = rb_reg_nth_match(3, m); + if (!check_apost(d, mon, y)) + return 0; + mon = INT2FIX(mon_num(mon)); s3e(hash, y, mon, d, 0); @@ -802,9 +875,9 @@ parse_eu(VALUE str, VALUE hash) FPW_COM FPT_COM #endif #ifndef TIGHT_PARSER - "'?(\\d+)[^-\\d\\s]*" + "('?" NUMBER "+)[^-\\d\\s]*" #else - "(\\d+)(?:st|nd|rd|th)?\\b" + "(\\d+)(?:(?:st|nd|rd|th)\\b)?" #endif "\\s*" #ifndef TIGHT_PARSER @@ -815,11 +888,19 @@ parse_eu(VALUE str, VALUE hash) "(?:" "\\s*" #ifndef TIGHT_PARSER - "(c(?:e|\\.e\\.)|b(?:ce|\\.c\\.e\\.)|a(?:d|\\.d\\.)|b(?:c|\\.c\\.))?" + "(?:" + BEGIN_ERA + "(c(?:e|\\.e\\.)|b(?:ce|\\.c\\.e\\.)|a(?:d|\\.d\\.)|b(?:c|\\.c\\.))" + END_ERA + ")?" "\\s*" "('?-?\\d+(?:(?:st|nd|rd|th)\\b)?)" #else - "('?-?\\d+)" + "(?:" FPA ")?" + "\\s*" + "([-']?\\d+)" + "\\s*" + "(?:" FPA "|" FPB ")?" #endif ")?" #ifdef TIGHT_PARSER @@ -857,6 +938,9 @@ parse_us_cb(VALUE m, VALUE hash) d = rb_reg_nth_match(2, m); y = rb_reg_nth_match(3, m); + if (!check_apost(mon, d, y)) + return 0; + mon = INT2FIX(mon_num(mon)); s3e(hash, y, mon, d, 0); @@ -881,17 +965,23 @@ parse_us(VALUE str, VALUE hash) #ifndef TIGHT_PARSER "('?\\d+)[^-\\d\\s']*" #else - "(\\d+)(?:st|nd|rd|th)?\\b" + "('?\\d+)(?:(?:st|nd|rd|th)\\b)?" COM_FPT #endif "(?:" - "\\s*,?" - "\\s*" + "\\s*+,?" + "\\s*+" #ifndef TIGHT_PARSER "(c(?:e|\\.e\\.)|b(?:ce|\\.c\\.e\\.)|a(?:d|\\.d\\.)|b(?:c|\\.c\\.))?" "\\s*" -#endif "('?-?\\d+)" +#else + "(?:" FPA ")?" + "\\s*" + "([-']?\\d+)" + "\\s*" + "(?:" FPA "|" FPB ")?" +#endif ")?" #ifdef TIGHT_PARSER COM_FPT COM_FPW @@ -914,7 +1004,7 @@ parse_iso_cb(VALUE m, VALUE hash) d = rb_reg_nth_match(3, m); #ifdef TIGHT_PARSER - if (*RSTRING_PTR(y) == '\'' && *RSTRING_PTR(d) == '\'') + if (!check_apost(y, mon, d)) return 0; #endif @@ -927,11 +1017,11 @@ parse_iso(VALUE str, VALUE hash) { static const char pat_source[] = #ifndef TIGHT_PARSER - "('?[-+]?\\d+)-(\\d+)-('?-?\\d+)" + "('?[-+]?" NUMBER "+)-(\\d+)-('?-?\\d+)" #else BOS FPW_COM FPT_COM - "('?[-+]?\\d+)-(\\d+)-('?-?\\d+)" + "([-+']?\\d+)-(\\d+)-([-']?\\d+)" TEE_FPT COM_FPW EOS #endif @@ -1197,6 +1287,9 @@ parse_iso2(VALUE str, VALUE hash) return 1; } +#define JISX0301_ERA_INITIALS "mtshr" +#define JISX0301_DEFAULT_ERA 'H' /* obsolete */ + static int gengo(int c) { @@ -1207,6 +1300,7 @@ gengo(int c) case 'T': case 't': e = 1911; break; case 'S': case 's': e = 1925; break; case 'H': case 'h': e = 1988; break; + case 'R': case 'r': e = 2018; break; default: e = 0; break; } return e; @@ -1237,11 +1331,11 @@ parse_jis(VALUE str, VALUE hash) { static const char pat_source[] = #ifndef TIGHT_PARSER - "\\b([mtsh])(\\d+)\\.(\\d+)\\.(\\d+)" + "\\b([" JISX0301_ERA_INITIALS "])(\\d+)\\.(\\d+)\\.(\\d+)" #else BOS FPW_COM FPT_COM - "([mtsh])(\\d+)\\.(\\d+)\\.(\\d+)" + "([" JISX0301_ERA_INITIALS "])(\\d+)\\.(\\d+)\\.(\\d+)" TEE_FPT COM_FPW EOS #endif @@ -1262,7 +1356,7 @@ parse_vms11_cb(VALUE m, VALUE hash) y = rb_reg_nth_match(3, m); #ifdef TIGHT_PARSER - if (*RSTRING_PTR(d) == '\'' && !NIL_P(y) && *RSTRING_PTR(y) == '\'') + if (!check_apost(d, mon, y)) return 0; #endif @@ -1277,13 +1371,13 @@ parse_vms11(VALUE str, VALUE hash) { static const char pat_source[] = #ifndef TIGHT_PARSER - "('?-?\\d+)-(" ABBR_MONTHS ")[^-/.]*" + "('?-?" NUMBER "+)-(" ABBR_MONTHS ")[^-/.]*" "-('?-?\\d+)" #else BOS FPW_COM FPT_COM - "('?-?\\d+)-(" DOTLESS_VALID_MONTHS ")" - "-('?-?\\d+)" + "([-']?\\d+)-(" DOTLESS_VALID_MONTHS ")" + "-([-']?\\d+)" COM_FPT COM_FPW EOS #endif @@ -1304,7 +1398,7 @@ parse_vms12_cb(VALUE m, VALUE hash) y = rb_reg_nth_match(3, m); #ifdef TIGHT_PARSER - if (*RSTRING_PTR(d) == '\'' && !NIL_P(y)) + if (!check_apost(mon, d, y)) return 0; #endif @@ -1325,7 +1419,7 @@ parse_vms12(VALUE str, VALUE hash) BOS FPW_COM FPT_COM "(" DOTLESS_VALID_MONTHS ")" - "-('?-?\\d+)(?:-('?-?\\d+))?" + "-([-']?\\d+)(?:-([-']?\\d+))?" COM_FPT COM_FPW EOS #endif @@ -1359,11 +1453,7 @@ parse_sla_cb(VALUE m, VALUE hash) d = rb_reg_nth_match(3, m); #ifdef TIGHT_PARSER - if (*RSTRING_PTR(y) == '\'' && - (*RSTRING_PTR(mon) == '\'' || - !NIL_P(d) && *RSTRING_PTR(d) == '\'')) - return 0; - if (*RSTRING_PTR(mon) == '\'' && !NIL_P(d)) + if (!check_apost(y, mon, d)) return 0; #endif @@ -1376,11 +1466,11 @@ parse_sla(VALUE str, VALUE hash) { static const char pat_source[] = #ifndef TIGHT_PARSER - "('?-?\\d+)/\\s*('?\\d+)(?:\\D\\s*('?-?\\d+))?" + "('?-?" NUMBER "+)/\\s*('?\\d+)(?:\\D\\s*('?-?\\d+))?" #else BOS FPW_COM FPT_COM - "('?-?\\d+)/\\s*('?\\d+)(?:(?:[-/]|\\s+)\\s*('?-?\\d+))?" + "([-']?\\d+)/\\s*('?\\d+)(?:(?:[-/]|\\s+)\\s*([-']?\\d+))?" COM_FPT COM_FPW EOS #endif @@ -1401,10 +1491,8 @@ parse_sla2_cb(VALUE m, VALUE hash) mon = rb_reg_nth_match(2, m); y = rb_reg_nth_match(3, m); -#ifdef TIGHT_PARSER - if (*RSTRING_PTR(d) == '\'' && !NIL_P(y) && *RSTRING_PTR(y) == '\'') + if (!check_apost(d, mon, y)) return 0; -#endif mon = INT2FIX(mon_num(mon)); @@ -1418,7 +1506,7 @@ parse_sla2(VALUE str, VALUE hash) static const char pat_source[] = BOS FPW_COM FPT_COM - "('?-?\\d+)/\\s*(" DOTLESS_VALID_MONTHS ")(?:(?:[-/]|\\s+)\\s*('?-?\\d+))?" + "([-']?\\d+)/\\s*(" DOTLESS_VALID_MONTHS ")(?:(?:[-/]|\\s+)\\s*([-']?\\d+))?" COM_FPT COM_FPW EOS ; @@ -1437,10 +1525,8 @@ parse_sla3_cb(VALUE m, VALUE hash) d = rb_reg_nth_match(2, m); y = rb_reg_nth_match(3, m); -#ifdef TIGHT_PARSER - if (*RSTRING_PTR(d) == '\'' && !NIL_P(y)) + if (!check_apost(mon, d, y)) return 0; -#endif mon = INT2FIX(mon_num(mon)); @@ -1454,7 +1540,7 @@ parse_sla3(VALUE str, VALUE hash) static const char pat_source[] = BOS FPW_COM FPT_COM - "(" DOTLESS_VALID_MONTHS ")/\\s*('?\\d+)(?:(?:[-/]|\\s+)\\s*('?-?\\d+))?" + "(" DOTLESS_VALID_MONTHS ")/\\s*([-']?\\d+)(?:(?:[-/]|\\s+)\\s*([-']?\\d+))?" COM_FPT COM_FPW EOS ; @@ -1475,7 +1561,7 @@ parse_dot_cb(VALUE m, VALUE hash) d = rb_reg_nth_match(3, m); #ifdef TIGHT_PARSER - if (*RSTRING_PTR(y) == '\'' && *RSTRING_PTR(d) == '\'') + if (!check_apost(y, mon, d)) return 0; #endif @@ -1488,11 +1574,11 @@ parse_dot(VALUE str, VALUE hash) { static const char pat_source[] = #ifndef TIGHT_PARSER - "('?-?\\d+)\\.\\s*('?\\d+)\\.\\s*('?-?\\d+)" + "('?-?" NUMBER "+)\\.\\s*('?\\d+)\\.\\s*('?-?\\d+)" #else BOS FPW_COM FPT_COM - "('?-?\\d+)\\.\\s*(\\d+)\\.\\s*('?-?\\d+)" + "([-']?\\d+)\\.\\s*(\\d+)\\.\\s*([-']?\\d+)" COM_FPT COM_FPW EOS #endif @@ -1513,10 +1599,8 @@ parse_dot2_cb(VALUE m, VALUE hash) mon = rb_reg_nth_match(2, m); y = rb_reg_nth_match(3, m); -#ifdef TIGHT_PARSER - if (*RSTRING_PTR(d) == '\'' && !NIL_P(y) && *RSTRING_PTR(y) == '\'') + if (!check_apost(d, mon, y)) return 0; -#endif mon = INT2FIX(mon_num(mon)); @@ -1530,7 +1614,7 @@ parse_dot2(VALUE str, VALUE hash) static const char pat_source[] = BOS FPW_COM FPT_COM - "('?-?\\d+)\\.\\s*(" DOTLESS_VALID_MONTHS ")(?:(?:[./])\\s*('?-?\\d+))?" + "([-']?\\d+)\\.\\s*(" DOTLESS_VALID_MONTHS ")(?:(?:[./])\\s*([-']?\\d+))?" COM_FPT COM_FPW EOS ; @@ -1549,10 +1633,8 @@ parse_dot3_cb(VALUE m, VALUE hash) d = rb_reg_nth_match(2, m); y = rb_reg_nth_match(3, m); -#ifdef TIGHT_PARSER - if (*RSTRING_PTR(d) == '\'' && !NIL_P(y)) + if (!check_apost(mon, d, y)) return 0; -#endif mon = INT2FIX(mon_num(mon)); @@ -1566,7 +1648,7 @@ parse_dot3(VALUE str, VALUE hash) static const char pat_source[] = BOS FPW_COM FPT_COM - "(" DOTLESS_VALID_MONTHS ")\\.\\s*('?\\d+)(?:(?:[./])\\s*('?-?\\d+))?" + "(" DOTLESS_VALID_MONTHS ")\\.\\s*([-']?\\d+)(?:(?:[./])\\s*([-']?\\d+))?" COM_FPT COM_FPW EOS ; @@ -1652,7 +1734,7 @@ parse_mday(VALUE str, VALUE hash) { static const char pat_source[] = #ifndef TIGHT_PARSER - "(\\d+)(st|nd|rd|th)\\b" + "(" NUMBER "+)(st|nd|rd|th)\\b" #else BOS FPW_COM FPT_COM @@ -1856,28 +1938,26 @@ parse_ddd_cb(VALUE m, VALUE hash) set_hash("zone", s5); if (*cs5 == '[') { - char *buf = ALLOCA_N(char, l5 + 1); - char *s1, *s2, *s3; + const char *s1, *s2; VALUE zone; - memcpy(buf, cs5, l5); - buf[l5 - 1] = '\0'; - - s1 = buf + 1; - s2 = strchr(buf, ':'); + l5 -= 2; + s1 = cs5 + 1; + s2 = memchr(s1, ':', l5); if (s2) { - *s2 = '\0'; s2++; + zone = rb_str_subseq(s5, s2 - cs5, l5 - (s2 - s1)); + s5 = rb_str_subseq(s5, 1, s2 - s1); } - if (s2) - s3 = s2; - else - s3 = s1; - zone = rb_str_new2(s3); + else { + zone = rb_str_subseq(s5, 1, l5); + if (isdigit((unsigned char)*s1)) + s5 = rb_str_append(rb_str_new_cstr("+"), zone); + else + s5 = zone; + } set_hash("zone", zone); - if (isdigit((unsigned char)*s1)) - *--s1 = '+'; - set_hash("offset", date_zone_to_diff(rb_str_new2(s1))); + set_hash("offset", date_zone_to_diff(s5)); } RB_GC_GUARD(s5); } @@ -1892,7 +1972,7 @@ parse_ddd(VALUE str, VALUE hash) #ifdef TIGHT_PARSER BOS #endif - "([-+]?)(\\d{2,14})" + "([-+]?)(" NUMBER "{2,14})" "(?:" "\\s*" "t?" @@ -1923,12 +2003,7 @@ parse_ddd(VALUE str, VALUE hash) static int parse_bc_cb(VALUE m, VALUE hash) { - VALUE y; - - y = ref_hash("year"); - if (!NIL_P(y)) - set_hash("year", f_add(f_negate(y), INT2FIX(1))); - + set_hash("_bc", Qtrue); return 1; } @@ -2097,12 +2172,17 @@ date__parse(VALUE str, VALUE comp) if (HAVE_ELEM_P(HAVE_DIGIT)) parse_time(str, hash); - if (HAVE_ELEM_P(HAVE_ALPHA|HAVE_DIGIT)) +#ifdef TIGHT_PARSER + if (HAVE_ELEM_P(HAVE_ALPHA)) + parse_era(str, hash); +#endif + + if (HAVE_ELEM_P(HAVE_ALPHA|HAVE_DIGIT)) { if (parse_eu(str, hash)) goto ok; - if (HAVE_ELEM_P(HAVE_ALPHA|HAVE_DIGIT)) if (parse_us(str, hash)) goto ok; + } if (HAVE_ELEM_P(HAVE_DIGIT|HAVE_DASH)) if (parse_iso(str, hash)) goto ok; @@ -2170,7 +2250,22 @@ date__parse(VALUE str, VALUE comp) #endif { - if (RTEST(ref_hash("_comp"))) { + if (RTEST(del_hash("_bc"))) { + VALUE y; + + y = ref_hash("cwyear"); + if (!NIL_P(y)) { + y = f_add(f_negate(y), INT2FIX(1)); + set_hash("cwyear", y); + } + y = ref_hash("year"); + if (!NIL_P(y)) { + y = f_add(f_negate(y), INT2FIX(1)); + set_hash("year", y); + } + } + + if (RTEST(del_hash("_comp"))) { VALUE y; y = ref_hash("cwyear"); @@ -2190,9 +2285,8 @@ date__parse(VALUE str, VALUE comp) set_hash("year", f_add(y, INT2FIX(2000))); } } - } - del_hash("_comp"); + } { VALUE zone = ref_hash("zone"); @@ -2243,8 +2337,8 @@ iso8601_ext_datetime_cb(VALUE m, VALUE hash) s[i] = rb_reg_nth_match(i, m); } - if (!NIL_P(s[3])) { - set_hash("mday", str2num(s[3])); + if (!NIL_P(s[1])) { + if (!NIL_P(s[3])) set_hash("mday", str2num(s[3])); if (strcmp(RSTRING_PTR(s[1]), "-") != 0) { y = str2num(s[1]); if (RSTRING_LEN(s[1]) < 4) @@ -2301,7 +2395,7 @@ static int iso8601_ext_datetime(VALUE str, VALUE hash) { static const char pat_source[] = - "\\A\\s*(?:([-+]?\\d{2,}|-)-(\\d{2})?-(\\d{2})|" + "\\A\\s*(?:([-+]?\\d{2,}|-)-(\\d{2})?(?:-(\\d{2}))?|" "([-+]?\\d{2,})?-(\\d{3})|" "(\\d{4}|\\d{2})?-w(\\d{2})-(\\d)|" "-w-(\\d))" @@ -2712,7 +2806,9 @@ rfc2822_cb(VALUE m, VALUE hash) s[i] = rb_reg_nth_match(i, m); } - set_hash("wday", INT2FIX(day_num(s[1]))); + if (!NIL_P(s[1])) { + set_hash("wday", INT2FIX(day_num(s[1]))); + } set_hash("mday", str2num(s[2])); set_hash("mon", INT2FIX(mon_num(s[3]))); y = str2num(s[4]); @@ -2930,7 +3026,7 @@ jisx0301_cb(VALUE m, VALUE hash) s[i] = rb_reg_nth_match(i, m); } - ep = gengo(NIL_P(s[1]) ? 'h' : *RSTRING_PTR(s[1])); + ep = gengo(NIL_P(s[1]) ? JISX0301_DEFAULT_ERA : *RSTRING_PTR(s[1])); set_hash("year", f_add(str2num(s[2]), INT2FIX(ep))); set_hash("mon", str2num(s[3])); set_hash("mday", str2num(s[4])); @@ -2955,7 +3051,7 @@ static int jisx0301(VALUE str, VALUE hash) { static const char pat_source[] = - "\\A\\s*([mtsh])?(\\d{2})\\.(\\d{2})\\.(\\d{2})" + "\\A\\s*([" JISX0301_ERA_INITIALS "])?(\\d{2})\\.(\\d{2})\\.(\\d{2})" "(?:t" "(?:(\\d{2}):(\\d{2})(?::(\\d{2})(?:[,.](\\d*))?)?" "(z|[-+]\\d{2}(?::?\\d{2})?)?)?)?\\s*\\z"; |
