summaryrefslogtreecommitdiff
path: root/ext/date/date_parse.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/date/date_parse.c')
-rw-r--r--ext/date/date_parse.c1367
1 files changed, 1024 insertions, 343 deletions
diff --git a/ext/date/date_parse.c b/ext/date/date_parse.c
index 597c25ac55..a1600e4708 100644
--- a/ext/date/date_parse.c
+++ b/ext/date/date_parse.c
@@ -1,5 +1,5 @@
/*
- date_parse.c: Coded by Tadayoshi Funaba 2011
+ date_parse.c: Coded by Tadayoshi Funaba 2011,2012
*/
#include "ruby.h"
@@ -7,6 +7,14 @@
#include "ruby/re.h"
#include <ctype.h>
+#undef strncasecmp
+#define strncasecmp STRNCASECMP
+
+RUBY_EXTERN VALUE rb_int_positive_pow(long x, unsigned long y);
+RUBY_EXTERN unsigned long ruby_scan_digits(const char *str, ssize_t len, int base, size_t *retlen, int *overflow);
+
+/* #define TIGHT_PARSER */
+
#define sizeof_array(o) (sizeof o / sizeof o[0])
#define f_negate(x) rb_funcall(x, rb_intern("-@"), 0)
@@ -35,32 +43,47 @@
#define f_sub_bang(s,r,x) rb_funcall(s, rb_intern("sub!"), 2, r, x)
#define f_gsub_bang(s,r,x) rb_funcall(s, rb_intern("gsub!"), 2, r, x)
-#define set_hash(k,v) rb_hash_aset(hash, ID2SYM(rb_intern(k)), v)
-#define ref_hash(k) rb_hash_aref(hash, ID2SYM(rb_intern(k)))
-#define del_hash(k) rb_hash_delete(hash, ID2SYM(rb_intern(k)))
+#define set_hash(k,v) rb_hash_aset(hash, ID2SYM(rb_intern(k"")), v)
+#define ref_hash(k) rb_hash_aref(hash, ID2SYM(rb_intern(k"")))
+#define del_hash(k) rb_hash_delete(hash, ID2SYM(rb_intern(k"")))
#define cstr2num(s) rb_cstr_to_inum(s, 10, 0)
#define str2num(s) rb_str_to_inum(s, 10, 0)
-static const char *abbr_days[] = {
+static const char abbr_days[][4] = {
"sun", "mon", "tue", "wed",
"thu", "fri", "sat"
};
-static const char *abbr_months[] = {
+static const char abbr_months[][4] = {
"jan", "feb", "mar", "apr", "may", "jun",
"jul", "aug", "sep", "oct", "nov", "dec"
};
#define issign(c) ((c) == '-' || (c) == '+')
#define asp_string() rb_str_new(" ", 1)
+#ifdef TIGHT_PARSER
+#define asuba_string() rb_str_new("\001", 1)
+#define asubb_string() rb_str_new("\002", 1)
+#define asubw_string() rb_str_new("\027", 1)
+#define asubt_string() rb_str_new("\024", 1)
+#endif
+
+static size_t
+digit_span(const char *s, const char *e)
+{
+ size_t i = 0;
+ while (s + i < e && isdigit((unsigned char)s[i])) i++;
+ return i;
+}
static void
s3e(VALUE hash, VALUE y, VALUE m, VALUE d, int bc)
{
+ VALUE vbuf = 0;
VALUE c = Qnil;
- if (TYPE(m) != T_STRING)
+ if (!RB_TYPE_P(m, T_STRING))
m = f_to_s(m);
if (!NIL_P(y) && !NIL_P(m) && NIL_P(d)) {
@@ -78,7 +101,7 @@ s3e(VALUE hash, VALUE y, VALUE m, VALUE d, int bc)
y = d;
d = Qnil;
}
- if (!NIL_P(d) && *RSTRING_PTR(d) == '\'') {
+ if (!NIL_P(d) && RSTRING_LEN(d) > 0 && *RSTRING_PTR(d) == '\'') {
y = d;
d = Qnil;
}
@@ -89,17 +112,20 @@ s3e(VALUE hash, VALUE y, VALUE m, VALUE d, int bc)
size_t l;
s = RSTRING_PTR(y);
- while (!issign(*s) && !isdigit(*s))
+ ep = RSTRING_END(y);
+ while (s < ep && !issign(*s) && !isdigit((unsigned char)*s))
s++;
+ if (s >= ep) goto no_date;
bp = s;
- if (issign(*s))
+ if (issign((unsigned char)*s))
s++;
- l = strspn(s, "0123456789");
+ l = digit_span(s, ep);
ep = s + l;
if (*ep) {
y = d;
d = rb_str_new(bp, ep - bp);
}
+ no_date:;
}
if (!NIL_P(m)) {
@@ -138,8 +164,10 @@ s3e(VALUE hash, VALUE y, VALUE m, VALUE d, int bc)
VALUE iy;
s = RSTRING_PTR(y);
- while (!issign(*s) && !isdigit(*s))
+ ep = RSTRING_END(y);
+ while (s < ep && !issign(*s) && !isdigit((unsigned char)*s))
s++;
+ if (s >= ep) goto no_year;
bp = s;
if (issign(*s)) {
s++;
@@ -147,43 +175,50 @@ s3e(VALUE hash, VALUE y, VALUE m, VALUE d, int bc)
}
if (sign)
c = Qfalse;
- l = strspn(s, "0123456789");
+ l = digit_span(s, ep);
ep = s + l;
if (l > 2)
c = Qfalse;
{
char *buf;
- buf = ALLOCA_N(char, ep - bp + 1);
+ buf = ALLOCV_N(char, vbuf, ep - bp + 1);
memcpy(buf, bp, ep - bp);
buf[ep - bp] = '\0';
iy = cstr2num(buf);
+ ALLOCV_END(vbuf);
}
- if (bc)
- iy = f_add(f_negate(iy), INT2FIX(1));
set_hash("year", iy);
+ no_year:;
}
+ if (bc)
+ set_hash("_bc", Qtrue);
+
if (!NIL_P(m)) {
const char *s, *bp, *ep;
size_t l;
VALUE im;
s = RSTRING_PTR(m);
- while (!isdigit(*s))
+ ep = RSTRING_END(m);
+ while (s < ep && !isdigit((unsigned char)*s))
s++;
+ if (s >= ep) goto no_month;
bp = s;
- l = strspn(s, "0123456789");
+ l = digit_span(s, ep);
ep = s + l;
{
char *buf;
- buf = ALLOCA_N(char, ep - bp + 1);
+ buf = ALLOCV_N(char, vbuf, ep - bp + 1);
memcpy(buf, bp, ep - bp);
buf[ep - bp] = '\0';
im = cstr2num(buf);
+ ALLOCV_END(vbuf);
}
set_hash("mon", im);
+ no_month:;
}
if (!NIL_P(d)) {
@@ -192,20 +227,24 @@ s3e(VALUE hash, VALUE y, VALUE m, VALUE d, int bc)
VALUE id;
s = RSTRING_PTR(d);
- while (!isdigit(*s))
+ ep = RSTRING_END(d);
+ while (s < ep && !isdigit((unsigned char)*s))
s++;
+ if (s >= ep) goto no_mday;
bp = s;
- l = strspn(s, "0123456789");
+ l = digit_span(s, ep);
ep = s + l;
{
char *buf;
- buf = ALLOCA_N(char, ep - bp + 1);
+ buf = ALLOCV_N(char, vbuf, ep - bp + 1);
memcpy(buf, bp, ep - bp);
buf[ep - bp] = '\0';
id = cstr2num(buf);
+ ALLOCV_END(vbuf);
}
set_hash("mday", id);
+ no_mday:;
}
if (!NIL_P(c))
@@ -217,32 +256,67 @@ s3e(VALUE hash, VALUE y, VALUE m, VALUE d, int bc)
#define ABBR_DAYS "sun|mon|tue|wed|thu|fri|sat"
#define ABBR_MONTHS "jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec"
+#define NUMBER "(?<!\\d)\\d"
+
+#ifdef TIGHT_PARSER
+#define VALID_DAYS "(?:" DAYS ")" "|(?:tues|wednes|thurs|thur|" ABBR_DAYS ")\\.?"
+#define VALID_MONTHS "(?:" MONTHS ")" "|(?:sept|" ABBR_MONTHS ")\\.?"
+#define DOTLESS_VALID_MONTHS "(?:" MONTHS ")" "|(?:sept|" ABBR_MONTHS ")"
+#define BOS "\\A\\s*"
+#define FPA "\\001"
+#define FPB "\\002"
+#define FPW "\\027"
+#define FPT "\\024"
+#define FPW_COM "\\s*(?:" FPW "\\s*,?)?\\s*"
+#define FPT_COM "\\s*(?:" FPT "\\s*,?)?\\s*"
+#define COM_FPW "\\s*(?:,?\\s*" FPW ")?\\s*"
+#define COM_FPT "\\s*(?:,?\\s*(?:@|\\b[aA][tT]\\b)?\\s*" FPT ")?\\s*"
+#define TEE_FPT "\\s*(?:[tT]?" FPT ")?"
+#define EOS "\\s*\\z"
+#endif
+
static VALUE
regcomp(const char *source, long len, int opt)
{
VALUE pat;
pat = rb_reg_new(source, len, opt);
+ rb_obj_freeze(pat);
rb_gc_register_mark_object(pat);
return pat;
}
#define REGCOMP(pat,opt) \
-{ \
+do { \
if (NIL_P(pat)) \
pat = regcomp(pat##_source, sizeof pat##_source - 1, opt); \
-}
+} while (0)
#define REGCOMP_0(pat) REGCOMP(pat, 0)
#define REGCOMP_I(pat) REGCOMP(pat, ONIG_OPTION_IGNORECASE)
-#define SUBS(s,p,c) \
-{ \
- return subs(s, p, hash, c); \
+#define MATCH(s,p,c) \
+do { \
+ return match(s, p, hash, c); \
+} while (0)
+
+static int
+match(VALUE str, VALUE pat, VALUE hash, int (*cb)(VALUE, VALUE))
+{
+ VALUE m;
+
+ m = f_match(pat, str);
+
+ if (NIL_P(m))
+ return 0;
+
+ (*cb)(m, hash);
+
+ return 1;
}
static int
-subs(VALUE str, VALUE pat, VALUE hash, int (*cb)(VALUE, VALUE))
+subx(VALUE str, VALUE rep, VALUE pat, VALUE hash, int (*cb)(VALUE, VALUE))
{
VALUE m;
@@ -256,280 +330,226 @@ subs(VALUE str, VALUE pat, VALUE hash, int (*cb)(VALUE, VALUE))
be = f_begin(m, INT2FIX(0));
en = f_end(m, INT2FIX(0));
- f_aset2(str, be, LONG2NUM(NUM2LONG(en) - NUM2LONG(be)), asp_string());
+ f_aset2(str, be, LONG2NUM(NUM2LONG(en) - NUM2LONG(be)), rep);
(*cb)(m, hash);
}
return 1;
}
-struct zone {
- const char *name;
- int offset;
-};
+#define SUBS(s,p,c) \
+do { \
+ return subx(s, asp_string(), p, hash, c); \
+} while (0)
-static struct zone zones_source[] = {
- {"ut", 0*3600}, {"gmt", 0*3600}, {"est", -5*3600}, {"edt", -4*3600},
- {"cst", -6*3600}, {"cdt", -5*3600}, {"mst", -7*3600}, {"mdt", -6*3600},
- {"pst", -8*3600}, {"pdt", -7*3600},
- {"a", 1*3600}, {"b", 2*3600}, {"c", 3*3600}, {"d", 4*3600},
- {"e", 5*3600}, {"f", 6*3600}, {"g", 7*3600}, {"h", 8*3600},
- {"i", 9*3600}, {"k", 10*3600}, {"l", 11*3600}, {"m", 12*3600},
- {"n", -1*3600}, {"o", -2*3600}, {"p", -3*3600}, {"q", -4*3600},
- {"r", -5*3600}, {"s", -6*3600}, {"t", -7*3600}, {"u", -8*3600},
- {"v", -9*3600}, {"w", -10*3600}, {"x", -11*3600}, {"y", -12*3600},
- {"z", 0*3600},
-
- {"utc", 0*3600}, {"wet", 0*3600},
- {"at", -2*3600}, {"brst",-2*3600}, {"ndt", -(2*3600+1800)},
- {"art", -3*3600}, {"adt", -3*3600}, {"brt", -3*3600}, {"clst",-3*3600},
- {"nst", -(3*3600+1800)},
- {"ast", -4*3600}, {"clt", -4*3600},
- {"akdt",-8*3600}, {"ydt", -8*3600},
- {"akst",-9*3600}, {"hadt",-9*3600}, {"hdt", -9*3600}, {"yst", -9*3600},
- {"ahst",-10*3600},{"cat",-10*3600}, {"hast",-10*3600},{"hst",-10*3600},
- {"nt", -11*3600},
- {"idlw",-12*3600},
- {"bst", 1*3600}, {"cet", 1*3600}, {"fwt", 1*3600}, {"met", 1*3600},
- {"mewt", 1*3600}, {"mez", 1*3600}, {"swt", 1*3600}, {"wat", 1*3600},
- {"west", 1*3600},
- {"cest", 2*3600}, {"eet", 2*3600}, {"fst", 2*3600}, {"mest", 2*3600},
- {"mesz", 2*3600}, {"sast", 2*3600}, {"sst", 2*3600},
- {"bt", 3*3600}, {"eat", 3*3600}, {"eest", 3*3600}, {"msk", 3*3600},
- {"msd", 4*3600}, {"zp4", 4*3600},
- {"zp5", 5*3600}, {"ist", (5*3600+1800)},
- {"zp6", 6*3600},
- {"wast", 7*3600},
- {"cct", 8*3600}, {"sgt", 8*3600}, {"wadt", 8*3600},
- {"jst", 9*3600}, {"kst", 9*3600},
- {"east",10*3600}, {"gst", 10*3600},
- {"eadt",11*3600},
- {"idle",12*3600}, {"nzst",12*3600}, {"nzt", 12*3600},
- {"nzdt",13*3600},
-
- {"afghanistan", 16200}, {"alaskan", -32400},
- {"arab", 10800}, {"arabian", 14400},
- {"arabic", 10800}, {"atlantic", -14400},
- {"aus central", 34200}, {"aus eastern", 36000},
- {"azores", -3600}, {"canada central", -21600},
- {"cape verde", -3600}, {"caucasus", 14400},
- {"cen. australia", 34200}, {"central america", -21600},
- {"central asia", 21600}, {"central europe", 3600},
- {"central european", 3600}, {"central pacific", 39600},
- {"central", -21600}, {"china", 28800},
- {"dateline", -43200}, {"e. africa", 10800},
- {"e. australia", 36000}, {"e. europe", 7200},
- {"e. south america", -10800}, {"eastern", -18000},
- {"egypt", 7200}, {"ekaterinburg", 18000},
- {"fiji", 43200}, {"fle", 7200},
- {"greenland", -10800}, {"greenwich", 0},
- {"gtb", 7200}, {"hawaiian", -36000},
- {"india", 19800}, {"iran", 12600},
- {"jerusalem", 7200}, {"korea", 32400},
- {"mexico", -21600}, {"mid-atlantic", -7200},
- {"mountain", -25200}, {"myanmar", 23400},
- {"n. central asia", 21600}, {"nepal", 20700},
- {"new zealand", 43200}, {"newfoundland", -12600},
- {"north asia east", 28800}, {"north asia", 25200},
- {"pacific sa", -14400}, {"pacific", -28800},
- {"romance", 3600}, {"russian", 10800},
- {"sa eastern", -10800}, {"sa pacific", -18000},
- {"sa western", -14400}, {"samoa", -39600},
- {"se asia", 25200}, {"malay peninsula", 28800},
- {"south africa", 7200}, {"sri lanka", 21600},
- {"taipei", 28800}, {"tasmania", 36000},
- {"tokyo", 32400}, {"tonga", 46800},
- {"us eastern", -18000}, {"us mountain", -25200},
- {"vladivostok", 36000}, {"w. australia", 28800},
- {"w. central africa", 3600}, {"w. europe", 3600},
- {"west asia", 18000}, {"west pacific", 36000},
- {"yakutsk", 32400}
-};
+#ifdef TIGHT_PARSER
+#define SUBA(s,p,c) \
+do { \
+ return subx(s, asuba_string(), p, hash, c); \
+} while (0)
-VALUE
-date_zone_to_diff(VALUE str)
-{
- VALUE offset = Qnil;
+#define SUBB(s,p,c) \
+do { \
+ return subx(s, asubb_string(), p, hash, c); \
+} while (0)
- long l, i;
- char *s, *dest, *d;
- int sp = 1;
+#define SUBW(s,p,c) \
+do { \
+ return subx(s, asubw_string(), p, hash, c); \
+} while (0)
- l = RSTRING_LEN(str);
- s = RSTRING_PTR(str);
+#define SUBT(s,p,c) \
+do { \
+ return subx(s, asubt_string(), p, hash, c); \
+} while (0)
+#endif
- dest = d = ALLOCA_N(char, l + 1);
+#include "zonetab.h"
- for (i = 0; i < l; i++) {
- if (isspace(s[i]) || s[i] == '\0') {
- if (!sp)
- *d++ = ' ';
- sp = 1;
+static int
+str_end_with_word(const char *s, long l, const char *w)
+{
+ int n = (int)strlen(w);
+ if (l <= n || !isspace((unsigned char)s[l - n - 1])) return 0;
+ if (strncasecmp(&s[l - n], w, n)) return 0;
+ do ++n; while (l > n && isspace((unsigned char)s[l - n - 1]));
+ return n;
+}
+
+static long
+shrunk_size(const char *s, long l)
+{
+ long i, ni;
+ int sp = 0;
+ for (i = ni = 0; i < l; ++i) {
+ if (!isspace((unsigned char)s[i])) {
+ if (sp) ni++;
+ sp = 0;
+ ni++;
}
else {
- if (isalpha(s[i]))
- *d++ = tolower(s[i]);
- else
- *d++ = s[i];
- sp = 0;
+ sp = 1;
}
}
- if (d > dest) {
- if (*(d - 1) == ' ')
- --d;
- *d = '\0';
- }
- str = rb_str_new2(dest);
- {
-#define STD " standard time"
-#define DST " daylight time"
- char *ss, *ds;
- long sl, dl;
- int dst = 0;
-
- sl = RSTRING_LEN(str) - (sizeof STD - 1);
- ss = RSTRING_PTR(str) + sl;
- dl = RSTRING_LEN(str) - (sizeof DST - 1);
- ds = RSTRING_PTR(str) + dl;
+ return ni < l ? ni : 0;
+}
- if (sl >= 0 && strcmp(ss, STD) == 0) {
- str = rb_str_new(RSTRING_PTR(str), sl);
- }
- else if (dl >= 0 && strcmp(ds, DST) == 0) {
- str = rb_str_new(RSTRING_PTR(str), dl);
- dst = 1;
+static long
+shrink_space(char *d, const char *s, long l)
+{
+ long i, ni;
+ int sp = 0;
+ for (i = ni = 0; i < l; ++i) {
+ if (!isspace((unsigned char)s[i])) {
+ if (sp) d[ni++] = ' ';
+ sp = 0;
+ d[ni++] = s[i];
}
-#undef STD
-#undef DST
else {
-#define DST " dst"
- char *ds;
- long dl;
+ sp = 1;
+ }
+ }
+ return ni;
+}
+
+VALUE
+date_zone_to_diff(VALUE str)
+{
+ VALUE offset = Qnil;
+ long l = RSTRING_LEN(str);
+ const char *s = RSTRING_PTR(str);
- dl = RSTRING_LEN(str) - (sizeof DST - 1);
- ds = RSTRING_PTR(str) + dl;
+ {
+ int dst = 0;
+ int w;
- if (dl >= 0 && strcmp(ds, DST) == 0) {
- str = rb_str_new(RSTRING_PTR(str), dl);
+ if ((w = str_end_with_word(s, l, "time")) > 0) {
+ int wtime = w;
+ l -= w;
+ if ((w = str_end_with_word(s, l, "standard")) > 0) {
+ l -= w;
+ }
+ else if ((w = str_end_with_word(s, l, "daylight")) > 0) {
+ l -= w;
dst = 1;
}
-#undef DST
+ else {
+ l += wtime;
+ }
+ }
+ else if ((w = str_end_with_word(s, l, "dst")) > 0) {
+ l -= w;
+ dst = 1;
}
+
{
- static VALUE zones = Qnil;
+ const char *zn = s;
+ long sl = shrunk_size(s, l);
+ char shrunk_buff[MAX_WORD_LENGTH]; /* no terminator to be added */
+ const struct zone *z = 0;
- if (NIL_P(zones)) {
- int i;
+ if (sl <= 0) {
+ sl = l;
+ }
+ else if (sl <= MAX_WORD_LENGTH) {
+ char *d = shrunk_buff;
+ sl = shrink_space(d, s, l);
+ zn = d;
+ }
- zones = rb_hash_new();
- rb_gc_register_mark_object(zones);
- for (i = 0; i < (int)sizeof_array(zones_source); i++) {
- VALUE name = rb_str_new2(zones_source[i].name);
- VALUE offset = INT2FIX(zones_source[i].offset);
- rb_hash_aset(zones, name, offset);
- }
+ if (sl > 0 && sl <= MAX_WORD_LENGTH) {
+ z = zonetab(zn, (unsigned int)sl);
}
- offset = f_aref(zones, str);
- if (!NIL_P(offset)) {
+ if (z) {
+ int d = z->offset;
if (dst)
- offset = f_add(offset, INT2FIX(3600));
+ d += 3600;
+ offset = INT2FIX(d);
goto ok;
}
}
- {
- char *s, *p;
- VALUE sign;
- VALUE hour = Qnil, min = Qnil, sec = Qnil;
- VALUE str_orig;
- s = RSTRING_PTR(str);
- str_orig = str;
+ {
+ char *p;
+ int sign = 0;
+ long hour = 0, min = 0, sec = 0;
- if (strncmp(s, "gmt", 3) == 0 ||
- strncmp(s, "utc", 3) == 0)
+ if (l > 3 &&
+ (strncasecmp(s, "gmt", 3) == 0 ||
+ strncasecmp(s, "utc", 3) == 0)) {
s += 3;
+ l -= 3;
+ }
if (issign(*s)) {
- sign = rb_str_new(s, 1);
+ sign = *s == '-';
s++;
+ l--;
- str = rb_str_new2(s);
-
- if (p = strchr(s, ':')) {
- hour = rb_str_new(s, p - s);
+#define out_of_range(v, min, max) ((v) < (min) || (max) < (v))
+ hour = STRTOUL(s, &p, 10);
+ if (*p == ':') {
+ if (out_of_range(hour, 0, 23)) return Qnil;
s = ++p;
- if (p = strchr(s, ':')) {
- min = rb_str_new(s, p - s);
+ min = STRTOUL(s, &p, 10);
+ if (out_of_range(min, 0, 59)) return Qnil;
+ if (*p == ':') {
s = ++p;
- if (p = strchr(s, ':')) {
- sec = rb_str_new(s, p - s);
- }
- else
- sec = rb_str_new2(s);
+ sec = STRTOUL(s, &p, 10);
+ if (out_of_range(sec, 0, 59)) return Qnil;
}
- else
- min = rb_str_new2(s);
- RB_GC_GUARD(str_orig);
- goto num;
}
- if (strpbrk(RSTRING_PTR(str), ",.")) {
- char *a, *b;
-
- a = ALLOCA_N(char, RSTRING_LEN(str) + 1);
- strcpy(a, RSTRING_PTR(str));
- b = strpbrk(a, ",.");
- *b = '\0';
- b++;
-
- hour = cstr2num(a);
- min = f_mul(rb_rational_new2
- (cstr2num(b),
- f_expt(INT2FIX(10),
- LONG2NUM((long)strlen(b)))),
- INT2FIX(60));
- goto num;
- }
- {
- const char *cs = RSTRING_PTR(str);
- long cl = RSTRING_LEN(str);
-
- if (cl % 2) {
- if (cl >= 1)
- hour = rb_str_new(&cs[0], 1);
- if (cl >= 3)
- min = rb_str_new(&cs[1], 2);
- if (cl >= 5)
- min = rb_str_new(&cs[3], 2);
+ else if (*p == ',' || *p == '.') {
+ /* fractional hour */
+ size_t n;
+ int ov;
+ /* no over precision for offset; 10**-7 hour = 0.36
+ * milliseconds should be enough. */
+ const size_t max_digits = 7; /* 36 * 10**7 < 32-bit FIXNUM_MAX */
+
+ if (out_of_range(hour, 0, 23)) return Qnil;
+
+ n = (s + l) - ++p;
+ if (n > max_digits) n = max_digits;
+ sec = ruby_scan_digits(p, n, 10, &n, &ov);
+ if ((p += n) < s + l && *p >= ('5' + !(sec & 1)) && *p <= '9') {
+ /* round half to even */
+ sec++;
+ }
+ sec *= 36;
+ if (sign) {
+ hour = -hour;
+ sec = -sec;
+ }
+ if (n <= 2) {
+ /* HH.nn or HH.n */
+ if (n == 1) sec *= 10;
+ offset = INT2FIX(sec + hour * 3600);
}
else {
- if (cl >= 2)
- hour = rb_str_new(&cs[0], 2);
- if (cl >= 4)
- min = rb_str_new(&cs[2], 2);
- if (cl >= 6)
- sec = rb_str_new(&cs[4], 2);
+ VALUE denom = rb_int_positive_pow(10, (int)(n - 2));
+ offset = f_add(rb_rational_new(INT2FIX(sec), denom), INT2FIX(hour * 3600));
+ if (rb_rational_den(offset) == INT2FIX(1)) {
+ offset = rb_rational_num(offset);
+ }
}
- goto num;
- }
- num:
- if (NIL_P(hour))
- offset = INT2FIX(0);
- else {
- if (TYPE(hour) == T_STRING)
- hour = str2num(hour);
- offset = f_mul(hour, INT2FIX(3600));
+ goto ok;
}
- if (!NIL_P(min)) {
- if (TYPE(min) == T_STRING)
- min = str2num(min);
- offset = f_add(offset, f_mul(min, INT2FIX(60)));
+ else if (l > 2) {
+ size_t n;
+ int ov;
+
+ if (l >= 1)
+ hour = ruby_scan_digits(&s[0], 2 - l % 2, 10, &n, &ov);
+ if (l >= 3)
+ min = ruby_scan_digits(&s[2 - l % 2], 2, 10, &n, &ov);
+ if (l >= 5)
+ sec = ruby_scan_digits(&s[4 - l % 2], 2, 10, &n, &ov);
}
- if (!NIL_P(sec))
- offset = f_add(offset, str2num(sec));
- if (!NIL_P(sign) &&
- RSTRING_LEN(sign) == 1 &&
- *RSTRING_PTR(sign) == '-')
- offset = f_negate(offset);
+ sec += min * 60 + hour * 3600;
+ if (sign) sec = -sec;
+ offset = INT2FIX(sec);
+#undef out_of_range
}
}
}
@@ -573,11 +593,21 @@ parse_day_cb(VALUE m, VALUE hash)
static int
parse_day(VALUE str, VALUE hash)
{
- static const char pat_source[] = "\\b(" ABBR_DAYS ")[^-\\d\\s]*";
+ static const char pat_source[] =
+#ifndef TIGHT_PARSER
+ "\\b(" ABBR_DAYS ")[^-/\\d\\s]*"
+#else
+ "(" VALID_DAYS ")"
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_I(pat);
+#ifndef TIGHT_PARSER
SUBS(str, pat, parse_day_cb);
+#else
+ SUBW(str, pat, parse_day_cb);
+#endif
}
static int
@@ -661,40 +691,152 @@ parse_time(VALUE str, VALUE hash)
{
static const char pat_source[] =
"("
+ "" NUMBER "+\\s*"
"(?:"
- "\\d+\\s*:\\s*\\d+"
"(?:"
- "\\s*:\\s*\\d+(?:[,.]\\d*)?"
+ ":\\s*\\d+"
+ "(?:"
+#ifndef TIGHT_PARSER
+ "\\s*:\\s*\\d+(?:[,.]\\d*)?"
+#else
+ "\\s*:\\s*\\d+(?:[,.]\\d+)?"
+#endif
+ ")?"
+ "|"
+ "h(?:\\s*\\d+m?(?:\\s*\\d+s?)?)?"
+ ")"
+ "(?:"
+ "\\s*"
+ "[ap](?:m\\b|\\.m\\.)"
")?"
"|"
- "\\d+\\s*h(?:\\s*\\d+m?(?:\\s*\\d+s?)?)?"
- ")"
- "(?:"
- "\\s*"
"[ap](?:m\\b|\\.m\\.)"
- ")?"
- "|"
- "\\d+\\s*[ap](?:m\\b|\\.m\\.)"
+ ")"
")"
"(?:"
"\\s*"
"("
"(?:gmt|utc?)?[-+]\\d+(?:[,.:]\\d+(?::\\d+)?)?"
"|"
- "[[:alpha:].\\s]+(?:standard|daylight)\\stime\\b"
+ "(?-i:[[:alpha:].\\s]+)(?:standard|daylight)\\stime\\b"
"|"
- "[[:alpha:]]+(?:\\sdst)?\\b"
+ "(?-i:[[:alpha:]]+)(?:\\sdst)?\\b"
")"
")?";
static VALUE pat = Qnil;
REGCOMP_I(pat);
+#ifndef TIGHT_PARSER
SUBS(str, pat, parse_time_cb);
+#else
+ SUBT(str, pat, parse_time_cb);
+#endif
+}
+
+#define BEGIN_ERA "\\b"
+#define END_ERA "(?!(?<!\\.)[a-z])"
+
+#ifdef TIGHT_PARSER
+static int
+parse_era1_cb(VALUE m, VALUE hash)
+{
+ return 1;
+}
+
+static int
+parse_era1(VALUE str, VALUE hash)
+{
+ static const char pat_source[] =
+ BEGIN_ERA "(a(?:d\\b|\\.d\\.))" END_ERA;
+ static VALUE pat = Qnil;
+
+ REGCOMP_I(pat);
+ SUBA(str, pat, parse_era1_cb);
+}
+
+static int
+parse_era2_cb(VALUE m, VALUE hash)
+{
+ VALUE b;
+
+ b = rb_reg_nth_match(1, m);
+ if (*RSTRING_PTR(b) == 'B' ||
+ *RSTRING_PTR(b) == 'b')
+ set_hash("_bc", Qtrue);
+ return 1;
+}
+
+static int
+parse_era2(VALUE str, VALUE hash)
+{
+ static const char pat_source[] = BEGIN_ERA
+ "(c(?:e\\b|\\.e\\.)|b(?:ce\\b|\\.c\\.e\\.)|b(?:c\\b|\\.c\\.))"
+ END_ERA;
+ static VALUE pat = Qnil;
+
+ REGCOMP_I(pat);
+ SUBB(str, pat, parse_era2_cb);
+}
+
+static int
+parse_era(VALUE str, VALUE hash)
+{
+ if (parse_era1(str, hash)) /* pre */
+ goto ok;
+ if (parse_era2(str, hash)) /* post */
+ goto ok;
+ return 0;
+ ok:
+ return 1;
+}
+#endif
+
+#ifdef TIGHT_PARSER
+static int
+check_year_width(VALUE y)
+{
+ const char *s;
+ long l;
+
+ l = RSTRING_LEN(y);
+ if (l < 2) return 0;
+ s = RSTRING_PTR(y);
+ if (!isdigit((unsigned char)s[1])) return 0;
+ return (l == 2 || !isdigit((unsigned char)s[2]));
+}
+
+static int
+check_apost(VALUE a, VALUE b, VALUE c)
+{
+ int f = 0;
+
+ if (!NIL_P(a) && *RSTRING_PTR(a) == '\'') {
+ if (!check_year_width(a))
+ return 0;
+ f++;
+ }
+ if (!NIL_P(b) && *RSTRING_PTR(b) == '\'') {
+ if (!check_year_width(b))
+ return 0;
+ if (!NIL_P(c))
+ return 0;
+ f++;
+ }
+ if (!NIL_P(c) && *RSTRING_PTR(c) == '\'') {
+ if (!check_year_width(c))
+ return 0;
+ f++;
+ }
+ if (f > 1)
+ return 0;
+ return 1;
}
+#endif
static int
parse_eu_cb(VALUE m, VALUE hash)
{
+#ifndef TIGHT_PARSER
VALUE y, mon, d, b;
d = rb_reg_nth_match(1, m);
@@ -707,6 +849,20 @@ parse_eu_cb(VALUE m, VALUE hash)
s3e(hash, y, mon, d, !NIL_P(b) &&
(*RSTRING_PTR(b) == 'B' ||
*RSTRING_PTR(b) == 'b'));
+#else
+ VALUE y, mon, d;
+
+ d = rb_reg_nth_match(1, m);
+ mon = rb_reg_nth_match(2, m);
+ y = rb_reg_nth_match(3, m);
+
+ if (!check_apost(d, mon, y))
+ return 0;
+
+ mon = INT2FIX(mon_num(mon));
+
+ s3e(hash, y, mon, d, 0);
+#endif
return 1;
}
@@ -714,15 +870,44 @@ static int
parse_eu(VALUE str, VALUE hash)
{
static const char pat_source[] =
- "'?(\\d+)[^-\\d\\s]*"
+#ifdef TIGHT_PARSER
+ BOS
+ FPW_COM FPT_COM
+#endif
+#ifndef TIGHT_PARSER
+ "('?" NUMBER "+)[^-\\d\\s]*"
+#else
+ "(\\d+)(?:(?:st|nd|rd|th)\\b)?"
+#endif
"\\s*"
+#ifndef TIGHT_PARSER
"(" ABBR_MONTHS ")[^-\\d\\s']*"
+#else
+ "(" VALID_MONTHS ")"
+#endif
"(?:"
"\\s*"
- "(c(?:e|\\.e\\.)|b(?:ce|\\.c\\.e\\.)|a(?:d|\\.d\\.)|b(?:c|\\.c\\.))?"
+#ifndef TIGHT_PARSER
+ "(?:"
+ BEGIN_ERA
+ "(c(?:e|\\.e\\.)|b(?:ce|\\.c\\.e\\.)|a(?:d|\\.d\\.)|b(?:c|\\.c\\.))"
+ END_ERA
+ ")?"
"\\s*"
"('?-?\\d+(?:(?:st|nd|rd|th)\\b)?)"
- ")?";
+#else
+ "(?:" FPA ")?"
+ "\\s*"
+ "([-']?\\d+)"
+ "\\s*"
+ "(?:" FPA "|" FPB ")?"
+#endif
+ ")?"
+#ifdef TIGHT_PARSER
+ COM_FPT COM_FPW
+ EOS
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_I(pat);
@@ -732,10 +917,12 @@ parse_eu(VALUE str, VALUE hash)
static int
parse_us_cb(VALUE m, VALUE hash)
{
+#ifndef TIGHT_PARSER
VALUE y, mon, d, b;
mon = rb_reg_nth_match(1, m);
d = rb_reg_nth_match(2, m);
+
b = rb_reg_nth_match(3, m);
y = rb_reg_nth_match(4, m);
@@ -744,6 +931,20 @@ parse_us_cb(VALUE m, VALUE hash)
s3e(hash, y, mon, d, !NIL_P(b) &&
(*RSTRING_PTR(b) == 'B' ||
*RSTRING_PTR(b) == 'b'));
+#else
+ VALUE y, mon, d;
+
+ mon = rb_reg_nth_match(1, m);
+ d = rb_reg_nth_match(2, m);
+ y = rb_reg_nth_match(3, m);
+
+ if (!check_apost(mon, d, y))
+ return 0;
+
+ mon = INT2FIX(mon_num(mon));
+
+ s3e(hash, y, mon, d, 0);
+#endif
return 1;
}
@@ -751,15 +952,42 @@ static int
parse_us(VALUE str, VALUE hash)
{
static const char pat_source[] =
+#ifdef TIGHT_PARSER
+ BOS
+ FPW_COM FPT_COM
+#endif
+#ifndef TIGHT_PARSER
"\\b(" ABBR_MONTHS ")[^-\\d\\s']*"
+#else
+ "\\b(" VALID_MONTHS ")"
+#endif
"\\s*"
+#ifndef TIGHT_PARSER
"('?\\d+)[^-\\d\\s']*"
+#else
+ "('?\\d+)(?:(?:st|nd|rd|th)\\b)?"
+ COM_FPT
+#endif
"(?:"
- "\\s*"
+ "\\s*+,?"
+ "\\s*+"
+#ifndef TIGHT_PARSER
"(c(?:e|\\.e\\.)|b(?:ce|\\.c\\.e\\.)|a(?:d|\\.d\\.)|b(?:c|\\.c\\.))?"
"\\s*"
"('?-?\\d+)"
- ")?";
+#else
+ "(?:" FPA ")?"
+ "\\s*"
+ "([-']?\\d+)"
+ "\\s*"
+ "(?:" FPA "|" FPB ")?"
+#endif
+ ")?"
+#ifdef TIGHT_PARSER
+ COM_FPT COM_FPW
+ EOS
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_I(pat);
@@ -775,6 +1003,11 @@ parse_iso_cb(VALUE m, VALUE hash)
mon = rb_reg_nth_match(2, m);
d = rb_reg_nth_match(3, m);
+#ifdef TIGHT_PARSER
+ if (!check_apost(y, mon, d))
+ return 0;
+#endif
+
s3e(hash, y, mon, d, 0);
return 1;
}
@@ -782,7 +1015,17 @@ parse_iso_cb(VALUE m, VALUE hash)
static int
parse_iso(VALUE str, VALUE hash)
{
- static const char pat_source[] = "('?[-+]?\\d+)-(\\d+)-('?-?\\d+)";
+ static const char pat_source[] =
+#ifndef TIGHT_PARSER
+ "('?[-+]?" NUMBER "+)-(\\d+)-('?-?\\d+)"
+#else
+ BOS
+ FPW_COM FPT_COM
+ "([-+']?\\d+)-(\\d+)-([-']?\\d+)"
+ TEE_FPT COM_FPW
+ EOS
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_0(pat);
@@ -811,7 +1054,16 @@ static int
parse_iso21(VALUE str, VALUE hash)
{
static const char pat_source[] =
- "\\b(\\d{2}|\\d{4})?-?w(\\d{2})(?:-?(\\d))?\\b";
+#ifndef TIGHT_PARSER
+ "\\b(\\d{2}|\\d{4})?-?w(\\d{2})(?:-?(\\d))?\\b"
+#else
+ BOS
+ FPW_COM FPT_COM
+ "(\\d{2}|\\d{4})?-?w(\\d{2})(?:-?(\\d))?"
+ TEE_FPT COM_FPW
+ EOS
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_I(pat);
@@ -831,7 +1083,17 @@ parse_iso22_cb(VALUE m, VALUE hash)
static int
parse_iso22(VALUE str, VALUE hash)
{
- static const char pat_source[] = "-w-(\\d)\\b";
+ static const char pat_source[] =
+#ifndef TIGHT_PARSER
+ "-w-(\\d)\\b"
+#else
+ BOS
+ FPW_COM FPT_COM
+ "-w-(\\d)"
+ TEE_FPT COM_FPW
+ EOS
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_I(pat);
@@ -856,7 +1118,17 @@ parse_iso23_cb(VALUE m, VALUE hash)
static int
parse_iso23(VALUE str, VALUE hash)
{
- static const char pat_source[] = "--(\\d{2})?-(\\d{2})\\b";
+ static const char pat_source[] =
+#ifndef TIGHT_PARSER
+ "--(\\d{2})?-(\\d{2})\\b"
+#else
+ BOS
+ FPW_COM FPT_COM
+ "--(\\d{2})?-(\\d{2})"
+ TEE_FPT COM_FPW
+ EOS
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_0(pat);
@@ -881,7 +1153,17 @@ parse_iso24_cb(VALUE m, VALUE hash)
static int
parse_iso24(VALUE str, VALUE hash)
{
- static const char pat_source[] = "--(\\d{2})(\\d{2})?\\b";
+ static const char pat_source[] =
+#ifndef TIGHT_PARSER
+ "--(\\d{2})(\\d{2})?\\b"
+#else
+ BOS
+ FPW_COM FPT_COM
+ "--(\\d{2})(\\d{2})?"
+ TEE_FPT COM_FPW
+ EOS
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_0(pat);
@@ -905,9 +1187,29 @@ parse_iso25_cb(VALUE m, VALUE hash)
static int
parse_iso25(VALUE str, VALUE hash)
{
- static const char pat0_source[] = "[,.](\\d{2}|\\d{4})-\\d{3}\\b";
+ static const char pat0_source[] =
+#ifndef TIGHT_PARSER
+ "[,.](\\d{2}|\\d{4})-\\d{3}\\b"
+#else
+ BOS
+ FPW_COM FPT_COM
+ "[,.](\\d{2}|\\d{4})-\\d{3}"
+ TEE_FPT COM_FPW
+ EOS
+#endif
+ ;
static VALUE pat0 = Qnil;
- static const char pat_source[] = "\\b(\\d{2}|\\d{4})-(\\d{3})\\b";
+ static const char pat_source[] =
+#ifndef TIGHT_PARSER
+ "\\b(\\d{2}|\\d{4})-(\\d{3})\\b"
+#else
+ BOS
+ FPW_COM FPT_COM
+ "(\\d{2}|\\d{4})-(\\d{3})"
+ TEE_FPT COM_FPW
+ EOS
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_0(pat0);
@@ -931,9 +1233,29 @@ parse_iso26_cb(VALUE m, VALUE hash)
static int
parse_iso26(VALUE str, VALUE hash)
{
- static const char pat0_source[] = "\\d-\\d{3}\\b";
+ static const char pat0_source[] =
+#ifndef TIGHT_PARSER
+ "\\d-\\d{3}\\b"
+#else
+ BOS
+ FPW_COM FPT_COM
+ "\\d-\\d{3}"
+ TEE_FPT COM_FPW
+ EOS
+#endif
+ ;
static VALUE pat0 = Qnil;
- static const char pat_source[] = "\\b-(\\d{3})\\b";
+ static const char pat_source[] =
+#ifndef TIGHT_PARSER
+ "\\b-(\\d{3})\\b"
+#else
+ BOS
+ FPW_COM FPT_COM
+ "-(\\d{3})"
+ TEE_FPT COM_FPW
+ EOS
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_0(pat0);
@@ -965,6 +1287,9 @@ parse_iso2(VALUE str, VALUE hash)
return 1;
}
+#define JISX0301_ERA_INITIALS "mtshr"
+#define JISX0301_DEFAULT_ERA 'H' /* obsolete */
+
static int
gengo(int c)
{
@@ -975,6 +1300,7 @@ gengo(int c)
case 'T': case 't': e = 1911; break;
case 'S': case 's': e = 1925; break;
case 'H': case 'h': e = 1988; break;
+ case 'R': case 'r': e = 2018; break;
default: e = 0; break;
}
return e;
@@ -1003,7 +1329,17 @@ parse_jis_cb(VALUE m, VALUE hash)
static int
parse_jis(VALUE str, VALUE hash)
{
- static const char pat_source[] = "\\b([mtsh])(\\d+)\\.(\\d+)\\.(\\d+)";
+ static const char pat_source[] =
+#ifndef TIGHT_PARSER
+ "\\b([" JISX0301_ERA_INITIALS "])(\\d+)\\.(\\d+)\\.(\\d+)"
+#else
+ BOS
+ FPW_COM FPT_COM
+ "([" JISX0301_ERA_INITIALS "])(\\d+)\\.(\\d+)\\.(\\d+)"
+ TEE_FPT COM_FPW
+ EOS
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_I(pat);
@@ -1019,6 +1355,11 @@ parse_vms11_cb(VALUE m, VALUE hash)
mon = rb_reg_nth_match(2, m);
y = rb_reg_nth_match(3, m);
+#ifdef TIGHT_PARSER
+ if (!check_apost(d, mon, y))
+ return 0;
+#endif
+
mon = INT2FIX(mon_num(mon));
s3e(hash, y, mon, d, 0);
@@ -1029,8 +1370,18 @@ static int
parse_vms11(VALUE str, VALUE hash)
{
static const char pat_source[] =
- "('?-?\\d+)-(" ABBR_MONTHS ")[^-]*"
- "-('?-?\\d+)";
+#ifndef TIGHT_PARSER
+ "('?-?" NUMBER "+)-(" ABBR_MONTHS ")[^-/.]*"
+ "-('?-?\\d+)"
+#else
+ BOS
+ FPW_COM FPT_COM
+ "([-']?\\d+)-(" DOTLESS_VALID_MONTHS ")"
+ "-([-']?\\d+)"
+ COM_FPT COM_FPW
+ EOS
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_I(pat);
@@ -1046,6 +1397,11 @@ parse_vms12_cb(VALUE m, VALUE hash)
d = rb_reg_nth_match(2, m);
y = rb_reg_nth_match(3, m);
+#ifdef TIGHT_PARSER
+ if (!check_apost(mon, d, y))
+ return 0;
+#endif
+
mon = INT2FIX(mon_num(mon));
s3e(hash, y, mon, d, 0);
@@ -1056,8 +1412,18 @@ static int
parse_vms12(VALUE str, VALUE hash)
{
static const char pat_source[] =
- "\\b(" ABBR_MONTHS ")[^-]*"
- "-('?-?\\d+)(?:-('?-?\\d+))?";
+#ifndef TIGHT_PARSER
+ "\\b(" ABBR_MONTHS ")[^-/.]*"
+ "-('?-?\\d+)(?:-('?-?\\d+))?"
+#else
+ BOS
+ FPW_COM FPT_COM
+ "(" DOTLESS_VALID_MONTHS ")"
+ "-([-']?\\d+)(?:-([-']?\\d+))?"
+ COM_FPT COM_FPW
+ EOS
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_I(pat);
@@ -1086,6 +1452,11 @@ parse_sla_cb(VALUE m, VALUE hash)
mon = rb_reg_nth_match(2, m);
d = rb_reg_nth_match(3, m);
+#ifdef TIGHT_PARSER
+ if (!check_apost(y, mon, d))
+ return 0;
+#endif
+
s3e(hash, y, mon, d, 0);
return 1;
}
@@ -1094,13 +1465,92 @@ static int
parse_sla(VALUE str, VALUE hash)
{
static const char pat_source[] =
- "('?-?\\d+)/\\s*('?\\d+)(?:\\D\\s*('?-?\\d+))?";
+#ifndef TIGHT_PARSER
+ "('?-?" NUMBER "+)/\\s*('?\\d+)(?:\\D\\s*('?-?\\d+))?"
+#else
+ BOS
+ FPW_COM FPT_COM
+ "([-']?\\d+)/\\s*('?\\d+)(?:(?:[-/]|\\s+)\\s*([-']?\\d+))?"
+ COM_FPT COM_FPW
+ EOS
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_I(pat);
SUBS(str, pat, parse_sla_cb);
}
+#ifdef TIGHT_PARSER
+static int
+parse_sla2_cb(VALUE m, VALUE hash)
+{
+ VALUE y, mon, d;
+
+ d = rb_reg_nth_match(1, m);
+ mon = rb_reg_nth_match(2, m);
+ y = rb_reg_nth_match(3, m);
+
+ if (!check_apost(d, mon, y))
+ return 0;
+
+ mon = INT2FIX(mon_num(mon));
+
+ s3e(hash, y, mon, d, 0);
+ return 1;
+}
+
+static int
+parse_sla2(VALUE str, VALUE hash)
+{
+ static const char pat_source[] =
+ BOS
+ FPW_COM FPT_COM
+ "([-']?\\d+)/\\s*(" DOTLESS_VALID_MONTHS ")(?:(?:[-/]|\\s+)\\s*([-']?\\d+))?"
+ COM_FPT COM_FPW
+ EOS
+ ;
+ static VALUE pat = Qnil;
+
+ REGCOMP_I(pat);
+ SUBS(str, pat, parse_sla2_cb);
+}
+
+static int
+parse_sla3_cb(VALUE m, VALUE hash)
+{
+ VALUE y, mon, d;
+
+ mon = rb_reg_nth_match(1, m);
+ d = rb_reg_nth_match(2, m);
+ y = rb_reg_nth_match(3, m);
+
+ if (!check_apost(mon, d, y))
+ return 0;
+
+ mon = INT2FIX(mon_num(mon));
+
+ s3e(hash, y, mon, d, 0);
+ return 1;
+}
+
+static int
+parse_sla3(VALUE str, VALUE hash)
+{
+ static const char pat_source[] =
+ BOS
+ FPW_COM FPT_COM
+ "(" DOTLESS_VALID_MONTHS ")/\\s*([-']?\\d+)(?:(?:[-/]|\\s+)\\s*([-']?\\d+))?"
+ COM_FPT COM_FPW
+ EOS
+ ;
+ static VALUE pat = Qnil;
+
+ REGCOMP_I(pat);
+ SUBS(str, pat, parse_sla3_cb);
+}
+#endif
+
static int
parse_dot_cb(VALUE m, VALUE hash)
{
@@ -1110,6 +1560,11 @@ parse_dot_cb(VALUE m, VALUE hash)
mon = rb_reg_nth_match(2, m);
d = rb_reg_nth_match(3, m);
+#ifdef TIGHT_PARSER
+ if (!check_apost(y, mon, d))
+ return 0;
+#endif
+
s3e(hash, y, mon, d, 0);
return 1;
}
@@ -1118,13 +1573,92 @@ static int
parse_dot(VALUE str, VALUE hash)
{
static const char pat_source[] =
- "('?-?\\d+)\\.\\s*('?\\d+)\\.\\s*('?-?\\d+)";
+#ifndef TIGHT_PARSER
+ "('?-?" NUMBER "+)\\.\\s*('?\\d+)\\.\\s*('?-?\\d+)"
+#else
+ BOS
+ FPW_COM FPT_COM
+ "([-']?\\d+)\\.\\s*(\\d+)\\.\\s*([-']?\\d+)"
+ COM_FPT COM_FPW
+ EOS
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_I(pat);
SUBS(str, pat, parse_dot_cb);
}
+#ifdef TIGHT_PARSER
+static int
+parse_dot2_cb(VALUE m, VALUE hash)
+{
+ VALUE y, mon, d;
+
+ d = rb_reg_nth_match(1, m);
+ mon = rb_reg_nth_match(2, m);
+ y = rb_reg_nth_match(3, m);
+
+ if (!check_apost(d, mon, y))
+ return 0;
+
+ mon = INT2FIX(mon_num(mon));
+
+ s3e(hash, y, mon, d, 0);
+ return 1;
+}
+
+static int
+parse_dot2(VALUE str, VALUE hash)
+{
+ static const char pat_source[] =
+ BOS
+ FPW_COM FPT_COM
+ "([-']?\\d+)\\.\\s*(" DOTLESS_VALID_MONTHS ")(?:(?:[./])\\s*([-']?\\d+))?"
+ COM_FPT COM_FPW
+ EOS
+ ;
+ static VALUE pat = Qnil;
+
+ REGCOMP_I(pat);
+ SUBS(str, pat, parse_dot2_cb);
+}
+
+static int
+parse_dot3_cb(VALUE m, VALUE hash)
+{
+ VALUE y, mon, d;
+
+ mon = rb_reg_nth_match(1, m);
+ d = rb_reg_nth_match(2, m);
+ y = rb_reg_nth_match(3, m);
+
+ if (!check_apost(mon, d, y))
+ return 0;
+
+ mon = INT2FIX(mon_num(mon));
+
+ s3e(hash, y, mon, d, 0);
+ return 1;
+}
+
+static int
+parse_dot3(VALUE str, VALUE hash)
+{
+ static const char pat_source[] =
+ BOS
+ FPW_COM FPT_COM
+ "(" DOTLESS_VALID_MONTHS ")\\.\\s*([-']?\\d+)(?:(?:[./])\\s*([-']?\\d+))?"
+ COM_FPT COM_FPW
+ EOS
+ ;
+ static VALUE pat = Qnil;
+
+ REGCOMP_I(pat);
+ SUBS(str, pat, parse_dot3_cb);
+}
+#endif
+
static int
parse_year_cb(VALUE m, VALUE hash)
{
@@ -1138,7 +1672,17 @@ parse_year_cb(VALUE m, VALUE hash)
static int
parse_year(VALUE str, VALUE hash)
{
- static const char pat_source[] = "'(\\d+)\\b";
+ static const char pat_source[] =
+#ifndef TIGHT_PARSER
+ "'(\\d+)\\b"
+#else
+ BOS
+ FPW_COM FPT_COM
+ "'(\\d+)"
+ COM_FPT COM_FPW
+ EOS
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_0(pat);
@@ -1158,7 +1702,17 @@ parse_mon_cb(VALUE m, VALUE hash)
static int
parse_mon(VALUE str, VALUE hash)
{
- static const char pat_source[] = "\\b(" ABBR_MONTHS ")\\S*";
+ static const char pat_source[] =
+#ifndef TIGHT_PARSER
+ "\\b(" ABBR_MONTHS ")\\S*"
+#else
+ BOS
+ FPW_COM FPT_COM
+ "(" VALID_MONTHS ")"
+ COM_FPT COM_FPW
+ EOS
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_I(pat);
@@ -1178,7 +1732,17 @@ parse_mday_cb(VALUE m, VALUE hash)
static int
parse_mday(VALUE str, VALUE hash)
{
- static const char pat_source[] = "(\\d+)(st|nd|rd|th)\\b";
+ static const char pat_source[] =
+#ifndef TIGHT_PARSER
+ "(" NUMBER "+)(st|nd|rd|th)\\b"
+#else
+ BOS
+ FPW_COM FPT_COM
+ "(\\d+)(st|nd|rd|th)"
+ COM_FPT COM_FPW
+ EOS
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_I(pat);
@@ -1374,28 +1938,26 @@ parse_ddd_cb(VALUE m, VALUE hash)
set_hash("zone", s5);
if (*cs5 == '[') {
- char *buf = ALLOCA_N(char, l5 + 1);
- char *s1, *s2, *s3;
+ const char *s1, *s2;
VALUE zone;
- memcpy(buf, cs5, l5);
- buf[l5 - 1] = '\0';
-
- s1 = buf + 1;
- s2 = strchr(buf, ':');
+ l5 -= 2;
+ s1 = cs5 + 1;
+ s2 = memchr(s1, ':', l5);
if (s2) {
- *s2 = '\0';
s2++;
+ zone = rb_str_subseq(s5, s2 - cs5, l5 - (s2 - s1));
+ s5 = rb_str_subseq(s5, 1, s2 - s1);
}
- if (s2)
- s3 = s2;
- else
- s3 = s1;
- zone = rb_str_new2(s3);
+ else {
+ zone = rb_str_subseq(s5, 1, l5);
+ if (isdigit((unsigned char)*s1))
+ s5 = rb_str_append(rb_str_new_cstr("+"), zone);
+ else
+ s5 = zone;
+ }
set_hash("zone", zone);
- if (isdigit(*s1))
- *--s1 = '+';
- set_hash("offset", date_zone_to_diff(rb_str_new2(s1)));
+ set_hash("offset", date_zone_to_diff(s5));
}
RB_GC_GUARD(s5);
}
@@ -1407,7 +1969,10 @@ static int
parse_ddd(VALUE str, VALUE hash)
{
static const char pat_source[] =
- "([-+]?)(\\d{2,14})"
+#ifdef TIGHT_PARSER
+ BOS
+#endif
+ "([-+]?)(" NUMBER "{2,14})"
"(?:"
"\\s*"
"t?"
@@ -1423,22 +1988,22 @@ parse_ddd(VALUE str, VALUE hash)
"|"
"\\[[-+]?\\d[^\\]]*\\]"
")"
- ")?";
+ ")?"
+#ifdef TIGHT_PARSER
+ EOS
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_I(pat);
SUBS(str, pat, parse_ddd_cb);
}
+#ifndef TIGHT_PARSER
static int
parse_bc_cb(VALUE m, VALUE hash)
{
- VALUE y;
-
- y = ref_hash("year");
- if (!NIL_P(y))
- set_hash("year", f_add(f_negate(y), INT2FIX(1)));
-
+ set_hash("_bc", Qtrue);
return 1;
}
@@ -1485,6 +2050,57 @@ parse_frag(VALUE str, VALUE hash)
REGCOMP_I(pat);
SUBS(str, pat, parse_frag_cb);
}
+#endif
+
+#ifdef TIGHT_PARSER
+static int
+parse_dummy_cb(VALUE m, VALUE hash)
+{
+ return 1;
+}
+
+static int
+parse_wday_only(VALUE str, VALUE hash)
+{
+ static const char pat_source[] = "\\A\\s*" FPW "\\s*\\z";
+ static VALUE pat = Qnil;
+
+ REGCOMP_0(pat);
+ SUBS(str, pat, parse_dummy_cb);
+}
+
+static int
+parse_time_only(VALUE str, VALUE hash)
+{
+ static const char pat_source[] = "\\A\\s*" FPT "\\s*\\z";
+ static VALUE pat = Qnil;
+
+ REGCOMP_0(pat);
+ SUBS(str, pat, parse_dummy_cb);
+}
+
+static int
+parse_wday_and_time(VALUE str, VALUE hash)
+{
+ static const char pat_source[] = "\\A\\s*(" FPW "\\s+" FPT "|" FPT "\\s+" FPW ")\\s*\\z";
+ static VALUE pat = Qnil;
+
+ REGCOMP_0(pat);
+ SUBS(str, pat, parse_dummy_cb);
+}
+
+static unsigned
+have_invalid_char_p(VALUE s)
+{
+ long i;
+
+ for (i = 0; i < RSTRING_LEN(s); i++)
+ if (iscntrl((unsigned char)RSTRING_PTR(s)[i]) &&
+ !isspace((unsigned char)RSTRING_PTR(s)[i]))
+ return 1;
+ return 0;
+}
+#endif
#define HAVE_ALPHA (1<<0)
#define HAVE_DIGIT (1<<1)
@@ -1500,9 +2116,9 @@ check_class(VALUE s)
flags = 0;
for (i = 0; i < RSTRING_LEN(s); i++) {
- if (isalpha(RSTRING_PTR(s)[i]))
+ if (isalpha((unsigned char)RSTRING_PTR(s)[i]))
flags |= HAVE_ALPHA;
- if (isdigit(RSTRING_PTR(s)[i]))
+ if (isdigit((unsigned char)RSTRING_PTR(s)[i]))
flags |= HAVE_DIGIT;
if (RSTRING_PTR(s)[i] == '-')
flags |= HAVE_DASH;
@@ -1516,16 +2132,31 @@ check_class(VALUE s)
#define HAVE_ELEM_P(x) ((check_class(str) & (x)) == (x))
+#ifdef TIGHT_PARSER
+#define PARSER_ERROR return rb_hash_new()
+#endif
+
VALUE
date__parse(VALUE str, VALUE comp)
{
VALUE backref, hash;
+#ifdef TIGHT_PARSER
+ if (have_invalid_char_p(str))
+ PARSER_ERROR;
+#endif
+
backref = rb_backref_get();
rb_match_busy(backref);
{
- static const char pat_source[] = "[^-+',./:@[:alnum:]\\[\\]]+";
+ static const char pat_source[] =
+#ifndef TIGHT_PARSER
+ "[^-+',./:@[:alnum:]\\[\\]]+"
+#else
+ "[^[:graph:]]+"
+#endif
+ ;
static VALUE pat = Qnil;
REGCOMP_0(pat);
@@ -1541,12 +2172,17 @@ date__parse(VALUE str, VALUE comp)
if (HAVE_ELEM_P(HAVE_DIGIT))
parse_time(str, hash);
- if (HAVE_ELEM_P(HAVE_ALPHA|HAVE_DIGIT))
+#ifdef TIGHT_PARSER
+ if (HAVE_ELEM_P(HAVE_ALPHA))
+ parse_era(str, hash);
+#endif
+
+ if (HAVE_ELEM_P(HAVE_ALPHA|HAVE_DIGIT)) {
if (parse_eu(str, hash))
goto ok;
- if (HAVE_ELEM_P(HAVE_ALPHA|HAVE_DIGIT))
if (parse_us(str, hash))
goto ok;
+ }
if (HAVE_ELEM_P(HAVE_DIGIT|HAVE_DASH))
if (parse_iso(str, hash))
goto ok;
@@ -1559,9 +2195,25 @@ date__parse(VALUE str, VALUE comp)
if (HAVE_ELEM_P(HAVE_DIGIT|HAVE_SLASH))
if (parse_sla(str, hash))
goto ok;
+#ifdef TIGHT_PARSER
+ if (HAVE_ELEM_P(HAVE_ALPHA|HAVE_DIGIT|HAVE_SLASH)) {
+ if (parse_sla2(str, hash))
+ goto ok;
+ if (parse_sla3(str, hash))
+ goto ok;
+ }
+#endif
if (HAVE_ELEM_P(HAVE_DIGIT|HAVE_DOT))
if (parse_dot(str, hash))
goto ok;
+#ifdef TIGHT_PARSER
+ if (HAVE_ELEM_P(HAVE_ALPHA|HAVE_DIGIT|HAVE_DOT)) {
+ if (parse_dot2(str, hash))
+ goto ok;
+ if (parse_dot3(str, hash))
+ goto ok;
+ }
+#endif
if (HAVE_ELEM_P(HAVE_DIGIT))
if (parse_iso2(str, hash))
goto ok;
@@ -1578,14 +2230,42 @@ date__parse(VALUE str, VALUE comp)
if (parse_ddd(str, hash))
goto ok;
+#ifdef TIGHT_PARSER
+ if (parse_wday_only(str, hash))
+ goto ok;
+ if (parse_time_only(str, hash))
+ goto ok;
+ if (parse_wday_and_time(str, hash))
+ goto ok;
+
+ PARSER_ERROR; /* not found */
+#endif
+
ok:
+#ifndef TIGHT_PARSER
if (HAVE_ELEM_P(HAVE_ALPHA))
parse_bc(str, hash);
if (HAVE_ELEM_P(HAVE_DIGIT))
parse_frag(str, hash);
+#endif
{
- if (RTEST(ref_hash("_comp"))) {
+ if (RTEST(del_hash("_bc"))) {
+ VALUE y;
+
+ y = ref_hash("cwyear");
+ if (!NIL_P(y)) {
+ y = f_add(f_negate(y), INT2FIX(1));
+ set_hash("cwyear", y);
+ }
+ y = ref_hash("year");
+ if (!NIL_P(y)) {
+ y = f_add(f_negate(y), INT2FIX(1));
+ set_hash("year", y);
+ }
+ }
+
+ if (RTEST(del_hash("_comp"))) {
VALUE y;
y = ref_hash("cwyear");
@@ -1605,9 +2285,8 @@ date__parse(VALUE str, VALUE comp)
set_hash("year", f_add(y, INT2FIX(2000)));
}
}
- }
- del_hash("_comp");
+ }
{
VALUE zone = ref_hash("zone");
@@ -1658,8 +2337,8 @@ iso8601_ext_datetime_cb(VALUE m, VALUE hash)
s[i] = rb_reg_nth_match(i, m);
}
- if (!NIL_P(s[3])) {
- set_hash("mday", str2num(s[3]));
+ if (!NIL_P(s[1])) {
+ if (!NIL_P(s[3])) set_hash("mday", str2num(s[3]));
if (strcmp(RSTRING_PTR(s[1]), "-") != 0) {
y = str2num(s[1]);
if (RSTRING_LEN(s[1]) < 4)
@@ -1716,7 +2395,7 @@ static int
iso8601_ext_datetime(VALUE str, VALUE hash)
{
static const char pat_source[] =
- "\\A\\s*(?:([-+]?\\d{2,}|-)-(\\d{2})?-(\\d{2})|"
+ "\\A\\s*(?:([-+]?\\d{2,}|-)-(\\d{2})?(?:-(\\d{2}))?|"
"([-+]?\\d{2,})?-(\\d{3})|"
"(\\d{4}|\\d{2})?-w(\\d{2})-(\\d)|"
"-w-(\\d))"
@@ -1726,7 +2405,7 @@ iso8601_ext_datetime(VALUE str, VALUE hash)
static VALUE pat = Qnil;
REGCOMP_I(pat);
- SUBS(str, pat, iso8601_ext_datetime_cb);
+ MATCH(str, pat, iso8601_ext_datetime_cb);
}
#undef SNUM
@@ -1817,7 +2496,7 @@ iso8601_bas_datetime(VALUE str, VALUE hash)
static VALUE pat = Qnil;
REGCOMP_I(pat);
- SUBS(str, pat, iso8601_bas_datetime_cb);
+ MATCH(str, pat, iso8601_bas_datetime_cb);
}
#undef SNUM
@@ -1855,24 +2534,24 @@ static int
iso8601_ext_time(VALUE str, VALUE hash)
{
static const char pat_source[] =
- "\\A\\s*(?:(\\d{2}):(\\d{2})(?::(\\d{2})(?:[,.](\\d+))?)?"
+ "\\A\\s*(\\d{2}):(\\d{2})(?::(\\d{2})(?:[,.](\\d+))?"
"(z|[-+]\\d{2}(:?\\d{2})?)?)?\\s*\\z";
static VALUE pat = Qnil;
REGCOMP_I(pat);
- SUBS(str, pat, iso8601_ext_time_cb);
+ MATCH(str, pat, iso8601_ext_time_cb);
}
static int
iso8601_bas_time(VALUE str, VALUE hash)
{
static const char pat_source[] =
- "\\A\\s*(?:(\\d{2})(\\d{2})(?:(\\d{2})(?:[,.](\\d+))?)?"
+ "\\A\\s*(\\d{2})(\\d{2})(?:(\\d{2})(?:[,.](\\d+))?"
"(z|[-+]\\d{2}(\\d{2})?)?)?\\s*\\z";
static VALUE pat = Qnil;
REGCOMP_I(pat);
- SUBS(str, pat, iso8601_bas_time_cb);
+ MATCH(str, pat, iso8601_bas_time_cb);
}
VALUE
@@ -1940,7 +2619,7 @@ rfc3339(VALUE str, VALUE hash)
static VALUE pat = Qnil;
REGCOMP_I(pat);
- SUBS(str, pat, rfc3339_cb);
+ MATCH(str, pat, rfc3339_cb);
}
VALUE
@@ -2004,7 +2683,7 @@ xmlschema_datetime(VALUE str, VALUE hash)
static VALUE pat = Qnil;
REGCOMP_I(pat);
- SUBS(str, pat, xmlschema_datetime_cb);
+ MATCH(str, pat, xmlschema_datetime_cb);
}
#undef SNUM
@@ -2045,7 +2724,7 @@ xmlschema_time(VALUE str, VALUE hash)
static VALUE pat = Qnil;
REGCOMP_I(pat);
- SUBS(str, pat, xmlschema_time_cb);
+ MATCH(str, pat, xmlschema_time_cb);
}
#undef SNUM
@@ -2086,7 +2765,7 @@ xmlschema_trunc(VALUE str, VALUE hash)
static VALUE pat = Qnil;
REGCOMP_I(pat);
- SUBS(str, pat, xmlschema_trunc_cb);
+ MATCH(str, pat, xmlschema_trunc_cb);
}
VALUE
@@ -2127,7 +2806,9 @@ rfc2822_cb(VALUE m, VALUE hash)
s[i] = rb_reg_nth_match(i, m);
}
- set_hash("wday", INT2FIX(day_num(s[1])));
+ if (!NIL_P(s[1])) {
+ set_hash("wday", INT2FIX(day_num(s[1])));
+ }
set_hash("mday", str2num(s[2]));
set_hash("mon", INT2FIX(mon_num(s[3])));
y = str2num(s[4]);
@@ -2157,7 +2838,7 @@ rfc2822(VALUE str, VALUE hash)
static VALUE pat = Qnil;
REGCOMP_I(pat);
- SUBS(str, pat, rfc2822_cb);
+ MATCH(str, pat, rfc2822_cb);
}
VALUE
@@ -2215,7 +2896,7 @@ httpdate_type1(VALUE str, VALUE hash)
static VALUE pat = Qnil;
REGCOMP_I(pat);
- SUBS(str, pat, httpdate_type1_cb);
+ MATCH(str, pat, httpdate_type1_cb);
}
#undef SNUM
@@ -2262,7 +2943,7 @@ httpdate_type2(VALUE str, VALUE hash)
static VALUE pat = Qnil;
REGCOMP_I(pat);
- SUBS(str, pat, httpdate_type2_cb);
+ MATCH(str, pat, httpdate_type2_cb);
}
#undef SNUM
@@ -2303,7 +2984,7 @@ httpdate_type3(VALUE str, VALUE hash)
static VALUE pat = Qnil;
REGCOMP_I(pat);
- SUBS(str, pat, httpdate_type3_cb);
+ MATCH(str, pat, httpdate_type3_cb);
}
VALUE
@@ -2345,7 +3026,7 @@ jisx0301_cb(VALUE m, VALUE hash)
s[i] = rb_reg_nth_match(i, m);
}
- ep = gengo(NIL_P(s[1]) ? 'h' : *RSTRING_PTR(s[1]));
+ ep = gengo(NIL_P(s[1]) ? JISX0301_DEFAULT_ERA : *RSTRING_PTR(s[1]));
set_hash("year", f_add(str2num(s[2]), INT2FIX(ep)));
set_hash("mon", str2num(s[3]));
set_hash("mday", str2num(s[4]));
@@ -2370,14 +3051,14 @@ static int
jisx0301(VALUE str, VALUE hash)
{
static const char pat_source[] =
- "\\A\\s*([mtsh])?(\\d{2})\\.(\\d{2})\\.(\\d{2})"
+ "\\A\\s*([" JISX0301_ERA_INITIALS "])?(\\d{2})\\.(\\d{2})\\.(\\d{2})"
"(?:t"
"(?:(\\d{2}):(\\d{2})(?::(\\d{2})(?:[,.](\\d*))?)?"
"(z|[-+]\\d{2}(?::?\\d{2})?)?)?)?\\s*\\z";
static VALUE pat = Qnil;
REGCOMP_I(pat);
- SUBS(str, pat, jisx0301_cb);
+ MATCH(str, pat, jisx0301_cb);
}
VALUE