summaryrefslogtreecommitdiff
path: root/ext/date/date_strptime.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/date/date_strptime.c')
-rw-r--r--ext/date/date_strptime.c695
1 files changed, 695 insertions, 0 deletions
diff --git a/ext/date/date_strptime.c b/ext/date/date_strptime.c
new file mode 100644
index 0000000000..27f0893efc
--- /dev/null
+++ b/ext/date/date_strptime.c
@@ -0,0 +1,695 @@
+/*
+ date_strptime.c: Coded by Tadayoshi Funaba 2011
+*/
+
+#include "ruby.h"
+#include "ruby/encoding.h"
+#include "ruby/re.h"
+#include <ctype.h>
+
+static const char *day_names[] = {
+ "Sunday", "Monday", "Tuesday", "Wednesday",
+ "Thursday", "Friday", "Saturday",
+ "Sun", "Mon", "Tue", "Wed",
+ "Thu", "Fri", "Sat"
+};
+
+static const char *month_names[] = {
+ "January", "February", "March", "April",
+ "May", "June", "July", "August", "September",
+ "October", "November", "December",
+ "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
+};
+
+static const char *merid_names[] = {
+ "am", "pm",
+ "a.m.", "p.m."
+};
+
+static const char *extz_pats[] = {
+ ":z",
+ "::z",
+ ":::z"
+};
+
+#define sizeof_array(o) (sizeof o / sizeof o[0])
+
+#define f_negate(x) rb_funcall(x, rb_intern("-@"), 0)
+#define f_add(x,y) rb_funcall(x, '+', 1, y)
+#define f_sub(x,y) rb_funcall(x, '-', 1, y)
+#define f_mul(x,y) rb_funcall(x, '*', 1, y)
+#define f_div(x,y) rb_funcall(x, '/', 1, y)
+#define f_mod(x,y) rb_funcall(x, '%', 1, y)
+#define f_expt(x,y) rb_funcall(x, rb_intern("**"), 1, y)
+
+#define f_lt_p(x,y) rb_funcall(x, '<', 1, y)
+#define f_gt_p(x,y) rb_funcall(x, '>', 1, y)
+#define f_le_p(x,y) rb_funcall(x, rb_intern("<="), 1, y)
+#define f_ge_p(x,y) rb_funcall(x, rb_intern(">="), 1, y)
+
+#define f_match(r,s) rb_funcall(r, rb_intern("match"), 1, s)
+#define f_aref(o,i) rb_funcall(o, rb_intern("[]"), 1, i)
+#define f_end(o,i) rb_funcall(o, rb_intern("end"), 1, i)
+
+static int
+num_pattern_p(const char *s)
+{
+ if (isdigit(*s))
+ return 1;
+ if (*s == '%') {
+ s++;
+ if (*s == 'E' || *s == 'O')
+ s++;
+ if (*s &&
+ (strchr("CDdeFGgHIjkLlMmNQRrSsTUuVvWwXxYy", *s) || isdigit(*s)))
+ return 1;
+ }
+ return 0;
+}
+
+#define NUM_PATTERN_P() num_pattern_p(&fmt[fi + 1])
+
+static long
+read_digits(const char *s, VALUE *n, size_t width)
+{
+ size_t l;
+
+ l = strspn(s, "0123456789");
+
+ if (l == 0)
+ return 0;
+
+ if (width < l)
+ l = width;
+
+ if ((4 * l * sizeof(char)) <= (sizeof(long)*CHAR_BIT)) {
+ const char *os = s;
+ long v;
+
+ v = 0;
+ while ((size_t)(s - os) < l) {
+ v *= 10;
+ v += *s - '0';
+ s++;
+ }
+ if (os == s)
+ return 0;
+ *n = LONG2NUM(v);
+ return l;
+ }
+ else {
+ char *s2 = ALLOCA_N(char, l + 1);
+ memcpy(s2, s, l);
+ s2[l] = '\0';
+ *n = rb_cstr_to_inum(s2, 10, 0);
+ return l;
+ }
+}
+
+#define set_hash(k,v) rb_hash_aset(hash, ID2SYM(rb_intern(k)), v)
+#define ref_hash(k) rb_hash_aref(hash, ID2SYM(rb_intern(k)))
+#define del_hash(k) rb_hash_delete(hash, ID2SYM(rb_intern(k)))
+
+#define fail() \
+{ \
+ set_hash("_fail", Qtrue); \
+ return 0; \
+}
+
+#define fail_p() (!NIL_P(ref_hash("_fail")))
+
+#define READ_DIGITS(n,w) \
+{ \
+ size_t l; \
+ l = read_digits(&str[si], &n, w); \
+ if (l == 0) \
+ fail(); \
+ si += l; \
+}
+
+#define READ_DIGITS_MAX(n) READ_DIGITS(n, LONG_MAX)
+
+static int
+valid_range_p(VALUE v, int a, int b)
+{
+ if (FIXNUM_P(v)) {
+ int vi = FIX2INT(v);
+ return !(vi < a || vi > b);
+ }
+ return !(f_lt_p(v, INT2NUM(a)) || f_gt_p(v, INT2NUM(b)));
+}
+
+#define recur(fmt) \
+{ \
+ size_t l; \
+ l = date__strptime_internal(&str[si], slen - si, \
+ fmt, sizeof fmt - 1, hash); \
+ if (fail_p()) \
+ return 0; \
+ si += l; \
+}
+
+static size_t
+date__strptime_internal(const char *str, size_t slen,
+ const char *fmt, size_t flen, VALUE hash)
+{
+ size_t si, fi;
+ int c;
+
+ si = fi = 0;
+
+ while (fi < flen) {
+
+ switch (fmt[fi]) {
+ case '%':
+
+ again:
+ fi++;
+ c = fmt[fi];
+
+ switch (c) {
+ case 'E':
+ if (fmt[fi + 1] && strchr("cCxXyY", fmt[fi + 1]))
+ goto again;
+ fi--;
+ goto ordinal;
+ case 'O':
+ if (fmt[fi + 1] && strchr("deHImMSuUVwWy", fmt[fi + 1]))
+ goto again;
+ fi--;
+ goto ordinal;
+ case ':':
+ {
+ int i;
+
+ for (i = 0; i < (int)sizeof_array(extz_pats); i++)
+ if (strncmp(extz_pats[i], &fmt[fi],
+ strlen(extz_pats[i])) == 0) {
+ fi += i;
+ goto again;
+ }
+ fail();
+ }
+
+ case 'A':
+ case 'a':
+ {
+ int i;
+
+ for (i = 0; i < (int)sizeof_array(day_names); i++) {
+ size_t l = strlen(day_names[i]);
+ if (strncasecmp(day_names[i], &str[si], l) == 0) {
+ si += l;
+ set_hash("wday", INT2FIX(i % 7));
+ goto matched;
+ }
+ }
+ fail();
+ }
+ case 'B':
+ case 'b':
+ case 'h':
+ {
+ int i;
+
+ for (i = 0; i < (int)sizeof_array(month_names); i++) {
+ size_t l = strlen(month_names[i]);
+ if (strncasecmp(month_names[i], &str[si], l) == 0) {
+ si += l;
+ set_hash("mon", INT2FIX((i % 12) + 1));
+ goto matched;
+ }
+ }
+ fail();
+ }
+
+ case 'C':
+ {
+ VALUE n;
+
+ if (NUM_PATTERN_P())
+ READ_DIGITS(n, 2)
+ else
+ READ_DIGITS_MAX(n)
+ set_hash("_cent", n);
+ goto matched;
+ }
+
+ case 'c':
+ recur("%a %b %e %H:%M:%S %Y");
+ goto matched;
+
+ case 'D':
+ recur("%m/%d/%y");
+ goto matched;
+
+ case 'd':
+ case 'e':
+ {
+ VALUE n;
+
+ if (str[si] == ' ') {
+ si++;
+ READ_DIGITS(n, 1);
+ } else {
+ READ_DIGITS(n, 2);
+ }
+ if (!valid_range_p(n, 1, 31))
+ fail();
+ set_hash("mday", n);
+ goto matched;
+ }
+
+ case 'F':
+ recur("%Y-%m-%d");
+ goto matched;
+
+ case 'G':
+ {
+ VALUE n;
+
+ if (NUM_PATTERN_P())
+ READ_DIGITS(n, 4)
+ else
+ READ_DIGITS_MAX(n)
+ set_hash("cwyear", n);
+ goto matched;
+ }
+
+ case 'g':
+ {
+ VALUE n;
+
+ READ_DIGITS(n, 2);
+ if (!valid_range_p(n, 0, 99))
+ fail();
+ set_hash("cwyear",n);
+ set_hash("_cent",
+ INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20));
+ goto matched;
+ }
+
+ case 'H':
+ case 'k':
+ {
+ VALUE n;
+
+ if (str[si] == ' ') {
+ si++;
+ READ_DIGITS(n, 1);
+ } else {
+ READ_DIGITS(n, 2);
+ }
+ if (!valid_range_p(n, 0, 24))
+ fail();
+ set_hash("hour", n);
+ goto matched;
+ }
+
+ case 'I':
+ case 'l':
+ {
+ VALUE n;
+
+ if (str[si] == ' ') {
+ si++;
+ READ_DIGITS(n, 1);
+ } else {
+ READ_DIGITS(n, 2);
+ }
+ if (!valid_range_p(n, 1, 12))
+ fail();
+ set_hash("hour", n);
+ goto matched;
+ }
+
+ case 'j':
+ {
+ VALUE n;
+
+ READ_DIGITS(n, 3);
+ if (!valid_range_p(n, 1, 366))
+ fail();
+ set_hash("yday", n);
+ goto matched;
+ }
+
+ case 'L':
+ case 'N':
+ {
+ VALUE n;
+ int sign = 1;
+ size_t osi;
+
+ if (str[si] == '-' || str[si] == '+') {
+ if (str[si] == '-')
+ sign = -1;
+ si++;
+ }
+ osi = si;
+ if (NUM_PATTERN_P())
+ READ_DIGITS(n, c == 'L' ? 3 : 9)
+ else
+ READ_DIGITS_MAX(n)
+ if (sign == -1)
+ n = f_negate(n);
+ set_hash("sec_fraction",
+ rb_rational_new2(n,
+ f_expt(INT2FIX(10),
+ ULONG2NUM(si - osi))));
+ goto matched;
+ }
+
+ case 'M':
+ {
+ VALUE n;
+
+ READ_DIGITS(n, 2);
+ if (!valid_range_p(n, 0, 59))
+ fail();
+ set_hash("min", n);
+ goto matched;
+ }
+
+ case 'm':
+ {
+ VALUE n;
+
+ READ_DIGITS(n, 2);
+ if (!valid_range_p(n, 1, 12))
+ fail();
+ set_hash("mon", n);
+ goto matched;
+ }
+
+ case 'n':
+ case 't':
+ recur(" ");
+ goto matched;
+
+ case 'P':
+ case 'p':
+ {
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ size_t l = strlen(merid_names[i]);
+ if (strncasecmp(merid_names[i], &str[si], l) == 0) {
+ si += l;
+ set_hash("_merid", INT2FIX((i % 2) == 0 ? 0 : 12));
+ goto matched;
+ }
+ }
+ fail();
+ }
+
+ case 'Q':
+ {
+ VALUE n;
+ int sign = 1;
+
+ if (str[si] == '-') {
+ sign = -1;
+ si++;
+ }
+ READ_DIGITS_MAX(n);
+ if (sign == -1)
+ n = f_negate(n);
+ set_hash("seconds",
+ rb_rational_new2(n,
+ f_expt(INT2FIX(10),
+ INT2FIX(3))));
+ goto matched;
+ }
+
+ case 'R':
+ recur("%H:%M");
+ goto matched;
+
+ case 'r':
+ recur("%I:%M:%S %p");
+ goto matched;
+
+ case 'S':
+ {
+ VALUE n;
+
+ READ_DIGITS(n, 2);
+ if (!valid_range_p(n, 0, 60))
+ fail();
+ set_hash("sec", n);
+ goto matched;
+ }
+
+ case 's':
+ {
+ VALUE n;
+ int sign = 1;
+
+ if (str[si] == '-') {
+ sign = -1;
+ si++;
+ }
+ READ_DIGITS_MAX(n);
+ if (sign == -1)
+ n = f_negate(n);
+ set_hash("seconds", n);
+ goto matched;
+ }
+
+ case 'T':
+ recur("%H:%M:%S");
+ goto matched;
+
+ case 'U':
+ case 'W':
+ {
+ VALUE n;
+
+ READ_DIGITS(n, 2);
+ if (!valid_range_p(n, 0, 53))
+ fail();
+ set_hash(c == 'U' ? "wnum0" : "wnum1", n);
+ goto matched;
+ }
+
+ case 'u':
+ {
+ VALUE n;
+
+ READ_DIGITS(n, 1);
+ if (!valid_range_p(n, 1, 7))
+ fail();
+ set_hash("cwday", n);
+ goto matched;
+ }
+
+ case 'V':
+ {
+ VALUE n;
+
+ READ_DIGITS(n, 2);
+ if (!valid_range_p(n, 1, 53))
+ fail();
+ set_hash("cweek", n);
+ goto matched;
+ }
+
+ case 'v':
+ recur("%e-%b-%Y");
+ goto matched;
+
+ case 'w':
+ {
+ VALUE n;
+
+ READ_DIGITS(n, 1);
+ if (!valid_range_p(n, 0, 6))
+ fail();
+ set_hash("wday", n);
+ goto matched;
+ }
+
+ case 'X':
+ recur("%H:%M:%S");
+ goto matched;
+
+ case 'x':
+ recur("%m/%d/%y");
+ goto matched;
+
+ case 'Y':
+ {
+ VALUE n;
+ int sign = 1;
+
+ if (str[si] == '-' || str[si] == '+') {
+ if (str[si] == '-')
+ sign = -1;
+ si++;
+ }
+ if (NUM_PATTERN_P())
+ READ_DIGITS(n, 4)
+ else
+ READ_DIGITS_MAX(n)
+ if (sign == -1)
+ n = f_negate(n);
+ set_hash("year", n);
+ goto matched;
+ }
+
+ case 'y':
+ {
+ VALUE n;
+ int sign = 1;
+
+ READ_DIGITS(n, 2);
+ if (!valid_range_p(n, 0, 99))
+ fail();
+ if (sign == -1)
+ n = f_negate(n);
+ set_hash("year", n);
+ set_hash("_cent",
+ INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20));
+ goto matched;
+ }
+
+
+ case 'Z':
+ case 'z':
+ {
+ static const char pat_source[] =
+ "\\A("
+ "(?:gmt|utc?)?[-+]\\d+(?:[,.:]\\d+(?::\\d+)?)?"
+ "|[[:alpha:].\\s]+(?:standard|daylight)\\s+time\\b"
+ "|[[:alpha:]]+(?:\\s+dst)?\\b"
+ ")";
+ static VALUE pat = Qnil;
+ VALUE m, b;
+ VALUE zone_to_diff(VALUE s);
+
+ if (NIL_P(pat)) {
+ pat = rb_reg_new(pat_source, sizeof pat_source - 1,
+ ONIG_OPTION_IGNORECASE);
+ rb_gc_register_mark_object(pat);
+ }
+
+ b = rb_backref_get();
+ m = f_match(pat, rb_usascii_str_new2(&str[si]));
+
+ if (!NIL_P(m)) {
+ VALUE s, l, o;
+
+ s = f_aref(m, INT2FIX(1));
+ l = f_end(m, INT2FIX(0));
+ o = zone_to_diff(s);
+ si += NUM2LONG(l);
+ set_hash("zone", s);
+ set_hash("offset", o);
+ rb_backref_set(b);
+ goto matched;
+ }
+ rb_backref_set(b);
+ fail();
+ }
+
+ case '%':
+ if (str[si] != '%')
+ fail();
+ si++;
+ goto matched;
+
+ case '+':
+ recur("%a %b %e %H:%M:%S %Z %Y");
+ goto matched;
+
+ default:
+ if (str[si] != '%')
+ fail();
+ si++;
+ if (fi < flen)
+ if (str[si] != fmt[fi])
+ fail();
+ si++;
+ goto matched;
+ }
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\v':
+ case '\f':
+ case '\r':
+ while (isspace(str[si]))
+ si++;
+ fi++;
+ break;
+ default:
+ ordinal:
+ if (str[si] != fmt[fi])
+ fail();
+ si++;
+ fi++;
+ break;
+ matched:
+ fi++;
+ break;
+ }
+ }
+
+ {
+ VALUE s;
+
+ if (slen > si) {
+ s = rb_usascii_str_new(&str[si], slen - si);
+ set_hash("leftover", s);
+ }
+ }
+
+ return si;
+}
+
+VALUE
+date__strptime(const char *str, size_t slen,
+ const char *fmt, size_t flen, VALUE hash)
+{
+ VALUE cent, merid;
+
+ date__strptime_internal(str, slen, fmt, flen, hash);
+
+ if (fail_p())
+ return Qnil;
+
+ cent = ref_hash("_cent");
+ if (!NIL_P(cent)) {
+ VALUE year;
+
+ year = ref_hash("cwyear");
+ if (!NIL_P(year))
+ set_hash("cwyear", f_add(year, f_mul(cent, INT2FIX(100))));
+ year = ref_hash("year");
+ if (!NIL_P(year))
+ set_hash("year", f_add(year, f_mul(cent, INT2FIX(100))));
+ del_hash("_cent");
+ }
+
+ merid = ref_hash("_merid");
+ if (!NIL_P(merid)) {
+ VALUE hour;
+
+ hour = ref_hash("hour");
+ if (!NIL_P(hour)) {
+ hour = f_mod(hour, INT2FIX(12));
+ set_hash("hour", f_add(hour, merid));
+ }
+ del_hash("_merid");
+ }
+
+ return hash;
+}
+
+/*
+Local variables:
+c-file-style: "ruby"
+End:
+*/