libiso8601/src/libiso8601/200_parser.c

702 lines
18 KiB
C

/* libiso8601/src/libiso8601/200_parser.c
*
* (c)2006-2010, Laurence Withers, <l@lwithers.me.uk>.
* Released under the GNU GPLv3. See file COPYING or
* http://www.gnu.org/copyleft/gpl.html for details.
*/
int
iso8601_parse(const char* str, struct iso8601_date* earliest,
struct iso8601_date* latest, struct iso8601_details* details)
{
enum {
state_none,
state_year, /* e.g. `2006' or `2006123' */
state_date2, /* 2nd component of date, e.g. `2006-' or `2006-1' */
state_day, /* e.g. `2006-01-' or `2006-01-01' */
state_week_basic, /* e.g. `2006W' or `2006W123' */
state_week_extended, /* e.g. `2006-W' or `2006-W31' */
state_week_day, /* e.g. `2006-W31-' */
state_week_done, /* `2006-W31-1' */
state_time_basic,
state_time_hour,
state_time_min,
state_time_sec,
state_time_nsec_basic,
state_time_nsec,
state_tz_basic,
state_tz_hour,
state_tz_min,
state_tz_sec,
state_tz_utc
}state = state_none;
div_t qr;
char ch;
int num = 0, neg = 0, dig = 0, tz_neg = 0, nsec = -1, nsec_dig = -1,
leap_sec_req = 0, y = 0, m = -1, d = -1, w = -1, wd = -1, hour = -1,
min = -1, sec = -1, tz_sec = 0;
double frac;
struct iso8601_elapsed elapsed;
if(earliest) memset(earliest, 0, sizeof(struct iso8601_date));
if(latest) memset(latest, 0, sizeof(struct iso8601_date));
if(details) memset(details, 0, sizeof(struct iso8601_details));
#define ERROR_IF(x) do { \
if(x) { \
return -1; \
} \
}while(0)
#define INCNUM() do { \
++dig; \
num *= 10; \
num += ch - '0'; \
ERROR_IF(num > BILLION); \
}while(0)
while(1) {
ch = *str++;
if(isspace(ch)) ch = 0; /* simplify switch */
switch(state) {
case state_none:
switch(ch) {
case 0:
/* ignore whitespace */
ERROR_IF(!*(str - 1));
break;
case '0' ... '9':
state = state_year;
num = ch - '0';
++dig;
break;
case '-':
neg = 1;
state = state_year;
break;
default:
ERROR_IF(1);
}
break;
case state_year:
switch(ch) {
case '0' ... '9':
INCNUM();
break;
case 0:
case 'T':
switch(dig) {
case 4: /* YYYY */
y = num;
break;
case 6: /* YYYYMM */
qr = div(num, 100);
y = qr.quot;
m = qr.rem;
break;
case 7: /* YYYYJJJ */
qr = div(num, 1000);
y = qr.quot;
d = qr.rem;
break;
case 8: /* YYYYMMDD */
qr = div(num, 10000);
y = qr.quot;
qr = div(qr.rem, 100);
m = qr.quot;
d = qr.rem;
break;
default:
ERROR_IF(1);
}
switch(ch) {
case 0:
goto done;
case 'T':
ERROR_IF(!d);
state = state_time_basic;
num = 0;
dig = 0;
break;
}
break;
case '-':
ERROR_IF(!dig);
y = num;
num = 0;
dig = 0;
state = state_date2;
if(details) details->extended = 1;
break;
case 'W':
y = num;
num = 0;
dig = 0;
state = state_week_basic;
break;
default:
ERROR_IF(1);
}
break;
case state_date2:
switch(ch) {
case '0' ... '9':
INCNUM();
break;
case '-':
ERROR_IF(dig != 2);
m = num;
num = 0;
dig = 0;
state = state_day;
break;
case 'W':
ERROR_IF(dig);
state = state_week_extended;
break;
case 0:
switch(dig) {
case 2: m = num; break;
case 3: d = num; break;
default: ERROR_IF(1);
}
goto done;
case 'T':
ERROR_IF(dig != 3);
d = num;
state = state_time_hour;
num = 0;
dig = 0;
break;
default:
ERROR_IF(1);
}
break;
case state_day:
switch(ch) {
case '0' ... '9':
INCNUM();
break;
case 0:
case 'T':
ERROR_IF(dig != 2);
d = num;
switch(ch) {
case 0:
goto done;
case 'T':
num = 0;
dig = 0;
state = state_time_hour;
break;
}
break;
default:
ERROR_IF(1);
}
break;
case state_week_basic:
switch(ch) {
case '0' ... '9':
INCNUM();
break;
case 0:
switch(dig) {
case 2:
w = num;
break;
case 3:
qr = div(num, 10);
w = qr.quot;
wd = qr.rem;
break;
default:
ERROR_IF(1);
}
goto done;
case 'T':
ERROR_IF(dig != 3);
qr = div(num, 10);
w = qr.quot;
wd = qr.rem;
num = 0;
dig = 0;
state = state_time_basic;
break;
default:
ERROR_IF(1);
}
break;
case state_week_extended:
switch(ch) {
case '0' ... '9':
INCNUM();
break;
case 0:
case '-':
ERROR_IF(dig != 2);
w = num;
num = 0;
dig = 0;
switch(ch) {
case 0: goto done;
case '-': state = state_week_day; break;
}
break;
}
break;
case state_week_day:
ERROR_IF(ch < '1' || ch > '7');
wd = ch - '0';
state = state_week_done;
break;
case state_week_done:
switch(ch) {
case 0:
goto done;
case 'T':
num = 0;
dig = 0;
state = state_time_hour;
break;
default:
ERROR_IF(1);
}
break;
case state_time_basic:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
switch(dig) {
case 2:
hour = num;
break;
case 4:
qr = div(num, 100);
hour = qr.quot;
min = qr.rem;
break;
case 6:
qr = div(num, 10000);
hour = qr.quot;
qr = div(qr.rem, 100);
min = qr.quot;
sec = qr.rem;
break;
default:
ERROR_IF(1);
}
num = 0;
dig = 0;
switch(ch) {
case '.': state = state_time_nsec_basic; break;
case 0: goto done;
case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_basic; break;
case '-': tz_neg = 1; state = state_tz_basic; break;
default: ERROR_IF(1);
}
break;
case state_time_hour:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
ERROR_IF(dig != 2);
hour = num;
num = 0;
dig = 0;
switch(ch) {
case ':': state = state_time_min; break;
case '.': state = state_time_nsec; break;
case 0: goto done;
case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_hour; break;
case '-': tz_neg = 1; state = state_tz_hour; break;
default: ERROR_IF(1);
}
break;
case state_time_min:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
ERROR_IF(dig != 2);
min = num;
num = 0;
dig = 0;
switch(ch) {
case ':': state = state_time_sec; break;
case '.': state = state_time_nsec; break;
case 0: goto done;
case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_hour; break;
case '-': tz_neg = 1; state = state_tz_hour; break;
default: ERROR_IF(1);
}
break;
case state_time_sec:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
ERROR_IF(dig != 2);
sec = num;
num = 0;
dig = 0;
switch(ch) {
case '.': state = state_time_nsec; break;
case 0: goto done;
case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_hour; break;
case '-': tz_neg = 1; state = state_tz_hour; break;
default: ERROR_IF(1);
}
break;
case state_time_nsec_basic:
case state_time_nsec:
if(ch >= '0' && ch <= '9') {
if(dig < 9) INCNUM();
break;
}
nsec = num;
nsec_dig = dig;
num = 0;
dig = 0;
switch(ch) {
case 0:
goto done;
case 'Z':
state = state_tz_utc;
break;
case '-':
tz_neg = -1;
case '+':
state = (state == state_time_nsec_basic) ?
state_tz_basic : state_tz_hour;
break;
}
break;
case state_tz_basic:
switch(ch) {
case '0' ... '9':
INCNUM();
break;
case 0:
switch(dig) {
case 2:
ERROR_IF(num > 23);
tz_sec = num * 3600;
break;
case 4:
qr = div(num, 100);
ERROR_IF(qr.quot > 23 || qr.rem > 59);
tz_sec = qr.quot * 3600 + qr.rem * 60;
break;
case 6:
qr = div(num, 10000);
ERROR_IF(qr.quot > 23);
tz_sec = qr.quot * 3600;
qr = div(qr.rem, 100);
ERROR_IF(qr.quot > 59 || qr.rem > 59);
tz_sec += qr.quot * 60 + qr.rem;
break;
default:
ERROR_IF(1);
}
goto done;
default:
ERROR_IF(1);
}
break;
case state_tz_utc:
ERROR_IF(ch);
goto done;
case state_tz_hour:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
ERROR_IF(dig != 2 || num > 23);
tz_sec = num * 3600;
num = 0;
dig = 0;
switch(ch) {
case ':': state = state_tz_min; break;
case 0: goto done;
default: ERROR_IF(1);
}
break;
case state_tz_min:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
ERROR_IF(dig != 2 || num > 59);
tz_sec += num * 60;
num = 0;
dig = 0;
switch(ch) {
case ':': state = state_tz_sec; break;
case 0: goto done;
default: ERROR_IF(1);
}
break;
case state_tz_sec:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
ERROR_IF(ch || dig != 2 || num > 59);
tz_sec += num;
num = 0;
dig = 0;
goto done;
}
}
done:
--str;
while(*str) ERROR_IF(!isspace(*str++));
if(neg) y *= -1;
if(tz_neg) tz_sec *= -1;
if(m != -1) {
if(d == -1) {
ERROR_IF(earliest && iso8601_from_cal(earliest, y, m, 1));
ERROR_IF(latest && iso8601_from_cal(latest, y, m,
_days_in_month(y, m)));
if(details) details->date_prec = iso8601_prec_month;
} else {
ERROR_IF(earliest && iso8601_from_cal(earliest, y, m, d));
ERROR_IF(latest && iso8601_from_cal(latest, y, m, d));
if(details) details->date_prec = iso8601_prec_day;
}
} else if(d != -1) {
ERROR_IF(earliest && iso8601_from_ord(earliest, y, d));
ERROR_IF(latest && iso8601_from_ord(latest, y, d));
if(details) details->date_prec = iso8601_prec_ord;
} else if(w != -1) {
if(wd == -1) {
ERROR_IF(earliest && iso8601_from_week(earliest, y, w, 1));
ERROR_IF(latest && iso8601_from_week(latest, y, w, 7));
if(details) details->date_prec = iso8601_prec_week;
} else {
ERROR_IF(earliest && iso8601_from_week(earliest, y, w, wd));
ERROR_IF(latest && iso8601_from_week(latest, y, w, wd));
if(details) details->date_prec = iso8601_prec_wday;
}
} else {
ERROR_IF(earliest && iso8601_from_cal(earliest, y, 1, 1));
ERROR_IF(latest && iso8601_from_cal(latest, y, 12, 31));
if(details) details->date_prec = iso8601_prec_year;
}
if(nsec_dig != -1) while(nsec_dig++ < 9) nsec *= 10;
if(hour == -1) {
ERROR_IF(earliest && iso8601_from_clocktime(earliest, 0, 0, 0));
ERROR_IF(latest && iso8601_from_clocktime(latest, 23, 59, 59));
if(details) details->time_prec = iso8601_prec_none;
} else if(sec != -1) {
if(sec == 60) {
leap_sec_req++;
sec = 59;
}
ERROR_IF(earliest && iso8601_from_clocktime(earliest, hour, min, sec));
ERROR_IF(latest && iso8601_from_clocktime(latest, hour, min, sec));
if(nsec_dig == -1) {
if(latest) latest->nsec = 999999999;
if(details) details->time_prec = iso8601_prec_sec;
} else {
if(earliest) earliest->nsec = nsec;
if(latest) latest->nsec = nsec;
if(details) details->time_prec = iso8601_prec_secfrac;
}
} else if(min != -1) {
if(nsec_dig == -1) {
ERROR_IF(earliest && iso8601_from_clocktime(earliest,
hour, min, 0));
ERROR_IF(latest && iso8601_from_clocktime(latest, hour, min, 59));
if(latest) latest->nsec = 999999999;
if(details) details->time_prec = iso8601_prec_min;
} else {
frac = nsec * 60.0 / 1e9;
sec = (int)frac;
nsec = (frac - sec) * 1e9;
ERROR_IF(earliest && iso8601_from_clocktime(earliest,
hour, min, sec));
if(earliest) earliest->nsec = nsec;
ERROR_IF(latest && iso8601_from_clocktime(latest, hour, min, sec));
if(latest) latest->nsec = nsec;
if(details) details->time_prec = iso8601_prec_minfrac;
}
} else {
if(nsec_dig == -1) {
ERROR_IF(earliest && iso8601_from_clocktime(earliest, hour, 0, 0));
ERROR_IF(latest && iso8601_from_clocktime(latest, hour, 59, 59));
if(latest) latest->nsec = 999999999;
if(details) details->time_prec = iso8601_prec_hour;
} else {
frac = nsec * 60.0 / 1e9;
min = (int)frac;
frac -= min;
frac *= 60;
sec = (int)frac;
nsec = (frac - sec) * 1e9;
ERROR_IF(earliest && iso8601_from_clocktime(earliest,
hour, min, sec));
if(earliest) earliest->nsec = nsec;
ERROR_IF(latest && iso8601_from_clocktime(latest, hour, min, sec));
if(latest) latest->nsec = nsec;
if(details) details->time_prec = iso8601_prec_hourfrac;
}
}
/* correct for timezone offset (note trickery to end up at 23:59:60 iff a leap second was
* requested), and ensure that any leap second requested was a valid leap second. */
if(details) details->tz_sec = tz_sec;
if(tz_sec && earliest) {
elapsed.sec = leap_sec_req + tz_sec;
elapsed.nsec = 0;
iso8601_subtract_elapsed(earliest, &elapsed);
}
if(tz_sec && latest) {
elapsed.sec = leap_sec_req + tz_sec;
elapsed.nsec = 0;
iso8601_subtract_elapsed(latest, &elapsed);
}
if(leap_sec_req) {
ERROR_IF(earliest && earliest->sec != 86400);
ERROR_IF(latest && latest->sec != 86400);
}
ERROR_IF(earliest && iso8601_invalid(earliest));
ERROR_IF(latest && iso8601_invalid(latest));
return 0;
#undef ERROR_IF
#undef INCNUM
}
int
iso8601_invalid(const struct iso8601_date* date)
{
return date->nsec < 0
|| date->nsec >= BILLION
|| date->sec < 0
|| date->sec >= iso8601_seconds_leap(date)
;
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/