Add whitespace stripping to parser

This commit is contained in:
Laurence Withers 2007-07-23 14:13:48 +00:00
parent e9d29f4792
commit 1b6343e474
5 changed files with 121 additions and 89 deletions

View File

@ -8,6 +8,7 @@
#include "iso8601.h" #include "iso8601.h"
/* Below are all the includes used throughout the library. */ /* Below are all the includes used throughout the library. */
#include <ctype.h>
#include <errno.h> #include <errno.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>

View File

@ -42,27 +42,31 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
if(latest) memset(latest, 0, sizeof(struct iso8601_date)); if(latest) memset(latest, 0, sizeof(struct iso8601_date));
if(details) memset(details, 0, sizeof(struct iso8601_details)); if(details) memset(details, 0, sizeof(struct iso8601_details));
#define INCNUM() do { \
++dig; \
num *= 10; \
num += ch - '0'; \
if(num > BILLION) { \
return -1; \
} \
}while(0)
#define ERROR_IF(x) do { \ #define ERROR_IF(x) do { \
if(x) { \ if(x) { \
return -1; \ return -1; \
} \ } \
}while(0) }while(0)
#define INCNUM() do { \
++dig; \
num *= 10; \
num += ch - '0'; \
ERROR_IF(num > BILLION); \
}while(0)
while(1) { while(1) {
ch = *str++; ch = *str++;
if(isspace(ch)) ch = 0; /* simplify switch */
switch(state) { switch(state) {
case state_none: case state_none:
switch(ch) { switch(ch) {
case 0:
/* ignore whitespace */
ERROR_IF(!*(str - 1));
break;
case '0' ... '9': case '0' ... '9':
state = state_year; state = state_year;
num = ch - '0'; num = ch - '0';
@ -75,7 +79,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
break; break;
default: default:
return -1; ERROR_IF(1);
} }
break; break;
@ -85,37 +89,26 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
INCNUM(); INCNUM();
break; break;
case '-':
ERROR_IF(!dig);
y = num;
num = 0;
dig = 0;
state = state_date2;
if(details) details->extended = 1;
break;
case 0: case 0:
case 'T': case 'T':
switch(dig) { switch(dig) {
case 4: case 4: /* YYYY */
ERROR_IF(ch);
y = num; y = num;
goto done; goto done;
case 6: case 6: /* YYYYMM */
ERROR_IF(ch);
qr = div(num, 100); qr = div(num, 100);
y = qr.quot; y = qr.quot;
m = qr.rem; m = qr.rem;
goto done; goto done;
case 7: case 7: /* YYYYJJJ */
qr = div(num, 1000); qr = div(num, 1000);
y = qr.quot; y = qr.quot;
d = qr.rem; d = qr.rem;
break; break;
case 8: case 8: /* YYYYMMDD */
qr = div(num, 10000); qr = div(num, 10000);
y = qr.quot; y = qr.quot;
qr = div(qr.rem, 100); qr = div(qr.rem, 100);
@ -124,13 +117,29 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
break; break;
default: default:
return -1; ERROR_IF(1);
} }
if(!ch) goto done;
state = state_time_basic; switch(ch) {
case '0':
goto done;
case 'T':
ERROR_IF(!d);
state = state_time_basic;
num = 0;
dig = 0;
break;
}
break;
case '-':
ERROR_IF(!dig);
y = num;
num = 0; num = 0;
dig = 0; dig = 0;
state = state_date2;
if(details) details->extended = 1;
break; break;
case 'W': case 'W':
@ -157,18 +166,18 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
break; break;
case 'W': case 'W':
if(dig) { ERROR_IF(dig);
return -1;
}
state = state_week_extended; state = state_week_extended;
break; break;
case 0: case 0:
switch(dig) { switch(dig) {
case 2: m = num; goto done; case 2: m = num; break;
case 3: d = num; goto done; case 3: d = num; break;
default: ERROR_IF(1);
} }
return -1;
goto done;
case 'T': case 'T':
ERROR_IF(dig != 3); ERROR_IF(dig != 3);
@ -179,7 +188,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
break; break;
default: default:
return -1; ERROR_IF(1);
} }
break; break;
@ -193,15 +202,21 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
case 'T': case 'T':
ERROR_IF(dig != 2); ERROR_IF(dig != 2);
d = num; d = num;
if(!ch) goto done;
num = 0; switch(ch) {
dig = 0; case 0:
state = state_time_hour; goto done;
case 'T':
num = 0;
dig = 0;
state = state_time_hour;
break;
}
break; break;
default: default:
return -1; ERROR_IF(1);
} }
break; break;
@ -215,16 +230,19 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
switch(dig) { switch(dig) {
case 2: case 2:
w = num; w = num;
goto done; break;
case 3: case 3:
qr = div(num, 10); qr = div(num, 10);
w = qr.quot; w = qr.quot;
wd = qr.rem; wd = qr.rem;
goto done; break;
default:
ERROR_IF(1);
} }
return -1; goto done;
case 'T': case 'T':
ERROR_IF(dig != 3); ERROR_IF(dig != 3);
@ -238,7 +256,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
break; break;
default: default:
return -1; ERROR_IF(1);
} }
break; break;
@ -254,8 +272,10 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
w = num; w = num;
num = 0; num = 0;
dig = 0; dig = 0;
if(!ch) goto done; switch(ch) {
state = state_week_day; case 0: goto done;
case '-': state = state_week_day; break;
}
break; break;
} }
break; break;
@ -267,12 +287,19 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
break; break;
case state_week_done: case state_week_done:
if(!ch) goto done; switch(ch) {
ERROR_IF(ch != 'T'); case 0:
goto done;
num = 0; case 'T':
dig = 0; num = 0;
state = state_time_hour; dig = 0;
state = state_time_hour;
break;
default:
ERROR_IF(1);
}
break; break;
case state_time_basic: case state_time_basic:
@ -301,7 +328,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
break; break;
default: default:
return -1; ERROR_IF(1);
} }
num = 0; num = 0;
@ -313,7 +340,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
case 'Z': state = state_tz_utc; break; case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_basic; break; case '+': state = state_tz_basic; break;
case '-': tz_neg = 1; state = state_tz_basic; break; case '-': tz_neg = 1; state = state_tz_basic; break;
default: return -1; default: ERROR_IF(1);
} }
break; break;
@ -335,7 +362,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
case 'Z': state = state_tz_utc; break; case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_hour; break; case '+': state = state_tz_hour; break;
case '-': tz_neg = 1; state = state_tz_hour; break; case '-': tz_neg = 1; state = state_tz_hour; break;
default: return -1; default: ERROR_IF(1);
} }
break; break;
@ -357,7 +384,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
case 'Z': state = state_tz_utc; break; case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_hour; break; case '+': state = state_tz_hour; break;
case '-': tz_neg = 1; state = state_tz_hour; break; case '-': tz_neg = 1; state = state_tz_hour; break;
default: return -1; default: ERROR_IF(1);
} }
break; break;
@ -378,7 +405,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
case 'Z': state = state_tz_utc; break; case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_hour; break; case '+': state = state_tz_hour; break;
case '-': tz_neg = 1; state = state_tz_hour; break; case '-': tz_neg = 1; state = state_tz_hour; break;
default: return -1; default: ERROR_IF(1);
} }
break; break;
@ -439,12 +466,13 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
break; break;
default: default:
return -1; ERROR_IF(1);
} }
goto done; goto done;
default: default:
return -1; ERROR_IF(1);
} }
break; break;
@ -466,7 +494,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
switch(ch) { switch(ch) {
case ':': state = state_tz_min; break; case ':': state = state_tz_min; break;
case 0: goto done; case 0: goto done;
default: return -1; default: ERROR_IF(1);
} }
break; break;
@ -484,7 +512,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
switch(ch) { switch(ch) {
case ':': state = state_tz_sec; break; case ':': state = state_tz_sec; break;
case 0: goto done; case 0: goto done;
default: return -1; default: ERROR_IF(1);
} }
break; break;
@ -503,43 +531,45 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
} }
done: done:
while(*str) ERROR_IF(!isspace(*str++));
if(neg) y *= -1; if(neg) y *= -1;
if(tz_neg) tz_sec *= -1; if(tz_neg) tz_sec *= -1;
if(m != -1) { if(m != -1) {
if(d == -1) { if(d == -1) {
if(earliest && iso8601_from_cal(earliest, y, m, 1)) return -1; ERROR_IF(earliest && iso8601_from_cal(earliest, y, m, 1));
if(latest && iso8601_from_cal(latest, y, m, _days_in_month(y, m))) return -1; ERROR_IF(latest && iso8601_from_cal(latest, y, m, _days_in_month(y, m)));
if(details) details->date_prec = iso8601_prec_month; if(details) details->date_prec = iso8601_prec_month;
} else { } else {
if(earliest && iso8601_from_cal(earliest, y, m, d)) return -1; ERROR_IF(earliest && iso8601_from_cal(earliest, y, m, d));
if(latest && iso8601_from_cal(latest, y, m, d)) return -1; ERROR_IF(latest && iso8601_from_cal(latest, y, m, d));
if(details) details->date_prec = iso8601_prec_day; if(details) details->date_prec = iso8601_prec_day;
} }
} else if(d != -1) { } else if(d != -1) {
if(earliest && iso8601_from_ord(earliest, y, d)) return -1; ERROR_IF(earliest && iso8601_from_ord(earliest, y, d));
if(latest && iso8601_from_ord(latest, y, d)) return -1; ERROR_IF(latest && iso8601_from_ord(latest, y, d));
if(details) details->date_prec = iso8601_prec_ord; if(details) details->date_prec = iso8601_prec_ord;
} else if(w != -1) { } else if(w != -1) {
if(wd == -1) { if(wd == -1) {
if(earliest && iso8601_from_week(earliest, y, w, 1)) return -1; ERROR_IF(earliest && iso8601_from_week(earliest, y, w, 1));
if(latest && iso8601_from_week(latest, y, w, 7)) return -1; ERROR_IF(latest && iso8601_from_week(latest, y, w, 7));
if(details) details->date_prec = iso8601_prec_week; if(details) details->date_prec = iso8601_prec_week;
} else { } else {
if(earliest && iso8601_from_week(earliest, y, w, wd)) return -1; ERROR_IF(earliest && iso8601_from_week(earliest, y, w, wd));
if(latest && iso8601_from_week(latest, y, w, wd)) return -1; ERROR_IF(latest && iso8601_from_week(latest, y, w, wd));
if(details) details->date_prec = iso8601_prec_wday; if(details) details->date_prec = iso8601_prec_wday;
} }
} else { } else {
if(earliest && iso8601_from_cal(earliest, y, 1, 1)) return -1; ERROR_IF(earliest && iso8601_from_cal(earliest, y, 1, 1));
if(latest && iso8601_from_cal(latest, y, 12, 31)) return -1; ERROR_IF(latest && iso8601_from_cal(latest, y, 12, 31));
if(details) details->date_prec = iso8601_prec_year; if(details) details->date_prec = iso8601_prec_year;
} }
@ -547,8 +577,8 @@ done:
if(nsec_dig != -1) while(nsec_dig++ < 9) nsec *= 10; if(nsec_dig != -1) while(nsec_dig++ < 9) nsec *= 10;
if(hour == -1) { if(hour == -1) {
if(earliest && iso8601_from_clocktime(earliest, 0, 0, 0)) return -1; ERROR_IF(earliest && iso8601_from_clocktime(earliest, 0, 0, 0));
if(latest && iso8601_from_clocktime(latest, 23, 59, 59)) return -1; ERROR_IF(latest && iso8601_from_clocktime(latest, 23, 59, 59));
if(details) details->time_prec = iso8601_prec_none; if(details) details->time_prec = iso8601_prec_none;
} else if(sec != -1) { } else if(sec != -1) {
@ -557,8 +587,8 @@ done:
sec = 59; sec = 59;
} }
if(earliest && iso8601_from_clocktime(earliest, hour, min, sec)) return -1; ERROR_IF(earliest && iso8601_from_clocktime(earliest, hour, min, sec));
if(latest && iso8601_from_clocktime(latest, hour, min, sec)) return -1; ERROR_IF(latest && iso8601_from_clocktime(latest, hour, min, sec));
if(nsec_dig == -1) { if(nsec_dig == -1) {
if(latest) latest->nsec = 999999999; if(latest) latest->nsec = 999999999;
@ -573,8 +603,8 @@ done:
} else if(min != -1) { } else if(min != -1) {
if(nsec_dig == -1) { if(nsec_dig == -1) {
if(earliest && iso8601_from_clocktime(earliest, hour, min, 0)) return -1; ERROR_IF(earliest && iso8601_from_clocktime(earliest, hour, min, 0));
if(latest && iso8601_from_clocktime(latest, hour, min, 59)) return -1; ERROR_IF(latest && iso8601_from_clocktime(latest, hour, min, 59));
if(latest) latest->nsec = 999999999; if(latest) latest->nsec = 999999999;
if(details) details->time_prec = iso8601_prec_min; if(details) details->time_prec = iso8601_prec_min;
@ -582,9 +612,9 @@ done:
frac = nsec * 60.0 / 1e9; frac = nsec * 60.0 / 1e9;
sec = (int)frac; sec = (int)frac;
nsec = (frac - sec) * 1e9; nsec = (frac - sec) * 1e9;
if(earliest && iso8601_from_clocktime(earliest, hour, min, sec)) return -1; ERROR_IF(earliest && iso8601_from_clocktime(earliest, hour, min, sec));
if(earliest) earliest->nsec = nsec; if(earliest) earliest->nsec = nsec;
if(latest && iso8601_from_clocktime(latest, hour, min, sec)) return -1; ERROR_IF(latest && iso8601_from_clocktime(latest, hour, min, sec));
if(latest) latest->nsec = nsec; if(latest) latest->nsec = nsec;
if(details) details->time_prec = iso8601_prec_minfrac; if(details) details->time_prec = iso8601_prec_minfrac;
@ -592,8 +622,8 @@ done:
} else { } else {
if(nsec_dig == -1) { if(nsec_dig == -1) {
if(earliest && iso8601_from_clocktime(earliest, hour, 0, 0)) return -1; ERROR_IF(earliest && iso8601_from_clocktime(earliest, hour, 0, 0));
if(latest && iso8601_from_clocktime(latest, hour, 59, 59)) return -1; ERROR_IF(latest && iso8601_from_clocktime(latest, hour, 59, 59));
if(latest) latest->nsec = 999999999; if(latest) latest->nsec = 999999999;
if(details) details->time_prec = iso8601_prec_hour; if(details) details->time_prec = iso8601_prec_hour;
@ -604,9 +634,9 @@ done:
frac *= 60; frac *= 60;
sec = (int)frac; sec = (int)frac;
nsec = (frac - sec) * 1e9; nsec = (frac - sec) * 1e9;
if(earliest && iso8601_from_clocktime(earliest, hour, min, sec)) return -1; ERROR_IF(earliest && iso8601_from_clocktime(earliest, hour, min, sec));
if(earliest) earliest->nsec = nsec; if(earliest) earliest->nsec = nsec;
if(latest && iso8601_from_clocktime(latest, hour, min, sec)) return -1; ERROR_IF(latest && iso8601_from_clocktime(latest, hour, min, sec));
if(latest) latest->nsec = nsec; if(latest) latest->nsec = nsec;
if(details) details->time_prec = iso8601_prec_hourfrac; if(details) details->time_prec = iso8601_prec_hourfrac;

View File

@ -22,7 +22,7 @@ to represent them, and for validating such dates/times.
\param[out] latest The latest possible time the string could represent. May be 0. \param[out] latest The latest possible time the string could represent. May be 0.
\param[out] details Stores details such as the precision to which the time/date were specified. May \param[out] details Stores details such as the precision to which the time/date were specified. May
be 0. be 0.
\retval -1 on error (and see \a errno). \retval -1 on error.
\retval 0 on success. \retval 0 on success.
Parses a string containing the ISO8601 date/time. Deals with any format of date, optionally storing Parses a string containing the ISO8601 date/time. Deals with any format of date, optionally storing

View File

@ -7,7 +7,7 @@ build_target libiso8601 || return 1
if [ -z ${tests_BUILT} ] if [ -z ${tests_BUILT} ]
then then
LIBS="${libiso8601} " LIBS="${libiso8601} "
EXTRAS="" # @TODO@ libs, cflags EXTRAS="-D_GNU_SOURCE"
echo "Building test programs..." echo "Building test programs..."
do_cmd mkdir -p obj/tests || return 1 do_cmd mkdir -p obj/tests || return 1

View File

@ -19,7 +19,7 @@ int do_date(const char* str)
char buf[100], buf2[100], buf3[100], buf4[100]; char buf[100], buf2[100], buf3[100], buf4[100];
if(iso8601_parse(str, &earliest, &latest, &details)) { if(iso8601_parse(str, &earliest, &latest, &details)) {
perror("iso8601_parse"); fputs("Failed to parse date.\n", stderr);
return 1; return 1;
} }
@ -58,7 +58,8 @@ int do_date(const char* str)
int main(int argc, char* argv[]) int main(int argc, char* argv[])
{ {
int ret = 0, i; int ret = 0, i;
char buf[100]; char* buf = 0;
size_t buf_size;
if(argc == 2 && !strcmp(argv[1], "--print-summary")) { if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
printf("One line summary.\n"); printf("One line summary.\n");
@ -68,7 +69,7 @@ int main(int argc, char* argv[])
if(argc == 1) { if(argc == 1) {
while(!feof(stdin)) { while(!feof(stdin)) {
printf("date >> "); printf("date >> ");
scanf("%s", buf); if(getline(&buf, &buf_size, stdin) < 1) continue;
do_date(buf); do_date(buf);
} }