Add whitespace stripping to parser

This commit is contained in:
Laurence Withers 2007-07-23 14:13:48 +00:00
parent e9d29f4792
commit 1b6343e474
5 changed files with 121 additions and 89 deletions

View File

@ -8,6 +8,7 @@
#include "iso8601.h"
/* Below are all the includes used throughout the library. */
#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>

View File

@ -42,27 +42,31 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
if(latest) memset(latest, 0, sizeof(struct iso8601_date));
if(details) memset(details, 0, sizeof(struct iso8601_details));
#define INCNUM() do { \
++dig; \
num *= 10; \
num += ch - '0'; \
if(num > BILLION) { \
return -1; \
} \
}while(0)
#define ERROR_IF(x) do { \
if(x) { \
return -1; \
} \
}while(0)
#define INCNUM() do { \
++dig; \
num *= 10; \
num += ch - '0'; \
ERROR_IF(num > BILLION); \
}while(0)
while(1) {
ch = *str++;
if(isspace(ch)) ch = 0; /* simplify switch */
switch(state) {
case state_none:
switch(ch) {
case 0:
/* ignore whitespace */
ERROR_IF(!*(str - 1));
break;
case '0' ... '9':
state = state_year;
num = ch - '0';
@ -75,7 +79,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
break;
default:
return -1;
ERROR_IF(1);
}
break;
@ -85,37 +89,26 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
INCNUM();
break;
case '-':
ERROR_IF(!dig);
y = num;
num = 0;
dig = 0;
state = state_date2;
if(details) details->extended = 1;
break;
case 0:
case 'T':
switch(dig) {
case 4:
ERROR_IF(ch);
case 4: /* YYYY */
y = num;
goto done;
case 6:
ERROR_IF(ch);
case 6: /* YYYYMM */
qr = div(num, 100);
y = qr.quot;
m = qr.rem;
goto done;
case 7:
case 7: /* YYYYJJJ */
qr = div(num, 1000);
y = qr.quot;
d = qr.rem;
break;
case 8:
case 8: /* YYYYMMDD */
qr = div(num, 10000);
y = qr.quot;
qr = div(qr.rem, 100);
@ -124,14 +117,30 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
break;
default:
return -1;
ERROR_IF(1);
}
if(!ch) goto done;
switch(ch) {
case '0':
goto done;
case 'T':
ERROR_IF(!d);
state = state_time_basic;
num = 0;
dig = 0;
break;
}
break;
case '-':
ERROR_IF(!dig);
y = num;
num = 0;
dig = 0;
state = state_date2;
if(details) details->extended = 1;
break;
case 'W':
y = num;
@ -157,18 +166,18 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
break;
case 'W':
if(dig) {
return -1;
}
ERROR_IF(dig);
state = state_week_extended;
break;
case 0:
switch(dig) {
case 2: m = num; goto done;
case 3: d = num; goto done;
case 2: m = num; break;
case 3: d = num; break;
default: ERROR_IF(1);
}
return -1;
goto done;
case 'T':
ERROR_IF(dig != 3);
@ -179,7 +188,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
break;
default:
return -1;
ERROR_IF(1);
}
break;
@ -193,15 +202,21 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
case 'T':
ERROR_IF(dig != 2);
d = num;
if(!ch) goto done;
switch(ch) {
case 0:
goto done;
case 'T':
num = 0;
dig = 0;
state = state_time_hour;
break;
}
break;
default:
return -1;
ERROR_IF(1);
}
break;
@ -215,16 +230,19 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
switch(dig) {
case 2:
w = num;
goto done;
break;
case 3:
qr = div(num, 10);
w = qr.quot;
wd = qr.rem;
goto done;
break;
default:
ERROR_IF(1);
}
return -1;
goto done;
case 'T':
ERROR_IF(dig != 3);
@ -238,7 +256,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
break;
default:
return -1;
ERROR_IF(1);
}
break;
@ -254,8 +272,10 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
w = num;
num = 0;
dig = 0;
if(!ch) goto done;
state = state_week_day;
switch(ch) {
case 0: goto done;
case '-': state = state_week_day; break;
}
break;
}
break;
@ -267,14 +287,21 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
break;
case state_week_done:
if(!ch) goto done;
ERROR_IF(ch != 'T');
switch(ch) {
case 0:
goto done;
case 'T':
num = 0;
dig = 0;
state = state_time_hour;
break;
default:
ERROR_IF(1);
}
break;
case state_time_basic:
if(ch >= '0' && ch <= '9') {
INCNUM();
@ -301,7 +328,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
break;
default:
return -1;
ERROR_IF(1);
}
num = 0;
@ -313,7 +340,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_basic; break;
case '-': tz_neg = 1; state = state_tz_basic; break;
default: return -1;
default: ERROR_IF(1);
}
break;
@ -335,7 +362,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_hour; break;
case '-': tz_neg = 1; state = state_tz_hour; break;
default: return -1;
default: ERROR_IF(1);
}
break;
@ -357,7 +384,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_hour; break;
case '-': tz_neg = 1; state = state_tz_hour; break;
default: return -1;
default: ERROR_IF(1);
}
break;
@ -378,7 +405,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_hour; break;
case '-': tz_neg = 1; state = state_tz_hour; break;
default: return -1;
default: ERROR_IF(1);
}
break;
@ -439,12 +466,13 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
break;
default:
return -1;
ERROR_IF(1);
}
goto done;
default:
return -1;
ERROR_IF(1);
}
break;
@ -466,7 +494,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
switch(ch) {
case ':': state = state_tz_min; break;
case 0: goto done;
default: return -1;
default: ERROR_IF(1);
}
break;
@ -484,7 +512,7 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
switch(ch) {
case ':': state = state_tz_sec; break;
case 0: goto done;
default: return -1;
default: ERROR_IF(1);
}
break;
@ -503,43 +531,45 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
}
done:
while(*str) ERROR_IF(!isspace(*str++));
if(neg) y *= -1;
if(tz_neg) tz_sec *= -1;
if(m != -1) {
if(d == -1) {
if(earliest && iso8601_from_cal(earliest, y, m, 1)) return -1;
if(latest && iso8601_from_cal(latest, y, m, _days_in_month(y, m))) return -1;
ERROR_IF(earliest && iso8601_from_cal(earliest, y, m, 1));
ERROR_IF(latest && iso8601_from_cal(latest, y, m, _days_in_month(y, m)));
if(details) details->date_prec = iso8601_prec_month;
} else {
if(earliest && iso8601_from_cal(earliest, y, m, d)) return -1;
if(latest && iso8601_from_cal(latest, y, m, d)) return -1;
ERROR_IF(earliest && iso8601_from_cal(earliest, y, m, d));
ERROR_IF(latest && iso8601_from_cal(latest, y, m, d));
if(details) details->date_prec = iso8601_prec_day;
}
} else if(d != -1) {
if(earliest && iso8601_from_ord(earliest, y, d)) return -1;
if(latest && iso8601_from_ord(latest, y, d)) return -1;
ERROR_IF(earliest && iso8601_from_ord(earliest, y, d));
ERROR_IF(latest && iso8601_from_ord(latest, y, d));
if(details) details->date_prec = iso8601_prec_ord;
} else if(w != -1) {
if(wd == -1) {
if(earliest && iso8601_from_week(earliest, y, w, 1)) return -1;
if(latest && iso8601_from_week(latest, y, w, 7)) return -1;
ERROR_IF(earliest && iso8601_from_week(earliest, y, w, 1));
ERROR_IF(latest && iso8601_from_week(latest, y, w, 7));
if(details) details->date_prec = iso8601_prec_week;
} else {
if(earliest && iso8601_from_week(earliest, y, w, wd)) return -1;
if(latest && iso8601_from_week(latest, y, w, wd)) return -1;
ERROR_IF(earliest && iso8601_from_week(earliest, y, w, wd));
ERROR_IF(latest && iso8601_from_week(latest, y, w, wd));
if(details) details->date_prec = iso8601_prec_wday;
}
} else {
if(earliest && iso8601_from_cal(earliest, y, 1, 1)) return -1;
if(latest && iso8601_from_cal(latest, y, 12, 31)) return -1;
ERROR_IF(earliest && iso8601_from_cal(earliest, y, 1, 1));
ERROR_IF(latest && iso8601_from_cal(latest, y, 12, 31));
if(details) details->date_prec = iso8601_prec_year;
}
@ -547,8 +577,8 @@ done:
if(nsec_dig != -1) while(nsec_dig++ < 9) nsec *= 10;
if(hour == -1) {
if(earliest && iso8601_from_clocktime(earliest, 0, 0, 0)) return -1;
if(latest && iso8601_from_clocktime(latest, 23, 59, 59)) return -1;
ERROR_IF(earliest && iso8601_from_clocktime(earliest, 0, 0, 0));
ERROR_IF(latest && iso8601_from_clocktime(latest, 23, 59, 59));
if(details) details->time_prec = iso8601_prec_none;
} else if(sec != -1) {
@ -557,8 +587,8 @@ done:
sec = 59;
}
if(earliest && iso8601_from_clocktime(earliest, hour, min, sec)) return -1;
if(latest && iso8601_from_clocktime(latest, hour, min, sec)) return -1;
ERROR_IF(earliest && iso8601_from_clocktime(earliest, hour, min, sec));
ERROR_IF(latest && iso8601_from_clocktime(latest, hour, min, sec));
if(nsec_dig == -1) {
if(latest) latest->nsec = 999999999;
@ -573,8 +603,8 @@ done:
} else if(min != -1) {
if(nsec_dig == -1) {
if(earliest && iso8601_from_clocktime(earliest, hour, min, 0)) return -1;
if(latest && iso8601_from_clocktime(latest, hour, min, 59)) return -1;
ERROR_IF(earliest && iso8601_from_clocktime(earliest, hour, min, 0));
ERROR_IF(latest && iso8601_from_clocktime(latest, hour, min, 59));
if(latest) latest->nsec = 999999999;
if(details) details->time_prec = iso8601_prec_min;
@ -582,9 +612,9 @@ done:
frac = nsec * 60.0 / 1e9;
sec = (int)frac;
nsec = (frac - sec) * 1e9;
if(earliest && iso8601_from_clocktime(earliest, hour, min, sec)) return -1;
ERROR_IF(earliest && iso8601_from_clocktime(earliest, hour, min, sec));
if(earliest) earliest->nsec = nsec;
if(latest && iso8601_from_clocktime(latest, hour, min, sec)) return -1;
ERROR_IF(latest && iso8601_from_clocktime(latest, hour, min, sec));
if(latest) latest->nsec = nsec;
if(details) details->time_prec = iso8601_prec_minfrac;
@ -592,8 +622,8 @@ done:
} else {
if(nsec_dig == -1) {
if(earliest && iso8601_from_clocktime(earliest, hour, 0, 0)) return -1;
if(latest && iso8601_from_clocktime(latest, hour, 59, 59)) return -1;
ERROR_IF(earliest && iso8601_from_clocktime(earliest, hour, 0, 0));
ERROR_IF(latest && iso8601_from_clocktime(latest, hour, 59, 59));
if(latest) latest->nsec = 999999999;
if(details) details->time_prec = iso8601_prec_hour;
@ -604,9 +634,9 @@ done:
frac *= 60;
sec = (int)frac;
nsec = (frac - sec) * 1e9;
if(earliest && iso8601_from_clocktime(earliest, hour, min, sec)) return -1;
ERROR_IF(earliest && iso8601_from_clocktime(earliest, hour, min, sec));
if(earliest) earliest->nsec = nsec;
if(latest && iso8601_from_clocktime(latest, hour, min, sec)) return -1;
ERROR_IF(latest && iso8601_from_clocktime(latest, hour, min, sec));
if(latest) latest->nsec = nsec;
if(details) details->time_prec = iso8601_prec_hourfrac;

View File

@ -22,7 +22,7 @@ to represent them, and for validating such dates/times.
\param[out] latest The latest possible time the string could represent. May be 0.
\param[out] details Stores details such as the precision to which the time/date were specified. May
be 0.
\retval -1 on error (and see \a errno).
\retval -1 on error.
\retval 0 on success.
Parses a string containing the ISO8601 date/time. Deals with any format of date, optionally storing

View File

@ -7,7 +7,7 @@ build_target libiso8601 || return 1
if [ -z ${tests_BUILT} ]
then
LIBS="${libiso8601} "
EXTRAS="" # @TODO@ libs, cflags
EXTRAS="-D_GNU_SOURCE"
echo "Building test programs..."
do_cmd mkdir -p obj/tests || return 1

View File

@ -19,7 +19,7 @@ int do_date(const char* str)
char buf[100], buf2[100], buf3[100], buf4[100];
if(iso8601_parse(str, &earliest, &latest, &details)) {
perror("iso8601_parse");
fputs("Failed to parse date.\n", stderr);
return 1;
}
@ -58,7 +58,8 @@ int do_date(const char* str)
int main(int argc, char* argv[])
{
int ret = 0, i;
char buf[100];
char* buf = 0;
size_t buf_size;
if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
printf("One line summary.\n");
@ -68,7 +69,7 @@ int main(int argc, char* argv[])
if(argc == 1) {
while(!feof(stdin)) {
printf("date >> ");
scanf("%s", buf);
if(getline(&buf, &buf_size, stdin) < 1) continue;
do_date(buf);
}