Further WIP on parser

This commit is contained in:
Laurence Withers 2006-10-15 23:26:27 +01:00
parent d61e77f9dc
commit 02d47dbdf2
1 changed files with 331 additions and 83 deletions

View File

@ -20,16 +20,46 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
state_week_day, // e.g. `2006-W31-'
state_week_done, // `2006-W31-1'
state_time_basic,
state_time_hour
state_time_hour,
state_time_min,
state_time_sec,
state_time_nsec_basic,
state_time_nsec,
state_tz_basic,
state_tz_hour,
state_tz_min,
state_tz_sec,
state_tz_utc
}state = state_none;
div_t qr;
char ch;
int num = 0, neg = 0, dig = 0;
int y = 0, m = 0, d = 0, w = 0, wd = 0, hour = 0, min = 0, sec = 0;
int num = 0, neg = 0, dig = 0, tz_neg = 0, nsec = -1, nsec_dig = -1;
int y = 0, m = 0, d = 0, w = 0, wd = 0, hour = -1, min = -1, sec = -1, tz_sec = 0;
if(earliest) memset(earliest, 0, sizeof(struct iso8601_date));
if(latest) memset(latest, 0, sizeof(struct iso8601_date));
if(details) memset(details, 0, sizeof(struct iso8601_details));
#define INCNUM() do { \
++dig; \
num *= 10; \
num += ch - '0'; \
if(num > 1000000000) { \
errno = EDOM; \
return -1; \
} \
}while(0)
#define ERROR_IF(x) do { \
if(x) { \
errno = EILSEQ; \
return -1; \
} \
}while(0)
while(1) {
ch = *str++;
printf("ch = '%c', state = %d, num = %d, dig = %d\n", ch, state, num, dig);
switch(state) {
case state_none:
@ -54,56 +84,44 @@ printf("ch = '%c', state = %d, num = %d, dig = %d\n", ch, state, num, dig);
case state_year:
switch(ch) {
case '0' ... '9':
++dig;
num *= 10;
num += ch - '0';
if(num > 1000000000) {
errno = EDOM;
return -1;
}
INCNUM();
break;
case '-':
if(!dig) {
errno = EILSEQ;
return -1;
}
ERROR_IF(!dig);
y = num;
num = 0;
dig = 0;
state = state_date2;
if(details) details->extended = 1;
break;
case 'T':
case 0:
switch(dig) {
case 4:
if(ch) {
errno = EILSEQ;
return -1;
}
ERROR_IF(ch);
y = num;
goto done;
case 6:
if(ch) {
errno = EILSEQ;
return -1;
}
y = num / 100;
m = num - y * 100;
ERROR_IF(ch);
qr = div(num, 100);
y = qr.quot;
m = qr.rem;
goto done;
case 7:
y = num / 1000;
d = num - (y * 1000);
qr = div(num, 1000);
y = qr.quot;
d = qr.rem;
break;
case 8:
y = num / 10000;
num -= y * 10000;
m = num / 100;
d = num - m * 100;
qr = div(num, 10000);
y = qr.quot;
qr = div(qr.rem, 100);
m = qr.quot;
d = qr.rem;
break;
default:
@ -111,6 +129,7 @@ printf("ch = '%c', state = %d, num = %d, dig = %d\n", ch, state, num, dig);
return -1;
}
if(!ch) goto done;
state = state_time_basic;
num = 0;
dig = 0;
@ -128,20 +147,16 @@ printf("ch = '%c', state = %d, num = %d, dig = %d\n", ch, state, num, dig);
case state_date2:
switch(ch) {
case '0' ... '9':
num *= 10;
num += ch - '0';
++dig;
INCNUM();
break;
case '-':
if(num < 1 || num > 12 || dig != 2) {
errno = EDOM;
return -1;
}
ERROR_IF(dig != 2);
m = num;
num = 0;
dig = 0;
state = state_day;
break;
case 'W':
if(dig) {
@ -152,13 +167,16 @@ printf("ch = '%c', state = %d, num = %d, dig = %d\n", ch, state, num, dig);
break;
case 0:
case 'T':
if(dig != 3) {
switch(dig) {
case 2: m = num; goto done;
case 3: d = num; goto done;
}
errno = EILSEQ;
return -1;
}
case 'T':
ERROR_IF(dig != 3);
d = num;
if(!ch) goto done;
state = state_time_hour;
num = 0;
dig = 0;
@ -173,19 +191,17 @@ printf("ch = '%c', state = %d, num = %d, dig = %d\n", ch, state, num, dig);
case state_day:
switch(ch) {
case '0' ... '9':
num *= 10;
num += ch;
++dig;
INCNUM();
break;
case 0:
case 'T':
if(dig != 2) {
errno = EILSEQ;
return -1;
}
ERROR_IF(dig != 2);
d = num;
if(!ch) goto done;
num = 0;
dig = 0;
state = state_time_hour;
break;
@ -198,46 +214,51 @@ printf("ch = '%c', state = %d, num = %d, dig = %d\n", ch, state, num, dig);
case state_week_basic:
switch(ch) {
case '0' ... '9':
num *= 10;
num += ch - '0';
++dig;
INCNUM();
break;
case 0:
case 'T':
switch(dig) {
case 3:
w = num / 10;
wd = num % 10;
break;
case 2:
w = num;
if(!ch) break;
goto done;
case 3:
qr = div(num, 10);
w = qr.quot;
wd = qr.rem;
goto done;
}
errno = EILSEQ;
return -1;
case 'T':
ERROR_IF(dig != 3);
qr = div(num, 10);
w = qr.quot;
wd = qr.rem;
num = 0;
dig = 0;
state = state_time_basic;
break;
default:
errno = EILSEQ;
return -1;
}
if(!ch) goto done;
break;
}
break;
case state_week_extended:
switch(ch) {
case '0' ... '9':
num *= 10;
num += ch - '0';
++dig;
INCNUM();
break;
case 0:
case '-':
if(dig != 2) {
errno = EILSEQ;
return -1;
}
ERROR_IF(dig != 2);
w = num;
num = 0;
dig = 0;
@ -248,31 +269,258 @@ printf("ch = '%c', state = %d, num = %d, dig = %d\n", ch, state, num, dig);
break;
case state_week_day:
if(ch < '1' || ch > '7') {
errno = EILSEQ;
return -1;
}
ERROR_IF(ch < '1' || ch > '7');
wd = ch - '0';
state = state_week_done;
break;
case state_week_done:
if(!ch) goto done;
if(ch != 'T') {
errno = EILSEQ;
return -1;
}
ERROR_IF(ch != 'T');
num = 0;
dig = 0;
state = state_time_hour;
break;
case state_time_basic:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
switch(dig) {
case 2:
hour = num;
break;
case 4:
qr = div(num, 100);
hour = qr.quot;
min = qr.rem;
break;
case 6:
qr = div(num, 10000);
hour = qr.quot;
qr = div(qr.rem, 100);
min = qr.quot;
sec = qr.rem;
break;
default:
errno = EILSEQ;
return -1;
}
num = 0;
dig = 0;
switch(ch) {
case '.': state = state_time_nsec_basic; break;
case 0: goto done;
case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_basic; break;
case '-': tz_neg = 1; state = state_tz_basic; break;
default: errno = EILSEQ; return -1;
}
break;
case state_time_hour:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
ERROR_IF(dig != 2);
hour = num;
num = 0;
dig = 0;
switch(ch) {
case ':': state = state_time_min; break;
case '.': state = state_time_nsec; break;
case 0: goto done;
case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_hour; break;
case '-': tz_neg = 1; state = state_tz_hour; break;
default: errno = EILSEQ; return -1;
}
break;
case state_time_min:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
ERROR_IF(dig != 2);
min = num;
num = 0;
dig = 0;
switch(ch) {
case ':': state = state_time_sec; break;
case '.': state = state_time_nsec; break;
case 0: goto done;
case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_hour; break;
case '-': tz_neg = 1; state = state_tz_hour; break;
default: errno = EILSEQ; return -1;
}
break;
case state_time_sec:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
ERROR_IF(dig != 2);
sec = num;
num = 0;
dig = 0;
switch(ch) {
case '.': state = state_time_nsec; break;
case 0: goto done;
case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_hour; break;
case '-': tz_neg = 1; state = state_tz_hour; break;
default: errno = EILSEQ; return -1;
}
break;
case state_time_nsec_basic:
case state_time_nsec:
if(ch >= '0' && ch <= '9') {
if(dig < 9) INCNUM();
break;
}
nsec = num;
nsec_dig = dig;
num = 0;
dig = 0;
switch(ch) {
case 0:
goto done;
case 'Z':
state = state_tz_utc;
break;
case '-':
tz_neg = -1;
case '+':
state = (state == state_time_nsec_basic) ? state_tz_basic : state_tz_hour;
break;
}
break;
case state_tz_basic:
switch(ch) {
case '0' ... '9':
INCNUM();
break;
case 0:
switch(dig) {
case 2:
ERROR_IF(num > 23);
tz_sec = num * 3600;
break;
case 4:
qr = div(num, 100);
ERROR_IF(qr.quot > 23 || qr.rem > 59);
tz_sec = qr.quot * 3600 + qr.rem * 60;
break;
case 6:
qr = div(num, 10000);
ERROR_IF(qr.quot > 23);
tz_sec = qr.quot * 3600;
qr = div(qr.rem, 100);
ERROR_IF(qr.quot > 59 || qr.rem > 59);
tz_sec += qr.quot * 60 + qr.rem;
break;
default:
errno = EILSEQ;
return -1;
}
goto done;
default:
errno = EILSEQ;
return -1;
}
break;
case state_tz_utc:
ERROR_IF(ch);
goto done;
case state_tz_hour:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
ERROR_IF(dig != 2 || num > 23);
tz_sec = num * 3600;
num = 0;
dig = 0;
switch(ch) {
case ':': state = state_tz_min; break;
case 0: goto done;
default: errno = EILSEQ; return -1;
}
break;
case state_tz_min:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
ERROR_IF(dig != 2 || num > 59);
tz_sec += num * 60;
num = 0;
dig = 0;
switch(ch) {
case ':': state = state_tz_sec; break;
case 0: goto done;
default: errno = EILSEQ; return -1;
}
break;
case state_tz_sec:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
ERROR_IF(ch || dig != 2 || num > 59);
tz_sec += num;
num = 0;
dig = 0;
goto done;
}
}
#undef ERROR_IF
#undef INCNUM
done:
printf("y=%d, m=%d, d=%d, w=%d, wd=%d\n", y, m, d, w, wd);
abort();
if(neg) y *= -1;
if(tz_neg) tz_sec *= -1;
// TODO
}