Further WIP on parser

This commit is contained in:
Laurence Withers 2006-10-15 23:26:27 +01:00
parent d61e77f9dc
commit 02d47dbdf2
1 changed files with 331 additions and 83 deletions

View File

@ -20,16 +20,46 @@ int iso8601_parse(const char* str, struct iso8601_date* earliest, struct iso8601
state_week_day, // e.g. `2006-W31-' state_week_day, // e.g. `2006-W31-'
state_week_done, // `2006-W31-1' state_week_done, // `2006-W31-1'
state_time_basic, state_time_basic,
state_time_hour state_time_hour,
state_time_min,
state_time_sec,
state_time_nsec_basic,
state_time_nsec,
state_tz_basic,
state_tz_hour,
state_tz_min,
state_tz_sec,
state_tz_utc
}state = state_none; }state = state_none;
div_t qr;
char ch; char ch;
int num = 0, neg = 0, dig = 0; int num = 0, neg = 0, dig = 0, tz_neg = 0, nsec = -1, nsec_dig = -1;
int y = 0, m = 0, d = 0, w = 0, wd = 0, hour = 0, min = 0, sec = 0; int y = 0, m = 0, d = 0, w = 0, wd = 0, hour = -1, min = -1, sec = -1, tz_sec = 0;
if(earliest) memset(earliest, 0, sizeof(struct iso8601_date));
if(latest) memset(latest, 0, sizeof(struct iso8601_date));
if(details) memset(details, 0, sizeof(struct iso8601_details));
#define INCNUM() do { \
++dig; \
num *= 10; \
num += ch - '0'; \
if(num > 1000000000) { \
errno = EDOM; \
return -1; \
} \
}while(0)
#define ERROR_IF(x) do { \
if(x) { \
errno = EILSEQ; \
return -1; \
} \
}while(0)
while(1) { while(1) {
ch = *str++; ch = *str++;
printf("ch = '%c', state = %d, num = %d, dig = %d\n", ch, state, num, dig);
switch(state) { switch(state) {
case state_none: case state_none:
@ -54,56 +84,44 @@ printf("ch = '%c', state = %d, num = %d, dig = %d\n", ch, state, num, dig);
case state_year: case state_year:
switch(ch) { switch(ch) {
case '0' ... '9': case '0' ... '9':
++dig; INCNUM();
num *= 10;
num += ch - '0';
if(num > 1000000000) {
errno = EDOM;
return -1;
}
break; break;
case '-': case '-':
if(!dig) { ERROR_IF(!dig);
errno = EILSEQ;
return -1;
}
y = num; y = num;
num = 0; num = 0;
dig = 0; dig = 0;
state = state_date2; state = state_date2;
if(details) details->extended = 1;
break; break;
case 'T': case 'T':
case 0:
switch(dig) { switch(dig) {
case 4: case 4:
if(ch) { ERROR_IF(ch);
errno = EILSEQ;
return -1;
}
y = num; y = num;
goto done; goto done;
case 6: case 6:
if(ch) { ERROR_IF(ch);
errno = EILSEQ; qr = div(num, 100);
return -1; y = qr.quot;
} m = qr.rem;
y = num / 100;
m = num - y * 100;
goto done; goto done;
case 7: case 7:
y = num / 1000; qr = div(num, 1000);
d = num - (y * 1000); y = qr.quot;
d = qr.rem;
break; break;
case 8: case 8:
y = num / 10000; qr = div(num, 10000);
num -= y * 10000; y = qr.quot;
m = num / 100; qr = div(qr.rem, 100);
d = num - m * 100; m = qr.quot;
d = qr.rem;
break; break;
default: default:
@ -111,6 +129,7 @@ printf("ch = '%c', state = %d, num = %d, dig = %d\n", ch, state, num, dig);
return -1; return -1;
} }
if(!ch) goto done; if(!ch) goto done;
state = state_time_basic; state = state_time_basic;
num = 0; num = 0;
dig = 0; dig = 0;
@ -128,20 +147,16 @@ printf("ch = '%c', state = %d, num = %d, dig = %d\n", ch, state, num, dig);
case state_date2: case state_date2:
switch(ch) { switch(ch) {
case '0' ... '9': case '0' ... '9':
num *= 10; INCNUM();
num += ch - '0';
++dig;
break; break;
case '-': case '-':
if(num < 1 || num > 12 || dig != 2) { ERROR_IF(dig != 2);
errno = EDOM;
return -1;
}
m = num; m = num;
num = 0; num = 0;
dig = 0; dig = 0;
state = state_day; state = state_day;
break;
case 'W': case 'W':
if(dig) { if(dig) {
@ -152,13 +167,16 @@ printf("ch = '%c', state = %d, num = %d, dig = %d\n", ch, state, num, dig);
break; break;
case 0: case 0:
case 'T': switch(dig) {
if(dig != 3) { case 2: m = num; goto done;
errno = EILSEQ; case 3: d = num; goto done;
return -1;
} }
errno = EILSEQ;
return -1;
case 'T':
ERROR_IF(dig != 3);
d = num; d = num;
if(!ch) goto done;
state = state_time_hour; state = state_time_hour;
num = 0; num = 0;
dig = 0; dig = 0;
@ -173,19 +191,17 @@ printf("ch = '%c', state = %d, num = %d, dig = %d\n", ch, state, num, dig);
case state_day: case state_day:
switch(ch) { switch(ch) {
case '0' ... '9': case '0' ... '9':
num *= 10; INCNUM();
num += ch;
++dig;
break; break;
case 0: case 0:
case 'T': case 'T':
if(dig != 2) { ERROR_IF(dig != 2);
errno = EILSEQ;
return -1;
}
d = num; d = num;
if(!ch) goto done; if(!ch) goto done;
num = 0;
dig = 0;
state = state_time_hour; state = state_time_hour;
break; break;
@ -198,46 +214,51 @@ printf("ch = '%c', state = %d, num = %d, dig = %d\n", ch, state, num, dig);
case state_week_basic: case state_week_basic:
switch(ch) { switch(ch) {
case '0' ... '9': case '0' ... '9':
num *= 10; INCNUM();
num += ch - '0';
++dig;
break; break;
case 0: case 0:
case 'T':
switch(dig) { switch(dig) {
case 3:
w = num / 10;
wd = num % 10;
break;
case 2: case 2:
w = num; w = num;
if(!ch) break; goto done;
default: case 3:
errno = EILSEQ; qr = div(num, 10);
return -1; w = qr.quot;
wd = qr.rem;
goto done;
} }
if(!ch) goto done;
errno = EILSEQ;
return -1;
case 'T':
ERROR_IF(dig != 3);
qr = div(num, 10);
w = qr.quot;
wd = qr.rem;
num = 0;
dig = 0;
state = state_time_basic;
break; break;
default:
errno = EILSEQ;
return -1;
} }
break; break;
case state_week_extended: case state_week_extended:
switch(ch) { switch(ch) {
case '0' ... '9': case '0' ... '9':
num *= 10; INCNUM();
num += ch - '0';
++dig;
break; break;
case 0: case 0:
case '-': case '-':
if(dig != 2) { ERROR_IF(dig != 2);
errno = EILSEQ;
return -1;
}
w = num; w = num;
num = 0; num = 0;
dig = 0; dig = 0;
@ -248,31 +269,258 @@ printf("ch = '%c', state = %d, num = %d, dig = %d\n", ch, state, num, dig);
break; break;
case state_week_day: case state_week_day:
if(ch < '1' || ch > '7') { ERROR_IF(ch < '1' || ch > '7');
errno = EILSEQ;
return -1;
}
wd = ch - '0'; wd = ch - '0';
state = state_week_done; state = state_week_done;
break; break;
case state_week_done: case state_week_done:
if(!ch) goto done; if(!ch) goto done;
if(ch != 'T') { ERROR_IF(ch != 'T');
errno = EILSEQ;
return -1; num = 0;
} dig = 0;
state = state_time_hour; state = state_time_hour;
break; break;
default: case state_time_basic:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
switch(dig) {
case 2:
hour = num;
break;
case 4:
qr = div(num, 100);
hour = qr.quot;
min = qr.rem;
break;
case 6:
qr = div(num, 10000);
hour = qr.quot;
qr = div(qr.rem, 100);
min = qr.quot;
sec = qr.rem;
break;
default:
errno = EILSEQ;
return -1;
}
num = 0;
dig = 0;
switch(ch) {
case '.': state = state_time_nsec_basic; break;
case 0: goto done;
case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_basic; break;
case '-': tz_neg = 1; state = state_tz_basic; break;
default: errno = EILSEQ; return -1;
}
break;
case state_time_hour:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
ERROR_IF(dig != 2);
hour = num;
num = 0;
dig = 0;
switch(ch) {
case ':': state = state_time_min; break;
case '.': state = state_time_nsec; break;
case 0: goto done;
case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_hour; break;
case '-': tz_neg = 1; state = state_tz_hour; break;
default: errno = EILSEQ; return -1;
}
break;
case state_time_min:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
ERROR_IF(dig != 2);
min = num;
num = 0;
dig = 0;
switch(ch) {
case ':': state = state_time_sec; break;
case '.': state = state_time_nsec; break;
case 0: goto done;
case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_hour; break;
case '-': tz_neg = 1; state = state_tz_hour; break;
default: errno = EILSEQ; return -1;
}
break;
case state_time_sec:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
ERROR_IF(dig != 2);
sec = num;
num = 0;
dig = 0;
switch(ch) {
case '.': state = state_time_nsec; break;
case 0: goto done;
case 'Z': state = state_tz_utc; break;
case '+': state = state_tz_hour; break;
case '-': tz_neg = 1; state = state_tz_hour; break;
default: errno = EILSEQ; return -1;
}
break;
case state_time_nsec_basic:
case state_time_nsec:
if(ch >= '0' && ch <= '9') {
if(dig < 9) INCNUM();
break;
}
nsec = num;
nsec_dig = dig;
num = 0;
dig = 0;
switch(ch) {
case 0:
goto done;
case 'Z':
state = state_tz_utc;
break;
case '-':
tz_neg = -1;
case '+':
state = (state == state_time_nsec_basic) ? state_tz_basic : state_tz_hour;
break;
}
break;
case state_tz_basic:
switch(ch) {
case '0' ... '9':
INCNUM();
break;
case 0:
switch(dig) {
case 2:
ERROR_IF(num > 23);
tz_sec = num * 3600;
break;
case 4:
qr = div(num, 100);
ERROR_IF(qr.quot > 23 || qr.rem > 59);
tz_sec = qr.quot * 3600 + qr.rem * 60;
break;
case 6:
qr = div(num, 10000);
ERROR_IF(qr.quot > 23);
tz_sec = qr.quot * 3600;
qr = div(qr.rem, 100);
ERROR_IF(qr.quot > 59 || qr.rem > 59);
tz_sec += qr.quot * 60 + qr.rem;
break;
default:
errno = EILSEQ;
return -1;
}
goto done;
default:
errno = EILSEQ;
return -1;
}
break;
case state_tz_utc:
ERROR_IF(ch);
goto done;
case state_tz_hour:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
ERROR_IF(dig != 2 || num > 23);
tz_sec = num * 3600;
num = 0;
dig = 0;
switch(ch) {
case ':': state = state_tz_min; break;
case 0: goto done;
default: errno = EILSEQ; return -1;
}
break;
case state_tz_min:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
ERROR_IF(dig != 2 || num > 59);
tz_sec += num * 60;
num = 0;
dig = 0;
switch(ch) {
case ':': state = state_tz_sec; break;
case 0: goto done;
default: errno = EILSEQ; return -1;
}
break;
case state_tz_sec:
if(ch >= '0' && ch <= '9') {
INCNUM();
break;
}
ERROR_IF(ch || dig != 2 || num > 59);
tz_sec += num;
num = 0;
dig = 0;
goto done; goto done;
} }
} }
#undef ERROR_IF
#undef INCNUM
done: done:
printf("y=%d, m=%d, d=%d, w=%d, wd=%d\n", y, m, d, w, wd); if(neg) y *= -1;
abort(); if(tz_neg) tz_sec *= -1;
// TODO
} }