2006-07-31 15:34:21 +01:00
|
|
|
/* libutf8/src/tests/decode.c
|
|
|
|
*
|
2009-10-13 11:12:52 +01:00
|
|
|
* (c)2006-2009, Laurence Withers, <l@lwithers.me.uk>.
|
2009-10-13 11:55:21 +01:00
|
|
|
* Released under the GNU GPLv3. See file COPYING or
|
|
|
|
* http://www.gnu.org/copyleft/gpl.html for details.
|
2006-07-31 15:34:21 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include "utf8.h"
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void writeout(const wchar_t* x, int amt)
|
|
|
|
{
|
|
|
|
fwrite(x, sizeof(wchar_t), amt, stdout);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum utf8_decode_error_action error_callback(
|
|
|
|
const struct utf8_decode_state* ctx, enum utf8_decode_error error, wchar_t* newch)
|
|
|
|
{
|
|
|
|
fprintf(stderr, "Line %d, col %d (char %d, byte %d): ",
|
|
|
|
ctx->line + 1, ctx->col + 1, ctx->char_offset, ctx->byte_offset);
|
|
|
|
switch(error) {
|
|
|
|
case utf8_decode_error_lone_cchar:
|
|
|
|
fprintf(stderr, "a lone continuation char was encountered.\n");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case utf8_decode_error_not_cchar:
|
|
|
|
fprintf(stderr, "a continuation char was expected, but not encountered.\n");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case utf8_decode_error_not_schar:
|
|
|
|
fprintf(stderr, "an invalid character was encountered (not start char).\n");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case utf8_decode_error_overlong:
|
|
|
|
fprintf(stderr, "an overlong character sequence was encountered.\n");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case utf8_decode_error_illegal_cp:
|
|
|
|
fprintf(stderr, "an illegal code point was encountered.\n");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
*newch = 0xFFFD;
|
|
|
|
return utf8_decode_error_action_replace;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int main(int argc, char* argv[])
|
|
|
|
{
|
|
|
|
char inbuf[1024];
|
|
|
|
wchar_t outbuf[1024];
|
|
|
|
struct utf8_decode_state ctx;
|
|
|
|
|
|
|
|
if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
|
|
|
|
printf("Decodes UTF-8 on stdin to UCS-4 on stdout.\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(argc != 1) {
|
|
|
|
fprintf(stderr, "No parameters expected. This program decodes UTF-8 presented on stdin\n"
|
|
|
|
"and transforms it to UCS-4 on stdout.\n");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// set up ctx structure
|
|
|
|
memset(&ctx, 0, sizeof(ctx));
|
|
|
|
ctx.wr = outbuf;
|
|
|
|
ctx.wr_size = sizeof(outbuf) / sizeof(wchar_t);
|
|
|
|
ctx.error_callback = error_callback;
|
|
|
|
|
|
|
|
// loop over input
|
|
|
|
while(!feof(stdin)) {
|
|
|
|
// read input
|
|
|
|
ctx.rd_remain = fread(inbuf, 1, sizeof(inbuf), stdin);
|
|
|
|
ctx.rd = inbuf;
|
|
|
|
|
|
|
|
// decode it
|
|
|
|
while(ctx.rd_remain) {
|
|
|
|
if(!utf8_decoder(&ctx)) {
|
|
|
|
perror("utf8_decoder");
|
|
|
|
fprintf(stderr, "(at line %d, col %d, char %d, byte %d)\n",
|
|
|
|
ctx.line + 1, ctx.col + 1, ctx.char_offset, ctx.byte_offset);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// write output
|
|
|
|
writeout(outbuf, ctx.written);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if(!ctx.complete) {
|
|
|
|
fprintf(stderr, "Input did not end on a character boundary.\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* options for text editors
|
|
|
|
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
2009-10-13 11:55:21 +01:00
|
|
|
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
2006-07-31 15:34:21 +01:00
|
|
|
*/
|