/* libutf8/src/tests/decode.c * * (c)2006-2009, Laurence Withers, . * COPYING for more information / terms of license. */ #include "utf8.h" #include #include void writeout(const wchar_t* x, int amt) { fwrite(x, sizeof(wchar_t), amt, stdout); } enum utf8_decode_error_action error_callback( const struct utf8_decode_state* ctx, enum utf8_decode_error error, wchar_t* newch) { fprintf(stderr, "Line %d, col %d (char %d, byte %d): ", ctx->line + 1, ctx->col + 1, ctx->char_offset, ctx->byte_offset); switch(error) { case utf8_decode_error_lone_cchar: fprintf(stderr, "a lone continuation char was encountered.\n"); break; case utf8_decode_error_not_cchar: fprintf(stderr, "a continuation char was expected, but not encountered.\n"); break; case utf8_decode_error_not_schar: fprintf(stderr, "an invalid character was encountered (not start char).\n"); break; case utf8_decode_error_overlong: fprintf(stderr, "an overlong character sequence was encountered.\n"); break; case utf8_decode_error_illegal_cp: fprintf(stderr, "an illegal code point was encountered.\n"); break; } *newch = 0xFFFD; return utf8_decode_error_action_replace; } int main(int argc, char* argv[]) { char inbuf[1024]; wchar_t outbuf[1024]; struct utf8_decode_state ctx; if(argc == 2 && !strcmp(argv[1], "--print-summary")) { printf("Decodes UTF-8 on stdin to UCS-4 on stdout.\n"); return 0; } if(argc != 1) { fprintf(stderr, "No parameters expected. This program decodes UTF-8 presented on stdin\n" "and transforms it to UCS-4 on stdout.\n"); return 1; } // set up ctx structure memset(&ctx, 0, sizeof(ctx)); ctx.wr = outbuf; ctx.wr_size = sizeof(outbuf) / sizeof(wchar_t); ctx.error_callback = error_callback; // loop over input while(!feof(stdin)) { // read input ctx.rd_remain = fread(inbuf, 1, sizeof(inbuf), stdin); ctx.rd = inbuf; // decode it while(ctx.rd_remain) { if(!utf8_decoder(&ctx)) { perror("utf8_decoder"); fprintf(stderr, "(at line %d, col %d, char %d, byte %d)\n", ctx.line + 1, ctx.col + 1, ctx.char_offset, ctx.byte_offset); return 1; } // write output writeout(outbuf, ctx.written); } } if(!ctx.complete) { fprintf(stderr, "Input did not end on a character boundary.\n"); } return 0; } /* options for text editors kate: replace-trailing-space-save true; space-indent true; tab-width 4; */