libutf8/src/tests/decode.c

110 lines
2.8 KiB
C
Raw Normal View History

/* libutf8/src/tests/decode.c
*
2009-10-13 11:12:52 +01:00
* (c)2006-2009, Laurence Withers, <l@lwithers.me.uk>.
2009-10-13 11:55:21 +01:00
* Released under the GNU GPLv3. See file COPYING or
* http://www.gnu.org/copyleft/gpl.html for details.
*/
#include "utf8.h"
#include <stdio.h>
#include <string.h>
void writeout(const wchar_t* x, int amt)
{
fwrite(x, sizeof(wchar_t), amt, stdout);
}
enum utf8_decode_error_action error_callback(
const struct utf8_decode_state* ctx, enum utf8_decode_error error, wchar_t* newch)
{
fprintf(stderr, "Line %d, col %d (char %d, byte %d): ",
ctx->line + 1, ctx->col + 1, ctx->char_offset, ctx->byte_offset);
switch(error) {
case utf8_decode_error_lone_cchar:
fprintf(stderr, "a lone continuation char was encountered.\n");
break;
case utf8_decode_error_not_cchar:
fprintf(stderr, "a continuation char was expected, but not encountered.\n");
break;
case utf8_decode_error_not_schar:
fprintf(stderr, "an invalid character was encountered (not start char).\n");
break;
case utf8_decode_error_overlong:
fprintf(stderr, "an overlong character sequence was encountered.\n");
break;
case utf8_decode_error_illegal_cp:
fprintf(stderr, "an illegal code point was encountered.\n");
break;
}
*newch = 0xFFFD;
return utf8_decode_error_action_replace;
}
int main(int argc, char* argv[])
{
char inbuf[1024];
wchar_t outbuf[1024];
struct utf8_decode_state ctx;
if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
printf("Decodes UTF-8 on stdin to UCS-4 on stdout.\n");
return 0;
}
if(argc != 1) {
fprintf(stderr, "No parameters expected. This program decodes UTF-8 presented on stdin\n"
"and transforms it to UCS-4 on stdout.\n");
return 1;
}
// set up ctx structure
memset(&ctx, 0, sizeof(ctx));
ctx.wr = outbuf;
ctx.wr_size = sizeof(outbuf) / sizeof(wchar_t);
ctx.error_callback = error_callback;
// loop over input
while(!feof(stdin)) {
// read input
ctx.rd_remain = fread(inbuf, 1, sizeof(inbuf), stdin);
ctx.rd = inbuf;
// decode it
while(ctx.rd_remain) {
if(!utf8_decoder(&ctx)) {
perror("utf8_decoder");
fprintf(stderr, "(at line %d, col %d, char %d, byte %d)\n",
ctx.line + 1, ctx.col + 1, ctx.char_offset, ctx.byte_offset);
return 1;
}
// write output
writeout(outbuf, ctx.written);
}
}
if(!ctx.complete) {
fprintf(stderr, "Input did not end on a character boundary.\n");
}
return 0;
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
2009-10-13 11:55:21 +01:00
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
*/