/* libutf8++/src/lib/decoder.cpp * * (c)2006, Laurence Withers. Released under the GNU GPL. See file * COPYING for more information / terms of license. */ namespace utf8 { Decoder::Decoder(size_t hint) { memset(&ctx, 0, sizeof(ctx)); ctx.wr_size = (hint < 2) ? 2 : hint; ctx.wr = new wchar_t[ctx.wr_size]; ctx.error_callback = _exceptionOnError; ctx.data = this; } Decoder::~Decoder() { delete [] ctx.wr; } void Decoder::decode(const std::string& str) { decode(str.data(), str.size()); } void Decoder::decode(const char* str, ssize_t amt) { ctx.rd = str; ctx.rd_remain = amt; while(ctx.rd_remain) { utf8_decoder(&ctx); decoded.append(ctx.wr, ctx.written); if(ctx.rd_remain < 0 && !*(ctx.rd)) break; if(ctx.rd_remain) { ctx.wr_size *= 2; delete [] ctx.wr; ctx.wr = new wchar_t[ctx.wr_size]; } } } bool Decoder::complete() const { return ctx.complete; } void Decoder::reset() { size_t old_wr_size = ctx.wr_size; wchar_t* old_wr = ctx.wr; utf8_decode_error_callback old_error_callback = ctx.error_callback; memset(&ctx, 0, sizeof(ctx)); ctx.wr_size = old_wr_size; ctx.wr = old_wr; ctx.error_callback = old_error_callback; ctx.data = this; decoded.clear(); } void Decoder::skipOnError() { ctx.error_callback = _skipOnError; } void Decoder::replaceOnError(wchar_t ch) { replaceChar = ch; ctx.error_callback = _replaceOnError; } void Decoder::exceptionOnError() { ctx.error_callback = _exceptionOnError; } enum utf8_decode_error_action Decoder::_skipOnError (const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch) { (void)ctx; (void)error; (void)newch; return utf8_decode_error_action_skip; } enum utf8_decode_error_action Decoder::_replaceOnError (const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch) { (void)error; Decoder* self = (utf8::Decoder*)(ctx->data); *newch = self->replaceChar; return utf8_decode_error_action_replace; } enum utf8_decode_error_action Decoder::_exceptionOnError (const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch) { (void)newch; const char* desc = "unknown"; switch(error) { case utf8_decode_error_lone_cchar: desc = "An invalid continuation byte was encountered while expecting a character."; break; case utf8_decode_error_not_cchar: desc = "A multi-byte sequence contained an invalid byte."; break; case utf8_decode_error_not_schar: desc = "An invalid byte was encountered while expecting a character."; break; case utf8_decode_error_overlong: desc = "An overlong encoding of a character was encountered."; break; case utf8_decode_error_illegal_cp: desc = "An illegal code point (a UTF-16 surrogate perhaps?) was encountered."; break; } throw BadUTF8Sequence(desc, ctx); } } /* options for text editors kate: replace-trailing-space-save true; space-indent true; tab-width 4; */