libutf8xx/src/libutf8++/decoder.cpp

160 lines
3.1 KiB
C++

/* libutf8++/src/lib/decoder.cpp
*
* (c)2006, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
namespace utf8 {
Decoder::Decoder(size_t hint)
{
memset(&ctx, 0, sizeof(ctx));
ctx.wr_size = (hint < 2) ? 2 : hint;
ctx.wr = new wchar_t[ctx.wr_size];
ctx.error_callback = _exceptionOnError;
ctx.data = this;
}
Decoder::~Decoder()
{
delete [] ctx.wr;
}
void Decoder::decode(const std::string& str)
{
decode(str.data(), str.size());
}
void Decoder::decode(const char* str, ssize_t amt)
{
ctx.rd = str;
ctx.rd_remain = amt;
while(ctx.rd_remain) {
utf8_decoder(&ctx);
decoded.append(ctx.wr, ctx.written);
if(ctx.rd_remain < 0 && !*(ctx.rd)) break;
if(ctx.rd_remain) {
ctx.wr_size *= 2;
delete [] ctx.wr;
ctx.wr = new wchar_t[ctx.wr_size];
}
}
}
bool Decoder::complete() const
{
return ctx.complete;
}
void Decoder::reset()
{
size_t old_wr_size = ctx.wr_size;
wchar_t* old_wr = ctx.wr;
utf8_decode_error_callback old_error_callback = ctx.error_callback;
memset(&ctx, 0, sizeof(ctx));
ctx.wr_size = old_wr_size;
ctx.wr = old_wr;
ctx.error_callback = old_error_callback;
ctx.data = this;
decoded.clear();
}
void Decoder::skipOnError()
{
ctx.error_callback = _skipOnError;
}
void Decoder::replaceOnError(wchar_t ch)
{
replaceChar = ch;
ctx.error_callback = _replaceOnError;
}
void Decoder::exceptionOnError()
{
ctx.error_callback = _exceptionOnError;
}
enum utf8_decode_error_action Decoder::_skipOnError
(const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch)
{
(void)ctx;
(void)error;
(void)newch;
return utf8_decode_error_action_skip;
}
enum utf8_decode_error_action Decoder::_replaceOnError
(const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch)
{
(void)error;
Decoder* self = (utf8::Decoder*)(ctx->data);
*newch = self->replaceChar;
return utf8_decode_error_action_replace;
}
enum utf8_decode_error_action Decoder::_exceptionOnError
(const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch)
{
(void)newch;
const char* desc = "unknown";
switch(error) {
case utf8_decode_error_lone_cchar:
desc = "An invalid continuation byte was encountered while expecting a character.";
break;
case utf8_decode_error_not_cchar:
desc = "A multi-byte sequence contained an invalid byte.";
break;
case utf8_decode_error_not_schar:
desc = "An invalid byte was encountered while expecting a character.";
break;
case utf8_decode_error_overlong:
desc = "An overlong encoding of a character was encountered.";
break;
case utf8_decode_error_illegal_cp:
desc = "An illegal code point (a UTF-16 surrogate perhaps?) was encountered.";
break;
}
throw BadUTF8Sequence(desc, ctx);
}
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
*/