160 lines
3.1 KiB
C++
160 lines
3.1 KiB
C++
/* libutf8++/src/lib/decoder.cpp
|
|
*
|
|
* (c)2006, Laurence Withers. Released under the GNU GPL. See file
|
|
* COPYING for more information / terms of license.
|
|
*/
|
|
|
|
namespace utf8 {
|
|
|
|
|
|
|
|
Decoder::Decoder(size_t hint)
|
|
{
|
|
memset(&ctx, 0, sizeof(ctx));
|
|
ctx.wr_size = (hint < 2) ? 2 : hint;
|
|
ctx.wr = new wchar_t[ctx.wr_size];
|
|
ctx.error_callback = _exceptionOnError;
|
|
ctx.data = this;
|
|
}
|
|
|
|
|
|
|
|
Decoder::~Decoder()
|
|
{
|
|
delete [] ctx.wr;
|
|
}
|
|
|
|
|
|
|
|
void Decoder::decode(const std::string& str)
|
|
{
|
|
decode(str.data(), str.size());
|
|
}
|
|
|
|
|
|
|
|
void Decoder::decode(const char* str, ssize_t amt)
|
|
{
|
|
ctx.rd = str;
|
|
ctx.rd_remain = amt;
|
|
while(ctx.rd_remain) {
|
|
utf8_decoder(&ctx);
|
|
decoded.append(ctx.wr, ctx.written);
|
|
|
|
if(ctx.rd_remain < 0 && !*(ctx.rd)) break;
|
|
if(ctx.rd_remain) {
|
|
ctx.wr_size *= 2;
|
|
delete [] ctx.wr;
|
|
ctx.wr = new wchar_t[ctx.wr_size];
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
bool Decoder::complete() const
|
|
{
|
|
return ctx.complete;
|
|
}
|
|
|
|
|
|
|
|
void Decoder::reset()
|
|
{
|
|
size_t old_wr_size = ctx.wr_size;
|
|
wchar_t* old_wr = ctx.wr;
|
|
utf8_decode_error_callback old_error_callback = ctx.error_callback;
|
|
|
|
memset(&ctx, 0, sizeof(ctx));
|
|
ctx.wr_size = old_wr_size;
|
|
ctx.wr = old_wr;
|
|
ctx.error_callback = old_error_callback;
|
|
ctx.data = this;
|
|
decoded.clear();
|
|
}
|
|
|
|
|
|
|
|
void Decoder::skipOnError()
|
|
{
|
|
ctx.error_callback = _skipOnError;
|
|
}
|
|
|
|
|
|
|
|
void Decoder::replaceOnError(wchar_t ch)
|
|
{
|
|
replaceChar = ch;
|
|
ctx.error_callback = _replaceOnError;
|
|
}
|
|
|
|
|
|
|
|
void Decoder::exceptionOnError()
|
|
{
|
|
ctx.error_callback = _exceptionOnError;
|
|
}
|
|
|
|
|
|
|
|
enum utf8_decode_error_action Decoder::_skipOnError
|
|
(const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch)
|
|
{
|
|
(void)ctx;
|
|
(void)error;
|
|
(void)newch;
|
|
return utf8_decode_error_action_skip;
|
|
}
|
|
|
|
|
|
|
|
enum utf8_decode_error_action Decoder::_replaceOnError
|
|
(const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch)
|
|
{
|
|
(void)error;
|
|
Decoder* self = (utf8::Decoder*)(ctx->data);
|
|
*newch = self->replaceChar;
|
|
return utf8_decode_error_action_replace;
|
|
}
|
|
|
|
|
|
|
|
enum utf8_decode_error_action Decoder::_exceptionOnError
|
|
(const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch)
|
|
{
|
|
(void)newch;
|
|
const char* desc = "unknown";
|
|
|
|
switch(error) {
|
|
case utf8_decode_error_lone_cchar:
|
|
desc = "An invalid continuation byte was encountered while expecting a character.";
|
|
break;
|
|
|
|
case utf8_decode_error_not_cchar:
|
|
desc = "A multi-byte sequence contained an invalid byte.";
|
|
break;
|
|
|
|
case utf8_decode_error_not_schar:
|
|
desc = "An invalid byte was encountered while expecting a character.";
|
|
break;
|
|
|
|
case utf8_decode_error_overlong:
|
|
desc = "An overlong encoding of a character was encountered.";
|
|
break;
|
|
|
|
case utf8_decode_error_illegal_cp:
|
|
desc = "An illegal code point (a UTF-16 surrogate perhaps?) was encountered.";
|
|
break;
|
|
}
|
|
|
|
throw BadUTF8Sequence(desc, ctx);
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
/* options for text editors
|
|
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
|
*/
|