Tidy up source formatting
Use new function-definition formatting convention and remove C++-style comments.
This commit is contained in:
parent
e98cbe5cc5
commit
26e3c57b04
|
@ -7,14 +7,16 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int utf8_isascii(wchar_t ch)
|
int
|
||||||
|
utf8_isascii(wchar_t ch)
|
||||||
{
|
{
|
||||||
return !(ch & ~0x7F);
|
return !(ch & ~0x7F);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int utf8_isspace(wchar_t ch)
|
int
|
||||||
|
utf8_isspace(wchar_t ch)
|
||||||
{
|
{
|
||||||
return((ch >= 0x0009 && ch <= 0x000D)
|
return((ch >= 0x0009 && ch <= 0x000D)
|
||||||
|| ch == 0x0020
|
|| ch == 0x0020
|
||||||
|
@ -32,7 +34,8 @@ int utf8_isspace(wchar_t ch)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int utf8_isucs4(wchar_t ch)
|
int
|
||||||
|
utf8_isucs4(wchar_t ch)
|
||||||
{
|
{
|
||||||
return !(ch & (~((wchar_t)0x7FFFFFFF)))
|
return !(ch & (~((wchar_t)0x7FFFFFFF)))
|
||||||
&& (ch < 0xD800 || ch > 0xDFFF)
|
&& (ch < 0xD800 || ch > 0xDFFF)
|
||||||
|
@ -41,7 +44,8 @@ int utf8_isucs4(wchar_t ch)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int utf8_isutf32(wchar_t ch)
|
int
|
||||||
|
utf8_isutf32(wchar_t ch)
|
||||||
{
|
{
|
||||||
return ch >= 0 && ch <= 0x10FFFF
|
return ch >= 0 && ch <= 0x10FFFF
|
||||||
&& (ch < 0xD800 || ch > 0xDFFF)
|
&& (ch < 0xD800 || ch > 0xDFFF)
|
||||||
|
@ -50,7 +54,8 @@ int utf8_isutf32(wchar_t ch)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int utf8_isutf16(wchar_t ch)
|
int
|
||||||
|
utf8_isutf16(wchar_t ch)
|
||||||
{
|
{
|
||||||
return ch >= 0 && ch <= 0xFFFD
|
return ch >= 0 && ch <= 0xFFFD
|
||||||
&& (ch < 0xD800 || ch > 0xDFFF);
|
&& (ch < 0xD800 || ch > 0xDFFF);
|
||||||
|
|
|
@ -5,14 +5,18 @@
|
||||||
* http://www.gnu.org/copyleft/gpl.html for details.
|
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
wchar_t utf8_decode_char(const char* src, size_t* used)
|
|
||||||
|
|
||||||
|
wchar_t
|
||||||
|
utf8_decode_char(const char* src, size_t* used)
|
||||||
{
|
{
|
||||||
return utf8_decode_char2(src, 6, used);
|
return utf8_decode_char2(src, 6, used);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
wchar_t utf8_decode_char2(const char* src, size_t size, size_t* used)
|
wchar_t
|
||||||
|
utf8_decode_char2(const char* src, size_t size, size_t* used)
|
||||||
{
|
{
|
||||||
uint8_t ch;
|
uint8_t ch;
|
||||||
wchar_t ret, min;
|
wchar_t ret, min;
|
||||||
|
@ -82,14 +86,17 @@ wchar_t utf8_decode_char2(const char* src, size_t size, size_t* used)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
wchar_t utf8_decode_char_force(const char* src, size_t* used, wchar_t ilseq)
|
wchar_t
|
||||||
|
utf8_decode_char_force(const char* src, size_t* used, wchar_t ilseq)
|
||||||
{
|
{
|
||||||
return utf8_decode_char2_force(src, 6, used, ilseq);
|
return utf8_decode_char2_force(src, 6, used, ilseq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
wchar_t utf8_decode_char2_force(const char* src, size_t size, size_t* used, wchar_t ilseq)
|
wchar_t
|
||||||
|
utf8_decode_char2_force(const char* src, size_t size, size_t* used,
|
||||||
|
wchar_t ilseq)
|
||||||
{
|
{
|
||||||
uint8_t ch;
|
uint8_t ch;
|
||||||
wchar_t ret, min;
|
wchar_t ret, min;
|
||||||
|
@ -142,8 +149,8 @@ wchar_t utf8_decode_char2_force(const char* src, size_t size, size_t* used, wcha
|
||||||
}
|
}
|
||||||
return ch;
|
return ch;
|
||||||
|
|
||||||
ILSEQ:
|
ILSEQ:
|
||||||
// advance pointer to next valid char boundary
|
/* advance pointer to next valid char boundary */
|
||||||
while(1) {
|
while(1) {
|
||||||
if(!*src || !size) break;
|
if(!*src || !size) break;
|
||||||
if((*src & 0xC0) == 0x80) break;
|
if((*src & 0xC0) == 0x80) break;
|
||||||
|
@ -157,7 +164,8 @@ ILSEQ:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
wchar_t* utf8_decode(wchar_t* dest, size_t size, const char* src)
|
wchar_t*
|
||||||
|
utf8_decode(wchar_t* dest, size_t size, const char* src)
|
||||||
{
|
{
|
||||||
struct utf8_decode_state ctx;
|
struct utf8_decode_state ctx;
|
||||||
memset(&ctx, 0, sizeof(ctx));
|
memset(&ctx, 0, sizeof(ctx));
|
||||||
|
@ -177,7 +185,9 @@ wchar_t* utf8_decode(wchar_t* dest, size_t size, const char* src)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
wchar_t* utf8_decode2(wchar_t* dest, size_t size, size_t* written, const char* src, size_t amt)
|
wchar_t*
|
||||||
|
utf8_decode2(wchar_t* dest, size_t size, size_t* written, const char* src,
|
||||||
|
size_t amt)
|
||||||
{
|
{
|
||||||
struct utf8_decode_state ctx;
|
struct utf8_decode_state ctx;
|
||||||
memset(&ctx, 0, sizeof(ctx));
|
memset(&ctx, 0, sizeof(ctx));
|
||||||
|
@ -198,7 +208,8 @@ wchar_t* utf8_decode2(wchar_t* dest, size_t size, size_t* written, const char* s
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
wchar_t* utf8_decode_force(wchar_t* dest, size_t size, const char* src)
|
wchar_t*
|
||||||
|
utf8_decode_force(wchar_t* dest, size_t size, const char* src)
|
||||||
{
|
{
|
||||||
struct utf8_decode_state ctx;
|
struct utf8_decode_state ctx;
|
||||||
memset(&ctx, 0, sizeof(ctx));
|
memset(&ctx, 0, sizeof(ctx));
|
||||||
|
@ -219,7 +230,9 @@ wchar_t* utf8_decode_force(wchar_t* dest, size_t size, const char* src)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
wchar_t* utf8_decode_force2(wchar_t* dest, size_t size, size_t* written, const char* src, size_t amt)
|
wchar_t*
|
||||||
|
utf8_decode_force2(wchar_t* dest, size_t size, size_t* written, const char* src,
|
||||||
|
size_t amt)
|
||||||
{
|
{
|
||||||
struct utf8_decode_state ctx;
|
struct utf8_decode_state ctx;
|
||||||
memset(&ctx, 0, sizeof(ctx));
|
memset(&ctx, 0, sizeof(ctx));
|
||||||
|
@ -234,6 +247,8 @@ wchar_t* utf8_decode_force2(wchar_t* dest, size_t size, size_t* written, const c
|
||||||
return dest;
|
return dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* options for text editors
|
/* options for text editors
|
||||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||||
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
||||||
|
|
|
@ -5,7 +5,9 @@
|
||||||
* http://www.gnu.org/copyleft/gpl.html for details.
|
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*! \defgroup decode UTF-8 decoding routines.
|
|
||||||
|
|
||||||
|
/*! \defgroup decode UTF-8 decoding routines
|
||||||
|
|
||||||
These routines decode UTF-8 data into C's wide character type \c wchar_t. Errors are reported
|
These routines decode UTF-8 data into C's wide character type \c wchar_t. Errors are reported
|
||||||
through \c errno, with the following errors being of particular interest:
|
through \c errno, with the following errors being of particular interest:
|
||||||
|
@ -186,7 +188,6 @@ wchar_t* utf8_decode_force2(wchar_t* dest, size_t size, size_t* written, const c
|
||||||
|
|
||||||
|
|
||||||
/*!@}*/
|
/*!@}*/
|
||||||
|
|
||||||
/* options for text editors
|
/* options for text editors
|
||||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||||
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
||||||
|
|
|
@ -5,7 +5,10 @@
|
||||||
* http://www.gnu.org/copyleft/gpl.html for details.
|
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
char* utf8_encode_char(char* dest, size_t amt, wchar_t ch)
|
|
||||||
|
|
||||||
|
char*
|
||||||
|
utf8_encode_char(char* dest, size_t amt, wchar_t ch)
|
||||||
{
|
{
|
||||||
if(!dest || !amt) {
|
if(!dest || !amt) {
|
||||||
errno = EINVAL;
|
errno = EINVAL;
|
||||||
|
@ -76,7 +79,8 @@ char* utf8_encode_char(char* dest, size_t amt, wchar_t ch)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
char* utf8_encode_char_force(char* dest, size_t amt, wchar_t ch, wchar_t ilseq)
|
char*
|
||||||
|
utf8_encode_char_force(char* dest, size_t amt, wchar_t ch, wchar_t ilseq)
|
||||||
{
|
{
|
||||||
if(!utf8_isucs4(ilseq)) {
|
if(!utf8_isucs4(ilseq)) {
|
||||||
errno = EILSEQ;
|
errno = EILSEQ;
|
||||||
|
@ -88,14 +92,17 @@ char* utf8_encode_char_force(char* dest, size_t amt, wchar_t ch, wchar_t ilseq)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
char* utf8_encode(char* dest, size_t amt, const wchar_t* src)
|
char*
|
||||||
|
utf8_encode(char* dest, size_t amt, const wchar_t* src)
|
||||||
{
|
{
|
||||||
return utf8_encode2(dest, amt, 0, src, -1);
|
return utf8_encode2(dest, amt, 0, src, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
char* utf8_encode2(char* dest, size_t amt, size_t* written, const wchar_t* src, size_t inamt)
|
char*
|
||||||
|
utf8_encode2(char* dest, size_t amt, size_t* written, const wchar_t* src,
|
||||||
|
size_t inamt)
|
||||||
{
|
{
|
||||||
struct utf8_encode_state ctx;
|
struct utf8_encode_state ctx;
|
||||||
memset(&ctx, 0, sizeof(ctx));
|
memset(&ctx, 0, sizeof(ctx));
|
||||||
|
@ -115,14 +122,17 @@ char* utf8_encode2(char* dest, size_t amt, size_t* written, const wchar_t* src,
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
char* utf8_encode_force(char* dest, size_t amt, const wchar_t* src)
|
char*
|
||||||
|
utf8_encode_force(char* dest, size_t amt, const wchar_t* src)
|
||||||
{
|
{
|
||||||
return utf8_encode_force2(dest, amt, 0, src, -1);
|
return utf8_encode_force2(dest, amt, 0, src, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
char* utf8_encode_force2(char* dest, size_t amt, size_t* written, const wchar_t* src, size_t inamt)
|
char*
|
||||||
|
utf8_encode_force2(char* dest, size_t amt, size_t* written, const wchar_t* src,
|
||||||
|
size_t inamt)
|
||||||
{
|
{
|
||||||
struct utf8_encode_state ctx;
|
struct utf8_encode_state ctx;
|
||||||
memset(&ctx, 0, sizeof(ctx));
|
memset(&ctx, 0, sizeof(ctx));
|
||||||
|
@ -137,6 +147,8 @@ char* utf8_encode_force2(char* dest, size_t amt, size_t* written, const wchar_t*
|
||||||
return dest;
|
return dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* options for text editors
|
/* options for text editors
|
||||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||||
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
||||||
|
|
|
@ -5,7 +5,9 @@
|
||||||
* http://www.gnu.org/copyleft/gpl.html for details.
|
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*! \defgroup encode UTF-8 encoding routines.
|
|
||||||
|
|
||||||
|
/*! \defgroup encode UTF-8 encoding routines
|
||||||
|
|
||||||
The functions in this module allow encoding of UTF-8 characters. Errors are reported through
|
The functions in this module allow encoding of UTF-8 characters. Errors are reported through
|
||||||
\c errno, with the following errors being of particular interest:
|
\c errno, with the following errors being of particular interest:
|
||||||
|
@ -139,7 +141,6 @@ char* utf8_encode_force2(char* dest, size_t amt, size_t* written, const wchar_t*
|
||||||
|
|
||||||
|
|
||||||
/*!@}*/
|
/*!@}*/
|
||||||
|
|
||||||
/* options for text editors
|
/* options for text editors
|
||||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||||
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
||||||
|
|
|
@ -5,6 +5,8 @@
|
||||||
* http://www.gnu.org/copyleft/gpl.html for details.
|
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
enum utf8_decoder_state {
|
enum utf8_decoder_state {
|
||||||
utf8_state_none,
|
utf8_state_none,
|
||||||
utf8_state_multibyte1,
|
utf8_state_multibyte1,
|
||||||
|
@ -18,7 +20,8 @@ enum utf8_decoder_state {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
struct utf8_decode_state* utf8_decoder(struct utf8_decode_state* ctx)
|
struct utf8_decode_state*
|
||||||
|
utf8_decoder(struct utf8_decode_state* ctx)
|
||||||
{
|
{
|
||||||
wchar_t* wr;
|
wchar_t* wr;
|
||||||
size_t avail;
|
size_t avail;
|
||||||
|
@ -33,7 +36,7 @@ struct utf8_decode_state* utf8_decoder(struct utf8_decode_state* ctx)
|
||||||
ctx->written = 0;
|
ctx->written = 0;
|
||||||
avail = ctx->wr_size;
|
avail = ctx->wr_size;
|
||||||
|
|
||||||
loop:
|
loop:
|
||||||
while(ctx->rd_remain) {
|
while(ctx->rd_remain) {
|
||||||
uint8_t in = *ctx->rd;
|
uint8_t in = *ctx->rd;
|
||||||
|
|
||||||
|
@ -107,13 +110,13 @@ loop:
|
||||||
error_type = utf8_decode_error_overlong;
|
error_type = utf8_decode_error_overlong;
|
||||||
goto error;
|
goto error;
|
||||||
} else {
|
} else {
|
||||||
// validate codepoint
|
/* validate codepoint */
|
||||||
if(!utf8_isucs4(ctx->statech)) {
|
if(!utf8_isucs4(ctx->statech)) {
|
||||||
error_type = utf8_decode_error_illegal_cp;
|
error_type = utf8_decode_error_illegal_cp;
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
// add to output string
|
/* add to output string */
|
||||||
*wr++ = ctx->statech;
|
*wr++ = ctx->statech;
|
||||||
++ctx->written;
|
++ctx->written;
|
||||||
--avail;
|
--avail;
|
||||||
|
@ -142,7 +145,7 @@ loop:
|
||||||
*wr = 0;
|
*wr = 0;
|
||||||
return ctx;
|
return ctx;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
if(!ctx->error_callback) {
|
if(!ctx->error_callback) {
|
||||||
errno = EILSEQ;
|
errno = EILSEQ;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -173,15 +176,16 @@ error:
|
||||||
goto loop;
|
goto loop;
|
||||||
}
|
}
|
||||||
|
|
||||||
// shouldn't reach here
|
/* shouldn't reach here */
|
||||||
errno = EILSEQ;
|
errno = EILSEQ;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
enum utf8_decode_error_action utf8_decode_error_callback_replace(
|
enum utf8_decode_error_action
|
||||||
const struct utf8_decode_state* ctx, enum utf8_decode_error error, wchar_t* newch)
|
utf8_decode_error_callback_replace(const struct utf8_decode_state* ctx,
|
||||||
|
enum utf8_decode_error error, wchar_t* newch)
|
||||||
{
|
{
|
||||||
(void)ctx;
|
(void)ctx;
|
||||||
(void)error;
|
(void)error;
|
||||||
|
@ -191,8 +195,9 @@ enum utf8_decode_error_action utf8_decode_error_callback_replace(
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
enum utf8_decode_error_action utf8_decode_error_callback_skip(
|
enum utf8_decode_error_action
|
||||||
const struct utf8_decode_state* ctx, enum utf8_decode_error error, wchar_t* newch)
|
utf8_decode_error_callback_skip(const struct utf8_decode_state* ctx,
|
||||||
|
enum utf8_decode_error error, wchar_t* newch)
|
||||||
{
|
{
|
||||||
(void)ctx;
|
(void)ctx;
|
||||||
(void)error;
|
(void)error;
|
||||||
|
@ -200,6 +205,8 @@ enum utf8_decode_error_action utf8_decode_error_callback_skip(
|
||||||
return utf8_decode_error_action_skip;
|
return utf8_decode_error_action_skip;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* options for text editors
|
/* options for text editors
|
||||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||||
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
||||||
|
|
|
@ -5,7 +5,9 @@
|
||||||
* http://www.gnu.org/copyleft/gpl.html for details.
|
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*! \defgroup decode_ctx UTF-8 stateful decoder.
|
|
||||||
|
|
||||||
|
/*! \defgroup decode_ctx UTF-8 stateful decoder
|
||||||
|
|
||||||
This UTF-8 decoder uses a structure to maintain state information between calls. This means that
|
This UTF-8 decoder uses a structure to maintain state information between calls. This means that
|
||||||
you can feed it a stream of data as it comes in without needing to store the entire document in a
|
you can feed it a stream of data as it comes in without needing to store the entire document in a
|
||||||
|
@ -33,19 +35,19 @@ to the callback function.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
enum utf8_decode_error {
|
enum utf8_decode_error {
|
||||||
/// Lone continuation char encountered when start char expected.
|
/*! \brief Lone continuation char encountered when start char expected. */
|
||||||
utf8_decode_error_lone_cchar,
|
utf8_decode_error_lone_cchar,
|
||||||
|
|
||||||
/// Non-continuation char encountered within multibyte sequence.
|
/*! \brief Non-continuation char encountered within multibyte sequence. */
|
||||||
utf8_decode_error_not_cchar,
|
utf8_decode_error_not_cchar,
|
||||||
|
|
||||||
/// Invalid start char (not ASCII).
|
/*! \brief Invalid start char (not ASCII). */
|
||||||
utf8_decode_error_not_schar,
|
utf8_decode_error_not_schar,
|
||||||
|
|
||||||
/// Overlong byte sequence.
|
/*! \brief Overlong byte sequence. */
|
||||||
utf8_decode_error_overlong,
|
utf8_decode_error_overlong,
|
||||||
|
|
||||||
/// Illegal code positions (UTF-16 surrogates or 0xFFFE,0xFFFF).
|
/*! \brief Illegal code positions (UTF-16 surrogates or 0xFFFE,0xFFFF). */
|
||||||
utf8_decode_error_illegal_cp
|
utf8_decode_error_illegal_cp
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -58,13 +60,13 @@ error. These actions are specified by the error callback function's return value
|
||||||
|
|
||||||
*/
|
*/
|
||||||
enum utf8_decode_error_action {
|
enum utf8_decode_error_action {
|
||||||
/// Abort the conversion, returning EILSEQ.
|
/*! \brief Abort the conversion, returning EILSEQ. */
|
||||||
utf8_decode_error_action_abort,
|
utf8_decode_error_action_abort,
|
||||||
|
|
||||||
/// Skip the illegal byte sequence.
|
/*! \brief Skip the illegal byte sequence. */
|
||||||
utf8_decode_error_action_skip,
|
utf8_decode_error_action_skip,
|
||||||
|
|
||||||
/// Discard the illegal byte sequence and enter a replacement char.
|
/*! \brief Discard the illegal byte sequence and enter a replacement char. */
|
||||||
utf8_decode_error_action_replace
|
utf8_decode_error_action_replace
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -118,48 +120,48 @@ these variables aren't perfect, as they can be affected by errors and limitation
|
||||||
|
|
||||||
*/
|
*/
|
||||||
struct utf8_decode_state {
|
struct utf8_decode_state {
|
||||||
/// \c false if we are part-way through a multi-byte character.
|
/*! \brief 0 if we are part-way through a multi-byte character. */
|
||||||
int complete;
|
int complete;
|
||||||
|
|
||||||
/// Data to read (current read position).
|
/*! \brief Data to read (current read position). */
|
||||||
const char* rd;
|
const char* rd;
|
||||||
|
|
||||||
/// Number of bytes remaining (current).
|
/*! \brief Number of bytes remaining (current). */
|
||||||
int rd_remain;
|
int rd_remain;
|
||||||
|
|
||||||
/// Internal state; initialise to 0, don't change.
|
/*! \brief Internal state; initialise to 0, don't change. */
|
||||||
int state;
|
int state;
|
||||||
|
|
||||||
/// Error callback (may be 0).
|
/*! \brief Error callback (may be 0). */
|
||||||
utf8_decode_error_callback error_callback;
|
utf8_decode_error_callback error_callback;
|
||||||
|
|
||||||
/// Pointer to output buffer.
|
/*! \brief Pointer to output buffer. */
|
||||||
wchar_t* wr;
|
wchar_t* wr;
|
||||||
|
|
||||||
/// Number of characters that can be written.
|
/*! \brief Number of characters that can be written. */
|
||||||
size_t wr_size;
|
size_t wr_size;
|
||||||
|
|
||||||
/// Number of characters written on last call.
|
/*! \brief Number of characters written on last call. */
|
||||||
size_t written;
|
size_t written;
|
||||||
|
|
||||||
/// Arbitrary data pointer for \a error_callback.
|
/*! \brief Arbitrary data pointer for \a error_callback. */
|
||||||
void* data;
|
void* data;
|
||||||
|
|
||||||
/// Current line (starting from 0).
|
/*! \brief Current line (starting from 0). */
|
||||||
int line;
|
int line;
|
||||||
|
|
||||||
/// Current column (starting from 0).
|
/*! \brief Current column (starting from 0). */
|
||||||
int col;
|
int col;
|
||||||
|
|
||||||
/// Character offset from start of data (starting from 0).
|
/*! \brief Character offset from start of data (starting from 0). */
|
||||||
int char_offset;
|
int char_offset;
|
||||||
|
|
||||||
/// Byte offset from start of data (starting from 0).
|
/*! \brief Byte offset from start of data (starting from 0). */
|
||||||
int byte_offset;
|
int byte_offset;
|
||||||
|
|
||||||
/// Don't use this.
|
/*! \brief Don't use this. */
|
||||||
wchar_t statech;
|
wchar_t statech;
|
||||||
/// Don't use this.
|
/*! \brief Don't use this. */
|
||||||
wchar_t minch;
|
wchar_t minch;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -186,18 +188,17 @@ struct utf8_decode_state* utf8_decoder(struct utf8_decode_state* state);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// Standard error callback: use replacement char 0xFFFD.
|
/*! \brief Standard error callback: use replacement char 0xFFFD. */
|
||||||
enum utf8_decode_error_action utf8_decode_error_callback_replace(
|
enum utf8_decode_error_action utf8_decode_error_callback_replace(
|
||||||
const struct utf8_decode_state* ctx, enum utf8_decode_error error, wchar_t* newch);
|
const struct utf8_decode_state* ctx, enum utf8_decode_error error, wchar_t* newch);
|
||||||
|
|
||||||
/// Standard error callback: skip invalid chars.
|
/*! \brief Standard error callback: skip invalid chars. */
|
||||||
enum utf8_decode_error_action utf8_decode_error_callback_skip(
|
enum utf8_decode_error_action utf8_decode_error_callback_skip(
|
||||||
const struct utf8_decode_state* ctx, enum utf8_decode_error error, wchar_t* newch);
|
const struct utf8_decode_state* ctx, enum utf8_decode_error error, wchar_t* newch);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!@}*/
|
/*!@}*/
|
||||||
|
|
||||||
/* options for text editors
|
/* options for text editors
|
||||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||||
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
||||||
|
|
|
@ -5,7 +5,10 @@
|
||||||
* http://www.gnu.org/copyleft/gpl.html for details.
|
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
struct utf8_encode_state* utf8_encoder(struct utf8_encode_state* state)
|
|
||||||
|
|
||||||
|
struct utf8_encode_state*
|
||||||
|
utf8_encoder(struct utf8_encode_state* state)
|
||||||
{
|
{
|
||||||
char* wr = state->wr, * ret;
|
char* wr = state->wr, * ret;
|
||||||
char* endp = wr + state->wr_size - 1;
|
char* endp = wr + state->wr_size - 1;
|
||||||
|
@ -24,7 +27,7 @@ struct utf8_encode_state* utf8_encoder(struct utf8_encode_state* state)
|
||||||
if(!ch && state->rd_remain < 0) break;
|
if(!ch && state->rd_remain < 0) break;
|
||||||
|
|
||||||
reencoding = 0;
|
reencoding = 0;
|
||||||
reencode:
|
reencode:
|
||||||
ret = utf8_encode_char(wr, endp - wr, ch);
|
ret = utf8_encode_char(wr, endp - wr, ch);
|
||||||
if(!ret) {
|
if(!ret) {
|
||||||
if(errno == ENOMEM) break;
|
if(errno == ENOMEM) break;
|
||||||
|
@ -66,8 +69,9 @@ struct utf8_encode_state* utf8_encoder(struct utf8_encode_state* state)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
enum utf8_encode_error_action utf8_encode_error_callback_replace(
|
enum utf8_encode_error_action
|
||||||
const struct utf8_encode_state* state, wchar_t* newch)
|
utf8_encode_error_callback_replace(const struct utf8_encode_state* state,
|
||||||
|
wchar_t* newch)
|
||||||
{
|
{
|
||||||
(void)state;
|
(void)state;
|
||||||
*newch = 0xFFFD;
|
*newch = 0xFFFD;
|
||||||
|
@ -76,14 +80,17 @@ enum utf8_encode_error_action utf8_encode_error_callback_replace(
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
enum utf8_encode_error_action utf8_encode_error_callback_skip(
|
enum utf8_encode_error_action
|
||||||
const struct utf8_encode_state* state, wchar_t* newch)
|
utf8_encode_error_callback_skip(const struct utf8_encode_state* state,
|
||||||
|
wchar_t* newch)
|
||||||
{
|
{
|
||||||
(void)state;
|
(void)state;
|
||||||
(void)newch;
|
(void)newch;
|
||||||
return utf8_encode_error_action_skip;
|
return utf8_encode_error_action_skip;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* options for text editors
|
/* options for text editors
|
||||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||||
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
||||||
|
|
|
@ -5,7 +5,9 @@
|
||||||
* http://www.gnu.org/copyleft/gpl.html for details.
|
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*! \defgroup encode_state UTF-8 stateful encoder.
|
|
||||||
|
|
||||||
|
/*! \defgroup encode_state UTF-8 stateful encoder
|
||||||
|
|
||||||
This UTF-8 encoder uses a structure to maintain state information between calls. This means that
|
This UTF-8 encoder uses a structure to maintain state information between calls. This means that
|
||||||
you can feed it a stream of data as it comes in without needing to store the entire source in a
|
you can feed it a stream of data as it comes in without needing to store the entire source in a
|
||||||
|
@ -34,13 +36,13 @@ function's return value.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
enum utf8_encode_error_action {
|
enum utf8_encode_error_action {
|
||||||
/// Abort the conversion, returning EILSEQ.
|
/*! \brief Abort the conversion, returning EILSEQ. */
|
||||||
utf8_encode_error_action_abort,
|
utf8_encode_error_action_abort,
|
||||||
|
|
||||||
/// Skip the illegal byte sequence.
|
/*! \brief Skip the illegal byte sequence. */
|
||||||
utf8_encode_error_action_skip,
|
utf8_encode_error_action_skip,
|
||||||
|
|
||||||
/// Discard the illegal byte sequence and enter a replacement char.
|
/*! \brief Discard the illegal byte sequence and enter a replacement char. */
|
||||||
utf8_encode_error_action_replace
|
utf8_encode_error_action_replace
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -61,11 +63,11 @@ it with something else, or abort the conversion entirely.
|
||||||
typedef enum utf8_encode_error_action (*utf8_encode_error_callback)(
|
typedef enum utf8_encode_error_action (*utf8_encode_error_callback)(
|
||||||
const struct utf8_encode_state* state, wchar_t* newch);
|
const struct utf8_encode_state* state, wchar_t* newch);
|
||||||
|
|
||||||
/// Standard error callback: use replacement char 0xFFFD.
|
/*! \brief Standard error callback: use replacement char 0xFFFD. */
|
||||||
enum utf8_encode_error_action utf8_encode_error_callback_replace(
|
enum utf8_encode_error_action utf8_encode_error_callback_replace(
|
||||||
const struct utf8_encode_state* state, wchar_t* newch);
|
const struct utf8_encode_state* state, wchar_t* newch);
|
||||||
|
|
||||||
/// Standard error callback: skip invalid chars.
|
/*! \brief Standard error callback: skip invalid chars. */
|
||||||
enum utf8_encode_error_action utf8_encode_error_callback_skip(
|
enum utf8_encode_error_action utf8_encode_error_callback_skip(
|
||||||
const struct utf8_encode_state* state, wchar_t* newch);
|
const struct utf8_encode_state* state, wchar_t* newch);
|
||||||
|
|
||||||
|
@ -93,34 +95,34 @@ start of the stream, and should always be accurate.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
struct utf8_encode_state {
|
struct utf8_encode_state {
|
||||||
/// Current read position.
|
/*! \brief Current read position. */
|
||||||
const wchar_t* rd;
|
const wchar_t* rd;
|
||||||
|
|
||||||
/// Number of chars remaining (-ve means to scan for null char).
|
/*! \brief Number of chars remaining (-ve means to scan for null char). */
|
||||||
int rd_remain;
|
int rd_remain;
|
||||||
|
|
||||||
/// Callback function used to handle illegal source characters.
|
/*! \brief Callback function used to handle illegal source characters. */
|
||||||
utf8_encode_error_callback error_callback;
|
utf8_encode_error_callback error_callback;
|
||||||
|
|
||||||
/// Output buffer.
|
/*! \brief Output buffer. */
|
||||||
char* wr;
|
char* wr;
|
||||||
|
|
||||||
/// Output buffer size.
|
/*! \brief Output buffer size. */
|
||||||
size_t wr_size;
|
size_t wr_size;
|
||||||
|
|
||||||
/// Number of bytes written during last call.
|
/*! \brief Number of bytes written during last call. */
|
||||||
size_t written;
|
size_t written;
|
||||||
|
|
||||||
/// Arbitrary pointer (useful for \a error_callback).
|
/*! \brief Arbitrary pointer (useful for \a error_callback). */
|
||||||
void* data;
|
void* data;
|
||||||
|
|
||||||
/// Current line (starting from 0).
|
/*! \brief Current line (starting from 0). */
|
||||||
int line;
|
int line;
|
||||||
|
|
||||||
/// Current column (starting from 0).
|
/*! \brief Current column (starting from 0). */
|
||||||
int col;
|
int col;
|
||||||
|
|
||||||
/// Character offset from start of data (starting from 0).
|
/*! \brief Character offset from start of data (starting from 0). */
|
||||||
int char_offset;
|
int char_offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -158,7 +160,6 @@ struct utf8_encode_state* utf8_encoder(struct utf8_encode_state* state);
|
||||||
|
|
||||||
|
|
||||||
/*!@}*/
|
/*!@}*/
|
||||||
|
|
||||||
/* options for text editors
|
/* options for text editors
|
||||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||||
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
||||||
|
|
Loading…
Reference in New Issue