Tidy up source formatting
Use new function-definition formatting convention and remove C++-style comments.
This commit is contained in:
parent
e98cbe5cc5
commit
26e3c57b04
|
@ -7,14 +7,16 @@
|
|||
|
||||
|
||||
|
||||
int utf8_isascii(wchar_t ch)
|
||||
int
|
||||
utf8_isascii(wchar_t ch)
|
||||
{
|
||||
return !(ch & ~0x7F);
|
||||
}
|
||||
|
||||
|
||||
|
||||
int utf8_isspace(wchar_t ch)
|
||||
int
|
||||
utf8_isspace(wchar_t ch)
|
||||
{
|
||||
return((ch >= 0x0009 && ch <= 0x000D)
|
||||
|| ch == 0x0020
|
||||
|
@ -32,7 +34,8 @@ int utf8_isspace(wchar_t ch)
|
|||
|
||||
|
||||
|
||||
int utf8_isucs4(wchar_t ch)
|
||||
int
|
||||
utf8_isucs4(wchar_t ch)
|
||||
{
|
||||
return !(ch & (~((wchar_t)0x7FFFFFFF)))
|
||||
&& (ch < 0xD800 || ch > 0xDFFF)
|
||||
|
@ -41,7 +44,8 @@ int utf8_isucs4(wchar_t ch)
|
|||
|
||||
|
||||
|
||||
int utf8_isutf32(wchar_t ch)
|
||||
int
|
||||
utf8_isutf32(wchar_t ch)
|
||||
{
|
||||
return ch >= 0 && ch <= 0x10FFFF
|
||||
&& (ch < 0xD800 || ch > 0xDFFF)
|
||||
|
@ -50,7 +54,8 @@ int utf8_isutf32(wchar_t ch)
|
|||
|
||||
|
||||
|
||||
int utf8_isutf16(wchar_t ch)
|
||||
int
|
||||
utf8_isutf16(wchar_t ch)
|
||||
{
|
||||
return ch >= 0 && ch <= 0xFFFD
|
||||
&& (ch < 0xD800 || ch > 0xDFFF);
|
||||
|
|
|
@ -5,14 +5,18 @@
|
|||
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||
*/
|
||||
|
||||
wchar_t utf8_decode_char(const char* src, size_t* used)
|
||||
|
||||
|
||||
wchar_t
|
||||
utf8_decode_char(const char* src, size_t* used)
|
||||
{
|
||||
return utf8_decode_char2(src, 6, used);
|
||||
}
|
||||
|
||||
|
||||
|
||||
wchar_t utf8_decode_char2(const char* src, size_t size, size_t* used)
|
||||
wchar_t
|
||||
utf8_decode_char2(const char* src, size_t size, size_t* used)
|
||||
{
|
||||
uint8_t ch;
|
||||
wchar_t ret, min;
|
||||
|
@ -82,14 +86,17 @@ wchar_t utf8_decode_char2(const char* src, size_t size, size_t* used)
|
|||
|
||||
|
||||
|
||||
wchar_t utf8_decode_char_force(const char* src, size_t* used, wchar_t ilseq)
|
||||
wchar_t
|
||||
utf8_decode_char_force(const char* src, size_t* used, wchar_t ilseq)
|
||||
{
|
||||
return utf8_decode_char2_force(src, 6, used, ilseq);
|
||||
}
|
||||
|
||||
|
||||
|
||||
wchar_t utf8_decode_char2_force(const char* src, size_t size, size_t* used, wchar_t ilseq)
|
||||
wchar_t
|
||||
utf8_decode_char2_force(const char* src, size_t size, size_t* used,
|
||||
wchar_t ilseq)
|
||||
{
|
||||
uint8_t ch;
|
||||
wchar_t ret, min;
|
||||
|
@ -142,8 +149,8 @@ wchar_t utf8_decode_char2_force(const char* src, size_t size, size_t* used, wcha
|
|||
}
|
||||
return ch;
|
||||
|
||||
ILSEQ:
|
||||
// advance pointer to next valid char boundary
|
||||
ILSEQ:
|
||||
/* advance pointer to next valid char boundary */
|
||||
while(1) {
|
||||
if(!*src || !size) break;
|
||||
if((*src & 0xC0) == 0x80) break;
|
||||
|
@ -157,7 +164,8 @@ ILSEQ:
|
|||
|
||||
|
||||
|
||||
wchar_t* utf8_decode(wchar_t* dest, size_t size, const char* src)
|
||||
wchar_t*
|
||||
utf8_decode(wchar_t* dest, size_t size, const char* src)
|
||||
{
|
||||
struct utf8_decode_state ctx;
|
||||
memset(&ctx, 0, sizeof(ctx));
|
||||
|
@ -177,7 +185,9 @@ wchar_t* utf8_decode(wchar_t* dest, size_t size, const char* src)
|
|||
|
||||
|
||||
|
||||
wchar_t* utf8_decode2(wchar_t* dest, size_t size, size_t* written, const char* src, size_t amt)
|
||||
wchar_t*
|
||||
utf8_decode2(wchar_t* dest, size_t size, size_t* written, const char* src,
|
||||
size_t amt)
|
||||
{
|
||||
struct utf8_decode_state ctx;
|
||||
memset(&ctx, 0, sizeof(ctx));
|
||||
|
@ -198,7 +208,8 @@ wchar_t* utf8_decode2(wchar_t* dest, size_t size, size_t* written, const char* s
|
|||
|
||||
|
||||
|
||||
wchar_t* utf8_decode_force(wchar_t* dest, size_t size, const char* src)
|
||||
wchar_t*
|
||||
utf8_decode_force(wchar_t* dest, size_t size, const char* src)
|
||||
{
|
||||
struct utf8_decode_state ctx;
|
||||
memset(&ctx, 0, sizeof(ctx));
|
||||
|
@ -219,7 +230,9 @@ wchar_t* utf8_decode_force(wchar_t* dest, size_t size, const char* src)
|
|||
|
||||
|
||||
|
||||
wchar_t* utf8_decode_force2(wchar_t* dest, size_t size, size_t* written, const char* src, size_t amt)
|
||||
wchar_t*
|
||||
utf8_decode_force2(wchar_t* dest, size_t size, size_t* written, const char* src,
|
||||
size_t amt)
|
||||
{
|
||||
struct utf8_decode_state ctx;
|
||||
memset(&ctx, 0, sizeof(ctx));
|
||||
|
@ -234,6 +247,8 @@ wchar_t* utf8_decode_force2(wchar_t* dest, size_t size, size_t* written, const c
|
|||
return dest;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
||||
|
|
|
@ -5,7 +5,9 @@
|
|||
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||
*/
|
||||
|
||||
/*! \defgroup decode UTF-8 decoding routines.
|
||||
|
||||
|
||||
/*! \defgroup decode UTF-8 decoding routines
|
||||
|
||||
These routines decode UTF-8 data into C's wide character type \c wchar_t. Errors are reported
|
||||
through \c errno, with the following errors being of particular interest:
|
||||
|
@ -186,7 +188,6 @@ wchar_t* utf8_decode_force2(wchar_t* dest, size_t size, size_t* written, const c
|
|||
|
||||
|
||||
/*!@}*/
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
||||
|
|
|
@ -5,7 +5,10 @@
|
|||
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||
*/
|
||||
|
||||
char* utf8_encode_char(char* dest, size_t amt, wchar_t ch)
|
||||
|
||||
|
||||
char*
|
||||
utf8_encode_char(char* dest, size_t amt, wchar_t ch)
|
||||
{
|
||||
if(!dest || !amt) {
|
||||
errno = EINVAL;
|
||||
|
@ -76,7 +79,8 @@ char* utf8_encode_char(char* dest, size_t amt, wchar_t ch)
|
|||
|
||||
|
||||
|
||||
char* utf8_encode_char_force(char* dest, size_t amt, wchar_t ch, wchar_t ilseq)
|
||||
char*
|
||||
utf8_encode_char_force(char* dest, size_t amt, wchar_t ch, wchar_t ilseq)
|
||||
{
|
||||
if(!utf8_isucs4(ilseq)) {
|
||||
errno = EILSEQ;
|
||||
|
@ -88,14 +92,17 @@ char* utf8_encode_char_force(char* dest, size_t amt, wchar_t ch, wchar_t ilseq)
|
|||
|
||||
|
||||
|
||||
char* utf8_encode(char* dest, size_t amt, const wchar_t* src)
|
||||
char*
|
||||
utf8_encode(char* dest, size_t amt, const wchar_t* src)
|
||||
{
|
||||
return utf8_encode2(dest, amt, 0, src, -1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
char* utf8_encode2(char* dest, size_t amt, size_t* written, const wchar_t* src, size_t inamt)
|
||||
char*
|
||||
utf8_encode2(char* dest, size_t amt, size_t* written, const wchar_t* src,
|
||||
size_t inamt)
|
||||
{
|
||||
struct utf8_encode_state ctx;
|
||||
memset(&ctx, 0, sizeof(ctx));
|
||||
|
@ -115,14 +122,17 @@ char* utf8_encode2(char* dest, size_t amt, size_t* written, const wchar_t* src,
|
|||
|
||||
|
||||
|
||||
char* utf8_encode_force(char* dest, size_t amt, const wchar_t* src)
|
||||
char*
|
||||
utf8_encode_force(char* dest, size_t amt, const wchar_t* src)
|
||||
{
|
||||
return utf8_encode_force2(dest, amt, 0, src, -1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
char* utf8_encode_force2(char* dest, size_t amt, size_t* written, const wchar_t* src, size_t inamt)
|
||||
char*
|
||||
utf8_encode_force2(char* dest, size_t amt, size_t* written, const wchar_t* src,
|
||||
size_t inamt)
|
||||
{
|
||||
struct utf8_encode_state ctx;
|
||||
memset(&ctx, 0, sizeof(ctx));
|
||||
|
@ -137,6 +147,8 @@ char* utf8_encode_force2(char* dest, size_t amt, size_t* written, const wchar_t*
|
|||
return dest;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
||||
|
|
|
@ -5,7 +5,9 @@
|
|||
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||
*/
|
||||
|
||||
/*! \defgroup encode UTF-8 encoding routines.
|
||||
|
||||
|
||||
/*! \defgroup encode UTF-8 encoding routines
|
||||
|
||||
The functions in this module allow encoding of UTF-8 characters. Errors are reported through
|
||||
\c errno, with the following errors being of particular interest:
|
||||
|
@ -139,7 +141,6 @@ char* utf8_encode_force2(char* dest, size_t amt, size_t* written, const wchar_t*
|
|||
|
||||
|
||||
/*!@}*/
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
enum utf8_decoder_state {
|
||||
utf8_state_none,
|
||||
utf8_state_multibyte1,
|
||||
|
@ -18,7 +20,8 @@ enum utf8_decoder_state {
|
|||
|
||||
|
||||
|
||||
struct utf8_decode_state* utf8_decoder(struct utf8_decode_state* ctx)
|
||||
struct utf8_decode_state*
|
||||
utf8_decoder(struct utf8_decode_state* ctx)
|
||||
{
|
||||
wchar_t* wr;
|
||||
size_t avail;
|
||||
|
@ -33,7 +36,7 @@ struct utf8_decode_state* utf8_decoder(struct utf8_decode_state* ctx)
|
|||
ctx->written = 0;
|
||||
avail = ctx->wr_size;
|
||||
|
||||
loop:
|
||||
loop:
|
||||
while(ctx->rd_remain) {
|
||||
uint8_t in = *ctx->rd;
|
||||
|
||||
|
@ -107,13 +110,13 @@ loop:
|
|||
error_type = utf8_decode_error_overlong;
|
||||
goto error;
|
||||
} else {
|
||||
// validate codepoint
|
||||
/* validate codepoint */
|
||||
if(!utf8_isucs4(ctx->statech)) {
|
||||
error_type = utf8_decode_error_illegal_cp;
|
||||
goto error;
|
||||
}
|
||||
|
||||
// add to output string
|
||||
/* add to output string */
|
||||
*wr++ = ctx->statech;
|
||||
++ctx->written;
|
||||
--avail;
|
||||
|
@ -142,7 +145,7 @@ loop:
|
|||
*wr = 0;
|
||||
return ctx;
|
||||
|
||||
error:
|
||||
error:
|
||||
if(!ctx->error_callback) {
|
||||
errno = EILSEQ;
|
||||
return 0;
|
||||
|
@ -173,15 +176,16 @@ error:
|
|||
goto loop;
|
||||
}
|
||||
|
||||
// shouldn't reach here
|
||||
/* shouldn't reach here */
|
||||
errno = EILSEQ;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
enum utf8_decode_error_action utf8_decode_error_callback_replace(
|
||||
const struct utf8_decode_state* ctx, enum utf8_decode_error error, wchar_t* newch)
|
||||
enum utf8_decode_error_action
|
||||
utf8_decode_error_callback_replace(const struct utf8_decode_state* ctx,
|
||||
enum utf8_decode_error error, wchar_t* newch)
|
||||
{
|
||||
(void)ctx;
|
||||
(void)error;
|
||||
|
@ -191,8 +195,9 @@ enum utf8_decode_error_action utf8_decode_error_callback_replace(
|
|||
|
||||
|
||||
|
||||
enum utf8_decode_error_action utf8_decode_error_callback_skip(
|
||||
const struct utf8_decode_state* ctx, enum utf8_decode_error error, wchar_t* newch)
|
||||
enum utf8_decode_error_action
|
||||
utf8_decode_error_callback_skip(const struct utf8_decode_state* ctx,
|
||||
enum utf8_decode_error error, wchar_t* newch)
|
||||
{
|
||||
(void)ctx;
|
||||
(void)error;
|
||||
|
@ -200,6 +205,8 @@ enum utf8_decode_error_action utf8_decode_error_callback_skip(
|
|||
return utf8_decode_error_action_skip;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
||||
|
|
|
@ -5,7 +5,9 @@
|
|||
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||
*/
|
||||
|
||||
/*! \defgroup decode_ctx UTF-8 stateful decoder.
|
||||
|
||||
|
||||
/*! \defgroup decode_ctx UTF-8 stateful decoder
|
||||
|
||||
This UTF-8 decoder uses a structure to maintain state information between calls. This means that
|
||||
you can feed it a stream of data as it comes in without needing to store the entire document in a
|
||||
|
@ -33,19 +35,19 @@ to the callback function.
|
|||
|
||||
*/
|
||||
enum utf8_decode_error {
|
||||
/// Lone continuation char encountered when start char expected.
|
||||
/*! \brief Lone continuation char encountered when start char expected. */
|
||||
utf8_decode_error_lone_cchar,
|
||||
|
||||
/// Non-continuation char encountered within multibyte sequence.
|
||||
/*! \brief Non-continuation char encountered within multibyte sequence. */
|
||||
utf8_decode_error_not_cchar,
|
||||
|
||||
/// Invalid start char (not ASCII).
|
||||
/*! \brief Invalid start char (not ASCII). */
|
||||
utf8_decode_error_not_schar,
|
||||
|
||||
/// Overlong byte sequence.
|
||||
/*! \brief Overlong byte sequence. */
|
||||
utf8_decode_error_overlong,
|
||||
|
||||
/// Illegal code positions (UTF-16 surrogates or 0xFFFE,0xFFFF).
|
||||
/*! \brief Illegal code positions (UTF-16 surrogates or 0xFFFE,0xFFFF). */
|
||||
utf8_decode_error_illegal_cp
|
||||
};
|
||||
|
||||
|
@ -58,13 +60,13 @@ error. These actions are specified by the error callback function's return value
|
|||
|
||||
*/
|
||||
enum utf8_decode_error_action {
|
||||
/// Abort the conversion, returning EILSEQ.
|
||||
/*! \brief Abort the conversion, returning EILSEQ. */
|
||||
utf8_decode_error_action_abort,
|
||||
|
||||
/// Skip the illegal byte sequence.
|
||||
/*! \brief Skip the illegal byte sequence. */
|
||||
utf8_decode_error_action_skip,
|
||||
|
||||
/// Discard the illegal byte sequence and enter a replacement char.
|
||||
/*! \brief Discard the illegal byte sequence and enter a replacement char. */
|
||||
utf8_decode_error_action_replace
|
||||
};
|
||||
|
||||
|
@ -118,48 +120,48 @@ these variables aren't perfect, as they can be affected by errors and limitation
|
|||
|
||||
*/
|
||||
struct utf8_decode_state {
|
||||
/// \c false if we are part-way through a multi-byte character.
|
||||
/*! \brief 0 if we are part-way through a multi-byte character. */
|
||||
int complete;
|
||||
|
||||
/// Data to read (current read position).
|
||||
/*! \brief Data to read (current read position). */
|
||||
const char* rd;
|
||||
|
||||
/// Number of bytes remaining (current).
|
||||
/*! \brief Number of bytes remaining (current). */
|
||||
int rd_remain;
|
||||
|
||||
/// Internal state; initialise to 0, don't change.
|
||||
/*! \brief Internal state; initialise to 0, don't change. */
|
||||
int state;
|
||||
|
||||
/// Error callback (may be 0).
|
||||
/*! \brief Error callback (may be 0). */
|
||||
utf8_decode_error_callback error_callback;
|
||||
|
||||
/// Pointer to output buffer.
|
||||
/*! \brief Pointer to output buffer. */
|
||||
wchar_t* wr;
|
||||
|
||||
/// Number of characters that can be written.
|
||||
/*! \brief Number of characters that can be written. */
|
||||
size_t wr_size;
|
||||
|
||||
/// Number of characters written on last call.
|
||||
/*! \brief Number of characters written on last call. */
|
||||
size_t written;
|
||||
|
||||
/// Arbitrary data pointer for \a error_callback.
|
||||
/*! \brief Arbitrary data pointer for \a error_callback. */
|
||||
void* data;
|
||||
|
||||
/// Current line (starting from 0).
|
||||
/*! \brief Current line (starting from 0). */
|
||||
int line;
|
||||
|
||||
/// Current column (starting from 0).
|
||||
/*! \brief Current column (starting from 0). */
|
||||
int col;
|
||||
|
||||
/// Character offset from start of data (starting from 0).
|
||||
/*! \brief Character offset from start of data (starting from 0). */
|
||||
int char_offset;
|
||||
|
||||
/// Byte offset from start of data (starting from 0).
|
||||
/*! \brief Byte offset from start of data (starting from 0). */
|
||||
int byte_offset;
|
||||
|
||||
/// Don't use this.
|
||||
/*! \brief Don't use this. */
|
||||
wchar_t statech;
|
||||
/// Don't use this.
|
||||
/*! \brief Don't use this. */
|
||||
wchar_t minch;
|
||||
};
|
||||
|
||||
|
@ -186,18 +188,17 @@ struct utf8_decode_state* utf8_decoder(struct utf8_decode_state* state);
|
|||
|
||||
|
||||
|
||||
/// Standard error callback: use replacement char 0xFFFD.
|
||||
/*! \brief Standard error callback: use replacement char 0xFFFD. */
|
||||
enum utf8_decode_error_action utf8_decode_error_callback_replace(
|
||||
const struct utf8_decode_state* ctx, enum utf8_decode_error error, wchar_t* newch);
|
||||
|
||||
/// Standard error callback: skip invalid chars.
|
||||
/*! \brief Standard error callback: skip invalid chars. */
|
||||
enum utf8_decode_error_action utf8_decode_error_callback_skip(
|
||||
const struct utf8_decode_state* ctx, enum utf8_decode_error error, wchar_t* newch);
|
||||
|
||||
|
||||
|
||||
/*!@}*/
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
||||
|
|
|
@ -5,7 +5,10 @@
|
|||
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||
*/
|
||||
|
||||
struct utf8_encode_state* utf8_encoder(struct utf8_encode_state* state)
|
||||
|
||||
|
||||
struct utf8_encode_state*
|
||||
utf8_encoder(struct utf8_encode_state* state)
|
||||
{
|
||||
char* wr = state->wr, * ret;
|
||||
char* endp = wr + state->wr_size - 1;
|
||||
|
@ -24,7 +27,7 @@ struct utf8_encode_state* utf8_encoder(struct utf8_encode_state* state)
|
|||
if(!ch && state->rd_remain < 0) break;
|
||||
|
||||
reencoding = 0;
|
||||
reencode:
|
||||
reencode:
|
||||
ret = utf8_encode_char(wr, endp - wr, ch);
|
||||
if(!ret) {
|
||||
if(errno == ENOMEM) break;
|
||||
|
@ -66,8 +69,9 @@ struct utf8_encode_state* utf8_encoder(struct utf8_encode_state* state)
|
|||
|
||||
|
||||
|
||||
enum utf8_encode_error_action utf8_encode_error_callback_replace(
|
||||
const struct utf8_encode_state* state, wchar_t* newch)
|
||||
enum utf8_encode_error_action
|
||||
utf8_encode_error_callback_replace(const struct utf8_encode_state* state,
|
||||
wchar_t* newch)
|
||||
{
|
||||
(void)state;
|
||||
*newch = 0xFFFD;
|
||||
|
@ -76,14 +80,17 @@ enum utf8_encode_error_action utf8_encode_error_callback_replace(
|
|||
|
||||
|
||||
|
||||
enum utf8_encode_error_action utf8_encode_error_callback_skip(
|
||||
const struct utf8_encode_state* state, wchar_t* newch)
|
||||
enum utf8_encode_error_action
|
||||
utf8_encode_error_callback_skip(const struct utf8_encode_state* state,
|
||||
wchar_t* newch)
|
||||
{
|
||||
(void)state;
|
||||
(void)newch;
|
||||
return utf8_encode_error_action_skip;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
||||
|
|
|
@ -5,7 +5,9 @@
|
|||
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||
*/
|
||||
|
||||
/*! \defgroup encode_state UTF-8 stateful encoder.
|
||||
|
||||
|
||||
/*! \defgroup encode_state UTF-8 stateful encoder
|
||||
|
||||
This UTF-8 encoder uses a structure to maintain state information between calls. This means that
|
||||
you can feed it a stream of data as it comes in without needing to store the entire source in a
|
||||
|
@ -34,13 +36,13 @@ function's return value.
|
|||
|
||||
*/
|
||||
enum utf8_encode_error_action {
|
||||
/// Abort the conversion, returning EILSEQ.
|
||||
/*! \brief Abort the conversion, returning EILSEQ. */
|
||||
utf8_encode_error_action_abort,
|
||||
|
||||
/// Skip the illegal byte sequence.
|
||||
/*! \brief Skip the illegal byte sequence. */
|
||||
utf8_encode_error_action_skip,
|
||||
|
||||
/// Discard the illegal byte sequence and enter a replacement char.
|
||||
/*! \brief Discard the illegal byte sequence and enter a replacement char. */
|
||||
utf8_encode_error_action_replace
|
||||
};
|
||||
|
||||
|
@ -61,11 +63,11 @@ it with something else, or abort the conversion entirely.
|
|||
typedef enum utf8_encode_error_action (*utf8_encode_error_callback)(
|
||||
const struct utf8_encode_state* state, wchar_t* newch);
|
||||
|
||||
/// Standard error callback: use replacement char 0xFFFD.
|
||||
/*! \brief Standard error callback: use replacement char 0xFFFD. */
|
||||
enum utf8_encode_error_action utf8_encode_error_callback_replace(
|
||||
const struct utf8_encode_state* state, wchar_t* newch);
|
||||
|
||||
/// Standard error callback: skip invalid chars.
|
||||
/*! \brief Standard error callback: skip invalid chars. */
|
||||
enum utf8_encode_error_action utf8_encode_error_callback_skip(
|
||||
const struct utf8_encode_state* state, wchar_t* newch);
|
||||
|
||||
|
@ -93,34 +95,34 @@ start of the stream, and should always be accurate.
|
|||
|
||||
*/
|
||||
struct utf8_encode_state {
|
||||
/// Current read position.
|
||||
/*! \brief Current read position. */
|
||||
const wchar_t* rd;
|
||||
|
||||
/// Number of chars remaining (-ve means to scan for null char).
|
||||
/*! \brief Number of chars remaining (-ve means to scan for null char). */
|
||||
int rd_remain;
|
||||
|
||||
/// Callback function used to handle illegal source characters.
|
||||
/*! \brief Callback function used to handle illegal source characters. */
|
||||
utf8_encode_error_callback error_callback;
|
||||
|
||||
/// Output buffer.
|
||||
/*! \brief Output buffer. */
|
||||
char* wr;
|
||||
|
||||
/// Output buffer size.
|
||||
/*! \brief Output buffer size. */
|
||||
size_t wr_size;
|
||||
|
||||
/// Number of bytes written during last call.
|
||||
/*! \brief Number of bytes written during last call. */
|
||||
size_t written;
|
||||
|
||||
/// Arbitrary pointer (useful for \a error_callback).
|
||||
/*! \brief Arbitrary pointer (useful for \a error_callback). */
|
||||
void* data;
|
||||
|
||||
/// Current line (starting from 0).
|
||||
/*! \brief Current line (starting from 0). */
|
||||
int line;
|
||||
|
||||
/// Current column (starting from 0).
|
||||
/*! \brief Current column (starting from 0). */
|
||||
int col;
|
||||
|
||||
/// Character offset from start of data (starting from 0).
|
||||
/*! \brief Character offset from start of data (starting from 0). */
|
||||
int char_offset;
|
||||
};
|
||||
|
||||
|
@ -158,7 +160,6 @@ struct utf8_encode_state* utf8_encoder(struct utf8_encode_state* state);
|
|||
|
||||
|
||||
/*!@}*/
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
|
||||
|
|
Loading…
Reference in New Issue