Tidy up source formatting

Use new function-definition formatting convention and remove C++-style
comments.
This commit is contained in:
Laurence Withers 2009-10-13 11:29:55 +00:00
parent e98cbe5cc5
commit 26e3c57b04
9 changed files with 135 additions and 85 deletions

View File

@ -7,14 +7,16 @@
int utf8_isascii(wchar_t ch)
int
utf8_isascii(wchar_t ch)
{
return !(ch & ~0x7F);
}
int utf8_isspace(wchar_t ch)
int
utf8_isspace(wchar_t ch)
{
return((ch >= 0x0009 && ch <= 0x000D)
|| ch == 0x0020
@ -32,7 +34,8 @@ int utf8_isspace(wchar_t ch)
int utf8_isucs4(wchar_t ch)
int
utf8_isucs4(wchar_t ch)
{
return !(ch & (~((wchar_t)0x7FFFFFFF)))
&& (ch < 0xD800 || ch > 0xDFFF)
@ -41,7 +44,8 @@ int utf8_isucs4(wchar_t ch)
int utf8_isutf32(wchar_t ch)
int
utf8_isutf32(wchar_t ch)
{
return ch >= 0 && ch <= 0x10FFFF
&& (ch < 0xD800 || ch > 0xDFFF)
@ -50,7 +54,8 @@ int utf8_isutf32(wchar_t ch)
int utf8_isutf16(wchar_t ch)
int
utf8_isutf16(wchar_t ch)
{
return ch >= 0 && ch <= 0xFFFD
&& (ch < 0xD800 || ch > 0xDFFF);

View File

@ -5,14 +5,18 @@
* http://www.gnu.org/copyleft/gpl.html for details.
*/
wchar_t utf8_decode_char(const char* src, size_t* used)
wchar_t
utf8_decode_char(const char* src, size_t* used)
{
return utf8_decode_char2(src, 6, used);
}
wchar_t utf8_decode_char2(const char* src, size_t size, size_t* used)
wchar_t
utf8_decode_char2(const char* src, size_t size, size_t* used)
{
uint8_t ch;
wchar_t ret, min;
@ -82,14 +86,17 @@ wchar_t utf8_decode_char2(const char* src, size_t size, size_t* used)
wchar_t utf8_decode_char_force(const char* src, size_t* used, wchar_t ilseq)
wchar_t
utf8_decode_char_force(const char* src, size_t* used, wchar_t ilseq)
{
return utf8_decode_char2_force(src, 6, used, ilseq);
}
wchar_t utf8_decode_char2_force(const char* src, size_t size, size_t* used, wchar_t ilseq)
wchar_t
utf8_decode_char2_force(const char* src, size_t size, size_t* used,
wchar_t ilseq)
{
uint8_t ch;
wchar_t ret, min;
@ -142,8 +149,8 @@ wchar_t utf8_decode_char2_force(const char* src, size_t size, size_t* used, wcha
}
return ch;
ILSEQ:
// advance pointer to next valid char boundary
ILSEQ:
/* advance pointer to next valid char boundary */
while(1) {
if(!*src || !size) break;
if((*src & 0xC0) == 0x80) break;
@ -157,7 +164,8 @@ ILSEQ:
wchar_t* utf8_decode(wchar_t* dest, size_t size, const char* src)
wchar_t*
utf8_decode(wchar_t* dest, size_t size, const char* src)
{
struct utf8_decode_state ctx;
memset(&ctx, 0, sizeof(ctx));
@ -177,7 +185,9 @@ wchar_t* utf8_decode(wchar_t* dest, size_t size, const char* src)
wchar_t* utf8_decode2(wchar_t* dest, size_t size, size_t* written, const char* src, size_t amt)
wchar_t*
utf8_decode2(wchar_t* dest, size_t size, size_t* written, const char* src,
size_t amt)
{
struct utf8_decode_state ctx;
memset(&ctx, 0, sizeof(ctx));
@ -198,7 +208,8 @@ wchar_t* utf8_decode2(wchar_t* dest, size_t size, size_t* written, const char* s
wchar_t* utf8_decode_force(wchar_t* dest, size_t size, const char* src)
wchar_t*
utf8_decode_force(wchar_t* dest, size_t size, const char* src)
{
struct utf8_decode_state ctx;
memset(&ctx, 0, sizeof(ctx));
@ -219,7 +230,9 @@ wchar_t* utf8_decode_force(wchar_t* dest, size_t size, const char* src)
wchar_t* utf8_decode_force2(wchar_t* dest, size_t size, size_t* written, const char* src, size_t amt)
wchar_t*
utf8_decode_force2(wchar_t* dest, size_t size, size_t* written, const char* src,
size_t amt)
{
struct utf8_decode_state ctx;
memset(&ctx, 0, sizeof(ctx));
@ -234,6 +247,8 @@ wchar_t* utf8_decode_force2(wchar_t* dest, size_t size, size_t* written, const c
return dest;
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4:syntax=c.doxygen

View File

@ -5,7 +5,9 @@
* http://www.gnu.org/copyleft/gpl.html for details.
*/
/*! \defgroup decode UTF-8 decoding routines.
/*! \defgroup decode UTF-8 decoding routines
These routines decode UTF-8 data into C's wide character type \c wchar_t. Errors are reported
through \c errno, with the following errors being of particular interest:
@ -186,7 +188,6 @@ wchar_t* utf8_decode_force2(wchar_t* dest, size_t size, size_t* written, const c
/*!@}*/
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4:syntax=c.doxygen

View File

@ -5,7 +5,10 @@
* http://www.gnu.org/copyleft/gpl.html for details.
*/
char* utf8_encode_char(char* dest, size_t amt, wchar_t ch)
char*
utf8_encode_char(char* dest, size_t amt, wchar_t ch)
{
if(!dest || !amt) {
errno = EINVAL;
@ -76,7 +79,8 @@ char* utf8_encode_char(char* dest, size_t amt, wchar_t ch)
char* utf8_encode_char_force(char* dest, size_t amt, wchar_t ch, wchar_t ilseq)
char*
utf8_encode_char_force(char* dest, size_t amt, wchar_t ch, wchar_t ilseq)
{
if(!utf8_isucs4(ilseq)) {
errno = EILSEQ;
@ -88,14 +92,17 @@ char* utf8_encode_char_force(char* dest, size_t amt, wchar_t ch, wchar_t ilseq)
char* utf8_encode(char* dest, size_t amt, const wchar_t* src)
char*
utf8_encode(char* dest, size_t amt, const wchar_t* src)
{
return utf8_encode2(dest, amt, 0, src, -1);
}
char* utf8_encode2(char* dest, size_t amt, size_t* written, const wchar_t* src, size_t inamt)
char*
utf8_encode2(char* dest, size_t amt, size_t* written, const wchar_t* src,
size_t inamt)
{
struct utf8_encode_state ctx;
memset(&ctx, 0, sizeof(ctx));
@ -115,14 +122,17 @@ char* utf8_encode2(char* dest, size_t amt, size_t* written, const wchar_t* src,
char* utf8_encode_force(char* dest, size_t amt, const wchar_t* src)
char*
utf8_encode_force(char* dest, size_t amt, const wchar_t* src)
{
return utf8_encode_force2(dest, amt, 0, src, -1);
}
char* utf8_encode_force2(char* dest, size_t amt, size_t* written, const wchar_t* src, size_t inamt)
char*
utf8_encode_force2(char* dest, size_t amt, size_t* written, const wchar_t* src,
size_t inamt)
{
struct utf8_encode_state ctx;
memset(&ctx, 0, sizeof(ctx));
@ -137,6 +147,8 @@ char* utf8_encode_force2(char* dest, size_t amt, size_t* written, const wchar_t*
return dest;
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4:syntax=c.doxygen

View File

@ -5,7 +5,9 @@
* http://www.gnu.org/copyleft/gpl.html for details.
*/
/*! \defgroup encode UTF-8 encoding routines.
/*! \defgroup encode UTF-8 encoding routines
The functions in this module allow encoding of UTF-8 characters. Errors are reported through
\c errno, with the following errors being of particular interest:
@ -139,7 +141,6 @@ char* utf8_encode_force2(char* dest, size_t amt, size_t* written, const wchar_t*
/*!@}*/
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4:syntax=c.doxygen

View File

@ -5,6 +5,8 @@
* http://www.gnu.org/copyleft/gpl.html for details.
*/
enum utf8_decoder_state {
utf8_state_none,
utf8_state_multibyte1,
@ -18,7 +20,8 @@ enum utf8_decoder_state {
struct utf8_decode_state* utf8_decoder(struct utf8_decode_state* ctx)
struct utf8_decode_state*
utf8_decoder(struct utf8_decode_state* ctx)
{
wchar_t* wr;
size_t avail;
@ -33,7 +36,7 @@ struct utf8_decode_state* utf8_decoder(struct utf8_decode_state* ctx)
ctx->written = 0;
avail = ctx->wr_size;
loop:
loop:
while(ctx->rd_remain) {
uint8_t in = *ctx->rd;
@ -107,13 +110,13 @@ loop:
error_type = utf8_decode_error_overlong;
goto error;
} else {
// validate codepoint
/* validate codepoint */
if(!utf8_isucs4(ctx->statech)) {
error_type = utf8_decode_error_illegal_cp;
goto error;
}
// add to output string
/* add to output string */
*wr++ = ctx->statech;
++ctx->written;
--avail;
@ -142,7 +145,7 @@ loop:
*wr = 0;
return ctx;
error:
error:
if(!ctx->error_callback) {
errno = EILSEQ;
return 0;
@ -173,15 +176,16 @@ error:
goto loop;
}
// shouldn't reach here
/* shouldn't reach here */
errno = EILSEQ;
return 0;
}
enum utf8_decode_error_action utf8_decode_error_callback_replace(
const struct utf8_decode_state* ctx, enum utf8_decode_error error, wchar_t* newch)
enum utf8_decode_error_action
utf8_decode_error_callback_replace(const struct utf8_decode_state* ctx,
enum utf8_decode_error error, wchar_t* newch)
{
(void)ctx;
(void)error;
@ -191,8 +195,9 @@ enum utf8_decode_error_action utf8_decode_error_callback_replace(
enum utf8_decode_error_action utf8_decode_error_callback_skip(
const struct utf8_decode_state* ctx, enum utf8_decode_error error, wchar_t* newch)
enum utf8_decode_error_action
utf8_decode_error_callback_skip(const struct utf8_decode_state* ctx,
enum utf8_decode_error error, wchar_t* newch)
{
(void)ctx;
(void)error;
@ -200,6 +205,8 @@ enum utf8_decode_error_action utf8_decode_error_callback_skip(
return utf8_decode_error_action_skip;
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4:syntax=c.doxygen

View File

@ -5,7 +5,9 @@
* http://www.gnu.org/copyleft/gpl.html for details.
*/
/*! \defgroup decode_ctx UTF-8 stateful decoder.
/*! \defgroup decode_ctx UTF-8 stateful decoder
This UTF-8 decoder uses a structure to maintain state information between calls. This means that
you can feed it a stream of data as it comes in without needing to store the entire document in a
@ -33,19 +35,19 @@ to the callback function.
*/
enum utf8_decode_error {
/// Lone continuation char encountered when start char expected.
/*! \brief Lone continuation char encountered when start char expected. */
utf8_decode_error_lone_cchar,
/// Non-continuation char encountered within multibyte sequence.
/*! \brief Non-continuation char encountered within multibyte sequence. */
utf8_decode_error_not_cchar,
/// Invalid start char (not ASCII).
/*! \brief Invalid start char (not ASCII). */
utf8_decode_error_not_schar,
/// Overlong byte sequence.
/*! \brief Overlong byte sequence. */
utf8_decode_error_overlong,
/// Illegal code positions (UTF-16 surrogates or 0xFFFE,0xFFFF).
/*! \brief Illegal code positions (UTF-16 surrogates or 0xFFFE,0xFFFF). */
utf8_decode_error_illegal_cp
};
@ -58,13 +60,13 @@ error. These actions are specified by the error callback function's return value
*/
enum utf8_decode_error_action {
/// Abort the conversion, returning EILSEQ.
/*! \brief Abort the conversion, returning EILSEQ. */
utf8_decode_error_action_abort,
/// Skip the illegal byte sequence.
/*! \brief Skip the illegal byte sequence. */
utf8_decode_error_action_skip,
/// Discard the illegal byte sequence and enter a replacement char.
/*! \brief Discard the illegal byte sequence and enter a replacement char. */
utf8_decode_error_action_replace
};
@ -118,48 +120,48 @@ these variables aren't perfect, as they can be affected by errors and limitation
*/
struct utf8_decode_state {
/// \c false if we are part-way through a multi-byte character.
/*! \brief 0 if we are part-way through a multi-byte character. */
int complete;
/// Data to read (current read position).
/*! \brief Data to read (current read position). */
const char* rd;
/// Number of bytes remaining (current).
/*! \brief Number of bytes remaining (current). */
int rd_remain;
/// Internal state; initialise to 0, don't change.
/*! \brief Internal state; initialise to 0, don't change. */
int state;
/// Error callback (may be 0).
/*! \brief Error callback (may be 0). */
utf8_decode_error_callback error_callback;
/// Pointer to output buffer.
/*! \brief Pointer to output buffer. */
wchar_t* wr;
/// Number of characters that can be written.
/*! \brief Number of characters that can be written. */
size_t wr_size;
/// Number of characters written on last call.
/*! \brief Number of characters written on last call. */
size_t written;
/// Arbitrary data pointer for \a error_callback.
/*! \brief Arbitrary data pointer for \a error_callback. */
void* data;
/// Current line (starting from 0).
/*! \brief Current line (starting from 0). */
int line;
/// Current column (starting from 0).
/*! \brief Current column (starting from 0). */
int col;
/// Character offset from start of data (starting from 0).
/*! \brief Character offset from start of data (starting from 0). */
int char_offset;
/// Byte offset from start of data (starting from 0).
/*! \brief Byte offset from start of data (starting from 0). */
int byte_offset;
/// Don't use this.
/*! \brief Don't use this. */
wchar_t statech;
/// Don't use this.
/*! \brief Don't use this. */
wchar_t minch;
};
@ -186,18 +188,17 @@ struct utf8_decode_state* utf8_decoder(struct utf8_decode_state* state);
/// Standard error callback: use replacement char 0xFFFD.
/*! \brief Standard error callback: use replacement char 0xFFFD. */
enum utf8_decode_error_action utf8_decode_error_callback_replace(
const struct utf8_decode_state* ctx, enum utf8_decode_error error, wchar_t* newch);
/// Standard error callback: skip invalid chars.
/*! \brief Standard error callback: skip invalid chars. */
enum utf8_decode_error_action utf8_decode_error_callback_skip(
const struct utf8_decode_state* ctx, enum utf8_decode_error error, wchar_t* newch);
/*!@}*/
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4:syntax=c.doxygen

View File

@ -5,7 +5,10 @@
* http://www.gnu.org/copyleft/gpl.html for details.
*/
struct utf8_encode_state* utf8_encoder(struct utf8_encode_state* state)
struct utf8_encode_state*
utf8_encoder(struct utf8_encode_state* state)
{
char* wr = state->wr, * ret;
char* endp = wr + state->wr_size - 1;
@ -24,7 +27,7 @@ struct utf8_encode_state* utf8_encoder(struct utf8_encode_state* state)
if(!ch && state->rd_remain < 0) break;
reencoding = 0;
reencode:
reencode:
ret = utf8_encode_char(wr, endp - wr, ch);
if(!ret) {
if(errno == ENOMEM) break;
@ -66,8 +69,9 @@ struct utf8_encode_state* utf8_encoder(struct utf8_encode_state* state)
enum utf8_encode_error_action utf8_encode_error_callback_replace(
const struct utf8_encode_state* state, wchar_t* newch)
enum utf8_encode_error_action
utf8_encode_error_callback_replace(const struct utf8_encode_state* state,
wchar_t* newch)
{
(void)state;
*newch = 0xFFFD;
@ -76,14 +80,17 @@ enum utf8_encode_error_action utf8_encode_error_callback_replace(
enum utf8_encode_error_action utf8_encode_error_callback_skip(
const struct utf8_encode_state* state, wchar_t* newch)
enum utf8_encode_error_action
utf8_encode_error_callback_skip(const struct utf8_encode_state* state,
wchar_t* newch)
{
(void)state;
(void)newch;
return utf8_encode_error_action_skip;
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4:syntax=c.doxygen

View File

@ -5,7 +5,9 @@
* http://www.gnu.org/copyleft/gpl.html for details.
*/
/*! \defgroup encode_state UTF-8 stateful encoder.
/*! \defgroup encode_state UTF-8 stateful encoder
This UTF-8 encoder uses a structure to maintain state information between calls. This means that
you can feed it a stream of data as it comes in without needing to store the entire source in a
@ -34,13 +36,13 @@ function's return value.
*/
enum utf8_encode_error_action {
/// Abort the conversion, returning EILSEQ.
/*! \brief Abort the conversion, returning EILSEQ. */
utf8_encode_error_action_abort,
/// Skip the illegal byte sequence.
/*! \brief Skip the illegal byte sequence. */
utf8_encode_error_action_skip,
/// Discard the illegal byte sequence and enter a replacement char.
/*! \brief Discard the illegal byte sequence and enter a replacement char. */
utf8_encode_error_action_replace
};
@ -61,11 +63,11 @@ it with something else, or abort the conversion entirely.
typedef enum utf8_encode_error_action (*utf8_encode_error_callback)(
const struct utf8_encode_state* state, wchar_t* newch);
/// Standard error callback: use replacement char 0xFFFD.
/*! \brief Standard error callback: use replacement char 0xFFFD. */
enum utf8_encode_error_action utf8_encode_error_callback_replace(
const struct utf8_encode_state* state, wchar_t* newch);
/// Standard error callback: skip invalid chars.
/*! \brief Standard error callback: skip invalid chars. */
enum utf8_encode_error_action utf8_encode_error_callback_skip(
const struct utf8_encode_state* state, wchar_t* newch);
@ -93,34 +95,34 @@ start of the stream, and should always be accurate.
*/
struct utf8_encode_state {
/// Current read position.
/*! \brief Current read position. */
const wchar_t* rd;
/// Number of chars remaining (-ve means to scan for null char).
/*! \brief Number of chars remaining (-ve means to scan for null char). */
int rd_remain;
/// Callback function used to handle illegal source characters.
/*! \brief Callback function used to handle illegal source characters. */
utf8_encode_error_callback error_callback;
/// Output buffer.
/*! \brief Output buffer. */
char* wr;
/// Output buffer size.
/*! \brief Output buffer size. */
size_t wr_size;
/// Number of bytes written during last call.
/*! \brief Number of bytes written during last call. */
size_t written;
/// Arbitrary pointer (useful for \a error_callback).
/*! \brief Arbitrary pointer (useful for \a error_callback). */
void* data;
/// Current line (starting from 0).
/*! \brief Current line (starting from 0). */
int line;
/// Current column (starting from 0).
/*! \brief Current column (starting from 0). */
int col;
/// Character offset from start of data (starting from 0).
/*! \brief Character offset from start of data (starting from 0). */
int char_offset;
};
@ -158,7 +160,6 @@ struct utf8_encode_state* utf8_encoder(struct utf8_encode_state* state);
/*!@}*/
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4:syntax=c.doxygen