From 355444649a3f1d463ca4fea0cb2198a69ee74aa9 Mon Sep 17 00:00:00 2001 From: Laurence Withers Date: Tue, 13 Oct 2009 11:51:49 +0000 Subject: [PATCH] Add utf8_iseol() to test for end-of-line Add a character classification function modelled after the Unicode Standard Annex 13 Unicode newline guidelines to test for end-of-line characters. --- src/docs/char_class_eol.dox | 28 ++++++++++++++++++++++++++++ src/libutf8/100_ctype.c | 11 +++++++++++ src/libutf8/100_ctype.h | 15 +++++++++++++++ 3 files changed, 54 insertions(+) create mode 100644 src/docs/char_class_eol.dox diff --git a/src/docs/char_class_eol.dox b/src/docs/char_class_eol.dox new file mode 100644 index 0000000..83ba8ce --- /dev/null +++ b/src/docs/char_class_eol.dox @@ -0,0 +1,28 @@ +/* libutf8/src/docs/MainPage.dox + * + * (c)2006-2009, Laurence Withers, . + * Released under the GNU GPLv3. See file COPYING or + * http://www.gnu.org/copyleft/gpl.html for details. +*/ + +/*! \page char_class_eol Character classification: end of line + +From Unicode Standard +Annex #13 (Unicode newline guidelines): + + + +*/ + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +vim: expandtab:ts=4:sw=4:syntax=doxygen +*/ diff --git a/src/libutf8/100_ctype.c b/src/libutf8/100_ctype.c index 55eba0b..6b4a2f2 100644 --- a/src/libutf8/100_ctype.c +++ b/src/libutf8/100_ctype.c @@ -34,6 +34,17 @@ utf8_isspace(wchar_t ch) +int +utf8_iseol(wchar_t ch) +{ + return (ch >= 0x000A && ch <= 0x000D) + || ch == 0x0085 + || ch == 0x2028 + || ch == 0x2029; +} + + + int utf8_isucs4(wchar_t ch) { diff --git a/src/libutf8/100_ctype.h b/src/libutf8/100_ctype.h index 61c3624..c0194f6 100644 --- a/src/libutf8/100_ctype.h +++ b/src/libutf8/100_ctype.h @@ -51,6 +51,21 @@ int utf8_isspace(wchar_t ch); +/*! \brief Test if character is end-of-line. + +\param ch Character to test. +\retval nonzero if \a ch is an EOL character. +\retval 0 if \a ch is not an EOL character. + +This function tests a UCS char to see if it should be classified as +\ref char_class_eol "end-of-line". Note that both ASCII LR and CF are treated +as EOL; it is up to the application to disambiguate the line ending in use. + +*/ +int utf8_iseol(wchar_t ch); + + + /*! \brief Test if character is valid UCS-4 codepoint. \param ch The character to classify.