Add utf8_iseol() to test for end-of-line

Add a character classification function modelled after the Unicode Standard
Annex 13 Unicode newline guidelines to test for end-of-line characters.
This commit is contained in:
Laurence Withers 2009-10-13 11:51:49 +00:00
parent a46f5cc2ee
commit 355444649a
3 changed files with 54 additions and 0 deletions

View File

@ -0,0 +1,28 @@
/* libutf8/src/docs/MainPage.dox
*
* (c)2006-2009, Laurence Withers, <l@lwithers.me.uk>.
* Released under the GNU GPLv3. See file COPYING or
* http://www.gnu.org/copyleft/gpl.html for details.
*/
/*! \page char_class_eol Character classification: end of line
From <a href='http://unicode.org/reports/tr13/tr13-9.html'>Unicode Standard
Annex #13 (Unicode newline guidelines)</a>:
<ul>
<li>LF (000A)</li>
<li>VT (000B)</li>
<li>FF (000C)</li>
<li>CR (000D)</li>
<li>NEL (0085)</li>
<li>LS (2028)</li>
<li>PS (2029)</li>
</ul>
*/
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4:syntax=doxygen
*/

View File

@ -34,6 +34,17 @@ utf8_isspace(wchar_t ch)
int
utf8_iseol(wchar_t ch)
{
return (ch >= 0x000A && ch <= 0x000D)
|| ch == 0x0085
|| ch == 0x2028
|| ch == 0x2029;
}
int
utf8_isucs4(wchar_t ch)
{

View File

@ -51,6 +51,21 @@ int utf8_isspace(wchar_t ch);
/*! \brief Test if character is end-of-line.
\param ch Character to test.
\retval nonzero if \a ch is an EOL character.
\retval 0 if \a ch is not an EOL character.
This function tests a UCS char to see if it should be classified as
\ref char_class_eol "end-of-line". Note that both ASCII LR and CF are treated
as EOL; it is up to the application to disambiguate the line ending in use.
*/
int utf8_iseol(wchar_t ch);
/*! \brief Test if character is valid UCS-4 codepoint.
\param ch The character to classify.